library(tidyverse)
library(plotly)
library(sf)
library(mapview)
library(tigris)
library(censusapi)
library(leaflet)
library(lehdr)
library(usmap)
library(lmtest)
library(pracma)
library(lmtest)
library(forecast)
library(vars)
library(rvest)
library(RSelenium)
library(seleniumPipes)
library(dLagM)
library(jsonlite)
library(rgdal)
library(esri2sf)
library(readr)

options(
  tigris_class = "sf",
  tigris_use_cache = TRUE
)

Sys.setenv(CENSUS_KEY="10dcd73d7c043e91bac9fb8d3989cbff54b08790")

# Load the processing functions. This also loads the normalization functions.
source("safegraph_process_patterns_functions.R")

# SET your path here to the covid19analysis folder.
sg_path <- '/Users/simonespeizer/pCloud Drive/Shared/SFBI/Restricted Data Library/Safegraph/covid19analysis/'

# SET your path here to the github covid folder
github_path  <- ''

Zip code level analysis

Santa Clara County and San Francisco zip code cases

scc_map_cases <- esri2sf("https://services2.arcgis.com/RiZWfy7B1r76pKTz/arcgis/rest/services/COVID_zip_FC/FeatureServer/0")
## [1] "Feature Layer"
## [1] "esriGeometryPolygon"
sf_place_cases <- 
  read_csv("https://raw.githubusercontent.com/datadesk/california-coronavirus-data/master/latimes-place-totals.csv") %>%
  filter(county == 'San Francisco')

Get social distancing data

scc_blockgroups <-
  block_groups("CA","Santa Clara", cb=F, progress_bar=F) %>% 
  st_transform('+proj=longlat +datum=WGS84')

sf_blockgroups <- 
  block_groups("CA","San Francisco", cb=F, progress_bar=F) %>% 
  st_transform('+proj=longlat +datum=WGS84')

# zip code areas
zctas_94 <- 
  zctas(cb = F, starts_with = "94") %>% 
  st_transform('+proj=longlat +datum=WGS84') %>%
  dplyr::select(ZCTA5CE10, geometry)

zctas_95 <- 
  zctas(cb = F, starts_with = "95") %>% 
  st_transform('+proj=longlat +datum=WGS84') %>%
  dplyr::select(ZCTA5CE10, geometry)

zctas_94_95 <- rbind(zctas_94, zctas_95)

# join with block groups
scc_bg_zctas <- scc_blockgroups %>% 
  st_centroid() %>%
  st_join(zctas_94_95) %>% 
  dplyr::select(GEOID, ZCTA5CE10) %>%
  rename(blockgroup = GEOID, zipcode = ZCTA5CE10)

sf_bg_zctas <- sf_blockgroups %>% 
  st_centroid() %>%
  st_join(zctas_94) %>% 
  dplyr::select(GEOID, ZCTA5CE10) %>%
  rename(blockgroup = GEOID, zipcode = ZCTA5CE10)

bay_sd <- readRDS("/Users/simonespeizer/pCloud Drive/Shared/SFBI/Restricted Data Library/Safegraph/covid19analysis/bay_socialdistancing_v2.rds") %>% 
  mutate(date = date_range_start %>%  substr(1,10) %>% as.Date())

# obtaining weekends
weekends <- bay_sd %>% 
  filter(!duplicated(date)) %>% 
  arrange(date) %>% 
  mutate(weekend = ifelse((date %>% as.numeric()) %% 7 %in% c(2,3), T, F)) %>% 
  dplyr::select(date,weekend)

bay_sd <- bay_sd %>% left_join(weekends)

SF zip code over time analysis

Over time cases by zip code

First just plot the cases over time by zip code

# obtain the saved census data 
acs_vars = readRDS("/Users/simonespeizer/Documents/2020 Spring Quarter/CEE 218Z/censusData2018_acs_acs5.rds")

# define a function for pulling census data
pullCensus <- function(variableToPull, county) {
    regionString <- paste0("state:06+county:", county)
    censusData <- getCensus(
      name = "acs/acs5",
      vintage = 2018,
      region = "block group:*", 
      regionin = regionString,
      vars = variableToPull
    ) %>%
    mutate(blockgroup = paste0(state,county,tract,block_group)) %>%
      select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME"))
  
  return(censusData)
}


# get population data for San Francisco
sf_fips <- fips("CA", "San Francisco") %>% substr(3,5)

sf_pop_zip <- pullCensus("B01003_001E", sf_fips) %>% 
  left_join(sf_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>% 
  group_by(zipcode) %>%
  summarize(total_pop_zip = sum(B01003_001E))

sf_cases_zip <- sf_place_cases %>% left_join(sf_pop_zip, by = c("place" = "zipcode")) %>%
  mutate(cases_by_pop = confirmed_cases / total_pop_zip) %>%
  rename(zipcode = place)

sf_cases_zip %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = cases_by_pop, color=zipcode))

Something weird is going on with case counts on May 7th and 11th, so I remove those dates manually. Also adjust the day in zip code 94117 that is off.

sf_cases_zip_edit <- sf_cases_zip %>% filter(date != "2020-05-07" & date != "2020-05-11") %>% mutate(confirmed_cases = ifelse(zipcode == "94117" & confirmed_cases == 1, confirmed_cases + 37, confirmed_cases), cases_by_pop = confirmed_cases / total_pop_zip)

sf_cases_zip_edit %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = cases_by_pop, color=zipcode))

sf_cases_zip_edit %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = confirmed_cases, color=zipcode))

Bucket by movement right before shelter in place

Get social distancing data by zip code for the first few days before shelter-in-place

# relevant dates
pre_sd_date_cutoff <- as.Date("2020-03-01")
shelter_date <- as.Date("2020-03-17") # note that this is the day the order went into effect

# get daily social distancing data for SF by date
sf_sd_zip_by_date <- bay_sd %>%
  filter(origin_census_block_group %in% sf_blockgroups$GEOID) %>%
  left_join(sf_bg_zctas %>% dplyr::select(blockgroup, zipcode), by = c("origin_census_block_group" = "blockgroup")) %>%
  group_by(zipcode, date) %>%
  summarize(total_at_home_zip = sum(completely_home_device_count), total_devices = sum(device_count)) %>%
  ungroup()

# get block group averages leaving home before shelter in place/COVID-19
sf_pre_sd_leaving_avg <- bay_sd %>%
  filter(origin_census_block_group %in% sf_blockgroups$GEOID) %>%
  filter(date <= pre_sd_date_cutoff)  %>%
  left_join(sf_bg_zctas %>% dplyr::select(blockgroup, zipcode), by = c("origin_census_block_group" = "blockgroup")) %>%
  group_by(zipcode) %>%
  summarize(total_at_home_zip = sum(completely_home_device_count), total_devices = sum(device_count)) %>%
  mutate(
    percent_at_home_pre = total_at_home_zip*100/total_devices,
    percent_leaving_home_pre = (100 - percent_at_home_pre)
  )

# get average percent leaving the home for the 3 days before shelter in place
sf_sd_zip_3_days_pre <- sf_sd_zip_by_date %>%
  filter(date < shelter_date & date >= (shelter_date - 3)) %>%
  group_by(zipcode) %>%
  summarize(total_at_home_zip = sum(total_at_home_zip), total_devices = sum(total_devices)) %>% 
  mutate(percent_at_home = total_at_home_zip*100/total_devices, percent_leaving_home = (100 - percent_at_home)) %>% 
  left_join(sf_pre_sd_leaving_avg %>% dplyr::select(zipcode, percent_leaving_home_pre)) %>%
  mutate(dif_percent_leaving = percent_leaving_home - percent_leaving_home_pre)
  
sf_sd_zip_3_days_pre_buckets <- sf_sd_zip_3_days_pre %>% 
  mutate(bucketed_percent_leaving = ifelse(percent_leaving_home < 65, "65% or below", ifelse(percent_leaving_home < 70, "65% to 70%", "70% or more")), bucketed_dif_percent_leaving = ifelse(dif_percent_leaving > -5, "-5% or above", ifelse(dif_percent_leaving > -10, "-5% to -10%", ifelse(dif_percent_leaving > -15, "-10% to -15%", "-15% or below"))))

# join with the case data
sf_cases_sd_3dayspre_bucketed_abs <- left_join(sf_cases_zip_edit, sf_sd_zip_3_days_pre_buckets) %>%
  group_by(date, bucketed_percent_leaving) %>%
  summarize(total_cases_grouped = sum(confirmed_cases), total_pop_combined = sum(total_pop_zip)) %>%
  mutate(cases_by_pop_grouped = total_cases_grouped / total_pop_combined)
  
# plot
sf_cases_sd_3dayspre_bucketed_abs %>% filter(!is.na(bucketed_percent_leaving)) %>% ggplot(aes(x = date)) + 
  geom_line(aes(y = cases_by_pop_grouped, color=bucketed_percent_leaving)) + 
  ggtitle('Cases per capita for zipcodes grouped by % leaving in the 3 days before SIP') + labs(y = "Cases per capita", x = "Date", color = "% leaving home")

Pretty much the opposite of what we expect. But note that the differences between the groups do not seem to be growing much in the time period we see here - maybe there were pre-existing differences between these groups in terms of their baseline case counts?

Now look at the difference in percent leaving relative to pre SIP.

# try with differences in percent leaving
sf_cases_sd_3dayspre_bucketed_rel <- left_join(sf_cases_zip_edit, sf_sd_zip_3_days_pre_buckets) %>%
  group_by(date, bucketed_dif_percent_leaving) %>%
  summarize(total_cases_grouped = sum(confirmed_cases), total_pop_combined = sum(total_pop_zip)) %>%
  mutate(cases_by_pop_grouped = total_cases_grouped / total_pop_combined)
  
# plot
sf_cases_sd_3dayspre_bucketed_rel %>% filter(!is.na(bucketed_dif_percent_leaving)) %>% ggplot(aes(x = date)) + geom_line(aes(y = cases_by_pop_grouped, color=bucketed_dif_percent_leaving)) + ggtitle('Cases per capita for zipcodes grouped by % leaving in the 3 days before SIP') + labs(y = "Cases per capita", x = "Date", color = "dif in % leaving home relative to pre SIP")

This actually looks more like what we would expect, except for the -5% or greater bucket, which do note is only composed of 3 zip codes.

Bucket by movement right after shelter in place

Try now with the 3 days after SIP.

# get average percent leaving the home for the 3 days afterter in place
sf_sd_zip_3_days_post <- sf_sd_zip_by_date %>%
  filter(date >= shelter_date & date < (shelter_date + 3)) %>%
  group_by(zipcode) %>%
  summarize(total_at_home_zip = sum(total_at_home_zip), total_devices = sum(total_devices)) %>% 
  mutate(percent_at_home = total_at_home_zip*100/total_devices, percent_leaving_home = (100 - percent_at_home)) %>% 
  left_join(sf_pre_sd_leaving_avg %>% dplyr::select(zipcode, percent_leaving_home_pre)) %>%
  mutate(dif_percent_leaving = percent_leaving_home - percent_leaving_home_pre)
  

sf_sd_zip_3_days_post %>% ggplot(aes(x = zipcode, y = percent_leaving_home)) + geom_point()

sf_sd_zip_3_days_post %>% ggplot(aes(x = zipcode, y = dif_percent_leaving)) + geom_point()

sf_sd_zip_3_days_post_buckets <- sf_sd_zip_3_days_post %>% 
  mutate(bucketed_percent_leaving = ifelse(percent_leaving_home < 55, "55% or below", ifelse(percent_leaving_home < 60, "55% to 60%", ifelse(percent_leaving_home < 65, "60% to 65%","65% or more"))), bucketed_dif_percent_leaving = ifelse(dif_percent_leaving > -10, "-10% or above", ifelse(dif_percent_leaving > -15, "-10% to -15%", ifelse(dif_percent_leaving > -20, "-15% to -20%", ifelse(dif_percent_leaving > -25, "-20% to -25%", "-25% or below")))))

# join with the case data
sf_cases_sd_3dayspost_bucketed_abs <- left_join(sf_cases_zip_edit, sf_sd_zip_3_days_post_buckets) %>%
  group_by(date, bucketed_percent_leaving) %>%
  summarize(total_cases_grouped = sum(confirmed_cases), total_pop_combined = sum(total_pop_zip)) %>%
  mutate(cases_by_pop_grouped = total_cases_grouped / total_pop_combined)
  
# plot
sf_cases_sd_3dayspost_bucketed_abs %>% filter(!is.na(bucketed_percent_leaving)) %>% ggplot(aes(x = date)) + geom_line(aes(y = cases_by_pop_grouped, color=bucketed_percent_leaving)) + ggtitle('Cases per capita for zipcodes grouped by % leaving in the 3 days on and after SIP') + labs(y = "Cases per capita", x = "Date", color = "% leaving home")

Again pretty much the opposite of expectations.

# try with differences in percent leaving
sf_cases_sd_3dayspost_bucketed_rel <- left_join(sf_cases_zip_edit, sf_sd_zip_3_days_post_buckets) %>%
  group_by(date, bucketed_dif_percent_leaving) %>%
  summarize(total_cases_grouped = sum(confirmed_cases), total_pop_combined = sum(total_pop_zip)) %>%
  mutate(cases_by_pop_grouped = total_cases_grouped / total_pop_combined)
  
# plot
sf_cases_sd_3dayspost_bucketed_rel %>% filter(!is.na(bucketed_dif_percent_leaving)) %>% ggplot(aes(x = date)) + geom_line(aes(y = cases_by_pop_grouped, color=bucketed_dif_percent_leaving)) + ggtitle('Cases per capita for zipcodes grouped by % leaving in the 3 days on and after SIP') + labs(y = "Cases per capita", x = "Date", color = "dif in % leaving home relative to pre SIP")

Slightly more as expected, but still weird.

Bucket by movement through April 20th

Try the whole time from SIP through April 20th, the first day that we have case data

# get average percent leaving the home for the 3 days after shelter in place
sf_sd_zip_till420 <- sf_sd_zip_by_date %>%
  filter(date >= shelter_date & date < ("2020-04-20")) %>%
  group_by(zipcode) %>%
  summarize(total_at_home_zip = sum(total_at_home_zip), total_devices = sum(total_devices)) %>% 
  mutate(percent_at_home = total_at_home_zip*100/total_devices, percent_leaving_home = (100 - percent_at_home)) %>% 
  left_join(sf_pre_sd_leaving_avg %>% dplyr::select(zipcode, percent_leaving_home_pre)) %>%
  mutate(dif_percent_leaving = percent_leaving_home - percent_leaving_home_pre)
  

sf_sd_zip_till420 %>% ggplot(aes(x = zipcode, y = percent_leaving_home)) + geom_point()

sf_sd_zip_till420 %>% ggplot(aes(x = zipcode, y = dif_percent_leaving)) + geom_point()

sf_sd_zip_till420_buckets <- sf_sd_zip_3_days_post %>% 
  mutate(bucketed_percent_leaving = ifelse(percent_leaving_home < 50, "50% or below", ifelse(percent_leaving_home < 55, "50% to 55%", ifelse(percent_leaving_home < 60, "55% to 60%", "60% or more"))), bucketed_dif_percent_leaving = ifelse(dif_percent_leaving > -15, "-15% or above", ifelse(dif_percent_leaving > -20, "-15% to -20%", ifelse(dif_percent_leaving > -25, "-20% to -25%", "-25% or below"))))

# join with the case data
sf_cases_sd_till420_bucketed_abs <- left_join(sf_cases_zip_edit, sf_sd_zip_till420_buckets) %>%
  group_by(date, bucketed_percent_leaving) %>%
  summarize(total_cases_grouped = sum(confirmed_cases), total_pop_combined = sum(total_pop_zip)) %>%
  mutate(cases_by_pop_grouped = total_cases_grouped / total_pop_combined)
  
# plot
sf_cases_sd_till420_bucketed_abs %>% filter(!is.na(bucketed_percent_leaving)) %>% ggplot(aes(x = date)) + geom_line(aes(y = cases_by_pop_grouped, color=bucketed_percent_leaving)) + ggtitle('Cases per capita for zipcodes grouped by % leaving since SIP') + labs(y = "Cases per capita", x = "Date", color = "% leaving home")

Same surprising trend, but again the differences between the top two groups don’t seem to grow a ton (though they do relative to the bottom one).

Let’s look at this in scatter plot form. In the plot below each point represents a date, and again we’re looking at bucketed zip codes by their movement.

# scatter plot 
sf_cases_sd_till420_bucketed_abs %>% filter(!is.na(bucketed_percent_leaving)) %>% ggplot(aes(x = bucketed_percent_leaving, y = cases_by_pop_grouped)) + geom_point() + ggtitle('Cases by population through current date, grouped by movement, SF')

Again look at differences in percent leaving.

# try with differences in percent leaving
sf_cases_sd_till420_bucketed_rel <- left_join(sf_cases_zip_edit, sf_sd_zip_till420_buckets) %>%
  group_by(date, bucketed_dif_percent_leaving) %>%
  summarize(total_cases_grouped = sum(confirmed_cases), total_pop_combined = sum(total_pop_zip)) %>%
  mutate(cases_by_pop_grouped = total_cases_grouped / total_pop_combined)
  
# plot
sf_cases_sd_till420_bucketed_rel %>% filter(!is.na(bucketed_dif_percent_leaving)) %>% ggplot(aes(x = date)) + geom_line(aes(y = cases_by_pop_grouped, color=bucketed_dif_percent_leaving)) + ggtitle('Cases per capita for zipcodes grouped by % leaving since SIP') + labs(y = "Cases per capita", x = "Date", color = "dif in % leaving home relative to pre SIP")

Except for the -15% or above, more as we’d expect.

Bucket by case growth and look at movement

Looking back at the original case growth over time by zip codes, it looks like there are generally two groups–one set of zip codes that have >0.003 cases per capita, and one set that have <0.002 cases per capita by May 18th. The first set have rapid growths, while those in the second set generally have less if any growth. Let’s try breaking it up by cases.

sf_cases_zip_bucketed <- sf_cases_zip_edit %>% 
  filter(date == max(date)) %>%
  mutate(bucketed_case_level = ifelse(cases_by_pop <=0.002, "< 0.002", "> 0.002"))

sf_sd_zip_cases_bucket <- left_join(sf_cases_zip_bucketed %>% dplyr::select(-date), sf_sd_zip_by_date)

sf_sd_zip_cases_bucket_movement <- sf_sd_zip_cases_bucket %>% filter(!is.na(bucketed_case_level)) %>%
  group_by(bucketed_case_level, date) %>%
  summarize(total_at_home_bucket = sum(total_at_home_zip), total_devices = sum(total_devices)) %>% 
  mutate(percent_at_home = total_at_home_bucket*100/total_devices, percent_leaving_home = (100 - percent_at_home))

sf_sd_zip_cases_bucket_movement %>% filter(!is.na(bucketed_case_level)) %>%  ggplot(aes(x = date)) + 
  geom_line(aes(y = percent_leaving_home, color=bucketed_case_level)) + ggtitle('% leaving home over time grouped by cases per capita') + labs(y = "% leaving home", x = "Date", color = "cases per capita by May 18th")

Same figure but with plot_ly for more interactivity.

fig_sf_movt_case_bucket <- sf_sd_zip_cases_bucket_movement %>% filter(!is.na(bucketed_case_level)) %>% 
  plot_ly() %>%
  add_lines(x = ~date, y = ~percent_leaving_home, color = ~bucketed_case_level) %>%
  layout(xaxis = list(title = 'Date'), yaxis = list(title = '% leaving home'), legend = list(title = list(text = "Cases per capita by May 18th")), title = "% leaving home over time grouped by cases per capita")

fig_sf_movt_case_bucket

Also pretty weird. Try looking just at after shelter in place, and the difference relative to before.

# try looking just after shelter in place
# get pre shelter in place averages
sf_sd_zip_cases_bucket_movement_pre_avg <- sf_sd_zip_cases_bucket_movement %>% filter(date < pre_sd_date_cutoff) %>% ungroup() %>%
  group_by(bucketed_case_level) %>%
  summarize(total_at_home_pre = sum(total_at_home_bucket), total_devices = sum(total_devices)) %>%
  mutate(percent_at_home= total_at_home_pre*100/total_devices, percent_leaving_home_pre = (100-percent_at_home))

sf_sd_zip_cases_bucket_movement_post <- sf_sd_zip_cases_bucket_movement %>% filter(date >= shelter_date) %>% 
  left_join(sf_sd_zip_cases_bucket_movement_pre_avg %>% dplyr::select(bucketed_case_level, percent_leaving_home_pre)) %>%
  mutate(dif_percent_leaving = percent_leaving_home - percent_leaving_home_pre)

sf_sd_zip_cases_bucket_movement_post %>% filter(!is.na(bucketed_case_level)) %>% ggplot(aes(x = date)) + 
  geom_line(aes(y = dif_percent_leaving, color=bucketed_case_level)) + ggtitle('% leaving home over time (relative to pre SIP) grouped by cases per capita') + labs(y = "difference in % leaving home", x = "Date", color = "cases per capita by May 18th")

fig_sf_movt_case_bucket_post <- sf_sd_zip_cases_bucket_movement_post %>% filter(!is.na(bucketed_case_level)) %>% 
  plot_ly() %>%
  add_lines(x = ~date, y = ~dif_percent_leaving, color = ~bucketed_case_level) %>%
  layout(xaxis = list(title = 'Date'), yaxis = list(title = 'difference in % leaving home'), legend = list(title = list(text = "Cases per capita by May 18th")), title = "% leaving home over time (relative to pre SIP) grouped by cases per capita")

fig_sf_movt_case_bucket_post

That does look more like what we would expect.

Santa Clara County case data

First just visualize what the case data looks like.

scc_cases_zip <- scc_map_cases %>% 
  dplyr::select(Zipcode, Cases, Population) %>% 
  st_drop_geometry() %>%
  rename(zipcode = Zipcode, cases = Cases, pop = Population) %>%
  mutate(cases = replace_na(cases, 0)) %>%
  mutate(cases_by_pop = cases/pop) %>%
  filter(is.numeric(cases_by_pop))

scc_cases_zip %>% ggplot(aes(x = zipcode, y = cases_by_pop)) + geom_point()

Visualize zip code level leaving home data for SCC.

# get leaving home data on zip code level
scc_sd_zip_by_date <- bay_sd %>%
  filter(origin_census_block_group %in% scc_blockgroups$GEOID) %>%
  left_join(scc_bg_zctas %>% dplyr::select(blockgroup, zipcode), by = c("origin_census_block_group" = "blockgroup")) %>% 
  group_by(zipcode, date) %>%
  summarize(total_at_home_zip = sum(completely_home_device_count), total_devices = sum(device_count)) %>% 
  mutate(
    percent_at_home = total_at_home_zip*100/total_devices, 
    percent_leaving_home = (100 - percent_at_home)
  )

scc_sd_zip_by_date %>% ggplot(aes(x = date)) + geom_line(aes(y = percent_leaving_home, color = zipcode))

Try first looking at the overall % leaving home since SIP and cases as of pulled on May 20th.

# get zip code averages leaving home before SIP
scc_sd_zip_pre_avgs <- scc_sd_zip_by_date %>%
  filter(date < pre_sd_date_cutoff) %>%
  group_by(zipcode) %>%
  summarize(total_at_home_pre = sum(total_at_home_zip), total_devices = sum(total_devices)) %>%
  mutate(percent_at_home_pre = total_at_home_pre*100/total_devices, percent_leaving_home_pre = (100 - percent_at_home_pre))

scc_sd_zip_post <- scc_sd_zip_by_date %>%
  filter(date >= shelter_date) %>%
  group_by(zipcode) %>%
  summarize(total_at_home = sum(total_at_home_zip), total_devices = sum(total_devices)) %>%
  mutate(percent_at_home = total_at_home*100/total_devices, percent_leaving_home = (100 - percent_at_home)) %>%
  left_join(scc_sd_zip_pre_avgs %>% dplyr::select(zipcode, percent_leaving_home_pre)) %>%
  mutate(dif_percent_leaving = percent_leaving_home - percent_leaving_home_pre) %>%
  left_join(scc_cases_zip)

# absolute percent leaving
scc_sd_zip_post %>% ggplot(
  aes(x = percent_leaving_home, y = cases_by_pop)) + geom_point() + ggtitle("Santa Clara County") + geom_smooth(method=lm)

scc_zip_cases_sd_cor_abs <- lm(cases_by_pop ~ percent_leaving_home, scc_sd_zip_post , na.action = na.omit)

summary(scc_zip_cases_sd_cor_abs)
## 
## Call:
## lm(formula = cases_by_pop ~ percent_leaving_home, data = scc_sd_zip_post, 
##     na.action = na.omit)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0016790 -0.0005276 -0.0001715  0.0002346  0.0029655 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)
## (Intercept)          3.615e-04  8.709e-04   0.415    0.680
## percent_leaving_home 1.898e-05  1.725e-05   1.100    0.276
## 
## Residual standard error: 0.0008858 on 53 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.02232,    Adjusted R-squared:  0.003877 
## F-statistic:  1.21 on 1 and 53 DF,  p-value: 0.2763
# try with difference in percent leaving
scc_sd_zip_post %>% ggplot(
  aes(x = dif_percent_leaving, y = cases_by_pop)) + geom_point() + ggtitle("Santa Clara County") + geom_smooth(method=lm)

scc_zip_cases_sd_cor <- lm(cases_by_pop ~ dif_percent_leaving, scc_sd_zip_post , na.action = na.omit)

summary(scc_zip_cases_sd_cor)
## 
## Call:
## lm(formula = cases_by_pop ~ dif_percent_leaving, data = scc_sd_zip_post, 
##     na.action = na.omit)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -1.865e-03 -4.305e-04 -7.476e-05  2.154e-04  3.022e-03 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         2.450e-03  4.086e-04   5.996 1.84e-07 ***
## dif_percent_leaving 4.302e-05  1.483e-05   2.900  0.00542 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0008323 on 53 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.137,  Adjusted R-squared:  0.1207 
## F-statistic: 8.411 on 1 and 53 DF,  p-value: 0.005417

Bucketing cases

Try bucketing by cases per capita.

# bucket case counts
scc_cases_bucketed <- scc_cases_zip %>%
  mutate(bucketed_cases_by_pop = ifelse(cases_by_pop > 0.002, "0.002 or more", ifelse(cases_by_pop > 0.0015, "0.0015 to 0.002", ifelse(cases_by_pop > 0.001, "0.001 to 0.0015", ifelse(cases_by_pop > 0, "0.00001 to 0.001", "0")))))

scc_sd_zip_by_date_case_bucket <- left_join(scc_cases_bucketed, scc_sd_zip_by_date) %>%
  filter(!is.na(bucketed_cases_by_pop) & !is.na(date)) %>%
  group_by(date, bucketed_cases_by_pop) %>%
  summarize(total_at_home_bucket = sum(total_at_home_zip), total_devices = sum(total_devices)) %>% 
  mutate(percent_at_home = total_at_home_bucket*100/total_devices, percent_leaving_home = (100 - percent_at_home)) %>% ungroup()

fig_scc_movt_case_bucket <- plot_ly(scc_sd_zip_by_date_case_bucket) %>%
  add_trace(x = ~date, y = ~percent_leaving_home, color = ~bucketed_cases_by_pop, type = 'scatter', mode = 'lines') %>%
  layout(xaxis = list(title = 'Date'), yaxis = list(title = '% leaving home'), legend = list(title = list(text = "Cases per capita")), title = "% leaving home over time grouped by cases per capita, Santa Clara County")

fig_scc_movt_case_bucket
# try looking at this in scatter plot form
scc_sd_zip_by_date_case_bucket_pre <- scc_sd_zip_by_date_case_bucket %>%
  filter(date < pre_sd_date_cutoff) %>%
  group_by(bucketed_cases_by_pop) %>%
  summarize(total_at_home_bucket = sum(total_at_home_bucket), total_devices = sum(total_devices)) %>% 
  mutate(percent_at_home_pre = total_at_home_bucket*100/total_devices, percent_leaving_home_pre = (100 - percent_at_home_pre)) %>% ungroup()

# join with pre data
scc_sd_zip_by_date_case_bucket_post <- scc_sd_zip_by_date_case_bucket %>%
  filter(date >= shelter_date) %>%
  group_by(bucketed_cases_by_pop) %>%
  summarize(total_at_home_bucket = sum(total_at_home_bucket), total_devices = sum(total_devices)) %>% 
  mutate(percent_at_home = total_at_home_bucket*100/total_devices, percent_leaving_home = (100 - percent_at_home)) %>% ungroup() %>%
  left_join(scc_sd_zip_by_date_case_bucket_pre %>% dplyr::select(bucketed_cases_by_pop, percent_leaving_home_pre)) %>%
  mutate(dif_percent_leaving = percent_leaving_home - percent_leaving_home_pre)

# plot - absolute metric
scc_sd_zip_by_date_case_bucket_post %>% ggplot(aes(x = percent_leaving_home, y = bucketed_cases_by_pop)) + geom_point() + ggtitle('Percent leaving home and cases per capita for buckets of cases per capita, Santa Clara')

# plot - relative metric
scc_sd_zip_by_date_case_bucket_post %>% ggplot(aes(x = dif_percent_leaving, y = bucketed_cases_by_pop)) + geom_point() + ggtitle('Percent leaving home and cases per capita for buckets of cases per capita, Santa Clara')

That generally looks as we’d expect it to–except for the zip codes with zero cases, but it seems like a fair hypothesis that something different might be going on in those counties anyway.

Bucketing movement

Now let’s look at bucketing movement into categories, like we did for SF, but here we can only look at the outcome of cases at a specific date not over time.

To start, just looking at bucketing overall movement since SIP.

ggplot(scc_sd_zip_post, aes(x = zipcode, y = percent_leaving_home)) + geom_point()

ggplot(scc_sd_zip_post, aes(x = zipcode, y = dif_percent_leaving)) + geom_point()

scc_sd_zip_post_bucketed <- scc_sd_zip_post %>%
  mutate(bucketed_percent_leaving = ifelse(percent_leaving_home > 60, "60% or more", ifelse(percent_leaving_home > 55, "55% to 60%", ifelse(percent_leaving_home > 50, "50% to 55%", ifelse(percent_leaving_home > 45, "45% to 50%", ifelse(percent_leaving_home > 40, "40% to 45%", "40% or less"))))), bucketed_dif_leaving = ifelse(dif_percent_leaving > 0, "0% or above", ifelse(dif_percent_leaving > -20, "-0% to -20%", ifelse(dif_percent_leaving > -25, "-20% to -25%", ifelse(dif_percent_leaving > -30, "-25% to -30%", ifelse(dif_percent_leaving > -35, "-30% to -35%", ifelse(dif_percent_leaving > -40, "-35% to -40%", "-40% or lower"))))))) %>%
  left_join(scc_cases_zip) %>%
  filter(!is.na(cases))

scc_sd_cases_post_bucketed_abs <- scc_sd_zip_post_bucketed %>% 
  group_by(bucketed_percent_leaving) %>%
  summarize(total_cases = sum(cases), total_pop = sum(pop)) %>%
  mutate(cases_by_pop_grouped = total_cases/total_pop)

scc_sd_cases_post_bucketed_abs %>% ggplot(aes(x = bucketed_percent_leaving, y = cases_by_pop_grouped)) + geom_point() + ggtitle('Santa Clara')

Wow! A trend that (generally) makes sense!

Look at the difference in percent leaving.

scc_sd_cases_post_bucketed_rel <- scc_sd_zip_post_bucketed %>% 
  group_by(bucketed_dif_leaving) %>%
  summarize(total_cases = sum(cases), total_pop = sum(pop)) %>%
  mutate(cases_by_pop_grouped = total_cases/total_pop)

scc_sd_cases_post_bucketed_rel %>% ggplot(aes(x = bucketed_dif_leaving, y = cases_by_pop_grouped)) + geom_point() + ggtitle('Santa Clara')

Not quite as much as of the expected trend, but appears for -25 to -40 or below buckets.

Updates 5/25/20

Total cases and movement

Looking at the number of cases and movement through 1 week before the most recent date on the case data.

San Francisco

sf_case_date_max <- max(sf_cases_zip$date)

sf_sd_cases_curr <- sf_sd_zip_by_date %>%
  filter(date >= shelter_date & date <= sf_case_date_max - 7) %>%
  group_by(zipcode) %>%
  summarize(total_at_home = sum(total_at_home_zip), total_devices = sum(total_devices)) %>%
  mutate(percent_at_home = total_at_home*100/total_devices, percent_leaving_home = (100 - percent_at_home)) %>%
  left_join(sf_pre_sd_leaving_avg %>% dplyr::select(zipcode, percent_leaving_home_pre)) %>%
  mutate(dif_percent_leaving = percent_leaving_home - percent_leaving_home_pre) %>%
  left_join(sf_cases_zip_edit %>% filter(date == sf_case_date_max)) %>%
  filter(!is.na(confirmed_cases))

fig_sf_sd_cases_curr <- plot_ly(sf_sd_cases_curr) %>%
  add_trace(x = ~percent_leaving_home, y = ~cases_by_pop, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~percent_leaving_home, y = fitted(lm(sf_sd_cases_curr$cases_by_pop~ sf_sd_cases_curr$percent_leaving_home)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Percent of devices leaving home'), yaxis = list(title = 'Cases per capita'), title = "SFC, cases per capita and % leaving home through 1 week before case data")

model_sf_curr <- lm(cases_by_pop ~ percent_leaving_home, sf_sd_cases_curr)

summary(model_sf_curr)
## 
## Call:
## lm(formula = cases_by_pop ~ percent_leaving_home, data = sf_sd_cases_curr)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0019316 -0.0012394 -0.0007684  0.0011670  0.0031014 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)
## (Intercept)           0.0063697  0.0060659   1.050    0.306
## percent_leaving_home -0.0000788  0.0001190  -0.662    0.515
## 
## Residual standard error: 0.001741 on 21 degrees of freedom
## Multiple R-squared:  0.02045,    Adjusted R-squared:  -0.0262 
## F-statistic: 0.4384 on 1 and 21 DF,  p-value: 0.5151
fig_sf_sd_cases_curr
fig_sf_sd_cases_curr_dif <- plot_ly(sf_sd_cases_curr) %>%
  add_trace(x = ~dif_percent_leaving, y = ~cases_by_pop, type = 'scatter', mode = 'markers') %>% 
  add_trace(x = ~dif_percent_leaving, y = fitted(lm(sf_sd_cases_curr$cases_by_pop~ sf_sd_cases_curr$dif_percent_leaving )), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Difference in % of devices leaving home'), yaxis = list(title = 'Cases per capita'), title = "SFC, cases per capita and difference in % leaving home through 1 week before case data")

fig_sf_sd_cases_curr_dif
model_sf_curr_dif <- lm(cases_by_pop ~ dif_percent_leaving, sf_sd_cases_curr)

summary(model_sf_curr_dif)
## 
## Call:
## lm(formula = cases_by_pop ~ dif_percent_leaving, data = sf_sd_cases_curr)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0019606 -0.0012696 -0.0008445  0.0013284  0.0031583 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)  
## (Intercept)         4.599e-03  2.234e-03   2.058   0.0522 .
## dif_percent_leaving 9.479e-05  9.339e-05   1.015   0.3217  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.001718 on 21 degrees of freedom
## Multiple R-squared:  0.04676,    Adjusted R-squared:  0.001368 
## F-statistic:  1.03 on 1 and 21 DF,  p-value: 0.3217

Santa Clara

Note the need to manually include and update the date here.

scc_case_date_max <- as.Date("2020-05-26") # need to manually include and update this

scc_sd_cases_curr <- scc_sd_zip_by_date %>%
  filter(date >= shelter_date & date <= scc_case_date_max - 7) %>%
  group_by(zipcode) %>%
  summarize(total_at_home = sum(total_at_home_zip), total_devices = sum(total_devices)) %>%
  mutate(percent_at_home = total_at_home*100/total_devices, percent_leaving_home = (100 - percent_at_home)) %>%
  left_join(scc_sd_zip_pre_avgs %>% dplyr::select(zipcode, percent_leaving_home_pre)) %>%
  mutate(dif_percent_leaving = percent_leaving_home - percent_leaving_home_pre) %>%
  left_join(scc_cases_zip) %>%
  filter(!is.na(cases))

fig_scc_sd_cases_curr <- plot_ly(scc_sd_cases_curr) %>%
  add_trace(x = ~percent_leaving_home, y = ~cases_by_pop, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~percent_leaving_home, y = fitted(lm(scc_sd_cases_curr$cases_by_pop~ scc_sd_cases_curr$percent_leaving_home)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Percent of devices leaving home'), yaxis = list(title = 'Cases per capita'), title = "SCC, cases per capita and % leaving home through 1 week before case data")

fig_scc_sd_cases_curr
model_scc_curr <- lm(cases_by_pop ~ percent_leaving_home, scc_sd_cases_curr)

summary(model_scc_curr)
## 
## Call:
## lm(formula = cases_by_pop ~ percent_leaving_home, data = scc_sd_cases_curr)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0016871 -0.0005254 -0.0001648  0.0002365  0.0029581 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)
## (Intercept)          3.869e-04  8.484e-04   0.456    0.650
## percent_leaving_home 1.856e-05  1.688e-05   1.099    0.277
## 
## Residual standard error: 0.0008859 on 53 degrees of freedom
## Multiple R-squared:  0.0223, Adjusted R-squared:  0.003853 
## F-statistic: 1.209 on 1 and 53 DF,  p-value: 0.2765
fig_scc_sd_cases_curr_dif <- plot_ly(scc_sd_cases_curr) %>%
  add_trace(x = ~dif_percent_leaving, y = ~cases_by_pop, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~dif_percent_leaving, y = fitted(lm(scc_sd_cases_curr$cases_by_pop~ scc_sd_cases_curr$dif_percent_leaving)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Difference in % of devices leaving home'), yaxis = list(title = 'Cases per capita'), title = "SCC, cases per capita and difference in % leaving home through 1 week before case data")

fig_scc_sd_cases_curr_dif
model_scc_curr_dif <- lm(cases_by_pop ~ dif_percent_leaving, scc_sd_cases_curr)

summary(model_scc_curr_dif)
## 
## Call:
## lm(formula = cases_by_pop ~ dif_percent_leaving, data = scc_sd_cases_curr)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -1.884e-03 -4.369e-04 -9.157e-05  2.132e-04  3.002e-03 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         2.423e-03  4.050e-04   5.983 1.93e-07 ***
## dif_percent_leaving 4.164e-05  1.456e-05   2.859  0.00606 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0008339 on 53 degrees of freedom
## Multiple R-squared:  0.1337, Adjusted R-squared:  0.1173 
## F-statistic: 8.176 on 1 and 53 DF,  p-value: 0.006057

Look at the outliers in this data. The zip codes with no cases are, with their populations:

# find zip codes with 0 cases
zero_case_scc <- scc_sd_cases_curr %>% filter(cases == 0)

print(zero_case_scc$zipcode)
## [1] "95053"

These zip codes have populations:

print(zero_case_scc$pop)
## [1] 3678

Some of these are very small but some are large. Let’s map these with their populations.

for_mapping_zero_case_scc <- left_join(zctas_94_95, zero_case_scc %>% dplyr::select(zipcode, pop), by = c("ZCTA5CE10" = "zipcode")) %>% filter(!is.na(pop))
mapview(for_mapping_zero_case_scc %>% dplyr::select(pop))

Try excluding these.

scc_sd_cases_curr_filtered <- scc_sd_cases_curr %>% 
  filter(cases != 0) %>%
  mutate(log_cases = log(cases), log_cases_by_pop = log(cases_by_pop))

fig_scc_sd_cases_curr_filt <- plot_ly(scc_sd_cases_curr_filtered) %>%
  add_trace(x = ~percent_leaving_home, y = ~cases_by_pop, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~percent_leaving_home, y = fitted(lm(scc_sd_cases_curr_filtered$cases_by_pop~ scc_sd_cases_curr_filtered$percent_leaving_home)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Percent of devices leaving home'), yaxis = list(title = 'Cases per capita'), title = "SCC, cases per capita and % leaving home through 1 week before case data")

fig_scc_sd_cases_curr_filt
model_scc_curr_filt <- lm(cases_by_pop ~ percent_leaving_home, scc_sd_cases_curr_filtered)

summary(model_scc_curr_filt)
## 
## Call:
## lm(formula = cases_by_pop ~ percent_leaving_home, data = scc_sd_cases_curr_filtered)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0016498 -0.0005012 -0.0001875  0.0001966  0.0028503 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)  
## (Intercept)          -3.193e-04  8.834e-04  -0.361   0.7193  
## percent_leaving_home  3.349e-05  1.773e-05   1.889   0.0645 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0008568 on 52 degrees of freedom
## Multiple R-squared:  0.06421,    Adjusted R-squared:  0.04621 
## F-statistic: 3.568 on 1 and 52 DF,  p-value: 0.06449
fig_scc_sd_cases_curr_dif_filt <- plot_ly(scc_sd_cases_curr_filtered) %>%
  add_trace(x = ~dif_percent_leaving, y = ~cases_by_pop, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~dif_percent_leaving, y = fitted(lm(scc_sd_cases_curr_filtered$cases_by_pop~ scc_sd_cases_curr_filtered$dif_percent_leaving)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Difference in % of devices leaving home'), yaxis = list(title = 'Cases per capita'), title = "SCC, cases per capita and difference in % leaving home through 1 week before case data")

fig_scc_sd_cases_curr_dif_filt
model_scc_curr_filt_dif <- lm(cases_by_pop ~ dif_percent_leaving, scc_sd_cases_curr_filtered)

summary(model_scc_curr_filt_dif)
## 
## Call:
## lm(formula = cases_by_pop ~ dif_percent_leaving, data = scc_sd_cases_curr_filtered)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0016915 -0.0004076 -0.0001050  0.0002747  0.0029563 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         2.689e-03  4.018e-04   6.692 1.53e-08 ***
## dif_percent_leaving 5.020e-05  1.434e-05   3.500 0.000963 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0007968 on 52 degrees of freedom
## Multiple R-squared:  0.1907, Adjusted R-squared:  0.1751 
## F-statistic: 12.25 on 1 and 52 DF,  p-value: 0.0009631

Adjusting counting of cases by date for SF

Looking at the growth of cases from some baseline level over time. I will try using 0.0028 cases per capita as the baseline rate and seeing how much various zip codes exceed that in the week folloing when they reach that level, as dependent on their movement in the two weeks before the date they reached that level.

# set the cutoff level
cutoff_cases_by_pop <- 0.0028
# how many days after cutoff level to look
later_date_sep = 7
# how many days prior to look at movement averages
prior_movement_time = 14

# filter to just when cases are above the cutoff
sf_cases_cutoff <- sf_cases_zip_edit %>% filter(cases_by_pop >= cutoff_cases_by_pop)

zipcodes_in_cutoff <- unique(sf_cases_cutoff$zipcode)

later_cases_change <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
leaving_avg <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))

for (i in 1:length(zipcodes_in_cutoff)) {
  curr_zip <- zipcodes_in_cutoff[i]
  curr_vals <- sf_cases_cutoff %>% filter(zipcode == curr_zip)
  later_cases_curr = NA
  leaving_avg_val = NA
  if (curr_vals$date[1] != sf_cases_zip_edit$date[1]) { # need to make sure it didn't start above the cutoff
    later_cases_curr = curr_vals$cases_by_pop[later_date_sep] - curr_vals$cases_by_pop[1] # change in cases an amount of time later
    # get the percent of devices leaving home in an amount of time before the date that cases went above the threshold level
    leaving_computed = sf_sd_zip_by_date %>% 
      filter(zipcode == curr_zip & date < curr_vals$date[1] & (date >= curr_vals$date[1] - prior_movement_time)) %>% 
      summarize(total_at_home = sum(total_at_home_zip), total_devices = sum(total_devices)) %>%
  mutate(percent_at_home = total_at_home*100/total_devices, percent_leaving_home = (100 - percent_at_home)) 
    leaving_avg_val = leaving_computed$percent_leaving_home
  }  
  later_cases_change[i] = later_cases_curr
  leaving_avg[i] = leaving_avg_val
}

# combine
collected_cases_growth_baseline <- data.frame(zipcodes_in_cutoff, later_cases_change, leaving_avg) %>% filter(!is.na(later_cases_change))

# plot
fig_collected_cases_growth_baseline <- plot_ly(collected_cases_growth_baseline) %>%
  add_trace(x = ~leaving_avg, y = ~later_cases_change, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~leaving_avg, y = fitted(lm(collected_cases_growth_baseline$later_cases_change~ collected_cases_growth_baseline$leaving_avg)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = '% of devices leaving home in 2 weeks before'), yaxis = list(title = 'Change in cases per capita 1 week later'), title = "SF, change in cases/person in week after >0.0028 cases/person vs % leaving in 2 weeks before")

fig_collected_cases_growth_baseline
model_collected <- lm(collected_cases_growth_baseline$later_cases_change~ collected_cases_growth_baseline$leaving_avg)

summary(model_collected)
## 
## Call:
## lm(formula = collected_cases_growth_baseline$later_cases_change ~ 
##     collected_cases_growth_baseline$leaving_avg)
## 
## Residuals:
##          1          2          3          4          5          6          7 
## -1.075e-04  8.919e-05 -3.617e-04  5.133e-04  5.867e-05 -1.235e-04 -6.844e-05 
## 
## Coefficients:
##                                               Estimate Std. Error t value
## (Intercept)                                 -2.396e-03  1.994e-03  -1.202
## collected_cases_growth_baseline$leaving_avg  5.882e-05  3.977e-05   1.479
##                                             Pr(>|t|)
## (Intercept)                                    0.283
## collected_cases_growth_baseline$leaving_avg    0.199
## 
## Residual standard error: 0.0002957 on 5 degrees of freedom
## Multiple R-squared:  0.3043, Adjusted R-squared:  0.1652 
## F-statistic: 2.187 on 1 and 5 DF,  p-value: 0.1992

Compare this to just the metric we were looking at before (number of cases and movement through 1 week before the most recent date on the case data) for just these zip codes.

# plot just these zip codes from the figure before
fig_sf_sd_cases_curr_limit <- plot_ly(sf_sd_cases_curr %>% filter(zipcode %in% zipcodes_in_cutoff)) %>%
  add_trace(x = ~percent_leaving_home, y = ~cases_by_pop, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~percent_leaving_home, y = fitted(lm(cases_by_pop~ percent_leaving_home, sf_sd_cases_curr %>% filter(zipcode %in% zipcodes_in_cutoff))), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Percent of devices leaving home'), yaxis = list(title = 'Cases per capita'), title = "SFC, cases/person and % leaving through 1 week before case data, just zip codes in previous analysis")

fig_sf_sd_cases_curr_limit
model_cases_limit <- lm(cases_by_pop~ percent_leaving_home, sf_sd_cases_curr %>% filter(zipcode %in% zipcodes_in_cutoff))

summary(model_cases_limit)
## 
## Call:
## lm(formula = cases_by_pop ~ percent_leaving_home, data = sf_sd_cases_curr %>% 
##     filter(zipcode %in% zipcodes_in_cutoff))
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0009547 -0.0006777 -0.0001398  0.0006776  0.0012809 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)
## (Intercept)          -0.0035723  0.0080033  -0.446    0.671
## percent_leaving_home  0.0001611  0.0001592   1.012    0.351
## 
## Residual standard error: 0.0009088 on 6 degrees of freedom
## Multiple R-squared:  0.1458, Adjusted R-squared:  0.00338 
## F-statistic: 1.024 on 1 and 6 DF,  p-value: 0.3507

These two plots are not that similar, which is interesting but makes sense–more evidence that it’s likely important to take into account the original number of cases in a zip code.

Change in cases and movement

For San Francisco, since that’s the data on cases over time that we have. Below I look at the change in cases from April 21st through present. Movement is averaged from 2 weeks before April 21st through 1 week before present.

# get original levels of cases
original_cases <- sf_cases_zip %>% filter(date == (min(date)+1)) %>%
  mutate(log_orig_cases = log(confirmed_cases), log_orig_cases_by_pop = log(cases_by_pop)) %>% 
  dplyr::select(date, zipcode, confirmed_cases, cases_by_pop, log_orig_cases, log_orig_cases_by_pop) %>%
  rename(orig_date = date, orig_cases = confirmed_cases, orig_cases_by_pop = cases_by_pop)

# join with current levels of cases and movement averaged from 2 weeks before start of case data through 1 week before most recent case data
sf_sd_cases_curr_orig <- left_join(original_cases, sf_sd_cases_curr %>% dplyr::select(date, confirmed_cases, cases_by_pop, zipcode)) %>%
  mutate(log_cases = log(confirmed_cases), 
         log_cases_by_pop = log(cases_by_pop), 
         change_cases = confirmed_cases - orig_cases, 
         change_cases_by_pop = cases_by_pop - orig_cases_by_pop, 
         change_log_cases = log_cases - log_orig_cases, 
         change_log_cases_by_pop = log_cases_by_pop - log_orig_cases_by_pop, 
         perc_change_cases = change_cases*100 / orig_cases,
         perc_change_cases_by_pop = change_cases_by_pop*100/orig_cases_by_pop,
         perc_change_log_cases = change_log_cases*100/log_orig_cases,
         perc_change_log_cases_by_pop = change_log_cases_by_pop*100/log_orig_cases_by_pop) %>%
  left_join(sf_sd_zip_by_date %>%
  filter((date >= original_cases$orig_date[1] - 14) & (date <= sf_case_date_max - 7)) %>%
  group_by(zipcode) %>%
  summarize(total_at_home = sum(total_at_home_zip), total_devices = sum(total_devices)) %>%
  mutate(percent_at_home = total_at_home*100/total_devices, percent_leaving_home = (100 - percent_at_home))) %>%
  filter(!is.na(confirmed_cases))


# plot
fig_sf_sd_cases_change <- plot_ly(sf_sd_cases_curr_orig) %>%
  add_trace(x = ~percent_leaving_home, y = ~change_cases_by_pop, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~percent_leaving_home, y = fitted(lm(change_cases_by_pop~ percent_leaving_home, sf_sd_cases_curr_orig)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Percent of devices leaving home'), yaxis = list(title = 'Change in cases per capita from April 21st to present'), title = "SFC, change in cases/person and % leaving home April 7th through 1 week before case data")

fig_sf_sd_cases_change
model_cases_change <- lm(change_cases_by_pop ~ percent_leaving_home, sf_sd_cases_curr_orig)

summary(model_cases_change)
## 
## Call:
## lm(formula = change_cases_by_pop ~ percent_leaving_home, data = sf_sd_cases_curr_orig)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0010124 -0.0006528 -0.0004590  0.0006735  0.0019122 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)
## (Intercept)           2.843e-03  3.216e-03   0.884    0.387
## percent_leaving_home -3.645e-05  6.437e-05  -0.566    0.577
## 
## Residual standard error: 0.0009496 on 21 degrees of freedom
## Multiple R-squared:  0.01505,    Adjusted R-squared:  -0.03186 
## F-statistic: 0.3208 on 1 and 21 DF,  p-value: 0.5771
# plot change in log of cases
fig_sf_sd_cases_change_log <- plot_ly(sf_sd_cases_curr_orig) %>%
  add_trace(x = ~percent_leaving_home, y = ~change_log_cases_by_pop, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~percent_leaving_home, y = fitted(lm(change_log_cases_by_pop~ percent_leaving_home, sf_sd_cases_curr_orig)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Percent of devices leaving home'), yaxis = list(title = 'Change in log of cases per capita from April 21st to present'), title = "SFC, change in cases/person and % leaving home April 7th through 1 week before case data")

fig_sf_sd_cases_change_log
model_cases_change_log <- lm(change_log_cases_by_pop ~ percent_leaving_home, sf_sd_cases_curr_orig)

summary(model_cases_change_log)
## 
## Call:
## lm(formula = change_log_cases_by_pop ~ percent_leaving_home, 
##     data = sf_sd_cases_curr_orig)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.56519 -0.19932 -0.07670  0.06448  1.90013 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)
## (Intercept)           1.85980    1.59946   1.163    0.258
## percent_leaving_home -0.02573    0.03201  -0.804    0.431
## 
## Residual standard error: 0.4723 on 21 degrees of freedom
## Multiple R-squared:  0.02984,    Adjusted R-squared:  -0.01636 
## F-statistic: 0.6459 on 1 and 21 DF,  p-value: 0.4306
# plot percent change in cases, filtering outliers of very very high percent changes
fig_sf_sd_cases_change_perc <- plot_ly(sf_sd_cases_curr_orig %>% filter(perc_change_cases_by_pop < 1000)) %>%
  add_trace(x = ~percent_leaving_home, y = ~perc_change_cases_by_pop, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~percent_leaving_home, y = fitted(lm(perc_change_cases_by_pop~ percent_leaving_home, sf_sd_cases_curr_orig %>% filter(perc_change_cases_by_pop < 1000))), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Percent of devices leaving home'), yaxis = list(title = 'Percent change in cases per capita from April 21st to present'), title = "SFC, % change in cases/person and % leaving home April 7th through 1 week before most case data")

fig_sf_sd_cases_change_perc
model_cases_change_perc <- lm(perc_change_cases_by_pop ~ percent_leaving_home, sf_sd_cases_curr_orig %>% filter(perc_change_cases_by_pop < 1000))

summary(model_cases_change_perc)
## 
## Call:
## lm(formula = perc_change_cases_by_pop ~ percent_leaving_home, 
##     data = sf_sd_cases_curr_orig %>% filter(perc_change_cases_by_pop < 
##         1000))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -67.642 -21.207  -4.874  17.523  74.659 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)  
## (Intercept)           239.530    120.123   1.994    0.060 .
## percent_leaving_home   -3.458      2.403  -1.439    0.166  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 35.45 on 20 degrees of freedom
## Multiple R-squared:  0.09382,    Adjusted R-squared:  0.04851 
## F-statistic: 2.071 on 1 and 20 DF,  p-value: 0.1656

This still is weird.

Correlations

Try just looking at a few correlations between demographic variables, movement, and change in cases per capita since April 21st. The movement and cases metrics are the same as in the previous section. Demographic information from ACS census data.

First occupants per room.

sf_occupants_zip <- pullCensus("group(B25014)", sf_fips) %>% 
  dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(key = "variable", value = "estimate", -blockgroup) %>% 
  mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>% 
  dplyr::select(-variable) %>% 
  separate(label, into = c(NA, NA, NA,"occupants per room"), sep = "!!") %>% 
  filter(!is.na(`occupants per room`)) %>%
  group_by(blockgroup, `occupants per room`) %>%
  summarize(estimate_tot = sum(estimate)) %>% 
  spread(key = `occupants per room`, value = estimate_tot) %>%
  mutate(total_nums = `0.50 or less occupants per room` + `0.51 to 1.00 occupants per room` + `1.01 to 1.50 occupants per room` + `1.51 to 2.00 occupants per room` + `2.01 or more occupants per room`) %>%
  left_join(sf_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>% 
  group_by(zipcode) %>%
  summarize(total_nums = sum(total_nums), total_0.5less = sum(`0.50 or less occupants per room`), total_0.5to1 = sum(`0.51 to 1.00 occupants per room`), total_1to1.5 = sum(`1.01 to 1.50 occupants per room`), total_1.5to2 = sum(`1.51 to 2.00 occupants per room`), total_2more = sum(`2.01 or more occupants per room`)) %>%
  mutate(`percent more than 1` = (total_1to1.5 + total_1.5to2 + total_2more) * 100/ total_nums, `percent less than 1` = (100-`percent more than 1`), `percent 0.5 or less` = total_0.5less*100/total_nums, `percent more than 0.5` = (100-`percent 0.5 or less`), `percent more than 2` = total_2more*100/total_nums)

sf_occupants_sd_cases_change <- left_join(sf_occupants_zip, sf_sd_cases_curr_orig) %>% filter(!is.na(confirmed_cases))

# plot
fig_sf_cases_change_occupants <- plot_ly(sf_occupants_sd_cases_change) %>%
  add_trace(x = ~`percent more than 1`, y = ~change_cases_by_pop, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~`percent more than 1`, y = fitted(lm(change_cases_by_pop~ `percent more than 1`, sf_occupants_sd_cases_change)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Percent of households with more than 1 occupants per room'), yaxis = list(title = 'Change in cases per capita from April 21st to present'), title = "SFC, change in cases per capita and occupants per room")

fig_sf_cases_change_occupants
model_cases_change_occupants <- lm(change_cases_by_pop ~ `percent more than 1`, sf_occupants_sd_cases_change)

summary(model_cases_change_occupants)
## 
## Call:
## lm(formula = change_cases_by_pop ~ `percent more than 1`, data = sf_occupants_sd_cases_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0013355 -0.0004266 -0.0002839  0.0005227  0.0015590 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)  
## (Intercept)           2.944e-04  3.193e-04   0.922    0.367  
## `percent more than 1` 1.161e-04  4.276e-05   2.714    0.013 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0008233 on 21 degrees of freedom
## Multiple R-squared:  0.2597, Adjusted R-squared:  0.2245 
## F-statistic: 7.367 on 1 and 21 DF,  p-value: 0.01299
# try with more than 2 people
fig_sf_cases_change_occupants_2 <- plot_ly(sf_occupants_sd_cases_change) %>%
  add_trace(x = ~`percent more than 2`, y = ~change_cases_by_pop, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~`percent more than 2`, y = fitted(lm(change_cases_by_pop~ `percent more than 2`, sf_occupants_sd_cases_change)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Percent of households with more than 2 occupants per room'), yaxis = list(title = 'Change in cases per capita from April 21st to present'), title = "SFC, change in cases per capita and occupants per room")

fig_sf_cases_change_occupants_2
model_cases_change_occupants_2 <- lm(change_cases_by_pop ~ `percent more than 2`, sf_occupants_sd_cases_change)

summary(model_cases_change_occupants)
## 
## Call:
## lm(formula = change_cases_by_pop ~ `percent more than 1`, data = sf_occupants_sd_cases_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0013355 -0.0004266 -0.0002839  0.0005227  0.0015590 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)  
## (Intercept)           2.944e-04  3.193e-04   0.922    0.367  
## `percent more than 1` 1.161e-04  4.276e-05   2.714    0.013 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0008233 on 21 degrees of freedom
## Multiple R-squared:  0.2597, Adjusted R-squared:  0.2245 
## F-statistic: 7.367 on 1 and 21 DF,  p-value: 0.01299
# combine with social distancing
model_cases_change_occupants_sd <- lm(change_cases_by_pop ~ `percent more than 1` + percent_leaving_home, sf_occupants_sd_cases_change)

summary(model_cases_change_occupants_sd)
## 
## Call:
## lm(formula = change_cases_by_pop ~ `percent more than 1` + percent_leaving_home, 
##     data = sf_occupants_sd_cases_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0013552 -0.0004288 -0.0002279  0.0005456  0.0014971 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)  
## (Intercept)           -5.502e-04  3.138e-03  -0.175   0.8626  
## `percent more than 1`  1.203e-04  4.645e-05   2.590   0.0175 *
## percent_leaving_home   1.640e-05  6.061e-05   0.271   0.7895  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.000842 on 20 degrees of freedom
## Multiple R-squared:  0.2624, Adjusted R-squared:  0.1886 
## F-statistic: 3.558 on 2 and 20 DF,  p-value: 0.04766
# try with absolute numbers of cases
model_cases_change_occupants_abs <- lm(change_cases ~ `percent more than 2`, sf_occupants_sd_cases_change)

summary(model_cases_change_occupants_abs)
## 
## Call:
## lm(formula = change_cases ~ `percent more than 2`, data = sf_occupants_sd_cases_change)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -57.83 -28.29 -18.42  13.64 165.44 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)  
## (Intercept)             36.418     13.708   2.657   0.0148 *
## `percent more than 2`    7.791     10.001   0.779   0.4447  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 52.98 on 21 degrees of freedom
## Multiple R-squared:  0.02809,    Adjusted R-squared:  -0.01819 
## F-statistic: 0.6069 on 1 and 21 DF,  p-value: 0.4447

That doesn’t seem significant.

Next try population density, using the area of the zip code regions

sf_pop_density_zip <- sf_cases_zip %>% filter(date == max(date)) %>%
  dplyr::select(zipcode, total_pop_zip) %>%
  left_join(zctas_94, by = c("zipcode" = "ZCTA5CE10")) %>%
  st_as_sf() %>%
  mutate(zip_area = st_area(.), pop_density = total_pop_zip / zip_area) %>%
  filter(!is.na(pop_density))

mapview(sf_pop_density_zip %>% dplyr::select(pop_density))
sf_pop_density_zip_sd_cases <- left_join(sf_sd_cases_curr_orig, sf_pop_density_zip) 

# plot
fig_sf_cases_change_pop_density <- plot_ly(sf_pop_density_zip_sd_cases) %>%
  add_trace(x = ~pop_density, y = ~change_cases_by_pop, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~pop_density, y = fitted(lm(change_cases_by_pop~ pop_density, sf_pop_density_zip_sd_cases)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Population density'), yaxis = list(title = 'Change in cases per capita from April 21st to present'), title = "SFC, change in cases per capita and population density")

fig_sf_cases_change_pop_density
model_cases_change_pop_density <- lm(change_cases_by_pop ~ pop_density, sf_pop_density_zip_sd_cases)

summary(model_cases_change_pop_density)
## 
## Call:
## lm(formula = change_cases_by_pop ~ pop_density, data = sf_pop_density_zip_sd_cases)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0009348 -0.0006967 -0.0005629  0.0007396  0.0017506 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 0.0009020  0.0004133   2.182   0.0406 *
## pop_density 0.0135048  0.0397646   0.340   0.7375  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0009542 on 21 degrees of freedom
## Multiple R-squared:  0.005462,   Adjusted R-squared:  -0.0419 
## F-statistic: 0.1153 on 1 and 21 DF,  p-value: 0.7375
model_cases_change_pop_density_sd <- lm(change_cases_by_pop ~ pop_density + percent_leaving_home, sf_pop_density_zip_sd_cases)

summary(model_cases_change_pop_density_sd)
## 
## Call:
## lm(formula = change_cases_by_pop ~ pop_density + percent_leaving_home, 
##     data = sf_pop_density_zip_sd_cases)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0010235 -0.0006783 -0.0004324  0.0006923  0.0018714 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)
## (Intercept)           2.924e-03  3.285e-03   0.890    0.384
## pop_density           1.779e-02  4.095e-02   0.434    0.669
## percent_leaving_home -4.133e-05  6.660e-05  -0.621    0.542
## 
## Residual standard error: 0.0009685 on 20 degrees of freedom
## Multiple R-squared:  0.02425,    Adjusted R-squared:  -0.07332 
## F-statistic: 0.2485 on 2 and 20 DF,  p-value: 0.7823
# absolute cases
model_cases_change_pop_density_abs <- lm(change_cases ~ pop_density, sf_pop_density_zip_sd_cases)

summary(model_cases_change_pop_density_abs)
## 
## Call:
## lm(formula = change_cases ~ pop_density, data = sf_pop_density_zip_sd_cases)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -51.64 -31.47 -23.70  14.30 162.23 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)    31.19      23.10    1.35    0.191
## pop_density  1267.69    2222.52    0.57    0.574
## 
## Residual standard error: 53.33 on 21 degrees of freedom
## Multiple R-squared:  0.01526,    Adjusted R-squared:  -0.03164 
## F-statistic: 0.3253 on 1 and 21 DF,  p-value: 0.5745

Not significant.

Try number of people per household.

sf_house_size_zip <- pullCensus("group(B11016)", sf_fips) %>% 
  dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(key = "variable", value = "estimate", -blockgroup) %>% 
  mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>% 
  dplyr::select(-variable) %>% 
  separate(label, into = c(NA, NA, "type", "size"), sep = "!!") %>% 
  filter(!is.na(type)) %>%
  filter(!is.na(size)) %>% 
  dplyr::select(-type) %>%
  group_by(blockgroup, size) %>%
  summarize(`total of this size` = sum(estimate)) %>% 
  spread(key = size, value = `total of this size`) %>%
  mutate(total_nums = `1-person household` + `2-person household` + `3-person household` + `4-person household` + `5-person household`+ `6-person household` + `7-or-more person household`) %>%
  left_join(sf_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>% 
  group_by(zipcode) %>%
  summarize(total_nums = sum(total_nums), total_1 = sum(`1-person household`), total_2 = sum(`2-person household`), total_3 = sum(`3-person household`), total_4 = sum(`4-person household`), total_5 = sum(`5-person household`), total_6 = sum(`6-person household`), total_7more = sum(`7-or-more person household`)) %>%
  mutate(`percent 5 or more` = (total_5 + total_6 + total_7more)* 100/ total_nums, `percent 1 or 2 only` = (total_1 + total_2)*100/total_nums)

sf_size_sd_cases_change <- left_join(sf_sd_cases_curr_orig, sf_house_size_zip)

# plot
fig_sf_cases_change_size <- plot_ly(sf_size_sd_cases_change) %>%
  add_trace(x = ~`percent 5 or more`, y = ~change_cases_by_pop, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~`percent 5 or more`, y = fitted(lm(change_cases_by_pop~ `percent 5 or more`, sf_size_sd_cases_change)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Percent of households with 5 or more members'), yaxis = list(title = 'Change in cases per capita from April 21st to present'), title = "SFC, change in cases per capita and household size")

fig_sf_cases_change_size
model_cases_change_size <- lm(change_cases_by_pop ~ `percent 5 or more`, sf_size_sd_cases_change)

summary(model_cases_change_size)
## 
## Call:
## lm(formula = change_cases_by_pop ~ `percent 5 or more`, data = sf_size_sd_cases_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0009727 -0.0007008 -0.0003047  0.0004362  0.0018114 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)  
## (Intercept)         7.336e-04  2.852e-04   2.572   0.0178 *
## `percent 5 or more` 4.583e-05  3.329e-05   1.376   0.1832  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0009164 on 21 degrees of freedom
## Multiple R-squared:  0.08275,    Adjusted R-squared:  0.03908 
## F-statistic: 1.895 on 1 and 21 DF,  p-value: 0.1832
model_cases_change_size_sd <- lm(change_cases_by_pop ~ `percent 5 or more` + percent_leaving_home, sf_size_sd_cases_change)

summary(model_cases_change_size_sd)
## 
## Call:
## lm(formula = change_cases_by_pop ~ `percent 5 or more` + percent_leaving_home, 
##     data = sf_size_sd_cases_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0010071 -0.0007005 -0.0003095  0.0004346  0.0018160 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)
## (Intercept)           1.339e-03  3.405e-03   0.393    0.698
## `percent 5 or more`   4.392e-05  3.573e-05   1.229    0.233
## percent_leaving_home -1.190e-05  6.666e-05  -0.179    0.860
## 
## Residual standard error: 0.0009383 on 20 degrees of freedom
## Multiple R-squared:  0.08421,    Adjusted R-squared:  -0.007365 
## F-statistic: 0.9196 on 2 and 20 DF,  p-value: 0.4149
# testing with number of cases
fig_sf_cases_change_size <- plot_ly(sf_size_sd_cases_change) %>%
  add_trace(x = ~`percent 5 or more`, y = ~change_cases, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~`percent 5 or more`, y = fitted(lm(change_cases~ `percent 5 or more`, sf_size_sd_cases_change)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Percent of households with 5 or more members'), yaxis = list(title = 'Change in cases per capita from April 21st to present'), title = "SFC, change in cases per capita and household size")

fig_sf_cases_change_size
model_cases_change_size_abs <- lm(change_cases ~ `percent 5 or more`, sf_size_sd_cases_change)

summary(model_cases_change_size_abs)
## 
## Call:
## lm(formula = change_cases ~ `percent 5 or more`, data = sf_size_sd_cases_change)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -52.21 -25.08 -13.87  18.41 156.55 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)  
## (Intercept)           15.342     14.661   1.046    0.307  
## `percent 5 or more`    4.308      1.711   2.517    0.020 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 47.1 on 21 degrees of freedom
## Multiple R-squared:  0.2318, Adjusted R-squared:  0.1952 
## F-statistic: 6.337 on 1 and 21 DF,  p-value: 0.02002

Also not very significant.

Try units in structure.

sf_units_zip <- pullCensus("group(B25024)", sf_fips) %>% dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(key = "variable", value = "estimate", -blockgroup) %>% 
  mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>% 
  dplyr::select(-variable) %>% 
  separate(label, into = c(NA, NA, "units"), sep = "!!") %>% 
  filter(!is.na(units)) %>%
  spread(key = units, value = estimate) %>%
  mutate(total_nums = `1, attached` + `1, detached` + `10 to 19` + `2` + `20 to 49`+ `3 or 4` + `5 to 9`+ `50 or more`+ `Boat, RV, van, etc.`+ `Mobile home`, total_20more = `20 to 49`+`50 or more`, total_10more = `10 to 19` + `20 to 49`+`50 or more`, total_1 = `1, attached` + `1, detached`) %>%
  left_join(sf_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>% 
  group_by(zipcode) %>%
  summarize(total_nums = sum(total_nums), total_20more = sum(total_20more), total_10more = sum(total_20more), total_1_only = sum(total_1)) %>%
  mutate(`percent 10 or more` = total_10more*100/total_nums, `percent 20 or more` = total_20more*100/total_nums, `percent 1 only` = total_1_only*100/total_nums, `percent more than 1` = (100 - `percent 1 only`))

sf_units_sd_cases_change <- left_join(sf_sd_cases_curr_orig, sf_units_zip)

# plot with more than 20 first
fig_sf_cases_change_units <- plot_ly(sf_units_sd_cases_change) %>%
  add_trace(x = ~`percent 20 or more`, y = ~change_cases_by_pop, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~`percent 20 or more`, y = fitted(lm(change_cases_by_pop~ `percent 20 or more`, sf_units_sd_cases_change)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Percent of structures with 20 or more units'), yaxis = list(title = 'Change in cases per capita from April 21st to present'), title = "SFC, change in cases per capita and units per structure")

fig_sf_cases_change_units
model_cases_change_units <- lm(change_cases_by_pop ~ `percent 20 or more`, sf_units_sd_cases_change)

summary(model_cases_change_units)
## 
## Call:
## lm(formula = change_cases_by_pop ~ `percent 20 or more`, data = sf_units_sd_cases_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0012250 -0.0006349 -0.0004288  0.0007245  0.0018698 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)   
## (Intercept)          8.888e-04  2.796e-04   3.179  0.00452 **
## `percent 20 or more` 4.574e-06  6.651e-06   0.688  0.49920   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0009462 on 21 degrees of freedom
## Multiple R-squared:  0.02202,    Adjusted R-squared:  -0.02455 
## F-statistic: 0.4729 on 1 and 21 DF,  p-value: 0.4992
model_cases_change_units_sd <- lm(change_cases_by_pop ~ `percent 20 or more` + percent_leaving_home, sf_units_sd_cases_change)

summary(model_cases_change_size_sd)
## 
## Call:
## lm(formula = change_cases_by_pop ~ `percent 5 or more` + percent_leaving_home, 
##     data = sf_size_sd_cases_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0010071 -0.0007005 -0.0003095  0.0004346  0.0018160 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)
## (Intercept)           1.339e-03  3.405e-03   0.393    0.698
## `percent 5 or more`   4.392e-05  3.573e-05   1.229    0.233
## percent_leaving_home -1.190e-05  6.666e-05  -0.179    0.860
## 
## Residual standard error: 0.0009383 on 20 degrees of freedom
## Multiple R-squared:  0.08421,    Adjusted R-squared:  -0.007365 
## F-statistic: 0.9196 on 2 and 20 DF,  p-value: 0.4149
# try with more than 1 unit per structure
fig_sf_cases_change_units_1 <- plot_ly(sf_units_sd_cases_change) %>%
  add_trace(x = ~`percent more than 1`, y = ~change_cases_by_pop, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~`percent more than 1`, y = fitted(lm(change_cases_by_pop~ `percent more than 1`, sf_units_sd_cases_change)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Percent of structures with more than 1 unit'), yaxis = list(title = 'Change in cases per capita from April 21st to present'), title = "SFC, change in cases per capita and units per structure")

fig_sf_cases_change_units_1
model_cases_change_units_1 <- lm(change_cases_by_pop ~ `percent more than 1`, sf_units_sd_cases_change)

summary(model_cases_change_units_1)
## 
## Call:
## lm(formula = change_cases_by_pop ~ `percent more than 1`, data = sf_units_sd_cases_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0009561 -0.0007078 -0.0004629  0.0007276  0.0017919 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)  
## (Intercept)           9.785e-04  4.971e-04   1.968   0.0624 .
## `percent more than 1` 6.932e-07  6.784e-06   0.102   0.9196  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0009566 on 21 degrees of freedom
## Multiple R-squared:  0.0004969,  Adjusted R-squared:  -0.0471 
## F-statistic: 0.01044 on 1 and 21 DF,  p-value: 0.9196
model_cases_change_units_sd_1 <- lm(change_cases_by_pop ~ `percent more than 1` + percent_leaving_home, sf_units_sd_cases_change)

summary(model_cases_change_units_sd_1)
## 
## Call:
## lm(formula = change_cases_by_pop ~ `percent more than 1` + percent_leaving_home, 
##     data = sf_units_sd_cases_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0010645 -0.0006709 -0.0004462  0.0006910  0.0019181 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)            2.886e-03  3.298e-03   0.875    0.392
## `percent more than 1`  1.518e-06  7.035e-06   0.216    0.831
## percent_leaving_home  -3.936e-05  6.724e-05  -0.585    0.565
## 
## Residual standard error: 0.0009719 on 20 degrees of freedom
## Multiple R-squared:  0.01733,    Adjusted R-squared:  -0.08093 
## F-statistic: 0.1764 on 2 and 20 DF,  p-value: 0.8396

That was unfortunately not very fruitful so far.

Visits

San Francisco, initial analysis

Starting to look at visits to locations, beginning with SFC.

poi_ca <- readRDS(paste0(sg_path,'ca_poi.rds'))

Start with the week before shelter-in-place started.

# week <- "2020-03-09"
# filter_loc <- "San Francisco"
# filter_loc_abr <- "sfc"
# 
# patterns <- 
#   readRDS(paste0(sg_path,'weekly-patterns/v2/main-file/',week,'-weekly-patterns-ca.rds'))
# 
# hps <- 
#   readRDS(paste0(sg_path,'weekly-patterns/v2/home-summary-file/',week,'-home-panel-summary.rds'))
# 
# sfc_patterns <-
#   patterns %>% 
#   left_join(
#     poi_ca %>%
#       dplyr::select(
#         safegraph_place_id,
#         longitude,
#         latitude,
#         naics_code
#       ), 
#     by = "safegraph_place_id"
#   ) %>% 
#   filter(!is.na(longitude)) %>% 
#   mutate(
#     long = longitude,
#     lat = latitude
#   ) %>% 
#   st_as_sf(coords = c("long","lat")) %>% 
#   st_set_crs(4326) %>% 
#   st_join(
#     counties("CA", cb=F, progress_bar=F) %>% 
#       filter(NAME == filter_loc) %>% 
#       st_transform(4326) %>% 
#       dplyr::select(NAME),
#     left=F
#   )
# 
# sfc_patterns_origins_daily <-
#   process_patterns_origins_daily(sfc_patterns,hps)
# 
# sfc_patterns_origins_daily <-
#   sfc_patterns_origins_daily %>% 
#   left_join(
#     sfc_patterns %>% 
#       as.data.frame() %>% 
#       dplyr::select(
#         safegraph_place_id,
#         location_name,
#         street_address,
#         naics_code
#       )
#   )
# 
# saveRDS(sfc_patterns_origins_daily,paste0(sg_path,"weekly-patterns/v2/",filter_loc_abr,"_patterns_",week,"_origins_daily.rds"))

sfc_patterns_origins_daily <- readRDS(paste0(sg_path,"weekly-patterns/v2/sfc_patterns_2020-03-09_origins_daily.rds"))

Try looking at the absolute number of visits in each zip code in the week before SIP and the absolute number of cases to date in each county in SF.

sf_visits_week_pre <- sfc_patterns_origins_daily %>%
  left_join(sf_bg_zctas, by = c("origin_census_block_group" = "blockgroup")) %>%
  group_by(zipcode) %>%
  summarize(visit_counts_high_zip = sum(visit_counts_high), visit_counts_low_zip = sum(visit_counts_low)) %>%
  filter(!is.na(visit_counts_high_zip))

# combine with case data
sf_visits_week_pre_cases <- left_join(sf_visits_week_pre, sf_cases_zip %>% filter(date == max(date))) %>% filter(!is.na(confirmed_cases))
  
# plot 
fig_sf_cases_visits_week_pre <- plot_ly(sf_visits_week_pre_cases) %>%
  add_trace(x = ~visit_counts_high_zip, y = ~confirmed_cases, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~visit_counts_high_zip, y = fitted(lm(confirmed_cases~ visit_counts_high_zip, sf_visits_week_pre_cases)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Number of visits in week before SIP (high)'), yaxis = list(title = 'Current cases'), title = "SFC, cases and visits in week before SIP")

fig_sf_cases_visits_week_pre
model_cases_visits_week_pre <- lm(confirmed_cases ~ visit_counts_high_zip, sf_visits_week_pre_cases)

summary(model_cases_visits_week_pre)
## 
## Call:
## lm(formula = confirmed_cases ~ visit_counts_high_zip, data = sf_visits_week_pre_cases)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -114.53  -31.25  -13.72   45.15  170.21 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           -1.472e+01  3.053e+01  -0.482   0.6348    
## visit_counts_high_zip  5.558e-04  1.378e-04   4.033   0.0006 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 71.79 on 21 degrees of freedom
## Multiple R-squared:  0.4365, Adjusted R-squared:  0.4096 
## F-statistic: 16.27 on 1 and 21 DF,  p-value: 0.0006004
# try with low just for comparison
fig_sf_cases_visits_week_pre_low <- plot_ly(sf_visits_week_pre_cases) %>%
  add_trace(x = ~visit_counts_low_zip, y = ~confirmed_cases, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~visit_counts_low_zip, y = fitted(lm(confirmed_cases~ visit_counts_low_zip, sf_visits_week_pre_cases)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Number of visits in week before SIP (low)'), yaxis = list(title = 'Current cases'), title = "SFC, cases and visits in week before SIP")

fig_sf_cases_visits_week_pre_low
model_cases_visits_week_pre_low <- lm(confirmed_cases ~ visit_counts_low_zip, sf_visits_week_pre_cases)

summary(model_cases_visits_week_pre_low)
## 
## Call:
## lm(formula = confirmed_cases ~ visit_counts_low_zip, data = sf_visits_week_pre_cases)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -115.43  -31.74  -11.01   45.60  169.46 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -1.732e+01  3.099e+01  -0.559 0.582116    
## visit_counts_low_zip  9.250e-04  2.285e-04   4.049 0.000578 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 71.66 on 21 degrees of freedom
## Multiple R-squared:  0.4384, Adjusted R-squared:  0.4117 
## F-statistic: 16.39 on 1 and 21 DF,  p-value: 0.0005778

That is very different from the results when we looked at percent leaving home! Interesting. Results are similar with high and low, so I’m just going to keep using the high counts for now.

But could this just be a population thing? I.e. higher population correlates with higher cases and more visits?

fig_sf_cases_pop_visits_week_pre <- plot_ly(sf_visits_week_pre_cases) %>%
  add_trace(x = ~visit_counts_high_zip, y = ~cases_by_pop, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~visit_counts_high_zip, y = fitted(lm(cases_by_pop~ visit_counts_high_zip, sf_visits_week_pre_cases)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Number of visits in week before SIP (high)'), yaxis = list(title = 'Current cases per capita'), title = "SFC, cases and visits in week before SIP")

fig_sf_cases_pop_visits_week_pre
model_cases_pop_visits_week_pre <- lm(cases_by_pop ~ visit_counts_high_zip, sf_visits_week_pre_cases)

summary(model_cases_pop_visits_week_pre)
## 
## Call:
## lm(formula = cases_by_pop ~ visit_counts_high_zip, data = sf_visits_week_pre_cases)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0017862 -0.0012636 -0.0006584  0.0012414  0.0032200 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)  
## (Intercept)           1.857e-03  7.375e-04   2.518    0.020 *
## visit_counts_high_zip 2.608e-09  3.329e-09   0.784    0.442  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.001734 on 21 degrees of freedom
## Multiple R-squared:  0.0284, Adjusted R-squared:  -0.01786 
## F-statistic: 0.6139 on 1 and 21 DF,  p-value: 0.4421

The relationship is definitely not as strong if you look at cases per capita. What if you pair with visits per capita?

sf_visits_week_pre_cases <- sf_visits_week_pre_cases %>%
  mutate(visits_per_capita_high = visit_counts_high_zip / total_pop_zip, visits_per_capita_low = visit_counts_low_zip / total_pop_zip)

# plot
fig_sf_cases_pop_visits_pop_week_pre <- plot_ly(sf_visits_week_pre_cases) %>%
  add_trace(x = ~visits_per_capita_high, y = ~cases_by_pop, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~visits_per_capita_high, y = fitted(lm(cases_by_pop~ visits_per_capita_high, sf_visits_week_pre_cases)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Number of visits per capita in week before SIP (high)'), yaxis = list(title = 'Current cases per capita'), title = "SFC, cases and visits in week before SIP")

fig_sf_cases_pop_visits_pop_week_pre
model_cases_pop_visits_pop_week_pre <- lm(cases_by_pop ~ visits_per_capita_high, sf_visits_week_pre_cases)

summary(model_cases_pop_visits_pop_week_pre)
## 
## Call:
## lm(formula = cases_by_pop ~ visits_per_capita_high, data = sf_visits_week_pre_cases)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0017718 -0.0013751 -0.0007039  0.0011794  0.0032135 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)            0.0013798  0.0020578   0.670    0.510
## visits_per_capita_high 0.0001942  0.0004010   0.484    0.633
## 
## Residual standard error: 0.001749 on 21 degrees of freedom
## Multiple R-squared:  0.01104,    Adjusted R-squared:  -0.03605 
## F-statistic: 0.2345 on 1 and 21 DF,  p-value: 0.6332

Again not significant results, but at least the trend is in the direction we would expect.

Also do multiple regression with absolute cases, absolute visits, and population

cases_pop_visits_sf <- lm(confirmed_cases ~ visit_counts_high_zip + total_pop_zip, sf_visits_week_pre_cases)

summary(cases_pop_visits_sf)
## 
## Call:
## lm(formula = confirmed_cases ~ visit_counts_high_zip + total_pop_zip, 
##     data = sf_visits_week_pre_cases)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -111.87  -48.78  -12.61   40.92  160.44 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)           -3.364e+01  3.510e+01  -0.959    0.349
## visit_counts_high_zip  4.132e-05  4.957e-04   0.083    0.934
## total_pop_zip          3.190e-03  2.953e-03   1.080    0.293
## 
## Residual standard error: 71.5 on 20 degrees of freedom
## Multiple R-squared:  0.4675, Adjusted R-squared:  0.4143 
## F-statistic: 8.781 on 2 and 20 DF,  p-value: 0.001832

Hmm…

Let’s do exactly the same thing but with SCC just for comparison.

Santa Clara County, initial analysis

# week <- "2020-03-09"
# filter_loc <- "Santa Clara"
# filter_loc_abr <- "scc"
# 
# patterns <- 
#   readRDS(paste0(sg_path,'weekly-patterns/v2/main-file/',week,'-weekly-patterns-ca.rds'))
# 
# hps <- 
#   readRDS(paste0(sg_path,'weekly-patterns/v2/home-summary-file/',week,'-home-panel-summary.rds'))
# 
# scc_patterns <-
#   patterns %>% 
#   left_join(
#     poi_ca %>%
#       dplyr::select(
#         safegraph_place_id,
#         longitude,
#         latitude,
#         naics_code
#       ), 
#     by = "safegraph_place_id"
#   ) %>% 
#   filter(!is.na(longitude)) %>% 
#   mutate(
#     long = longitude,
#     lat = latitude
#   ) %>% 
#   st_as_sf(coords = c("long","lat")) %>% 
#   st_set_crs(4326) %>% 
#   st_join(
#     counties("CA", cb=F, progress_bar=F) %>% 
#       filter(NAME == filter_loc) %>% 
#       st_transform(4326) %>% 
#       dplyr::select(NAME),
#     left=F
#   )
# 
# scc_patterns_origins_daily <-
#   process_patterns_origins_daily(scc_patterns,hps)
# 
# scc_patterns_origins_daily <-
#   scc_patterns_origins_daily %>% 
#   left_join(
#     scc_patterns %>% 
#       as.data.frame() %>% 
#       dplyr::select(
#         safegraph_place_id,
#         location_name,
#         street_address,
#         naics_code
#       )
#   )
# 
# saveRDS(scc_patterns_origins_daily,paste0(sg_path,"weekly-patterns/v2/",filter_loc_abr,"_patterns_",week,"_origins_daily.rds"))

scc_patterns_origins_daily <- readRDS(paste0(sg_path,"weekly-patterns/v2/scc_patterns_2020-03-09_origins_daily.rds"))

Try looking at the absolute number of visits in each zip code in the week before SIP and the absolute number of cases to date in each county in SCC.

scc_visits_week_pre <- scc_patterns_origins_daily %>%
  left_join(scc_bg_zctas, by = c("origin_census_block_group" = "blockgroup")) %>%
  group_by(zipcode) %>%
  summarize(visit_counts_high_zip = sum(visit_counts_high), visit_counts_low_zip = sum(visit_counts_low)) %>%
  filter(!is.na(visit_counts_high_zip))

# combine with case data
scc_visits_week_pre_cases <- left_join(scc_visits_week_pre, scc_cases_zip) %>%
  filter(!is.na(cases))
  
# plot 
fig_scc_cases_visits_week_pre <- plot_ly(scc_visits_week_pre_cases) %>%
  add_trace(x = ~visit_counts_high_zip, y = ~cases, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~visit_counts_high_zip, y = fitted(lm(cases~ visit_counts_high_zip, scc_visits_week_pre_cases)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Number of visits in week before SIP (high)'), yaxis = list(title = 'Current cases'), title = "SCC, cases and visits in week before SIP")

fig_scc_cases_visits_week_pre
scc_model_cases_visits_week_pre <- lm(cases ~ visit_counts_high_zip, scc_visits_week_pre_cases)

summary(scc_model_cases_visits_week_pre)
## 
## Call:
## lm(formula = cases ~ visit_counts_high_zip, data = scc_visits_week_pre_cases)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -57.097 -21.226  -4.106   9.912 155.987 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           -8.5710741  9.6466801  -0.888    0.378    
## visit_counts_high_zip  0.0001807  0.0000276   6.545 2.43e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 36.06 on 53 degrees of freedom
## Multiple R-squared:  0.447,  Adjusted R-squared:  0.4366 
## F-statistic: 42.84 on 1 and 53 DF,  p-value: 2.429e-08
# try with low just for comparison
fig_scc_cases_visits_week_pre_low <- plot_ly(scc_visits_week_pre_cases) %>%
  add_trace(x = ~visit_counts_low_zip, y = ~cases, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~visit_counts_low_zip, y = fitted(lm(cases~ visit_counts_low_zip, scc_visits_week_pre_cases)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Number of visits in week before SIP (low)'), yaxis = list(title = 'Current cases'), title = "SCC, cases and visits in week before SIP")

fig_scc_cases_visits_week_pre_low
scc_model_cases_visits_week_pre_low <- lm(cases ~ visit_counts_low_zip, scc_visits_week_pre_cases)

summary(scc_model_cases_visits_week_pre_low)
## 
## Call:
## lm(formula = cases ~ visit_counts_low_zip, data = scc_visits_week_pre_cases)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -54.728 -20.929  -5.711   8.191 160.517 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -5.831e+00  9.902e+00  -0.589    0.558    
## visit_counts_low_zip  2.578e-04  4.248e-05   6.069  1.4e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 37.24 on 53 degrees of freedom
## Multiple R-squared:   0.41,  Adjusted R-squared:  0.3989 
## F-statistic: 36.84 on 1 and 53 DF,  p-value: 1.404e-07

Cases per capita:

fig_scc_cases_pop_visits_week_pre <- plot_ly(scc_visits_week_pre_cases) %>%
  add_trace(x = ~visit_counts_high_zip, y = ~cases_by_pop, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~visit_counts_high_zip, y = fitted(lm(cases_by_pop~ visit_counts_high_zip, scc_visits_week_pre_cases)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Number of visits in week before SIP (high)'), yaxis = list(title = 'Current cases per capita'), title = "SCC, cases and visits in week before SIP")

fig_scc_cases_pop_visits_week_pre
scc_model_cases_pop_visits_week_pre <- lm(cases_by_pop ~ visit_counts_high_zip, scc_visits_week_pre_cases)

summary(scc_model_cases_pop_visits_week_pre)
## 
## Call:
## lm(formula = cases_by_pop ~ visit_counts_high_zip, data = scc_visits_week_pre_cases)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0012683 -0.0005813 -0.0002036  0.0002581  0.0030920 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           1.262e-03  2.396e-04   5.267 2.58e-06 ***
## visit_counts_high_zip 1.613e-10  6.854e-10   0.235    0.815    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0008954 on 53 degrees of freedom
## Multiple R-squared:  0.001044,   Adjusted R-squared:  -0.0178 
## F-statistic: 0.0554 on 1 and 53 DF,  p-value: 0.8148

Visits per capita

scc_visits_week_pre_cases <- scc_visits_week_pre_cases %>%
  mutate(visits_per_capita_high = visit_counts_high_zip / pop, visits_per_capita_low = visit_counts_low_zip / pop)

# plot
fig_scc_cases_pop_visits_pop_week_pre <- plot_ly(scc_visits_week_pre_cases) %>%
  add_trace(x = ~visits_per_capita_high, y = ~cases_by_pop, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~visits_per_capita_high, y = fitted(lm(cases_by_pop~ visits_per_capita_high, scc_visits_week_pre_cases)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Number of visits per capita in week before SIP (high)'), yaxis = list(title = 'Current cases per capita'), title = "SCC, cases and visits in week before SIP")

fig_scc_cases_pop_visits_pop_week_pre
scc_model_cases_pop_visits_pop_week_pre <- lm(cases_by_pop ~ visits_per_capita_high, scc_visits_week_pre_cases)

summary(scc_model_cases_pop_visits_pop_week_pre)
## 
## Call:
## lm(formula = cases_by_pop ~ visits_per_capita_high, data = scc_visits_week_pre_cases)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0011584 -0.0005801 -0.0001982  0.0002486  0.0027879 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)   
## (Intercept)             1.919e-03  6.428e-04   2.985  0.00429 **
## visits_per_capita_high -6.928e-05  7.197e-05  -0.963  0.34005   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0008882 on 53 degrees of freedom
## Multiple R-squared:  0.01719,    Adjusted R-squared:  -0.001356 
## F-statistic: 0.9269 on 1 and 53 DF,  p-value: 0.3401

That’s very weird…

Also do multiple regression with absolute cases, absolute visits, and population

cases_pop_visits_scc <- lm(cases ~ visit_counts_high_zip + pop, scc_visits_week_pre_cases)

summary(cases_pop_visits_scc)
## 
## Call:
## lm(formula = cases ~ visit_counts_high_zip + pop, data = scc_visits_week_pre_cases)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -64.716 -17.865  -3.391  10.695 149.452 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)  
## (Intercept)           -1.300e+01  9.770e+00  -1.331   0.1890  
## visit_counts_high_zip -4.365e-06  1.068e-04  -0.041   0.9675  
## pop                    1.746e-03  9.748e-04   1.791   0.0791 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 35.33 on 52 degrees of freedom
## Multiple R-squared:  0.4791, Adjusted R-squared:  0.4591 
## F-statistic: 23.92 on 2 and 52 DF,  p-value: 4.313e-08

Comparing visits and % leaving home

Looking at the differences in visits and % leaving home for the week before SIP.

# save previous results as another name since I edit that name below
sfc_patterns_origins_daily_week_pre <- sfc_patterns_origins_daily

# get % leaving in week before SIP
sf_sd_zip_week_pre <- sf_sd_zip_by_date %>%
  filter(date < shelter_date & date >= (shelter_date - 7)) %>%
  group_by(zipcode) %>%
  summarize(total_at_home_zip = sum(total_at_home_zip), total_devices = sum(total_devices)) %>% 
  mutate(percent_at_home = total_at_home_zip*100/total_devices, percent_leaving_home = (100 - percent_at_home)) %>% 
  left_join(sf_pre_sd_leaving_avg %>% dplyr::select(zipcode, percent_leaving_home_pre)) %>%
  mutate(dif_percent_leaving = percent_leaving_home - percent_leaving_home_pre)

sf_sd_visits_week_pre <- left_join(sf_sd_zip_week_pre, sf_visits_week_pre_cases) %>% filter(!is.na(percent_leaving_home) & !is.na(visits_per_capita_high))

# plot
fig_sfc_visits_pop_leaving_week_pre <- plot_ly(sf_sd_visits_week_pre) %>%
  add_trace(x = ~visits_per_capita_high, y = ~percent_leaving_home, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~visits_per_capita_high, y = fitted(lm(percent_leaving_home~ visits_per_capita_high, sf_sd_visits_week_pre)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Number of visits per capita in week before SIP (high)'), yaxis = list(title = '% leaving home in week before SIP'), title = "SFC, leaving home and visits in week before SIP")

fig_sfc_visits_pop_leaving_week_pre

That’s odd, but makes sense why the two metrics give us different results.

What about total devices leaving and total visits?

sf_sd_visits_week_pre <- sf_sd_visits_week_pre %>% mutate(total_leaving_home_zip = total_devices - total_at_home_zip)

fig_sfc_visits_leaving_week_pre <- plot_ly(sf_sd_visits_week_pre) %>%
  add_trace(x = ~visit_counts_high_zip, y = ~total_leaving_home_zip, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~visit_counts_high_zip, y = fitted(lm(total_leaving_home_zip~ visit_counts_high_zip, sf_sd_visits_week_pre)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Number of visits in week before SIP (high)'), yaxis = list(title = 'Number of devices leaving home in week before SIP'), title = "SFC, leaving home and visits in week before SIP")

fig_sfc_visits_leaving_week_pre

Well that’s a relationship we would expect. Interesting.

Try SCC.

# get % leaving in week before SIP
scc_sd_zip_week_pre <- scc_sd_zip_by_date %>%
  filter(date < shelter_date & date >= (shelter_date - 7)) %>%
  group_by(zipcode) %>%
  summarize(total_at_home_zip = sum(total_at_home_zip), total_devices = sum(total_devices)) %>% 
  mutate(percent_at_home = total_at_home_zip*100/total_devices, percent_leaving_home = (100 - percent_at_home)) %>% 
  left_join(sf_pre_sd_leaving_avg %>% dplyr::select(zipcode, percent_leaving_home_pre)) %>%
  mutate(dif_percent_leaving = percent_leaving_home - percent_leaving_home_pre)

scc_sd_visits_week_pre <- left_join(scc_sd_zip_week_pre, scc_visits_week_pre_cases) %>% filter(!is.na(percent_leaving_home) & !is.na(visits_per_capita_high))

# plot
fig_scc_visits_pop_leaving_week_pre <- plot_ly(scc_sd_visits_week_pre) %>%
  add_trace(x = ~visits_per_capita_high, y = ~percent_leaving_home, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~visits_per_capita_high, y = fitted(lm(percent_leaving_home~ visits_per_capita_high, scc_sd_visits_week_pre)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Number of visits per capita in week before SIP (high)'), yaxis = list(title = '% leaving home in week before SIP'), title = "SCC, leaving home and visits in week before SIP")

fig_scc_visits_pop_leaving_week_pre

Remove outlier at bottom right that is barely leaving home and plot again

# plot without outliers
fig_scc_visits_pop_leaving_week_pre_edit <- plot_ly(scc_sd_visits_week_pre %>% filter(percent_leaving_home > 50)) %>%
  add_trace(x = ~visits_per_capita_high, y = ~percent_leaving_home, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~visits_per_capita_high, y = fitted(lm(percent_leaving_home~ visits_per_capita_high, scc_sd_visits_week_pre %>% filter(percent_leaving_home > 50))), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Number of visits per capita in week before SIP (high)'), yaxis = list(title = '% leaving home in week before SIP'), title = "SCC, leaving home and visits in week before SIP")

fig_scc_visits_pop_leaving_week_pre_edit

Still not totally what we would expect (a linear relationship).

Total devices leaving and total visits.

scc_sd_visits_week_pre <- scc_sd_visits_week_pre %>% mutate(total_leaving_home_zip = total_devices - total_at_home_zip)

fig_scc_visits_leaving_week_pre <- plot_ly(scc_sd_visits_week_pre) %>%
  add_trace(x = ~visit_counts_high_zip, y = ~total_leaving_home_zip, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~visit_counts_high_zip, y = fitted(lm(total_leaving_home_zip~ visit_counts_high_zip, scc_sd_visits_week_pre)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Number of visits in week before SIP (high)'), yaxis = list(title = 'Number of devices leaving home in week before SIP'), title = "SCC, leaving home and visits in week before SIP")

fig_scc_visits_leaving_week_pre

Also expected. Interesting that these metrics diverge.

Would it be more appropriate to normalize number of visits by number of devices rather than population, then?

Preparing more data

## San Francisco
# for (i in 1:9) {
# week_init <- as.Date("2020-03-09")
# week <- week_init + 7*i
# filter_loc <- "San Francisco"
# filter_loc_abr <- "sfc"
# 
# patterns <- 
#   readRDS(paste0(sg_path,'weekly-patterns/v2/main-file/',week,'-weekly-patterns-ca.rds'))
# 
# hps <- 
#   readRDS(paste0(sg_path,'weekly-patterns/v2/home-summary-file/',week,'-home-panel-summary.rds'))
# 
# sfc_patterns <-
#   patterns %>% 
#   left_join(
#     poi_ca %>%
#       dplyr::select(
#         safegraph_place_id,
#         longitude,
#         latitude,
#         naics_code
#       ), 
#     by = "safegraph_place_id"
#   ) %>% 
#   filter(!is.na(longitude)) %>% 
#   mutate(
#     long = longitude,
#     lat = latitude
#   ) %>% 
#   st_as_sf(coords = c("long","lat")) %>% 
#   st_set_crs(4326) %>% 
#   st_join(
#     counties("CA", cb=F, progress_bar=F) %>% 
#       filter(NAME == filter_loc) %>% 
#       st_transform(4326) %>% 
#       dplyr::select(NAME),
#     left=F
#   )
# 
# sfc_patterns_origins_daily <-
#   process_patterns_origins_daily(sfc_patterns,hps)
# 
# sfc_patterns_origins_daily <-
#   sfc_patterns_origins_daily %>% 
#   left_join(
#     sfc_patterns %>% 
#       as.data.frame() %>% 
#       dplyr::select(
#         safegraph_place_id,
#         location_name,
#         street_address,
#         naics_code
#       )
#   )
# 
# saveRDS(sfc_patterns_origins_daily,paste0(sg_path,"weekly-patterns/v2/",filter_loc_abr,"_patterns_",week,"_origins_daily.rds"))
# }

Total visits since SIP, SFC

Let’s look at total visits since SIP.

sfc_patterns_origins_daily_week1 <- readRDS(paste0(sg_path,"weekly-patterns/v2/sfc_patterns_2020-03-16_origins_daily.rds"))

sfc_patterns_origins_daily_week2 <- readRDS(paste0(sg_path,"weekly-patterns/v2/sfc_patterns_2020-03-23_origins_daily.rds"))

sfc_patterns_origins_daily_week3 <- readRDS(paste0(sg_path,"weekly-patterns/v2/sfc_patterns_2020-03-30_origins_daily.rds"))

sfc_patterns_origins_daily_week4 <- readRDS(paste0(sg_path,"weekly-patterns/v2/sfc_patterns_2020-04-06_origins_daily.rds"))

sfc_patterns_origins_daily_week5 <- readRDS(paste0(sg_path,"weekly-patterns/v2/sfc_patterns_2020-04-13_origins_daily.rds"))

sfc_patterns_origins_daily_week6 <- readRDS(paste0(sg_path,"weekly-patterns/v2/sfc_patterns_2020-04-20_origins_daily.rds"))

sfc_patterns_origins_daily_week7 <- readRDS(paste0(sg_path,"weekly-patterns/v2/sfc_patterns_2020-04-27_origins_daily.rds"))

sfc_patterns_origins_daily_week8 <- readRDS(paste0(sg_path,"weekly-patterns/v2/sfc_patterns_2020-05-04_origins_daily.rds"))

sfc_patterns_origins_daily_week9 <- readRDS(paste0(sg_path,"weekly-patterns/v2/sfc_patterns_2020-05-11_origins_daily.rds"))

# combine
sfc_patterns_origins_daily_since_SIP <- rbind(sfc_patterns_origins_daily_week1, sfc_patterns_origins_daily_week2,sfc_patterns_origins_daily_week3,sfc_patterns_origins_daily_week4,sfc_patterns_origins_daily_week5,sfc_patterns_origins_daily_week6,sfc_patterns_origins_daily_week7,sfc_patterns_origins_daily_week8,sfc_patterns_origins_daily_week9)

# summarize
sf_visits_since_SIP <- sfc_patterns_origins_daily_since_SIP %>%
  left_join(sf_bg_zctas, by = c("origin_census_block_group" = "blockgroup")) %>%
  group_by(zipcode) %>%
  summarize(visit_counts_high_zip = sum(visit_counts_high), visit_counts_low_zip = sum(visit_counts_low)) %>%
  filter(!is.na(visit_counts_high_zip))

sf_visits_since_SIP_cases <- left_join(sf_visits_since_SIP, sf_cases_zip %>% filter(date == max(date))) %>% 
  mutate(visits_per_capita_high = visit_counts_high_zip / total_pop_zip, visits_per_capita_low = visit_counts_low_zip / total_pop_zip) %>% filter(!is.na(confirmed_cases))
  
# plot 
fig_sf_cases_visits_since_SIP <- plot_ly(sf_visits_since_SIP_cases) %>%
  add_trace(x = ~visit_counts_high_zip, y = ~confirmed_cases, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~visit_counts_high_zip, y = fitted(lm(confirmed_cases~ visit_counts_high_zip, sf_visits_since_SIP_cases)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Number of visits since SIP (high)'), yaxis = list(title = 'Current cases'), title = "SFC, cases and visits since SIP")

fig_sf_cases_visits_since_SIP
model_cases_visits_since_SIP <- lm(confirmed_cases ~ visit_counts_high_zip, sf_visits_since_SIP_cases)

summary(model_cases_visits_since_SIP)
## 
## Call:
## lm(formula = confirmed_cases ~ visit_counts_high_zip, data = sf_visits_since_SIP_cases)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -127.210  -30.544   -6.309   40.793  160.271 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           -1.815e+01  2.958e+01  -0.614 0.546018    
## visit_counts_high_zip  9.972e-05  2.319e-05   4.300 0.000317 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 69.74 on 21 degrees of freedom
## Multiple R-squared:  0.4682, Adjusted R-squared:  0.4429 
## F-statistic: 18.49 on 1 and 21 DF,  p-value: 0.0003174
# try with low just for comparison
fig_sf_cases_visits_since_SIP_low <- plot_ly(sf_visits_since_SIP_cases) %>%
  add_trace(x = ~visit_counts_low_zip, y = ~confirmed_cases, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~visit_counts_low_zip, y = fitted(lm(confirmed_cases~ visit_counts_low_zip, sf_visits_since_SIP_cases)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Number of visits since SIP (low)'), yaxis = list(title = 'Current cases'), title = "SFC, cases and visits since SIP")

fig_sf_cases_visits_since_SIP_low
model_cases_visits_since_SIP_low <- lm(confirmed_cases ~ visit_counts_low_zip, sf_visits_since_SIP_cases)

summary(model_cases_visits_since_SIP_low)
## 
## Call:
## lm(formula = confirmed_cases ~ visit_counts_low_zip, data = sf_visits_since_SIP_cases)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -125.827  -29.393   -4.704   39.462  159.397 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -2.066e+01  2.990e+01  -0.691 0.497285    
## visit_counts_low_zip  1.550e-04  3.579e-05   4.330 0.000295 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 69.51 on 21 degrees of freedom
## Multiple R-squared:  0.4717, Adjusted R-squared:  0.4466 
## F-statistic: 18.75 on 1 and 21 DF,  p-value: 0.0002949

Looks pretty similar to the week before SIP figure.

What about just the week after/around SIP? (3/16-3/22)

sf_visits_week1 <- sfc_patterns_origins_daily_week1 %>%
  left_join(sf_bg_zctas, by = c("origin_census_block_group" = "blockgroup")) %>%
  group_by(zipcode) %>%
  summarize(visit_counts_high_zip = sum(visit_counts_high), visit_counts_low_zip = sum(visit_counts_low)) %>%
  filter(!is.na(visit_counts_high_zip))

sf_visits_week1_cases <- left_join(sf_visits_week1, sf_cases_zip %>% filter(date == max(date))) %>% filter(!is.na(confirmed_cases))
  
# plot 
fig_sf_cases_visits_week1 <- plot_ly(sf_visits_week1_cases) %>%
  add_trace(x = ~visit_counts_high_zip, y = ~confirmed_cases, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~visit_counts_high_zip, y = fitted(lm(confirmed_cases~ visit_counts_high_zip, sf_visits_week1_cases)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Number of visits since SIP (high)'), yaxis = list(title = 'Current cases'), title = "SFC, cases and visits in week after SIP ( 3/16-3/22)")

fig_sf_cases_visits_week1
model_cases_visits_week1 <- lm(confirmed_cases ~ visit_counts_high_zip, sf_visits_week1_cases)

summary(model_cases_visits_week1)
## 
## Call:
## lm(formula = confirmed_cases ~ visit_counts_high_zip, data = sf_visits_week1_cases)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -121.581  -38.174    1.435   41.284  163.241 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           -1.932e+01  2.989e+01  -0.646 0.524980    
## visit_counts_high_zip  8.561e-04  1.996e-04   4.288 0.000326 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 69.82 on 21 degrees of freedom
## Multiple R-squared:  0.4669, Adjusted R-squared:  0.4415 
## F-statistic: 18.39 on 1 and 21 DF,  p-value: 0.0003261

Note that my “1 week post” actually starts 3/16 instead of 3/17, but that could be adjusted, I just kept it like this now since it was easier since that’s already how the data was bucketed. Could easily adjust this later though.

Updates 5/28/20

Mapping the movement data

Thinking about whether it’s possible that we’re missing visits by just looking at origins/how much that is relevent, I map the zip codes and their movement.

# add average visits per capita
sf_sd_visits_week_pre <- sf_sd_visits_week_pre %>% mutate(avg_visits_per_capita = (visits_per_capita_high + visits_per_capita_low)/2)

sf_visits_since_SIP_cases <- sf_visits_since_SIP_cases %>% mutate(avg_visits_per_capita = (visits_per_capita_high + visits_per_capita_low)/2)

scc_visits_week_pre_cases <- scc_visits_week_pre_cases %>% mutate(avg_visits_per_capita = (visits_per_capita_high + visits_per_capita_low)/2)

pal_1 <-
  colorNumeric(
    palette = "Blues",
    domain =
      c(0,sf_sd_visits_week_pre %>%
      pull(avg_visits_per_capita) %>%
      unique())
  )

pal_2 <-
  colorNumeric(
    palette = "Reds",
    domain =
      c(0,sf_sd_visits_week_pre %>%
      pull(percent_leaving_home) %>%
      unique())
  )

pal_3 <-
  colorNumeric(
    palette = "Blues",
    domain =
      c(0,sf_visits_since_SIP_cases %>%
      pull(avg_visits_per_capita) %>%
      unique())
  )

pal_4 <-
  colorNumeric(
    palette = "Reds",
    domain =
      c(0,sf_sd_cases_curr %>%
      pull(percent_leaving_home) %>%
      unique())
  )

pal_5 <-
  colorNumeric(
    palette = "Blues",
    domain =
      c(0,sf_visits_since_SIP_cases %>%
      pull(cases_by_pop) %>%
      unique())
  )

leaflet() %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  addPolygons(
    data = sf_sd_visits_week_pre %>% left_join(zctas_94_95, by = c("zipcode" = "ZCTA5CE10")) %>% st_as_sf() %>% st_transform(4326),
    fill = TRUE,
    fillColor = ~pal_1(avg_visits_per_capita),
    color = "white",
    weight = 0.5,
    opacity = 0.5,
    fillOpacity = 0.5,
    group = "Average visits per capita, 1 week pre SIP",
    label = sf_sd_visits_week_pre$avg_visits_per_capita
  ) %>%
    addPolygons(
    data = sf_sd_visits_week_pre %>% left_join(zctas_94_95, by = c("zipcode" = "ZCTA5CE10")) %>% st_as_sf() %>% st_transform(4326),
    fill = TRUE,
    fillColor = ~pal_2(percent_leaving_home),
    color = "white",
    weight = 0.5,
    opacity = 0.5,
    fillOpacity = 0.5,
    group = "Percent leaving home, 1 week pre SIP",
    label = sf_sd_visits_week_pre$percent_leaving_home
  ) %>%
  addPolygons(
    data = sf_visits_since_SIP_cases %>% left_join(zctas_94_95, by = c("zipcode" = "ZCTA5CE10")) %>% st_as_sf() %>% st_transform(4326),
    fill = TRUE,
    fillColor = ~pal_3(avg_visits_per_capita),
    color = "white",
    weight = 0.5,
    opacity = 0.5,
    fillOpacity = 0.5,
    group = "Average visits per capita since SIP",
    label = sf_visits_since_SIP_cases$avg_visits_per_capita
  ) %>%
  addPolygons(
    data = sf_sd_cases_curr %>% left_join(zctas_94_95, by = c("zipcode" = "ZCTA5CE10")) %>% st_as_sf() %>% st_transform(4326),
    fill = TRUE,
    fillColor = ~pal_4(percent_leaving_home),
    color = "white",
    weight = 0.5,
    opacity = 0.5,
    fillOpacity = 0.5,
    group = "Percent leaving home, SIP through 1 week ago",
    label = sf_sd_cases_curr$percent_leaving_home
  ) %>%
  addPolygons(
    data = sf_visits_since_SIP_cases %>% left_join(zctas_94_95, by = c("zipcode" = "ZCTA5CE10")) %>% st_as_sf() %>% st_transform(4326),
    fill = TRUE,
    fillColor = ~pal_5(cases_by_pop),
    color = "white",
    weight = 0.5,
    opacity = 0.5,
    fillOpacity = 0.5,
    group = "Cases per capita",
    label = sf_visits_since_SIP_cases$cases_by_pop
  ) %>%
  addLayersControl(
    overlayGroups = c("Average visits per capita, 1 week pre SIP", "Percent leaving home, 1 week pre SIP", "Average visits per capita since SIP", "Percent leaving home, SIP through 1 week ago","Cases per capita")
  )

Processing visits from SF block groups to other counties for pre SIP

saveCountyPatternsOrigin <- function(week, filter_loc, filter_loc_abr) {

  patterns <-
  readRDS(paste0(sg_path,'weekly-patterns/v2/main-file/',week,'-weekly-patterns-ca.rds'))

hps <-
  readRDS(paste0(sg_path,'weekly-patterns/v2/home-summary-file/',week,'-home-panel-summary.rds'))

county_patterns <-
  patterns %>%
  left_join(
    poi_ca %>%
      dplyr::select(
        safegraph_place_id,
        longitude,
        latitude,
        naics_code
      ),
    by = "safegraph_place_id"
  ) %>%
  filter(!is.na(longitude)) %>%
  mutate(
    long = longitude,
    lat = latitude
  ) %>%
  st_as_sf(coords = c("long","lat")) %>%
  st_set_crs(4326) %>%
  st_join(
    counties("CA", cb=F, progress_bar=F) %>%
      filter(NAME == filter_loc) %>%
      st_transform(4326) %>%
      dplyr::select(NAME),
    left=F
  )

county_patterns_origins_daily <-
  process_patterns_origins_daily(county_patterns,hps)

county_patterns_origins_daily <-
  county_patterns_origins_daily %>%
  left_join(
    county_patterns %>%
      as.data.frame() %>%
      dplyr::select(
        safegraph_place_id,
        location_name,
        street_address,
        naics_code
      )
  )

saveRDS(county_patterns_origins_daily,paste0(sg_path,"weekly-patterns/v2/",filter_loc_abr,"_patterns_",week,"_origins_daily.rds"))
}

# saveCountyPatternsOrigin("2020-03-09", "Marin", "mac")
# saveCountyPatternsOrigin("2020-03-09", "San Mateo", "smc")
# saveCountyPatternsOrigin("2020-03-09", "Contra Costa", "ccc")
# saveCountyPatternsOrigin("2020-03-09", "Alameda", "alc")
# saveCountyPatternsOrigin("2020-03-16", "Santa Clara", "scc")
# saveCountyPatternsOrigin("2020-03-16", "Marin", "mac")
# saveCountyPatternsOrigin("2020-03-16", "San Mateo", "smc")
# saveCountyPatternsOrigin("2020-03-16", "Contra Costa", "ccc")
# saveCountyPatternsOrigin("2020-03-16", "Alameda", "alc")

Obtain any SF block group movement from the Marin, SMC, SCC, Contra Costa, and Alameda data, for the week before SIP.

marin_patterns_origins_daily_week_pre <- readRDS(paste0(sg_path,"weekly-patterns/v2/mac_patterns_2020-03-09_origins_daily.rds"))

smc_patterns_origins_daily_week_pre <- readRDS(paste0(sg_path,"weekly-patterns/v2/smc_patterns_2020-03-09_origins_daily.rds"))

scc_patterns_origins_daily_week_pre <- readRDS(paste0(sg_path,"weekly-patterns/v2/scc_patterns_2020-03-09_origins_daily.rds"))

ccc_patterns_origins_daily_week_pre <- readRDS(paste0(sg_path,"weekly-patterns/v2/ccc_patterns_2020-03-09_origins_daily.rds"))

alc_patterns_origins_daily_week_pre <- readRDS(paste0(sg_path,"weekly-patterns/v2/alc_patterns_2020-03-09_origins_daily.rds"))

# see how many of these results are in SF blockgroups
marin_origins_week_pre_in_sf <- marin_patterns_origins_daily_week_pre %>%
  left_join(sf_bg_zctas, by = c("origin_census_block_group" = "blockgroup")) %>%
  group_by(zipcode) %>%
  summarize(visit_counts_high_zip = sum(visit_counts_high), visit_counts_low_zip = sum(visit_counts_low)) %>%
  filter(!is.na(visit_counts_high_zip)) %>%
  mutate(visit_counts_avg = (visit_counts_high_zip + visit_counts_low_zip)/2)

smc_origins_week_pre_in_sf <- smc_patterns_origins_daily_week_pre %>%
  left_join(sf_bg_zctas, by = c("origin_census_block_group" = "blockgroup")) %>%
  group_by(zipcode) %>%
  summarize(visit_counts_high_zip = sum(visit_counts_high), visit_counts_low_zip = sum(visit_counts_low)) %>%
  filter(!is.na(visit_counts_high_zip)) %>%
  mutate(visit_counts_avg = (visit_counts_high_zip + visit_counts_low_zip)/2)

scc_origins_week_pre_in_sf <- scc_patterns_origins_daily_week_pre %>%
  left_join(sf_bg_zctas, by = c("origin_census_block_group" = "blockgroup")) %>%
  group_by(zipcode) %>%
  summarize(visit_counts_high_zip = sum(visit_counts_high), visit_counts_low_zip = sum(visit_counts_low)) %>%
  filter(!is.na(visit_counts_high_zip)) %>%
  mutate(visit_counts_avg = (visit_counts_high_zip + visit_counts_low_zip)/2)

ccc_origins_week_pre_in_sf <- ccc_patterns_origins_daily_week_pre %>%
  left_join(sf_bg_zctas, by = c("origin_census_block_group" = "blockgroup")) %>%
  group_by(zipcode) %>%
  summarize(visit_counts_high_zip = sum(visit_counts_high), visit_counts_low_zip = sum(visit_counts_low)) %>%
  filter(!is.na(visit_counts_high_zip)) %>%
  mutate(visit_counts_avg = (visit_counts_high_zip + visit_counts_low_zip)/2)

alc_origins_week_pre_in_sf <- alc_patterns_origins_daily_week_pre %>%
  left_join(sf_bg_zctas, by = c("origin_census_block_group" = "blockgroup")) %>%
  group_by(zipcode) %>%
  summarize(visit_counts_high_zip = sum(visit_counts_high), visit_counts_low_zip = sum(visit_counts_low)) %>%
  filter(!is.na(visit_counts_high_zip)) %>%
  mutate(visit_counts_avg = (visit_counts_high_zip + visit_counts_low_zip)/2)

kable(marin_origins_week_pre_in_sf %>% dplyr::select(zipcode, visit_counts_avg))
zipcode visit_counts_avg
94102 475.97292
94103 789.51439
94105 258.97058
94107 315.39244
94108 1604.53485
94109 5002.97164
94110 5710.96985
94111 67.43151
94112 3533.98184
94114 1526.43155
94115 2959.83404
94116 2679.65911
94117 1940.38967
94118 8369.04381
94121 3049.50423
94122 5178.30592
94123 3741.92898
94124 287.55985
94127 1323.31935
94131 1867.63030
94132 1086.23054
94133 1510.32035
94134 766.19675
94158 98.10206
kable(smc_origins_week_pre_in_sf %>% dplyr::select(zipcode, visit_counts_avg))
zipcode visit_counts_avg
94102 8822.9942
94103 9445.9243
94105 4743.6358
94107 10827.6865
94108 3274.3621
94109 13889.0510
94110 51052.2628
94111 767.8747
94112 147166.6894
94114 11950.1894
94115 13134.4186
94116 62232.3329
94117 17343.8670
94118 22733.6120
94121 27290.3088
94122 55861.6978
94123 7899.8366
94124 44383.8291
94127 16828.2167
94131 25830.8609
94132 50896.3940
94133 8440.6738
94134 74335.7403
94158 8250.9984
kable(scc_origins_week_pre_in_sf %>% dplyr::select(zipcode, visit_counts_avg))
zipcode visit_counts_avg
94102 681.6278
94103 1593.4132
94105 1399.6867
94107 2417.7694
94108 902.2337
94109 3250.7088
94110 5295.8956
94112 6500.4987
94114 1494.4945
94115 3631.1729
94116 5285.5545
94117 5251.1729
94118 4615.9181
94121 3848.4594
94122 6241.3229
94123 1245.2389
94124 4479.2548
94127 999.4394
94131 2819.7143
94132 4129.0364
94133 1792.9758
94134 3180.9474
94158 1779.9106
kable(ccc_origins_week_pre_in_sf %>% dplyr::select(zipcode, visit_counts_avg))
zipcode visit_counts_avg
94102 427.88525
94103 1839.44877
94104 15.27273
94105 496.92492
94107 1342.25672
94108 1244.02409
94109 2186.76344
94110 4305.36111
94111 76.44088
94112 5427.29499
94114 718.77727
94115 3125.59312
94116 1903.14038
94117 2048.10992
94118 2151.35352
94121 2057.74163
94122 2739.95306
94123 778.72194
94124 2900.84678
94127 746.78034
94131 1941.41929
94132 1413.69885
94133 1563.17602
94134 2959.91844
94158 683.09438
kable(alc_origins_week_pre_in_sf %>% dplyr::select(zipcode, visit_counts_avg))
zipcode visit_counts_avg
94102 3557.1429
94103 6661.3442
94104 264.6690
94105 3991.7308
94107 4663.0451
94108 3079.6958
94109 8576.2139
94110 13280.1975
94111 471.0046
94112 16708.3392
94114 2920.4335
94115 3034.3397
94116 5947.1702
94117 7028.2598
94118 7522.3348
94121 8761.4736
94122 10245.1295
94123 2888.1686
94124 7845.0807
94127 2320.1625
94131 4460.3709
94132 7488.0836
94133 3665.8908
94134 6392.1383
94158 2128.0052

Compare to the numbers we had before in SF.

sf_visits_week_pre <- sf_visits_week_pre %>% mutate(visit_counts_avg = (visit_counts_high_zip + visit_counts_low_zip)/2)

kable(sf_visits_week_pre %>% dplyr::select(zipcode, visit_counts_avg))
zipcode visit_counts_avg
94102 110456.1680
94103 97654.8866
94104 669.2848
94105 30200.8654
94107 87742.3934
94108 58180.4614
94109 182261.6564
94110 325142.3979
94111 6392.1666
94112 384496.7076
94114 105694.0045
94115 136504.0604
94116 214065.0066
94117 146914.1251
94118 222036.5605
94121 205688.9278
94122 280201.0580
94123 72071.9397
94124 197262.4305
94127 83415.7547
94131 137383.4522
94132 152203.8776
94133 109806.4387
94134 194210.6322
94158 53867.1903

Definitely smaller numbers, but not insignificant.

Combine with the SF data.

combined_sf_week_pre <- rbind(sf_visits_week_pre, marin_origins_week_pre_in_sf, smc_origins_week_pre_in_sf, scc_origins_week_pre_in_sf, ccc_origins_week_pre_in_sf, alc_origins_week_pre_in_sf) %>%
  group_by(zipcode) %>%
  summarize(total_visit_counts_high = sum(visit_counts_high_zip), total_visit_counts_low = sum(visit_counts_low_zip)) %>%
  mutate(total_visit_counts_avg = (total_visit_counts_high + total_visit_counts_low)/2) %>%
  left_join(sf_cases_zip_edit %>% filter(date == max(date)) %>% dplyr::select(zipcode, confirmed_cases, total_pop_zip, cases_by_pop)) %>%
  mutate(total_visits_per_capita_avg = total_visit_counts_avg / total_pop_zip) %>% filter(!is.na(confirmed_cases))

Look at the differences in visits per capita when using the combined data and the original just SF one, again for the week before SIP.

dif_visits_week_pre <- left_join(combined_sf_week_pre, sf_visits_week_pre %>% dplyr::select(zipcode, visit_counts_avg)) %>%
  mutate(visits_per_capita_avg = visit_counts_avg / total_pop_zip, dif_visits_per_capita = total_visits_per_capita_avg - visits_per_capita_avg) %>% 
  left_join(zctas_94_95, by = c("zipcode" = "ZCTA5CE10")) %>% st_as_sf() %>% st_transform(4326)

mapview(dif_visits_week_pre %>% dplyr::select(dif_visits_per_capita))

Let’s compare these patterns to the week after SIP.

marin_patterns_origins_daily_week1 <- readRDS(paste0(sg_path,"weekly-patterns/v2/mac_patterns_2020-03-16_origins_daily.rds"))

smc_patterns_origins_daily_week1 <- readRDS(paste0(sg_path,"weekly-patterns/v2/smc_patterns_2020-03-16_origins_daily.rds"))

scc_patterns_origins_daily_week1 <- readRDS(paste0(sg_path,"weekly-patterns/v2/scc_patterns_2020-03-16_origins_daily.rds"))

ccc_patterns_origins_daily_week1 <- readRDS(paste0(sg_path,"weekly-patterns/v2/ccc_patterns_2020-03-16_origins_daily.rds"))

alc_patterns_origins_daily_week1 <- readRDS(paste0(sg_path,"weekly-patterns/v2/alc_patterns_2020-03-16_origins_daily.rds"))

# see how many of these results are in SF blockgroups
marin_origins_week1_in_sf <- marin_patterns_origins_daily_week1 %>%
  left_join(sf_bg_zctas, by = c("origin_census_block_group" = "blockgroup")) %>%
  group_by(zipcode) %>%
  summarize(visit_counts_high_zip = sum(visit_counts_high), visit_counts_low_zip = sum(visit_counts_low)) %>%
  filter(!is.na(visit_counts_high_zip)) %>%
  mutate(visit_counts_avg = (visit_counts_high_zip + visit_counts_low_zip)/2)

smc_origins_week1_in_sf <- smc_patterns_origins_daily_week1 %>%
  left_join(sf_bg_zctas, by = c("origin_census_block_group" = "blockgroup")) %>%
  group_by(zipcode) %>%
  summarize(visit_counts_high_zip = sum(visit_counts_high), visit_counts_low_zip = sum(visit_counts_low)) %>%
  filter(!is.na(visit_counts_high_zip)) %>%
  mutate(visit_counts_avg = (visit_counts_high_zip + visit_counts_low_zip)/2)

scc_origins_week1_in_sf <- scc_patterns_origins_daily_week1 %>%
  left_join(sf_bg_zctas, by = c("origin_census_block_group" = "blockgroup")) %>%
  group_by(zipcode) %>%
  summarize(visit_counts_high_zip = sum(visit_counts_high), visit_counts_low_zip = sum(visit_counts_low)) %>%
  filter(!is.na(visit_counts_high_zip)) %>%
  mutate(visit_counts_avg = (visit_counts_high_zip + visit_counts_low_zip)/2)

ccc_origins_week1_in_sf <- ccc_patterns_origins_daily_week1 %>%
  left_join(sf_bg_zctas, by = c("origin_census_block_group" = "blockgroup")) %>%
  group_by(zipcode) %>%
  summarize(visit_counts_high_zip = sum(visit_counts_high), visit_counts_low_zip = sum(visit_counts_low)) %>%
  filter(!is.na(visit_counts_high_zip)) %>%
  mutate(visit_counts_avg = (visit_counts_high_zip + visit_counts_low_zip)/2)

alc_origins_week1_in_sf <- alc_patterns_origins_daily_week1 %>%
  left_join(sf_bg_zctas, by = c("origin_census_block_group" = "blockgroup")) %>%
  group_by(zipcode) %>%
  summarize(visit_counts_high_zip = sum(visit_counts_high), visit_counts_low_zip = sum(visit_counts_low)) %>%
  filter(!is.na(visit_counts_high_zip)) %>%
  mutate(visit_counts_avg = (visit_counts_high_zip + visit_counts_low_zip)/2)

# combine with SF
combined_sf_week1 <- rbind(sf_visits_week1 %>% mutate(visit_counts_avg = (visit_counts_high_zip + visit_counts_low_zip)/2), marin_origins_week1_in_sf, smc_origins_week1_in_sf, scc_origins_week1_in_sf, ccc_origins_week1_in_sf, alc_origins_week1_in_sf) %>%
  group_by(zipcode) %>%
  summarize(total_visit_counts_high = sum(visit_counts_high_zip), total_visit_counts_low = sum(visit_counts_low_zip)) %>%
  mutate(total_visit_counts_avg = (total_visit_counts_high + total_visit_counts_low)/2) %>%
  left_join(sf_cases_zip_edit %>% filter(date == max(date)) %>% dplyr::select(zipcode, confirmed_cases, total_pop_zip, cases_by_pop)) %>%
  mutate(total_visits_per_capita_avg = total_visit_counts_avg / total_pop_zip) %>% filter(!is.na(confirmed_cases))

# differences
dif_visits_week1 <- left_join(combined_sf_week1, sf_visits_week1 %>% mutate(visit_counts_avg = (visit_counts_high_zip + visit_counts_low_zip)/2) %>% dplyr::select(zipcode, visit_counts_avg)) %>%
  mutate(visits_per_capita_avg = visit_counts_avg / total_pop_zip, dif_visits_per_capita = total_visits_per_capita_avg - visits_per_capita_avg) %>% 
  left_join(zctas_94_95, by = c("zipcode" = "ZCTA5CE10")) %>% st_as_sf() %>% st_transform(4326)

mapview(dif_visits_week1 %>% dplyr::select(dif_visits_per_capita))

It looks like the differences between block groups are similar (relatively) to the week before SIP.

Let’s try with these more accurate numbers.

Repeating visit analyses

Total visits and total cases

# plot 
fig_sf_cases_visits_week_pre_combined <- plot_ly(combined_sf_week_pre) %>%
  add_trace(x = ~total_visit_counts_avg, y = ~confirmed_cases, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~total_visit_counts_avg, y = fitted(lm(confirmed_cases~ total_visit_counts_avg, combined_sf_week_pre)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Number of visits in week before SIP'), yaxis = list(title = 'Current cases'), title = "SFC, cases and visits in week before SIP, combined data")

fig_sf_cases_visits_week_pre_combined
model_cases_visits_week_pre_combined <- lm(confirmed_cases ~ total_visit_counts_avg, combined_sf_week_pre)

summary(model_cases_visits_week_pre_combined)
## 
## Call:
## lm(formula = confirmed_cases ~ total_visit_counts_avg, data = combined_sf_week_pre)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -107.28  -27.91  -10.03   37.92  186.99 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            -5.4077470 28.9449346  -0.187 0.853588    
## total_visit_counts_avg  0.0004902  0.0001236   3.967 0.000703 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 72.3 on 21 degrees of freedom
## Multiple R-squared:  0.4284, Adjusted R-squared:  0.4011 
## F-statistic: 15.74 on 1 and 21 DF,  p-value: 0.0007032

Cases per capita

fig_sfc_cases_pop_visits_week_pre_combined <- plot_ly(combined_sf_week_pre) %>%
  add_trace(x = ~total_visit_counts_avg, y = ~cases_by_pop, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~total_visit_counts_avg, y = fitted(lm(cases_by_pop~ total_visit_counts_avg, combined_sf_week_pre)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Number of visits in week before SIP'), yaxis = list(title = 'Current cases per capita'), title = "SFC, cases and visits in week before SIP, combined data")

fig_sfc_cases_pop_visits_week_pre_combined
sfc_model_cases_pop_visits_week_pre_combined <- lm(cases_by_pop ~ total_visit_counts_avg, combined_sf_week_pre)

summary(sfc_model_cases_pop_visits_week_pre_combined)
## 
## Call:
## lm(formula = cases_by_pop ~ total_visit_counts_avg, data = combined_sf_week_pre)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0018096 -0.0012676 -0.0006227  0.0012212  0.0032183 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)  
## (Intercept)            1.902e-03  6.944e-04   2.739   0.0123 *
## total_visit_counts_avg 2.293e-09  2.965e-09   0.773   0.4479  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.001735 on 21 degrees of freedom
## Multiple R-squared:  0.0277, Adjusted R-squared:  -0.01861 
## F-statistic: 0.5982 on 1 and 21 DF,  p-value: 0.4479

Visits per capita

fig_sfc_cases_pop_visits_pop_week_pre_combined <- plot_ly(combined_sf_week_pre) %>%
  add_trace(x = ~total_visits_per_capita_avg, y = ~cases_by_pop, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~total_visits_per_capita_avg, y = fitted(lm(cases_by_pop~ total_visits_per_capita_avg, combined_sf_week_pre)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Number of visits per capita in week before SIP'), yaxis = list(title = 'Current cases per capita'), title = "SFC, cases and visits in week before SIP, combined data")

fig_sfc_cases_pop_visits_pop_week_pre_combined
sfc_model_cases_pop_visits_pop_week_pre <- lm(cases_by_pop ~ total_visits_per_capita_avg, combined_sf_week_pre)

summary(scc_model_cases_pop_visits_pop_week_pre)
## 
## Call:
## lm(formula = cases_by_pop ~ visits_per_capita_high, data = scc_visits_week_pre_cases)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0011584 -0.0005801 -0.0001982  0.0002486  0.0027879 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)   
## (Intercept)             1.919e-03  6.428e-04   2.985  0.00429 **
## visits_per_capita_high -6.928e-05  7.197e-05  -0.963  0.34005   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0008882 on 53 degrees of freedom
## Multiple R-squared:  0.01719,    Adjusted R-squared:  -0.001356 
## F-statistic: 0.9269 on 1 and 53 DF,  p-value: 0.3401

Also do multiple regression with absolute cases, absolute visits, and population

cases_pop_visits_sfc_combined <- lm(confirmed_cases ~ total_visit_counts_avg + total_pop_zip, combined_sf_week_pre)

summary(cases_pop_visits_sfc_combined)
## 
## Call:
## lm(formula = confirmed_cases ~ total_visit_counts_avg + total_pop_zip, 
##     data = combined_sf_week_pre)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -111.70  -48.29  -12.48   39.98  162.22 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)
## (Intercept)            -3.229e+01  3.609e+01  -0.895    0.381
## total_visit_counts_avg  6.244e-05  3.706e-04   0.168    0.868
## total_pop_zip           3.032e-03  2.480e-03   1.223    0.236
## 
## Residual standard error: 71.47 on 20 degrees of freedom
## Multiple R-squared:  0.4681, Adjusted R-squared:  0.4149 
## F-statistic: 8.801 on 2 and 20 DF,  p-value: 0.001812

Adding cases to % leaving and visits

First just look at % leaving and visits again with the updates to visits to see if this changed anything.

# get % leaving in week before SIP
sf_sd_visits_week_pre_update <- left_join(sf_sd_zip_week_pre, combined_sf_week_pre) %>% filter(!is.na(percent_leaving_home) & !is.na(total_visits_per_capita_avg))

# plot
fig_sfc_visits_pop_leaving_week_pre_update <- plot_ly(sf_sd_visits_week_pre_update) %>%
  add_trace(x = ~total_visits_per_capita_avg, y = ~percent_leaving_home, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~total_visits_per_capita_avg, y = fitted(lm(percent_leaving_home~ total_visits_per_capita_avg, sf_sd_visits_week_pre_update)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Number of visits per capita in week before SIP'), yaxis = list(title = '% leaving home in week before SIP'), title = "SFC, leaving home and visits in week before SIP, updated")

fig_sfc_visits_pop_leaving_week_pre_update

Didn’t alter the trend all that much.

Total devices leaving and total visits.

sf_sd_visits_week_pre_update <- sf_sd_visits_week_pre_update %>% mutate(total_leaving_home_zip = total_devices - total_at_home_zip)

fig_sfc_visits_leaving_week_pre_update <- plot_ly(sf_sd_visits_week_pre_update) %>%
  add_trace(x = ~total_visit_counts_avg, y = ~total_leaving_home_zip, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~total_visit_counts_avg, y = fitted(lm(total_leaving_home_zip~ total_visit_counts_avg, sf_sd_visits_week_pre_update)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Number of visits in week before SIP'), yaxis = list(title = 'Number of devices leaving home in week before SIP'), title = "SFC, leaving home and visits in week before SIP")

fig_sfc_visits_leaving_week_pre_update

Adding the cases to the first plot.

fig_sfc_visits_pop_leaving_week_pre_update_cases <- plot_ly(sf_sd_visits_week_pre_update) %>%
  add_trace(x = ~total_visits_per_capita_avg, y = ~percent_leaving_home, type = 'scatter', mode = 'markers', color = ~cases_by_pop) %>%
  add_trace(x = ~total_visits_per_capita_avg, y = fitted(lm(percent_leaving_home~ total_visits_per_capita_avg, sf_sd_visits_week_pre_update)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Number of visits per capita in week before SIP'), yaxis = list(title = '% leaving home in week before SIP'), title = "SFC, leaving home and visits in week before SIP, updated")

fig_sfc_visits_pop_leaving_week_pre_update_cases

Looking at case growth from baseline in 1 week and visits 2 weeks prior to that week

Same analysis done previously for % at home, now with visits.

# # set the cutoff level
# cutoff_cases_by_pop <- 0.0028
# # how many days after cutoff level to look
# later_date_sep = 7
# # how many days prior to look at movement averages
# prior_movement_time = 14

# # filter to just when cases are above the cutoff
# sf_cases_cutoff <- sf_cases_zip_edit %>% filter(cases_by_pop >= cutoff_cases_by_pop)
# 
# zipcodes_in_cutoff <- unique(sf_cases_cutoff$zipcode)
# 
later_cases_change2 <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
visits_tot <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
visits_by_pop <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))

# get the visits data in a form where it is averaged by day for each zipcode
sf_visits_since_SIP_by_day <- sfc_patterns_origins_daily_since_SIP %>%
  left_join(sf_bg_zctas, by = c("origin_census_block_group" = "blockgroup")) %>%
  group_by(zipcode, date) %>%
  summarize(visit_counts_high_zip = sum(visit_counts_high), visit_counts_low_zip = sum(visit_counts_low)) %>%
  filter(!is.na(visit_counts_high_zip)) %>%
  mutate(visit_counts_avg = (visit_counts_high_zip + visit_counts_low_zip)/2) %>%
  left_join(sf_visits_since_SIP_cases %>% dplyr::select(zipcode, total_pop_zip)) %>%
  mutate(visits_per_capita_avg = visit_counts_avg / total_pop_zip)

for (i in 1:length(zipcodes_in_cutoff)) {
  curr_zip <- zipcodes_in_cutoff[i]
  curr_vals <- sf_cases_cutoff %>% filter(zipcode == curr_zip)
  later_cases_curr = NA
  visits_count_sum = NA
  visits_per_cap = NA
  if (curr_vals$date[1] != sf_cases_zip_edit$date[1]) { # need to make sure it didn't start above the cutoff
    later_cases_curr = curr_vals$cases_by_pop[later_date_sep] - curr_vals$cases_by_pop[1] # change in cases an amount of time later
    # get the percent of devices leaving home in an amount of time before the date that cases went above the threshold level
    visits_computed = sf_visits_since_SIP_by_day %>% 
      filter(zipcode == curr_zip & date < curr_vals$date[1] & (date >= curr_vals$date[1] - prior_movement_time)) %>% 
      summarize(total_visits = sum(visit_counts_avg)) %>% 
      left_join(sf_visits_since_SIP_cases %>% dplyr::select(zipcode, total_pop_zip)) %>%
  mutate(visits_per_capita = total_visits/total_pop_zip) 
    visits_count_sum = visits_computed$total_visits
    visits_per_cap = visits_computed$visits_per_capita
  }  
  later_cases_change2[i] = later_cases_curr
  visits_tot[i] = visits_count_sum
  visits_by_pop[i] = visits_per_cap
}

# combine
cases_growth_baseline_visits <- data.frame(zipcodes_in_cutoff, later_cases_change2, visits_tot, visits_by_pop) %>% filter(!is.na(later_cases_change))

# plot
fig_growth_visits <- plot_ly(cases_growth_baseline_visits) %>%
  add_trace(x = ~visits_tot, y = ~later_cases_change2, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~visits_tot, y = fitted(lm(cases_growth_baseline_visits$later_cases_change2~ cases_growth_baseline_visits$visits_tot)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'number of visits in 2 weeks before'), yaxis = list(title = 'Change in cases per capita 1 week later'), title = "SF, change in cases/person in week after >0.0028 cases/person vs visits in 2 weeks before")

fig_growth_visits
model_growth_visits <- lm(cases_growth_baseline_visits$later_cases_change2~ cases_growth_baseline_visits$visits_tot)

summary(model_growth_visits)
## 
## Call:
## lm(formula = cases_growth_baseline_visits$later_cases_change2 ~ 
##     cases_growth_baseline_visits$visits_tot)
## 
## Residuals:
##          1          2          3          4          5          6          7 
##  7.667e-05 -1.224e-04 -5.557e-05  4.982e-04  2.298e-04 -2.442e-04 -3.825e-04 
## 
## Coefficients:
##                                          Estimate Std. Error t value Pr(>|t|)
## (Intercept)                             2.819e-04  3.044e-04   0.926    0.397
## cases_growth_baseline_visits$visits_tot 9.717e-10  1.014e-09   0.958    0.382
## 
## Residual standard error: 0.0003259 on 5 degrees of freedom
## Multiple R-squared:  0.1551, Adjusted R-squared:  -0.01391 
## F-statistic: 0.9177 on 1 and 5 DF,  p-value: 0.3821

Also plot with visits per capita.

fig_growth_visits_pop <- plot_ly(cases_growth_baseline_visits) %>%
  add_trace(x = ~visits_by_pop, y = ~later_cases_change2, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~visits_by_pop, y = fitted(lm(cases_growth_baseline_visits$later_cases_change2~ cases_growth_baseline_visits$visits_by_pop)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'number of visits per capita in 2 weeks before'), yaxis = list(title = 'Change in cases per capita 1 week later'), title = "SF, change in cases/person in week after >0.0028 cases/person vs visits/person in 2 weeks before")

fig_growth_visits_pop
model_growth_visits_pop <- lm(cases_growth_baseline_visits$later_cases_change2~ cases_growth_baseline_visits$visits_by_pop)

summary(model_growth_visits_pop)
## 
## Call:
## lm(formula = cases_growth_baseline_visits$later_cases_change2 ~ 
##     cases_growth_baseline_visits$visits_by_pop)
## 
## Residuals:
##          1          2          3          4          5          6          7 
##  0.0000301 -0.0002855 -0.0001264  0.0006412  0.0001137 -0.0002476 -0.0001255 
## 
## Coefficients:
##                                              Estimate Std. Error t value
## (Intercept)                                -0.0002843  0.0017013  -0.167
## cases_growth_baseline_visits$visits_by_pop  0.0001424  0.0002900   0.491
##                                            Pr(>|t|)
## (Intercept)                                   0.874
## cases_growth_baseline_visits$visits_by_pop    0.644
## 
## Residual standard error: 0.0003463 on 5 degrees of freedom
## Multiple R-squared:  0.046,  Adjusted R-squared:  -0.1448 
## F-statistic: 0.2411 on 1 and 5 DF,  p-value: 0.6442

Visits over time

Looking at the trends in visits over time in each zip code in SFC (noting the limitation that this only includes visits to other locations in SF with the current data)

fig_sf_visits_by_day <- sf_visits_since_SIP_by_day %>% plot_ly() %>% add_lines(x = ~date, y = ~visits_per_capita_avg, color = ~zipcode) %>% 
  layout(xaxis = list(title = 'Date'), yaxis = list(title = 'visits per capita'), title = "SFC visits per day per capita to SFC locations")

fig_sf_visits_by_day

Compare to case growth over time, like I have done previously for % leaving home.

# overall cases in SFC by date
sf_cases_by_date <- sf_cases_zip %>% group_by(date) %>% 
  summarize(total_cases = sum(confirmed_cases))

# also from LA times separately
ca_county_cases <- 
  read_csv("https://raw.githubusercontent.com/datadesk/california-coronavirus-data/master/latimes-county-totals.csv")
sf_county_cases <-
  ca_county_cases %>% 
  filter(county == "San Francisco") %>% 
  filter(!is.na(confirmed_cases)) %>% left_join(sf_cases_by_date) %>%
  mutate(dif_cases = confirmed_cases - total_cases)

# the zip code data has some holes and doesn't go back as far in time so will use the LA Times overall data
sf_county_cases <- sf_county_cases %>%
  mutate(dcases = c(NA,diff(confirmed_cases)), 
         past_cases = c(NA, sf_county_cases$confirmed_cases[1:(nrow(sf_county_cases)-1)]), 
         cases_growth_daily = (dcases / past_cases)*100)

sf_visits_since_SIP_by_day_total_cases <- sf_visits_since_SIP_by_day %>%
  group_by(date) %>%
  summarize(total_visits = sum(visit_counts_avg)) %>% 
  left_join(sf_county_cases)

ay <- list(
  tickfont = list(color = "red"),
  overlaying = "y",
  side = "right",
  title = "case growth rate"
)

fig_sf_visits_by_day <- plot_ly(sf_visits_since_SIP_by_day_total_cases) %>% add_lines(x = ~date, y = ~total_visits, name = "total visits") %>% add_lines(x = ~date, y = ~cases_growth_daily, name = "case growth rate", yaxis = "y2") %>% layout(title = "Visits and Case Growth Rate for SFC", yaxis2 = ay, xaxis = list(title = "Date"))

fig_sf_visits_by_day

Visits in the week before and after SIP. This does include visits from SF block groups to other counties.

# get only visits relevant to SFC
sfc_patterns_origins_daily_week_pre_in_sf <- sfc_patterns_origins_daily_week_pre %>%
  filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)

sfc_patterns_origins_daily_week1_in_sf <- sfc_patterns_origins_daily_week1 %>%
  filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)

marin_patterns_origins_daily_week1_in_sf <- marin_patterns_origins_daily_week1 %>%
  filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)

marin_patterns_origins_daily_week_pre_in_sf <- marin_patterns_origins_daily_week_pre %>%
  filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)

smc_patterns_origins_daily_week1_in_sf <- smc_patterns_origins_daily_week1 %>%
  filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)

smc_patterns_origins_daily_week_pre_in_sf <- smc_patterns_origins_daily_week_pre %>%
  filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)

scc_patterns_origins_daily_week1_in_sf <- scc_patterns_origins_daily_week1 %>%
  filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)

scc_patterns_origins_daily_week_pre_in_sf <- scc_patterns_origins_daily_week_pre %>%
  filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)

ccc_patterns_origins_daily_week1_in_sf <- ccc_patterns_origins_daily_week1 %>%
  filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)

ccc_patterns_origins_daily_week_pre_in_sf <- ccc_patterns_origins_daily_week_pre %>%
  filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)

alc_patterns_origins_daily_week1_in_sf <- alc_patterns_origins_daily_week1 %>%
  filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)

alc_patterns_origins_daily_week_pre_in_sf <- alc_patterns_origins_daily_week_pre %>%
  filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)

combined_sf_week_pre_1_by_date <- rbind(sfc_patterns_origins_daily_week_pre_in_sf, sfc_patterns_origins_daily_week1_in_sf, marin_patterns_origins_daily_week1_in_sf, marin_patterns_origins_daily_week_pre_in_sf, smc_patterns_origins_daily_week1_in_sf, smc_patterns_origins_daily_week_pre_in_sf, scc_patterns_origins_daily_week1_in_sf, scc_patterns_origins_daily_week_pre_in_sf, ccc_patterns_origins_daily_week1_in_sf, ccc_patterns_origins_daily_week_pre_in_sf, alc_patterns_origins_daily_week1_in_sf, alc_patterns_origins_daily_week_pre_in_sf) %>%
  left_join(sf_bg_zctas, by = c("origin_census_block_group" = "blockgroup")) %>%
  group_by(zipcode, date) %>%
  summarize(visit_counts_high_zip = sum(visit_counts_high), visit_counts_low_zip = sum(visit_counts_low)) %>%
  filter(!is.na(visit_counts_high_zip)) %>%
  mutate(visit_counts_avg = (visit_counts_high_zip + visit_counts_low_zip)/2) %>%
  left_join(sf_cases_zip_edit %>% filter(date == max(date)) %>% dplyr::select(zipcode, confirmed_cases, total_pop_zip, cases_by_pop)) %>%
  mutate(total_visits_per_capita_avg = visit_counts_avg / total_pop_zip) %>% filter(!is.na(confirmed_cases))

fig_sf_visits_by_day_early <- combined_sf_week_pre_1_by_date %>% plot_ly() %>% add_lines(x = ~date, y = ~total_visits_per_capita_avg, color = ~zipcode) %>% 
  layout(xaxis = list(title = 'Date'), yaxis = list(title = 'visits per capita'), title = "SFC visits per day per capita")

fig_sf_visits_by_day_early

That’s funny. Though not too surprising.

Absolute number of visits:

fig_sf_visits_by_day_early_abs <- combined_sf_week_pre_1_by_date %>% plot_ly() %>% add_lines(x = ~date, y = ~visit_counts_avg, color = ~zipcode) %>% 
  layout(xaxis = list(title = 'Date'), yaxis = list(title = 'total visits'), title = "SFC visits per day")

fig_sf_visits_by_day_early_abs

Visits per person leaving

Visits per person leaving home in this time frame. Combining the social distancing data with the visits data, looking over the whole time frame of the two weeks of visit data.

combined_sf_week_pre_1_total <- combined_sf_week_pre_1_by_date %>%
  group_by(zipcode) %>%
  summarize(total_visits = sum(visit_counts_avg), total_visits_per_capita = sum(total_visits_per_capita_avg))

sf_sd_week_pre_1 <- bay_sd %>% 
  filter(origin_census_block_group %in% sf_blockgroups$GEOID) %>%
  filter(date >= min(combined_sf_week_pre_1_by_date$date) & date <= max(combined_sf_week_pre_1_by_date$date)) %>%
  left_join(sf_bg_zctas %>% dplyr::select(blockgroup, zipcode), by = c("origin_census_block_group" = "blockgroup")) %>%
  group_by(zipcode) %>%
  summarize(total_at_home_zip = sum(completely_home_device_count), total_devices = sum(device_count)) %>%
  mutate(
    percent_at_home = total_at_home_zip*100/total_devices,
    percent_leaving_home = (100 - percent_at_home)
  )

sf_sd_visits_week_pre_1 <- left_join(sf_sd_week_pre_1, combined_sf_week_pre_1_total) %>% 
  left_join(sf_cases_zip %>% filter(date == max(date)) %>% dplyr::select(zipcode, total_pop_zip, confirmed_cases, cases_by_pop)) %>% 
  mutate(num_leaving_home = percent_leaving_home*total_pop_zip/100, visits_per_person_leaving = total_visits / num_leaving_home) %>%
  filter(!is.na(confirmed_cases))

Graph cases and visits per person leaving.

fig_cases_visits_leaving_abs <- plot_ly(sf_sd_visits_week_pre_1 ) %>%
  add_trace(x = ~visits_per_person_leaving, y = ~confirmed_cases, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~visits_per_person_leaving, y = fitted(lm(confirmed_cases~ visits_per_person_leaving, sf_sd_visits_week_pre_1 )), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Number of visits per person leaving home 3/9-3/22'), yaxis = list(title = 'Current cases'), title = "SFC, cases and visits per person leaving home")

fig_cases_visits_leaving_abs
sfc_model_fig_cases_visits_leaving_abs <- lm(confirmed_cases ~ visits_per_person_leaving, sf_sd_visits_week_pre_1 )

summary(sfc_model_fig_cases_visits_leaving_abs)
## 
## Call:
## lm(formula = confirmed_cases ~ visits_per_person_leaving, data = sf_sd_visits_week_pre_1)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -90.79 -60.08 -13.12  39.64 283.85 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)
## (Intercept)                -44.340     86.116  -0.515    0.612
## visits_per_person_leaving    9.981      6.125   1.630    0.118
## 
## Residual standard error: 90.1 on 21 degrees of freedom
## Multiple R-squared:  0.1123, Adjusted R-squared:  0.06998 
## F-statistic: 2.655 on 1 and 21 DF,  p-value: 0.1181
fig_cases_visits_leaving <- plot_ly(sf_sd_visits_week_pre_1 ) %>%
  add_trace(x = ~visits_per_person_leaving, y = ~cases_by_pop, type = 'scatter', mode = 'markers') %>%
  add_trace(x = ~visits_per_person_leaving, y = fitted(lm(cases_by_pop~ visits_per_person_leaving, sf_sd_visits_week_pre_1 )), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Number of visits per person leaving home 3/9-3/22'), yaxis = list(title = 'Current cases per capita'), title = "SFC, cases and visits per person leaving home")

fig_cases_visits_leaving
sfc_model_fig_cases_visits_leaving <- lm(cases_by_pop ~ visits_per_person_leaving, sf_sd_visits_week_pre_1 )

summary(sfc_model_fig_cases_visits_leaving)
## 
## Call:
## lm(formula = cases_by_pop ~ visits_per_person_leaving, data = sf_sd_visits_week_pre_1)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0020099 -0.0011592 -0.0005651  0.0009938  0.0032518 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)
## (Intercept)               0.0006089  0.0016351   0.372    0.713
## visits_per_person_leaving 0.0001277  0.0001163   1.098    0.285
## 
## Residual standard error: 0.001711 on 21 degrees of freedom
## Multiple R-squared:  0.05427,    Adjusted R-squared:  0.009233 
## F-statistic: 1.205 on 1 and 21 DF,  p-value: 0.2847
# model with % completely at home and visits per person leaving home
sfc_model_cases_visits_leaving_perc <- lm(cases_by_pop ~ visits_per_person_leaving + percent_at_home, sf_sd_visits_week_pre_1 )

summary(sfc_model_cases_visits_leaving_perc)
## 
## Call:
## lm(formula = cases_by_pop ~ visits_per_person_leaving + percent_at_home, 
##     data = sf_sd_visits_week_pre_1)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0022632 -0.0010210 -0.0004345  0.0008065  0.0034636 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)               -9.018e-03  4.788e-03  -1.884   0.0742 .
## visits_per_person_leaving  2.445e-07  1.233e-04   0.002   0.9984  
## percent_at_home            3.149e-04  1.486e-04   2.119   0.0468 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.001584 on 20 degrees of freedom
## Multiple R-squared:  0.2277, Adjusted R-squared:  0.1505 
## F-statistic: 2.949 on 2 and 20 DF,  p-value: 0.07546

I’m not sure if that is quite the right metric that I want yet, but there is something I’m trying to get at here that might be useful.