library(tidyverse)
library(plotly)
library(sf)
library(mapview)
library(tigris)
library(censusapi)
library(leaflet)
library(lehdr)
library(usmap)
library(lmtest)
library(pracma)
library(lmtest)
library(forecast)
library(vars)
library(rvest)
library(RSelenium)
library(seleniumPipes)
library(dLagM)
library(jsonlite)
library(rgdal)
library(esri2sf)
library(readr)

options(
  tigris_class = "sf",
  tigris_use_cache = TRUE
)

Sys.setenv(CENSUS_KEY="10dcd73d7c043e91bac9fb8d3989cbff54b08790")

Get the cumulative case data, first for SCC.

# remDr <- remoteDriver(
#   remoteServerAddr = "192.168.86.25",
#   port = 4445L
# )
# remDr$open()
# 
# remDr$navigate("https://app.powerbigov.us/view?r=eyJrIjoiZTg2MTlhMWQtZWE5OC00ZDI3LWE4NjAtMTU3YWYwZDRlOTNmIiwidCI6IjBhYzMyMDJmLWMzZTktNGY1Ni04MzBkLTAxN2QwOWQxNmIzZiJ9")
# 
# webElem <- remDr$findElements(using = "class", value = "column")
# 
# cases <-
#   1:length(webElem) %>%
#   map(function(x){
#     webElem[[x]]$getElementAttribute("aria-label") %>% as.character()
#   }) %>%
#   unlist() %>%
#   as.data.frame()
# 
# scc_cumulative_cases <-
#   cases %>%
#   rename(text = ".") %>%
#   filter(grepl("Total_cases",text)) %>%
#   separate(text, c("date","cases"), sep = "\\.") %>%
#   mutate(
#     date =
#       substr(date,6,nchar(.)) %>%
#       as.Date("%A, %B %d, %Y"),
#     cases =
#       substr(cases,13,nchar(.)) %>%
#       as.numeric()
#   )
# 
# saveRDS(scc_cumulative_cases, file = "/Users/simonespeizer/Documents/2020 Spring Quarter/CEE 218Z/covid19/Simone_Speizer/scc_cumulative_cases.rds")

scc_cumulative_cases <- readRDS("/Users/simonespeizer/Documents/2020 Spring Quarter/CEE 218Z/covid19/Simone_Speizer/scc_cumulative_cases.rds")

Also for SMC.

# remDr$navigate("https://app.powerbigov.us/view?r=eyJrIjoiMWI5NmE5M2ItOTUwMC00NGNmLWEzY2UtOTQyODA1YjQ1NWNlIiwidCI6IjBkZmFmNjM1LWEwNGQtNDhjYy1hN2UzLTZkYTFhZjA4ODNmOSJ9")
# 
# webElem <- remDr$findElements(using = "class", value = "column")
# 
# tests <-
#   1:length(webElem) %>%
#   map(function(x){
#     webElem[[x]]$getElementAttribute("aria-label") %>% as.character()
#   }) %>%
#   unlist() %>%
#   as.data.frame()
# 
# tests_clean <-
#   tests %>%
#   rename(text = ".") %>%
#   separate(text, c("date","test_text"), sep = "\\.") %>%
#   separate(test_text, c(NA,"type",NA,"tests")) %>%
#   mutate(
#     date =
#       substr(date,23,nchar(.)) %>%
#       as.Date("%A, %B %d, %Y"),
#     tests =
#       tests %>%
#       as.numeric()
#   ) %>%
#   spread(
#     key = type,
#     value = tests
#   ) %>%
#   mutate(
#     total = Negative + Pending + Positive,
#     perc_positive = Positive/total,
#     perc_positive_movavg = movavg(perc_positive, 7, type = "s")
#   )
# 
# smc_cumulative_cases <- tests_clean %>%
#   mutate(cumulative_cases = cumsum(Positive), cumulative_negative = cumsum(Negative), cumulative_total = cumulative_cases+cumulative_negative, perc_positive_cumulative = cumulative_cases*100 / cumulative_total, perc_positive_cumulative_mov = movavg(perc_positive_cumulative, 7, type = "s"))
# 
# saveRDS(smc_cumulative_cases, file = "/Users/simonespeizer/Documents/2020 Spring Quarter/CEE 218Z/covid19/Simone_Speizer/smc_cumulative_cases.rds")

smc_cumulative_cases <- readRDS("/Users/simonespeizer/Documents/2020 Spring Quarter/CEE 218Z/covid19/Simone_Speizer/smc_cumulative_cases.rds")

Get social distancing data.

scc_blockgroups <-
  block_groups("CA","Santa Clara", cb=F, progress_bar=F) %>% 
  st_transform('+proj=longlat +datum=WGS84')

smc_blockgroups <-
  block_groups("CA","San Mateo", cb=F, progress_bar=F) %>% 
  st_transform('+proj=longlat +datum=WGS84')

bay_sd <- readRDS("/Users/simonespeizer/pCloud Drive/Shared/SFBI/Restricted Data Library/Safegraph/covid19analysis/bay_socialdistancing_v2.rds") %>% 
  mutate(date = date_range_start %>%  substr(1,10) %>% as.Date())

# obtaining weekends
weekends <- bay_sd %>% 
  filter(!duplicated(date)) %>% 
  arrange(date) %>% 
  mutate(weekend = ifelse((date %>% as.numeric()) %% 7 %in% c(2,3), T, F)) %>% 
  dplyr::select(date,weekend)

bay_sd <- bay_sd %>% left_join(weekends)

SCC data processing.

scc_cases_sd_daily <- bay_sd %>% 
  filter(origin_census_block_group %in% scc_blockgroups$GEOID) %>%
  group_by(date) %>%
  summarize(total_at_home = sum(completely_home_device_count), total_devices = sum(device_count)) %>%
  mutate(
    percent_at_home = total_at_home*100/total_devices, 
    percent_leaving_home = (100 - percent_at_home),
  ) %>% 
  left_join(
    scc_cumulative_cases
  ) %>% 
  filter(date >= min(scc_cumulative_cases$date))

# get the baseline percent of people at home
pre_case_growth_at_home_scc <- bay_sd %>%
  filter(date < min(scc_cumulative_cases$date)) %>%
  filter(origin_census_block_group %in% scc_blockgroups$GEOID) %>%
  summarize(total_at_home = sum(completely_home_device_count), total_devices = sum(device_count)) %>%
  mutate(percent_at_home = total_at_home*100/total_devices, percent_leaving_home = (100 - percent_at_home))

# include change in percent leaving home
scc_cases_sd_daily <- scc_cases_sd_daily %>%
  mutate(leaving_home_dif = percent_leaving_home - pre_case_growth_at_home_scc$percent_leaving_home[1],
         log_cases = log(cases))

# compute number of differences for stationarity
ndiffs(scc_cases_sd_daily$cases)
## [1] 2
ndiffs(scc_cases_sd_daily$log_cases[-1])
## [1] 2
ndiffs(scc_cases_sd_daily$leaving_home_dif)
## [1] 1
scc_test_big <- scc_cases_sd_daily %>%
  mutate(
    dlog_cases = c(NA,diff(log_cases)),
    d2log_cases = c(NA,NA,diff(log_cases, differences = 2)),
    dcases = c(NA,diff(cases)),
    d2cases = c(NA,NA,diff(dcases, differences = 2)),
    dleaving = c(NA,diff(leaving_home_dif)),
    d2leaving = c(NA,NA,diff(leaving_home_dif, differences = 2)),
    cases_mov = movavg(cases, 7, type = "s"),
    log_cases_mov = movavg(log_cases, 7, type = "s"),
    dlog_cases_mov = c(NA,diff(log_cases_mov)),
    d2log_cases_mov = c(NA,NA,diff(log_cases_mov, differences = 2)),
    dcases_mov = c(NA,diff(cases_mov)),
    d2cases_mov = c(NA,diff(dcases_mov)),
    leaving_mov = movavg(leaving_home_dif, 7, type = "s"),
    dleaving_mov = c(NA,diff(leaving_mov)),
    d2leaving_mov = c(NA,diff(dleaving_mov)),
    leaving4 = c(rep(NA,4), scc_cases_sd_daily$leaving_home_dif[1:(nrow(scc_cases_sd_daily)-4)]),
    dleaving4 = c(NA,diff(leaving4)),
    d2leaving4 = c(NA,NA,diff(leaving4, differences = 2)),
    leaving3 = c(rep(NA,3), scc_cases_sd_daily$leaving_home_dif[1:(nrow(scc_cases_sd_daily)-3)]),
    leaving3_mov = movavg(leaving3, 7, type = "s"),
    dleaving3_mov = c(NA,diff(leaving3_mov)),
    d2leaving3_mov = c(NA,NA,diff(leaving3_mov, differences = 2)),
    leaving4_mov = movavg(leaving4, 7, type = "s"),
    dleaving4_mov = c(NA,diff(leaving4_mov)),
    d2leaving4_mov = c(NA,NA,diff(leaving4_mov, differences = 2)),
    leaving5 = c(rep(NA,5), scc_cases_sd_daily$leaving_home_dif[1:(nrow(scc_cases_sd_daily)-5)]),
    leaving5_mov = movavg(leaving5, 7, type = "s"),
    dleaving5_mov = c(NA,diff(leaving5_mov)),
    d2leaving5_mov = c(NA,NA,diff(leaving5_mov, differences = 2)),
    leaving6 = c(rep(NA,6), scc_cases_sd_daily$leaving_home_dif[1:(nrow(scc_cases_sd_daily)-6)]),
    leaving6_mov = movavg(leaving6, 7, type = "s"),
    dleaving6_mov = c(NA,diff(leaving6_mov)),
    d2leaving6_mov = c(NA,NA,diff(leaving6_mov, differences = 2)),
    leaving7 = c(rep(NA,7), scc_cases_sd_daily$leaving_home_dif[1:(nrow(scc_cases_sd_daily)-7)]),
    leaving7_mov = movavg(leaving7, 7, type = "s"),
    dleaving7_mov = c(NA,diff(leaving7_mov)),
    d2leaving7_mov = c(NA,NA,diff(leaving7_mov, differences = 2)),
    leaving8 = c(rep(NA,8), scc_cases_sd_daily$leaving_home_dif[1:(nrow(scc_cases_sd_daily)-8)]),
    leaving8_mov = movavg(leaving8, 7, type = "s"),
    dleaving8_mov = c(NA,diff(leaving8_mov)),
    d2leaving8_mov = c(NA,NA,diff(leaving8_mov, differences = 2)),
    leaving9 = c(rep(NA,9), scc_cases_sd_daily$leaving_home_dif[1:(nrow(scc_cases_sd_daily)-9)]),
    leaving9_mov = movavg(leaving9, 7, type = "s"),
    dleaving9_mov = c(NA,diff(leaving9_mov)),
    d2leaving9_mov = c(NA,NA,diff(leaving9_mov, differences = 2)),
    leaving10 = c(rep(NA,10), scc_cases_sd_daily$leaving_home_dif[1:(nrow(scc_cases_sd_daily)-10)]),
    leaving10_mov = movavg(leaving10, 7, type = "s"),
    dleaving10_mov = c(NA,diff(leaving10_mov)),
    d2leaving10_mov = c(NA,NA,diff(leaving10_mov, differences = 2)),
    leaving11 = c(rep(NA,11), scc_cases_sd_daily$leaving_home_dif[1:(nrow(scc_cases_sd_daily)-11)]),
    leaving11_mov = movavg(leaving11, 7, type = "s"),
    dleaving11_mov = c(NA,diff(leaving11_mov)),
    d2leaving11_mov = c(NA,NA,diff(leaving11_mov, differences = 2)),
    leaving12 = c(rep(NA,12), scc_cases_sd_daily$leaving_home_dif[1:(nrow(scc_cases_sd_daily)-12)]),
    leaving12_mov = movavg(leaving12, 7, type = "s"),
    dleaving12_mov = c(NA,diff(leaving12_mov)),
    d2leaving12_mov = c(NA,NA,diff(leaving12_mov, differences = 2)),
    leaving13 = c(rep(NA,13), scc_cases_sd_daily$leaving_home_dif[1:(nrow(scc_cases_sd_daily)-13)]),
    leaving13_mov = movavg(leaving13, 7, type = "s"),
    dleaving13_mov = c(NA,diff(leaving13_mov)),
    d2leaving13_mov = c(NA,NA,diff(leaving13_mov, differences = 2)),
    leaving14 = c(rep(NA,14), scc_cases_sd_daily$leaving_home_dif[1:(nrow(scc_cases_sd_daily)-14)]),
    leaving14_mov = movavg(leaving14, 7, type = "s"),
    dleaving14_mov = c(NA,diff(leaving14_mov)),
    d2leaving14_mov = c(NA,NA,diff(leaving14_mov, differences = 2)),
    leaving18 = c(rep(NA,18), scc_cases_sd_daily$leaving_home_dif[1:(nrow(scc_cases_sd_daily)-18)]),
    leaving18_mov = movavg(leaving14, 7, type = "s"),
    dleaving18_mov = c(NA,diff(leaving18_mov)),
    d2leaving18_mov = c(NA,NA,diff(leaving18_mov, differences = 2)),
    leaving21 = c(rep(NA,21), scc_cases_sd_daily$leaving_home_dif[1:(nrow(scc_cases_sd_daily)-21)]),
    leaving21_mov = movavg(leaving21, 7, type = "s"),
    dleaving21_mov = c(NA,diff(leaving21_mov)),
    d2leaving21_mov = c(NA,NA,diff(leaving21_mov, differences = 2)),
    leaving28 = c(rep(NA,28), scc_cases_sd_daily$leaving_home_dif[1:(nrow(scc_cases_sd_daily)-28)]),
    leaving28_mov = movavg(leaving28, 7, type = "s"),
    dleaving28_mov = c(NA,diff(leaving28_mov)),
    d2leaving28_mov = c(NA,NA,diff(leaving28_mov, differences = 2)),
    past_cases = c(NA, scc_cases_sd_daily$cases[1:(nrow(scc_cases_sd_daily)-1)]), 
    cases_growth_daily = (dcases / past_cases)*100,
    cases_growth_daily_mov = movavg(cases_growth_daily, 7, type = "s")
  ) %>% 
  filter(date >= "2020-03-01")

ndiffs(scc_test_big$cases_growth_daily)
## [1] 1
scc_test_big_pre415 <- scc_test_big %>% filter(date <= "2020-04-15")

Lots of plots of movement and cases

Plots testing

# raw, no shifts
scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving_mov, color="Leaving home")) +
  geom_line(aes(y = cases_growth_daily_mov-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+30, name = "Daily case growth rate (%), 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - Growth Rate, No Lag")

scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving_mov, color="Leaving home")) +
  geom_line(aes(y = dcases_mov-40, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+30, name = "Daily new cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - Cases, No Lag")

scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving_mov, color="Leaving home")) +
  geom_line(aes(y = d2cases_mov-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+30, name = "Change in change in cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - Change in Cases, No Lag")

# log cases
scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving_mov, color="Leaving home")) +
  geom_line(aes(y = log_cases_mov-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+30, name = "Log of cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - log(cases), No Lag")

scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving_mov, color="Leaving home")) +
  geom_line(aes(y = dlog_cases_mov*100-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~(.+30)/100, name = "Change in log of cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County  - change in log of cases, no lag")

# raw, no shifts, pre april 15
scc_test_big_pre415 %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving_mov, color="Leaving home")) +
  geom_line(aes(y = cases_growth_daily_mov-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+30, name = "Daily case growth rate (%), 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - Case Growth Rate, no Lag, pre 4/15")

scc_test_big_pre415 %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving_mov, color="Leaving home")) +
  geom_line(aes(y = dlog_cases_mov*100-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~./100+30, name = "Change in log of cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - Change in Log of Cases, no Lag, pre 4/15")

14 day lag

# 14 day shift
scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving14_mov, color="Leaving home")) +
  geom_line(aes(y = cases_growth_daily_mov-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+30, name = "Daily case growth rate (%), 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 14 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - growth rate, 14 Day Lag")

scc_test_big_pre415 %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving14_mov, color="Leaving home")) +
  geom_line(aes(y = cases_growth_daily_mov-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+30, name = "Daily case growth rate (%), 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 14 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - growth rate, 14 Day Lag, pre 4/15")

scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving14_mov, color="Leaving home")) +
  geom_line(aes(y = dcases_mov-40, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+40, name = "Daily new cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 14 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - change in cases, 14 Day Lag")

scc_test_big_pre415 %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving14_mov, color="Leaving home")) +
  geom_line(aes(y = dcases_mov-40, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+40, name = "Daily new cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 14 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - change in cases, 14 Day Lag, pre 4/15")

scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving14_mov, color="Leaving home")) +
  geom_line(aes(y = d2cases_mov-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+30, name = "Change in change in cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 14 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - change in change of cases, 14 Day Lag")

scc_test_big_pre415 %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving14_mov, color="Leaving home")) +
  geom_line(aes(y = d2cases_mov-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+30, name = "Change in change in cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 14 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - change in change of cases, 14 Day Lag, pre 4/15")

scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving14_mov, color="Leaving home")) +
  geom_line(aes(y = dlog_cases_mov*100-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~(.+30)/100, name = "Change in log of cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 14 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - change in log of cases, 14 day lag")

10 day lag

# 10 day lag
scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving10_mov, color="Leaving home")) +
  geom_line(aes(y = cases_growth_daily_mov-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+30, name = "Daily case growth rate (%), 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 10 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County  - case growth rate, 10 Day Lag")

scc_test_big_pre415 %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving10_mov, color="Leaving home")) +
  geom_line(aes(y = cases_growth_daily_mov-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+30, name = "Daily case growth rate (%), 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 10 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - case growth rate, 10 Day Lag, pre 4/15")

scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving10_mov, color="Leaving home")) +
  geom_line(aes(y = dcases_mov-40, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+40, name = "Daily new cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 10 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - change in cases, 10 Day Lag")

scc_test_big_pre415 %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving10_mov, color="Leaving home")) +
  geom_line(aes(y = dcases_mov-40, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+40, name = "Daily new cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 10 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - change in cases, 10 Day Lag, pre 4/15")

scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving10_mov, color="Leaving home")) +
  geom_line(aes(y = dlog_cases_mov*100-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~(.+30)/100, name = "Change in log of cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 10 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - change in log of cases, 10 day lag")

18 day lag

# 18 day lag 
scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving18_mov, color="Leaving home")) +
  geom_line(aes(y = cases_growth_daily_mov-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+30, name = "Daily case growth rate (%), 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 18 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - growth rate, 18 Day Lag")

scc_test_big_pre415 %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving18_mov, color="Leaving home")) +
  geom_line(aes(y = cases_growth_daily_mov-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+30, name = "Daily case growth rate (%), 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 18 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - growth rate, 18 Day Lag, pre 4/15")

scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving18_mov, color="Leaving home")) +
  geom_line(aes(y = dcases_mov-40, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+40, name = "Daily new cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 18 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - change in cases, 18 Day Lag")

scc_test_big_pre415 %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving18_mov, color="Leaving home")) +
  geom_line(aes(y = dcases_mov-40, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+40, name = "Daily new cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 18 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - change in cases, 18 Day Lag, pre 4/15")

21 day lag

# 21 day lag??
scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving21_mov, color="Leaving home")) +
  geom_line(aes(y = cases_growth_daily_mov-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+30, name = "Daily case growth rate (%), 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 21 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - growth rate, 21 Day Lag")

scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving21_mov, color="Leaving home")) +
  geom_line(aes(y = dcases_mov-40, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+40, name = "Daily new cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 21 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - change in cases, 21 Day Lag")

28 day lag…

# going wild with a 28 day lag
scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving28_mov, color="Leaving home")) +
  geom_line(aes(y = cases_growth_daily_mov-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+30, name = "Daily case growth rate (%), 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 28 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - growth rate, 28 Day Lag")

scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving28_mov, color="Leaving home")) +
  geom_line(aes(y = dcases_mov-40, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+40, name = "Daily new cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 28 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - change in cases, 28 Day Lag")

Just log, 7 days

# trying just log plot with 7 day lag
scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving7_mov, color="Leaving home")) +
  geom_line(aes(y = dlog_cases_mov*100-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~(.+30)/100, name = "Change in log of cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 7 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - change in log of cases, 7 day lag")

Testing ardlm

dleaving and d2logcases for 4 lags, moving average

testing_ardl4 = ardlDlm(x = scc_test_big$dleaving_mov, y = scc_test_big$d2log_cases_mov, p = 4, q = 4, remove = list(p = c(0,1,2,3), q=c()))
summary(testing_ardl4)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0213610 -0.0014294  0.0006449  0.0016122  0.0266611 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.0009501  0.0008704  -1.092    0.279    
## X.4         -0.0006286  0.0012117  -0.519    0.605    
## Y.1         -0.0364700  0.1170492  -0.312    0.756    
## Y.2          0.2251433  0.1197267   1.880    0.064 .  
## Y.3          0.4227324  0.1001782   4.220 6.94e-05 ***
## Y.4          0.0537929  0.1099349   0.489    0.626    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.007004 on 73 degrees of freedom
## Multiple R-squared:  0.2386, Adjusted R-squared:  0.1865 
## F-statistic: 4.576 on 5 and 73 DF,  p-value: 0.001098
testing_ardl4_1 = ardlDlm(x = scc_test_big$dleaving_mov, y = scc_test_big$d2log_cases_mov, p = 4, q = 4, remove = list(p = c(0,1,2,3), q=c(2)))
summary(testing_ardl4_1)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0191064 -0.0023746  0.0007522  0.0016163  0.0304296 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.0012538  0.0008698  -1.441 0.153688    
## X.4          0.0007032  0.0009998   0.703 0.484073    
## Y.1         -0.1035140  0.1133816  -0.913 0.364223    
## Y.3          0.3864274  0.0999705   3.865 0.000236 ***
## Y.4          0.0420932  0.1116238   0.377 0.707180    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.007123 on 74 degrees of freedom
## Multiple R-squared:  0.2017, Adjusted R-squared:  0.1586 
## F-statistic: 4.676 on 4 and 74 DF,  p-value: 0.002027
testing_ardl4_2 = ardlDlm(x = scc_test_big$dleaving_mov, y = scc_test_big$d2log_cases_mov, p = 4, q = 4, remove = list(p = c(0,1,2,3), q=c(2,3)))
summary(testing_ardl4_2)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.025948 -0.003072  0.000670  0.002360  0.038103 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -0.0017258  0.0009378  -1.840   0.0697 .
## X.4          0.0018170  0.0010426   1.743   0.0855 .
## Y.1         -0.0717088  0.1231452  -0.582   0.5621  
## Y.4         -0.0283038  0.1199277  -0.236   0.8141  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.007757 on 75 degrees of freedom
## Multiple R-squared:  0.04057,    Adjusted R-squared:  0.002194 
## F-statistic: 1.057 on 3 and 75 DF,  p-value: 0.3725
testing_ardl4_3 = ardlDlm(x = scc_test_big$dleaving_mov, y = scc_test_big$d2log_cases_mov, p = 4, q = 4, remove = list(p = c(0,1,2,3), q=c(2,3,4)))
summary(testing_ardl4_3)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.026160 -0.002982  0.000637  0.002331  0.037846 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -0.0017159  0.0009310  -1.843   0.0692 .
## X.4          0.0017349  0.0009767   1.776   0.0797 .
## Y.1         -0.0800756  0.1171969  -0.683   0.4965  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.007708 on 76 degrees of freedom
## Multiple R-squared:  0.03986,    Adjusted R-squared:  0.01459 
## F-statistic: 1.578 on 2 and 76 DF,  p-value: 0.2132
testing_ardl4_4 = ardlDlm(x = scc_test_big$dleaving_mov, y = scc_test_big$d2log_cases_mov, p = 4, q = 4, remove = list(p = c(), q=c(2,3,4)))
summary(testing_ardl4_4)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0144878 -0.0033536 -0.0005134  0.0033817  0.0200148 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.0011730  0.0007628  -1.538 0.128489    
## X.t         -0.0001007  0.0014843  -0.068 0.946073    
## X.1          0.0006811  0.0017657   0.386 0.700811    
## X.2          0.0048748  0.0014204   3.432 0.000996 ***
## X.3          0.0024134  0.0015035   1.605 0.112841    
## X.4         -0.0032499  0.0012177  -2.669 0.009396 ** 
## Y.1         -0.2120443  0.1069383  -1.983 0.051198 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.006182 on 72 degrees of freedom
## Multiple R-squared:  0.415,  Adjusted R-squared:  0.3662 
## F-statistic: 8.512 on 6 and 72 DF,  p-value: 5.426e-07
testing_ardl4_5 = ardlDlm(x = scc_test_big$dleaving_mov, y = scc_test_big$d2log_cases_mov, p = 4, q = 4, remove = list(p = c(0,1,2,3), q=c(2,3,4)))
summary(testing_ardl4_5)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.026160 -0.002982  0.000637  0.002331  0.037846 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -0.0017159  0.0009310  -1.843   0.0692 .
## X.4          0.0017349  0.0009767   1.776   0.0797 .
## Y.1         -0.0800756  0.1171969  -0.683   0.4965  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.007708 on 76 degrees of freedom
## Multiple R-squared:  0.03986,    Adjusted R-squared:  0.01459 
## F-statistic: 1.578 on 2 and 76 DF,  p-value: 0.2132
testing_ardl4_6= ardlDlm(x = scc_test_big$dleaving_mov, y = scc_test_big$d2log_cases_mov, p = 4, q = 4, remove = list(p = c(0,1,2), q=c(2,3,4)))
summary(testing_ardl4_6)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0151043 -0.0039623 -0.0002028  0.0022976  0.0315384 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.0015637  0.0008312  -1.881   0.0638 .  
## X.3          0.0061272  0.0013532   4.528  2.2e-05 ***
## X.4         -0.0022470  0.0012379  -1.815   0.0735 .  
## Y.1         -0.2898320  0.1143508  -2.535   0.0133 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.006877 on 75 degrees of freedom
## Multiple R-squared:  0.246,  Adjusted R-squared:  0.2158 
## F-statistic: 8.156 on 3 and 75 DF,  p-value: 9.064e-05
testing_ardl4_7= ardlDlm(x = scc_test_big$dleaving_mov, y = scc_test_big$d2log_cases_mov, p = 4, q = 4, remove = list(p = c(0,1), q=c(2,3,4)))
summary(testing_ardl4_7)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0147484 -0.0031604 -0.0004453  0.0031843  0.0193759 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.0012013  0.0007424  -1.618  0.10988    
## X.2          0.0052293  0.0011385   4.593 1.75e-05 ***
## X.3          0.0024392  0.0014453   1.688  0.09567 .  
## X.4         -0.0031711  0.0011176  -2.837  0.00586 ** 
## Y.1         -0.2020174  0.1033346  -1.955  0.05436 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.006107 on 74 degrees of freedom
## Multiple R-squared:  0.4133, Adjusted R-squared:  0.3816 
## F-statistic: 13.03 on 4 and 74 DF,  p-value: 4.407e-08
testing_ardl4_7= ardlDlm(x = scc_test_big$dleaving_mov, y = scc_test_big$d2log_cases_mov, p = 4, q = 4, remove = list(p = c(0), q=c(2,3,4)))
summary(testing_ardl4_7)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0145645 -0.0033280 -0.0005126  0.0033991  0.0200407 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.0011659  0.0007505  -1.554 0.124612    
## X.1          0.0006026  0.0013245   0.455 0.650483    
## X.2          0.0048628  0.0013997   3.474 0.000866 ***
## X.3          0.0023910  0.0014569   1.641 0.105068    
## X.4         -0.0032203  0.0011288  -2.853 0.005635 ** 
## Y.1         -0.2115972  0.1060050  -1.996 0.049653 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.00614 on 73 degrees of freedom
## Multiple R-squared:  0.4149, Adjusted R-squared:  0.3749 
## F-statistic: 10.35 on 5 and 73 DF,  p-value: 1.57e-07

dleaving and d2logcases for 4 lags, no moving average

testing_ardl4 = ardlDlm(x = scc_test_big$dleaving, y = scc_test_big$d2log_cases, p = 4, q = 4, remove = list(p = c(0,1,2,3), q=c()))
summary(testing_ardl4)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.110146 -0.008604  0.001436  0.009713  0.095544 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.005328   0.003753  -1.420  0.15996    
## X.4          0.001879   0.001196   1.572  0.12026    
## Y.1         -0.648722   0.114527  -5.664 2.75e-07 ***
## Y.2         -0.575380   0.123212  -4.670 1.34e-05 ***
## Y.3         -0.419613   0.128582  -3.263  0.00168 ** 
## Y.4         -0.166485   0.093575  -1.779  0.07938 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03261 on 73 degrees of freedom
## Multiple R-squared:  0.3428, Adjusted R-squared:  0.2978 
## F-statistic: 7.616 on 5 and 73 DF,  p-value: 8.376e-06
testing_ardl4_1 = ardlDlm(x = scc_test_big$dleaving, y = scc_test_big$d2log_cases, p = 4, q = 4, remove = list(p = c(0,1,2,3), q=c(2)))
summary(testing_ardl4_1)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.145947 -0.005744  0.001111  0.006201  0.154961 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -0.002286   0.004183  -0.546  0.58641   
## X.4          0.001054   0.001338   0.788  0.43327   
## Y.1         -0.348503   0.107281  -3.249  0.00175 **
## Y.3         -0.014330   0.107387  -0.133  0.89421   
## Y.4          0.003563   0.097568   0.037  0.97097   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03691 on 74 degrees of freedom
## Multiple R-squared:  0.1465, Adjusted R-squared:  0.1003 
## F-statistic: 3.175 on 4 and 74 DF,  p-value: 0.0183
testing_ardl4_2 = ardlDlm(x = scc_test_big$dleaving, y = scc_test_big$d2log_cases, p = 4, q = 4, remove = list(p = c(0,1,2,3), q=c(2,3)))
summary(testing_ardl4_2)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.147662 -0.005583  0.001611  0.006141  0.154145 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -0.002255   0.004149  -0.543  0.58845   
## X.4          0.001021   0.001306   0.782  0.43681   
## Y.1         -0.346777   0.105798  -3.278  0.00159 **
## Y.4          0.013241   0.064828   0.204  0.83871   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03667 on 75 degrees of freedom
## Multiple R-squared:  0.1463, Adjusted R-squared:  0.1121 
## F-statistic: 4.283 on 3 and 75 DF,  p-value: 0.007597
testing_ardl4_3 = ardlDlm(x = scc_test_big$dleaving, y = scc_test_big$d2log_cases, p = 4, q = 4, remove = list(p = c(0,1,2,3), q=c(2,3,4)))
summary(testing_ardl4_3)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.146828 -0.005788  0.001912  0.006288  0.154503 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -0.002278   0.004121  -0.553  0.58207   
## X.4          0.001140   0.001163   0.980  0.33024   
## Y.1         -0.350385   0.103653  -3.380  0.00115 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03643 on 76 degrees of freedom
## Multiple R-squared:  0.1458, Adjusted R-squared:  0.1233 
## F-statistic: 6.486 on 2 and 76 DF,  p-value: 0.002508
testing_ardl4_4 = ardlDlm(x = scc_test_big$dleaving, y = scc_test_big$d2log_cases, p = 4, q = 4, remove = list(p = c(), q=c(2,3,4)))
summary(testing_ardl4_4)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.116748 -0.015674 -0.002089  0.006839  0.130480 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.0004892  0.0038336   0.128 0.898806    
## X.t          0.0048828  0.0012044   4.054 0.000126 ***
## X.1          0.0029664  0.0013313   2.228 0.028987 *  
## X.2          0.0012681  0.0012156   1.043 0.300358    
## X.3         -0.0003461  0.0011465  -0.302 0.763649    
## X.4          0.0008240  0.0011210   0.735 0.464669    
## Y.1         -0.3973830  0.1055672  -3.764 0.000338 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.033 on 72 degrees of freedom
## Multiple R-squared:  0.336,  Adjusted R-squared:  0.2806 
## F-statistic: 6.071 on 6 and 72 DF,  p-value: 3.505e-05
testing_ardl4_5 = ardlDlm(x = scc_test_big$dleaving, y = scc_test_big$d2log_cases, p = 4, q = 4, remove = list(p = c(0,1,2,3), q=c(2,3,4)))
summary(testing_ardl4_5)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.146828 -0.005788  0.001912  0.006288  0.154503 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -0.002278   0.004121  -0.553  0.58207   
## X.4          0.001140   0.001163   0.980  0.33024   
## Y.1         -0.350385   0.103653  -3.380  0.00115 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03643 on 76 degrees of freedom
## Multiple R-squared:  0.1458, Adjusted R-squared:  0.1233 
## F-statistic: 6.486 on 2 and 76 DF,  p-value: 0.002508
testing_ardl4_6= ardlDlm(x = scc_test_big$dleaving, y = scc_test_big$d2log_cases, p = 4, q = 4, remove = list(p = c(0,1,2), q=c(2,3,4)))
summary(testing_ardl4_6)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.146758 -0.005950  0.001806  0.006341  0.154456 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -2.288e-03  4.163e-03  -0.550  0.58415   
## X.3         -3.637e-05  1.214e-03  -0.030  0.97617   
## X.4          1.134e-03  1.184e-03   0.958  0.34101   
## Y.1         -3.504e-01  1.043e-01  -3.358  0.00124 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03668 on 75 degrees of freedom
## Multiple R-squared:  0.1458, Adjusted R-squared:  0.1116 
## F-statistic: 4.267 on 3 and 75 DF,  p-value: 0.007743
testing_ardl4_7= ardlDlm(x = scc_test_big$dleaving, y = scc_test_big$d2log_cases, p = 4, q = 4, remove = list(p = c(0,1), q=c(2,3,4)))
summary(testing_ardl4_7)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.146824 -0.005837  0.001871  0.006411  0.154204 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -2.315e-03  4.228e-03  -0.548  0.58559   
## X.2         -6.382e-05  1.319e-03  -0.048  0.96154   
## X.3         -4.342e-05  1.231e-03  -0.035  0.97195   
## X.4          1.120e-03  1.231e-03   0.910  0.36598   
## Y.1         -3.501e-01  1.052e-01  -3.326  0.00137 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03692 on 74 degrees of freedom
## Multiple R-squared:  0.1458, Adjusted R-squared:  0.09966 
## F-statistic: 3.159 on 4 and 74 DF,  p-value: 0.01875
testing_ardl4_7= ardlDlm(x = scc_test_big$dleaving, y = scc_test_big$d2log_cases, p = 4, q = 4, remove = list(p = c(0), q=c(2,3,4)))
summary(testing_ardl4_7)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.141973 -0.012206 -0.000937  0.008027  0.155641 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.0014129  0.0041878  -0.337 0.736802    
## X.1          0.0027185  0.0014637   1.857 0.067313 .  
## X.2          0.0002834  0.0013110   0.216 0.829483    
## X.3          0.0004723  0.0012422   0.380 0.704871    
## X.4          0.0006855  0.0012333   0.556 0.580002    
## Y.1         -0.4450172  0.1154721  -3.854 0.000248 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03633 on 73 degrees of freedom
## Multiple R-squared:  0.1844, Adjusted R-squared:  0.1285 
## F-statistic:   3.3 on 5 and 73 DF,  p-value: 0.009638

6 lags

testing_ardl6 = ardlDlm(x = scc_test_big$dleaving, y = scc_test_big$d2log_cases, p = 6, q = 1, remove = list(p = c(0,1,2,3,4,5), q=c()))
summary(testing_ardl6)
## 
## Time series regression with "ts" data:
## Start = 7, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.137747 -0.008051  0.003462  0.012650  0.147615 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.005865   0.003852  -1.523   0.1321    
## X.6         -0.002136   0.001132  -1.887   0.0631 .  
## Y.1         -0.517877   0.103287  -5.014 3.54e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03348 on 74 degrees of freedom
## Multiple R-squared:  0.2566, Adjusted R-squared:  0.2365 
## F-statistic: 12.77 on 2 and 74 DF,  p-value: 1.717e-05
testing_ardl6_1 = ardlDlm(x = scc_test_big$dleaving, y = scc_test_big$d2log_cases, p = 6, q = 1, remove = list(p = c(0,1,2,3,4), q=c()))
summary(testing_ardl6_1)
## 
## Time series regression with "ts" data:
## Start = 7, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.126104 -0.010476  0.002082  0.013289  0.150996 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.006368   0.003814  -1.670   0.0992 .  
## X.5         -0.001909   0.001114  -1.715   0.0907 .  
## X.6         -0.002315   0.001123  -2.062   0.0428 *  
## Y.1         -0.488432   0.103395  -4.724 1.09e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03304 on 73 degrees of freedom
## Multiple R-squared:  0.2854, Adjusted R-squared:  0.256 
## F-statistic: 9.718 on 3 and 73 DF,  p-value: 1.79e-05
testing_ardl6_2 = ardlDlm(x = scc_test_big$dleaving, y = scc_test_big$d2log_cases, p = 6, q = 1, remove = list(p = c(0,1,2,3), q=c()))
summary(testing_ardl6_2)
## 
## Time series regression with "ts" data:
## Start = 7, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.127410 -0.011333  0.002135  0.013349  0.149788 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.0059533  0.0038808  -1.534   0.1294    
## X.4          0.0007883  0.0012051   0.654   0.5151    
## X.5         -0.0018371  0.0011235  -1.635   0.1064    
## X.6         -0.0020925  0.0011770  -1.778   0.0797 .  
## Y.1         -0.4775477  0.1051277  -4.543 2.18e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03317 on 72 degrees of freedom
## Multiple R-squared:  0.2896, Adjusted R-squared:  0.2502 
## F-statistic: 7.339 on 4 and 72 DF,  p-value: 5.147e-05
testing_ardl6_3 = ardlDlm(x = scc_test_big$dleaving, y = scc_test_big$d2log_cases, p = 6, q = 1, remove = list(p = c(0,1,2), q=c()))
summary(testing_ardl6_3)
## 
## Time series regression with "ts" data:
## Start = 7, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.124954 -0.010884  0.004603  0.013471  0.145748 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.0068364  0.0038837  -1.760   0.0827 .  
## X.3         -0.0018820  0.0012039  -1.563   0.1224    
## X.4          0.0005702  0.0012013   0.475   0.6365    
## X.5         -0.0022063  0.0011372  -1.940   0.0563 .  
## X.6         -0.0018226  0.0011781  -1.547   0.1263    
## Y.1         -0.4816791  0.1041227  -4.626 1.63e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03285 on 71 degrees of freedom
## Multiple R-squared:  0.3133, Adjusted R-squared:  0.2649 
## F-statistic: 6.477 on 5 and 71 DF,  p-value: 5.161e-05
testing_ardl6_4 = ardlDlm(x = scc_test_big$dleaving, y = scc_test_big$d2log_cases, p = 6, q = 1, remove = list(p = c(0,1), q=c()))
summary(testing_ardl6_4)
## 
## Time series regression with "ts" data:
## Start = 7, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.125837 -0.010159  0.005325  0.013508  0.143649 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.0070547  0.0039491  -1.786   0.0784 .  
## X.2         -0.0004600  0.0012070  -0.381   0.7043    
## X.3         -0.0019397  0.0012206  -1.589   0.1165    
## X.4          0.0004654  0.0012395   0.375   0.7084    
## X.5         -0.0021401  0.0011572  -1.849   0.0686 .  
## X.6         -0.0018265  0.0011853  -1.541   0.1278    
## Y.1         -0.4823447  0.1047698  -4.604  1.8e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03305 on 70 degrees of freedom
## Multiple R-squared:  0.3147, Adjusted R-squared:  0.2559 
## F-statistic: 5.357 on 6 and 70 DF,  p-value: 0.0001342
testing_ardl6_5 = ardlDlm(x = scc_test_big$dleaving, y = scc_test_big$d2log_cases, p = 6, q = 1, remove = list(p = c(0), q=c()))
summary(testing_ardl6_5)
## 
## Time series regression with "ts" data:
## Start = 7, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.125840 -0.013734  0.002542  0.012845  0.143593 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.0061492  0.0039348  -1.563   0.1227    
## X.1          0.0022711  0.0013459   1.687   0.0961 .  
## X.2         -0.0002032  0.0012010  -0.169   0.8661    
## X.3         -0.0015382  0.0012281  -1.253   0.2146    
## X.4          0.0001645  0.0012364   0.133   0.8945    
## X.5         -0.0019201  0.0011496  -1.670   0.0994 .  
## X.6         -0.0014612  0.0011898  -1.228   0.2236    
## Y.1         -0.5581706  0.1127559  -4.950 5.04e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03262 on 69 degrees of freedom
## Multiple R-squared:  0.3418, Adjusted R-squared:  0.2751 
## F-statistic:  5.12 on 7 and 69 DF,  p-value: 0.0001007

leaving and dcases for 4 lags

# keeping only 4 x lags, removing different y lags
testing_ardl4 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dcases, p = 4, q = 4, remove = list(p = c(0,1,2,3), q=c()))
summary(testing_ardl4)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.138 -10.351   0.307   6.675  41.859 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  9.30235    4.22315   2.203   0.0308 *  
## X.4          0.14646    0.16042   0.913   0.3643    
## Y.1          0.51796    0.11584   4.471  2.8e-05 ***
## Y.2          0.13820    0.13235   1.044   0.2998    
## Y.3          0.05266    0.13239   0.398   0.6920    
## Y.4          0.11643    0.11735   0.992   0.3244    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.28 on 73 degrees of freedom
## Multiple R-squared:  0.5417, Adjusted R-squared:  0.5103 
## F-statistic: 17.26 on 5 and 73 DF,  p-value: 3.076e-11
testing_ardl4_1 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dcases, p = 4, q = 4, remove = list(p = c(0,1,2,3), q=c(2)))
summary(testing_ardl4_1)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -26.144 -10.278  -0.513   7.007  41.133 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   9.5673     4.2181   2.268   0.0262 *  
## X.4           0.1371     0.1603   0.855   0.3951    
## Y.1           0.5738     0.1028   5.581 3.74e-07 ***
## Y.3           0.1028     0.1234   0.833   0.4074    
## Y.4           0.1350     0.1161   1.163   0.2484    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.29 on 74 degrees of freedom
## Multiple R-squared:  0.5349, Adjusted R-squared:  0.5097 
## F-statistic: 21.27 on 4 and 74 DF,  p-value: 1.042e-11
testing_ardl4_2 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dcases, p = 4, q = 4, remove = list(p = c(0,1,2,3), q=c(2,3)))
summary(testing_ardl4_2)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -26.098 -11.370  -1.555   6.895  42.376 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  9.77916    4.20184   2.327   0.0226 *  
## X.4          0.12249    0.15898   0.770   0.4434    
## Y.1          0.60536    0.09540   6.345 1.52e-08 ***
## Y.4          0.19013    0.09518   1.997   0.0494 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.26 on 75 degrees of freedom
## Multiple R-squared:  0.5305, Adjusted R-squared:  0.5117 
## F-statistic: 28.25 on 3 and 75 DF,  p-value: 2.493e-12
testing_ardl4_3 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dcases, p = 4, q = 4, remove = list(p = c(0,1,2,3), q=c(2,3,4)))
summary(testing_ardl4_3)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.158  -9.306   0.076   6.320  51.786 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.71800    4.25681   2.518   0.0139 *  
## X.4          0.04611    0.15732   0.293   0.7703    
## Y.1          0.70893    0.08164   8.684 5.39e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.54 on 76 degrees of freedom
## Multiple R-squared:  0.5055, Adjusted R-squared:  0.4925 
## F-statistic: 38.85 on 2 and 76 DF,  p-value: 2.382e-12
# all x lags, only 1 y lag
testing_ardl4_4 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dcases, p = 4, q = 4, remove = list(p = c(), q=c(2,3,4)))
summary(testing_ardl4_4)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.510  -9.509  -0.167   8.030  47.065 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.01316    4.63573   2.160   0.0341 *  
## X.t          1.02445    0.55073   1.860   0.0669 .  
## X.1         -1.46539    0.68853  -2.128   0.0367 *  
## X.2          0.06459    0.66124   0.098   0.9225    
## X.3         -0.46978    0.68276  -0.688   0.4936    
## X.4          0.86883    0.48962   1.774   0.0802 .  
## Y.1          0.71039    0.08201   8.663 8.88e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.21 on 72 degrees of freedom
## Multiple R-squared:  0.5523, Adjusted R-squared:  0.515 
## F-statistic:  14.8 on 6 and 72 DF,  p-value: 6.109e-11
# removing x lags with only 1 y lag
testing_ardl4_5 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dcases, p = 4, q = 4, remove = list(p = c(0,1,2,3), q=c(2,3,4)))
summary(testing_ardl4_5)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.158  -9.306   0.076   6.320  51.786 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.71800    4.25681   2.518   0.0139 *  
## X.4          0.04611    0.15732   0.293   0.7703    
## Y.1          0.70893    0.08164   8.684 5.39e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.54 on 76 degrees of freedom
## Multiple R-squared:  0.5055, Adjusted R-squared:  0.4925 
## F-statistic: 38.85 on 2 and 76 DF,  p-value: 2.382e-12
testing_ardl4_6= ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dcases, p = 4, q = 4, remove = list(p = c(0,1,2), q=c(2,3,4)))
summary(testing_ardl4_6)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.647 -10.337   0.241   7.421  51.455 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.21141    4.27294   2.390   0.0194 *  
## X.3         -0.55253    0.49015  -1.127   0.2632    
## X.4          0.55895    0.48128   1.161   0.2492    
## Y.1          0.69368    0.08261   8.397 2.09e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.51 on 75 degrees of freedom
## Multiple R-squared:  0.5138, Adjusted R-squared:  0.4943 
## F-statistic: 26.42 on 3 and 75 DF,  p-value: 9.134e-12
testing_ardl4_7= ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dcases, p = 4, q = 4, remove = list(p = c(0,1), q=c(2,3,4)))
summary(testing_ardl4_7)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.629 -11.017   0.545   7.257  50.039 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  9.20622    4.36471   2.109   0.0383 *  
## X.2         -0.56589    0.51641  -1.096   0.2767    
## X.3         -0.05961    0.66478  -0.090   0.9288    
## X.4          0.58609    0.48128   1.218   0.2272    
## Y.1          0.68726    0.08271   8.310 3.36e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.49 on 74 degrees of freedom
## Multiple R-squared:  0.5215, Adjusted R-squared:  0.4957 
## F-statistic: 20.17 on 4 and 74 DF,  p-value: 2.895e-11
# looking at different y lags included on their own with all x lags
testing_ardl4_7= ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dcases, p = 4, q = 4, remove = list(p = c(0), q=c(2,3,4)))
summary(testing_ardl4_7)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -24.550 -11.440  -0.434   8.004  49.430 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.67194    4.53616   1.691    0.095 .  
## X.1         -0.64379    0.53702  -1.199    0.234    
## X.2         -0.05349    0.66918  -0.080    0.937    
## X.3         -0.09676    0.66355  -0.146    0.884    
## X.4          0.69998    0.48917   1.431    0.157    
## Y.1          0.68791    0.08247   8.342 3.21e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.45 on 73 degrees of freedom
## Multiple R-squared:  0.5308, Adjusted R-squared:  0.4986 
## F-statistic: 16.52 on 5 and 73 DF,  p-value: 7.072e-11
testing_ardl4_8= ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dcases, p = 4, q = 4, remove = list(p = c(0), q=c(1,2,3)))
summary(testing_ardl4_8)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -33.605 -11.441  -3.405   7.935  44.179 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 17.653294   5.250084   3.362  0.00123 ** 
## X.1         -0.003434   0.675157  -0.005  0.99596    
## X.2         -0.152528   0.824765  -0.185  0.85379    
## X.3         -0.435355   0.815715  -0.534  0.59516    
## X.4          0.613803   0.608463   1.009  0.31641    
## Y.4          0.491134   0.107465   4.570 1.94e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.81 on 73 degrees of freedom
## Multiple R-squared:  0.2874, Adjusted R-squared:  0.2386 
## F-statistic: 5.888 on 5 and 73 DF,  p-value: 0.0001261
testing_ardl4_9= ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dcases, p = 4, q = 4, remove = list(p = c(0), q=c(1,2,4)))
summary(testing_ardl4_9)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -36.010 -12.295  -2.812   9.074  48.404 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  15.4530     5.1167   3.020  0.00348 ** 
## X.1           0.1272     0.6512   0.195  0.84570    
## X.2          -0.3449     0.7915  -0.436  0.66429    
## X.3          -0.9082     0.7880  -1.153  0.25283    
## X.4           1.1362     0.5774   1.968  0.05291 .  
## Y.3           0.5397     0.1010   5.345 9.88e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.12 on 73 degrees of freedom
## Multiple R-squared:  0.3413, Adjusted R-squared:  0.2962 
## F-statistic: 7.565 on 5 and 73 DF,  p-value: 9.044e-06
testing_ardl4_10= ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dcases, p = 4, q = 4, remove = list(p = c(0), q=c(1,3,4)))
summary(testing_ardl4_10)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -36.559  -9.336  -3.185   7.394  36.846 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 12.01964    4.92482   2.441   0.0171 *  
## X.1          0.04675    0.60711   0.077   0.9388    
## X.2         -0.94255    0.74948  -1.258   0.2125    
## X.3         -0.06290    0.74056  -0.085   0.9325    
## X.4          0.92419    0.54338   1.701   0.0932 .  
## Y.2          0.60176    0.09306   6.467 9.99e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.1 on 73 degrees of freedom
## Multiple R-squared:  0.4173, Adjusted R-squared:  0.3774 
## F-statistic: 10.46 on 5 and 73 DF,  p-value: 1.365e-07

leaving and dlog_cases for up to 5 lags

# all y lags, only x lag of 5
testing_ardl5 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(0,1,2,3,4), q=c()))
summary(testing_ardl5)
## 
## Time series regression with "ts" data:
## Start = 6, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.108106 -0.006086 -0.000262  0.007073  0.111391 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.0068988  0.0246441   0.280  0.78034    
## X.5          0.0002445  0.0007904   0.309  0.75798    
## Y.1          0.3067529  0.1047435   2.929  0.00457 ** 
## Y.2          0.1859873  0.1059526   1.755  0.08351 .  
## Y.3         -0.0460042  0.1024420  -0.449  0.65474    
## Y.4          0.4148357  0.0849609   4.883 6.23e-06 ***
## Y.5          0.0041610  0.0808347   0.051  0.95909    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0291 on 71 degrees of freedom
## Multiple R-squared:  0.8557, Adjusted R-squared:  0.8435 
## F-statistic: 70.17 on 6 and 71 DF,  p-value: < 2.2e-16
# all x lags, only y lag 1
testing_ardl5_1 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(), q=c(2,3,4,5)))
summary(testing_ardl5_1)
## 
## Time series regression with "ts" data:
## Start = 6, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.113862 -0.014509 -0.004236  0.013900  0.093738 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.172e-01  2.539e-02   4.617 1.72e-05 ***
## X.t          4.420e-03  1.183e-03   3.738 0.000376 ***
## X.1         -2.029e-03  1.563e-03  -1.298 0.198478    
## X.2          3.729e-05  1.456e-03   0.026 0.979634    
## X.3         -1.564e-03  1.460e-03  -1.071 0.287756    
## X.4          3.189e-03  1.454e-03   2.193 0.031652 *  
## X.5         -2.444e-04  1.089e-03  -0.224 0.823131    
## Y.1          4.700e-01  1.057e-01   4.445 3.22e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03006 on 70 degrees of freedom
## Multiple R-squared:  0.8482, Adjusted R-squared:  0.833 
## F-statistic: 55.87 on 7 and 70 DF,  p-value: < 2.2e-16
# all x lags, only y lag 2
testing_ardl5_2 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(), q=c(1,3,4,5)))
summary(testing_ardl5_2)
## 
## Time series regression with "ts" data:
## Start = 6, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.143847 -0.013093 -0.002237  0.013402  0.069980 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.1259712  0.0237767   5.298 1.29e-06 ***
## X.t          0.0039304  0.0011738   3.348  0.00131 ** 
## X.1          0.0010518  0.0014714   0.715  0.47708    
## X.2         -0.0019492  0.0015316  -1.273  0.20736    
## X.3         -0.0015801  0.0014634  -1.080  0.28397    
## X.4          0.0023156  0.0014536   1.593  0.11566    
## X.5          0.0003678  0.0010576   0.348  0.72909    
## Y.2          0.4454660  0.1011695   4.403 3.75e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03012 on 70 degrees of freedom
## Multiple R-squared:  0.8476, Adjusted R-squared:  0.8323 
## F-statistic:  55.6 on 7 and 70 DF,  p-value: < 2.2e-16
# adding in x lags
testing_ardl5_3 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(0), q=c(1,3,4,5)))
summary(testing_ardl5_3)
## 
## Time series regression with "ts" data:
## Start = 6, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.151173 -0.016913 -0.004573  0.012925  0.080272 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.1172594  0.0252765   4.639 1.56e-05 ***
## X.1          0.0040260  0.0012546   3.209 0.002000 ** 
## X.2         -0.0022846  0.0016346  -1.398 0.166568    
## X.3         -0.0001919  0.0015010  -0.128 0.898610    
## X.4          0.0019706  0.0015507   1.271 0.207958    
## X.5          0.0002684  0.0011307   0.237 0.813074    
## Y.2          0.4267191  0.1080348   3.950 0.000182 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03222 on 71 degrees of freedom
## Multiple R-squared:  0.8231, Adjusted R-squared:  0.8082 
## F-statistic: 55.08 on 6 and 71 DF,  p-value: < 2.2e-16
testing_ardl5_4 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(0,1), q=c(1,3,4,5)))
summary(testing_ardl5_4)
## 
## Time series regression with "ts" data:
## Start = 6, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.156426 -0.013784 -0.005954  0.011286  0.088204 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.1184173  0.0268562   4.409 3.56e-05 ***
## X.2          0.0010913  0.0013294   0.821  0.41439    
## X.3         -0.0005294  0.0015910  -0.333  0.74029    
## X.4          0.0034968  0.0015684   2.230  0.02890 *  
## X.5         -0.0002933  0.0011870  -0.247  0.80556    
## Y.2          0.3706024  0.1132847   3.271  0.00164 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03423 on 72 degrees of freedom
## Multiple R-squared:  0.7975, Adjusted R-squared:  0.7834 
## F-statistic: 56.71 on 5 and 72 DF,  p-value: < 2.2e-16
testing_ardl5_5 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(0,1,2), q=c(1,3,4,5)))
summary(testing_ardl5_5)
## 
## Time series regression with "ts" data:
## Start = 6, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.15622 -0.01280 -0.00626  0.01237  0.09327 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.1104363  0.0249789   4.421 3.36e-05 ***
## X.3          0.0002322  0.0012897   0.180 0.857639    
## X.4          0.0034391  0.0015633   2.200 0.030979 *  
## X.5         -0.0001679  0.0011745  -0.143 0.886738    
## Y.2          0.3959393  0.1087558   3.641 0.000505 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03416 on 73 degrees of freedom
## Multiple R-squared:  0.7956, Adjusted R-squared:  0.7844 
## F-statistic: 71.04 on 4 and 73 DF,  p-value: < 2.2e-16
testing_ardl5_6 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(0,1,2,3), q=c(1,3,4,5)))
summary(testing_ardl5_6)
## 
## Time series regression with "ts" data:
## Start = 6, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.155635 -0.012711 -0.006336  0.012753  0.094686 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.1086353  0.0227376   4.778 8.76e-06 ***
## X.4          0.0036217  0.0011815   3.065 0.003034 ** 
## X.5         -0.0001774  0.0011656  -0.152 0.879445    
## Y.2          0.4024312  0.1019295   3.948 0.000178 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03393 on 74 degrees of freedom
## Multiple R-squared:  0.7955, Adjusted R-squared:  0.7872 
## F-statistic: 95.96 on 3 and 74 DF,  p-value: < 2.2e-16
testing_ardl5_7 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(0,1,2,3,4), q=c(1,3,4,5)))
summary(testing_ardl5_7)
## 
## Time series regression with "ts" data:
## Start = 6, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.156542 -0.012049 -0.005309  0.009135  0.128707 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.0849432  0.0225490   3.767 0.000327 ***
## X.5         0.0026653  0.0007446   3.579 0.000608 ***
## Y.2         0.4925323  0.1029179   4.786 8.35e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03578 on 75 degrees of freedom
## Multiple R-squared:  0.7695, Adjusted R-squared:  0.7634 
## F-statistic: 125.2 on 2 and 75 DF,  p-value: < 2.2e-16
# trying different single y lags with only one x lag (4)
testing_ardl5_9 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(0,1,2,3,5), q=c(2,3,4,5)))
summary(testing_ardl5_9)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.128342 -0.013379 -0.006118  0.008368  0.123906 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.0959359  0.0213823   4.487 2.53e-05 ***
## X.4         0.0030422  0.0007068   4.304 4.94e-05 ***
## Y.1         0.4995973  0.0975490   5.122 2.23e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03397 on 76 degrees of freedom
## Multiple R-squared:  0.8076, Adjusted R-squared:  0.8026 
## F-statistic: 159.5 on 2 and 76 DF,  p-value: < 2.2e-16
testing_ardl5_10 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(0,1,2,3,5), q=c(1,3,4,5)))
summary(testing_ardl5_10)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.159944 -0.013787 -0.007115  0.012500  0.129400 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.1246404  0.0232551   5.360 8.63e-07 ***
## X.4         0.0039543  0.0007644   5.173 1.81e-06 ***
## Y.2         0.3540167  0.1056597   3.351  0.00126 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03677 on 76 degrees of freedom
## Multiple R-squared:  0.7745, Adjusted R-squared:  0.7686 
## F-statistic: 130.5 on 2 and 76 DF,  p-value: < 2.2e-16
testing_ardl5_11 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(0,1,2,3,5), q=c(1,2,4,5)))
summary(testing_ardl5_11)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.146867 -0.013290 -0.006858  0.006968  0.117459 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.1268207  0.0219254   5.784 1.53e-07 ***
## X.4         0.0040316  0.0007228   5.578 3.57e-07 ***
## Y.3         0.3261027  0.0935727   3.485  0.00082 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03658 on 76 degrees of freedom
## Multiple R-squared:  0.7769, Adjusted R-squared:  0.771 
## F-statistic: 132.3 on 2 and 76 DF,  p-value: < 2.2e-16
testing_ardl5_12 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(0,1,2,3,5), q=c(1,2,3,5)))
summary(testing_ardl5_12)
## 
## Time series regression with "ts" data:
## Start = 5, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.12448 -0.01460 -0.00846  0.01312  0.16135 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.1321876  0.0215795   6.126 3.69e-08 ***
## X.4         0.0041998  0.0007128   5.892 9.82e-08 ***
## Y.4         0.3024591  0.0921180   3.283  0.00155 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03687 on 76 degrees of freedom
## Multiple R-squared:  0.7734, Adjusted R-squared:  0.7674 
## F-statistic: 129.7 on 2 and 76 DF,  p-value: < 2.2e-16
testing_ardl5_13 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(0,1,2,3,5), q=c(1,2,3,4)))
summary(testing_ardl5_13)
## 
## Time series regression with "ts" data:
## Start = 6, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.109916 -0.016075 -0.007203  0.011628  0.128675 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.1491966  0.0176869   8.435 1.77e-12 ***
## X.4         0.0047611  0.0005858   8.128 6.82e-12 ***
## Y.5         0.2057945  0.0751397   2.739   0.0077 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03554 on 75 degrees of freedom
## Multiple R-squared:  0.7727, Adjusted R-squared:  0.7666 
## F-statistic: 127.5 on 2 and 75 DF,  p-value: < 2.2e-16
# compare to just having different x values on their own with single y lag
testing_ardl5_14 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(0,4,2,3,5), q=c(1,2,3,4)))
summary(testing_ardl5_14)
## 
## Time series regression with "ts" data:
## Start = 6, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.097008 -0.020340 -0.005162  0.017580  0.116528 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.1505429  0.0154646   9.735 6.04e-15 ***
## X.1         0.0049111  0.0005201   9.442 2.16e-14 ***
## Y.5         0.3136484  0.0589194   5.323 1.02e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03294 on 75 degrees of freedom
## Multiple R-squared:  0.8047, Adjusted R-squared:  0.7995 
## F-statistic: 154.5 on 2 and 75 DF,  p-value: < 2.2e-16
testing_ardl5_15 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(0,4,1,3,5), q=c(1,2,3,4)))
summary(testing_ardl5_15)
## 
## Time series regression with "ts" data:
## Start = 6, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.110907 -0.018323 -0.006812  0.013075  0.122121 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.1421199  0.0170519   8.335 2.75e-12 ***
## X.2         0.0045664  0.0005687   8.030 1.05e-11 ***
## Y.5         0.2959853  0.0676737   4.374 3.88e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03574 on 75 degrees of freedom
## Multiple R-squared:  0.7701, Adjusted R-squared:  0.764 
## F-statistic: 125.6 on 2 and 75 DF,  p-value: < 2.2e-16
testing_ardl5_16 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(0,4,1,2,5), q=c(1,2,3,4)))
summary(testing_ardl5_16)
## 
## Time series regression with "ts" data:
## Start = 6, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.114060 -0.016927 -0.006633  0.009952  0.122076 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.1393408  0.0169013   8.244 4.09e-12 ***
## X.3         0.0044608  0.0005618   7.940 1.55e-11 ***
## Y.5         0.2761199  0.0699722   3.946 0.000177 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03592 on 75 degrees of freedom
## Multiple R-squared:  0.7677, Adjusted R-squared:  0.7615 
## F-statistic:   124 on 2 and 75 DF,  p-value: < 2.2e-16
testing_ardl5_17 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(0,4,1,2,3), q=c(1,2,3,4)))
summary(testing_ardl5_17)
## 
## Time series regression with "ts" data:
## Start = 6, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.107410 -0.019604 -0.008113  0.010532  0.173450 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.1476161  0.0234921   6.284 1.98e-08 ***
## X.5         0.0046328  0.0007758   5.971 7.30e-08 ***
## Y.5         0.1697628  0.1003031   1.692   0.0947 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04012 on 75 degrees of freedom
## Multiple R-squared:  0.7102, Adjusted R-squared:  0.7025 
## F-statistic: 91.92 on 2 and 75 DF,  p-value: < 2.2e-16

Compare with up to 10 lags in x, 5 in y

# all y, all x
testing_ardl10 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 10, q = 5, remove = list(p = c(), q=c()))
summary(testing_ardl10)
## 
## Time series regression with "ts" data:
## Start = 11, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.052327 -0.009648 -0.001067  0.009745  0.040967 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.641e-02  2.633e-02   2.522  0.01453 *  
## X.t          2.840e-03  8.595e-04   3.304  0.00167 ** 
## X.1         -1.285e-05  1.018e-03  -0.013  0.98997    
## X.2         -1.956e-05  9.888e-04  -0.020  0.98429    
## X.3         -5.707e-04  1.008e-03  -0.566  0.57341    
## X.4         -2.560e-04  1.000e-03  -0.256  0.79893    
## X.5         -5.262e-04  1.011e-03  -0.521  0.60472    
## X.6         -1.029e-03  9.663e-04  -1.064  0.29174    
## X.7          2.332e-03  9.977e-04   2.338  0.02300 *  
## X.8         -1.902e-04  1.030e-03  -0.185  0.85414    
## X.9         -8.042e-04  9.888e-04  -0.813  0.41951    
## X.10         5.414e-04  7.989e-04   0.678  0.50072    
## Y.1          3.822e-01  1.324e-01   2.885  0.00554 ** 
## Y.2          1.441e-01  1.166e-01   1.236  0.22151    
## Y.3         -1.363e-01  9.442e-02  -1.444  0.15440    
## Y.4          3.329e-01  9.024e-02   3.689  0.00051 ***
## Y.5         -3.219e-04  8.465e-02  -0.004  0.99698    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01745 on 56 degrees of freedom
## Multiple R-squared:  0.937,  Adjusted R-squared:  0.919 
## F-statistic: 52.03 on 16 and 56 DF,  p-value: < 2.2e-16
# all x, only one y
testing_ardl10_1 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 10, q = 5, remove = list(p = c(), q=c(1,2,3,4)))
summary(testing_ardl10_1)
## 
## Time series regression with "ts" data:
## Start = 11, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.040717 -0.012324 -0.002844  0.010063  0.082275 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.1626336  0.0205950   7.897 7.35e-11 ***
## X.t          0.0026675  0.0010335   2.581  0.01231 *  
## X.1          0.0009585  0.0012016   0.798  0.42823    
## X.2          0.0009341  0.0011911   0.784  0.43598    
## X.3         -0.0011408  0.0011812  -0.966  0.33804    
## X.4          0.0012945  0.0011562   1.120  0.26731    
## X.5         -0.0015152  0.0012510  -1.211  0.23056    
## X.6         -0.0007200  0.0011749  -0.613  0.54230    
## X.7          0.0008758  0.0012106   0.723  0.47221    
## X.8          0.0016138  0.0011908   1.355  0.18041    
## X.9         -0.0008440  0.0012060  -0.700  0.48677    
## X.10         0.0013512  0.0009003   1.501  0.13865    
## Y.5          0.2704950  0.0820016   3.299  0.00164 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02196 on 60 degrees of freedom
## Multiple R-squared:  0.893,  Adjusted R-squared:  0.8716 
## F-statistic: 41.75 on 12 and 60 DF,  p-value: < 2.2e-16
# testing single x values with only one y
testing_ardl10_2 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 10, q = 5, remove = list(p = c(0,1,2,3,4,5,6,7,8,9), q=c(1,2,3,4)))
summary(testing_ardl10_2)
## 
## Time series regression with "ts" data:
## Start = 11, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.071381 -0.008366 -0.002362  0.007912  0.161808 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.0866566  0.0176744   4.903 5.90e-06 ***
## X.10        0.0027729  0.0005866   4.727 1.14e-05 ***
## Y.5         0.3389148  0.0861173   3.936 0.000193 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03005 on 70 degrees of freedom
## Multiple R-squared:  0.7664, Adjusted R-squared:  0.7598 
## F-statistic: 114.9 on 2 and 70 DF,  p-value: < 2.2e-16
testing_ardl10_3 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 10, q = 5, remove = list(p = c(0,1,2,3,4,5,6,7,8,10), q=c(1,2,3,4)))
summary(testing_ardl10_3)
## 
## Time series regression with "ts" data:
## Start = 10, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.075595 -0.009953 -0.002685  0.009153  0.157696 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.0927292  0.0190856   4.859 6.83e-06 ***
## X.9         0.0029620  0.0006303   4.699 1.24e-05 ***
## Y.5         0.3308754  0.0886666   3.732  0.00038 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03002 on 71 degrees of freedom
## Multiple R-squared:  0.783,  Adjusted R-squared:  0.7769 
## F-statistic: 128.1 on 2 and 71 DF,  p-value: < 2.2e-16
testing_ardl10_4 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 10, q = 5, remove = list(p = c(0,1,2,3,4,5,6,7,9,10), q=c(1,2,3,4)))
summary(testing_ardl10_4)
## 
## Time series regression with "ts" data:
## Start = 9, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.073893 -0.012403 -0.003338  0.007672  0.140461 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.1147839  0.0201761   5.689 2.57e-07 ***
## X.8         0.0037118  0.0006638   5.592 3.80e-07 ***
## Y.5         0.2834895  0.0919710   3.082  0.00291 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03162 on 72 degrees of freedom
## Multiple R-squared:  0.7912, Adjusted R-squared:  0.7854 
## F-statistic: 136.4 on 2 and 72 DF,  p-value: < 2.2e-16
testing_ardl10_5 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 10, q = 5, remove = list(p = c(0,1,2,3,4,5,6,8,9,10), q=c(1,2,3,4)))
summary(testing_ardl10_5)
## 
## Time series regression with "ts" data:
## Start = 8, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.064374 -0.015042 -0.001944  0.009414  0.125368 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.1297144  0.0186653   6.950 1.29e-09 ***
## X.7         0.0042094  0.0006127   6.870 1.81e-09 ***
## Y.5         0.2252478  0.0847603   2.657  0.00967 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02931 on 73 degrees of freedom
## Multiple R-squared:  0.8184, Adjusted R-squared:  0.8134 
## F-statistic: 164.4 on 2 and 73 DF,  p-value: < 2.2e-16
testing_ardl10_6 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 10, q = 5, remove = list(p = c(0,1,2,3,4,5,7,8,9,10), q=c(1,2,3,4)))
summary(testing_ardl10_6)
## 
## Time series regression with "ts" data:
## Start = 7, End = 83
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.068762 -0.012592 -0.005739  0.007397  0.163328 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.1021358  0.0194043   5.264 1.33e-06 ***
## X.6         0.0032769  0.0006406   5.115 2.38e-06 ***
## Y.5         0.3481950  0.0828006   4.205 7.22e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03235 on 74 degrees of freedom
## Multiple R-squared:  0.7886, Adjusted R-squared:  0.7829 
## F-statistic:   138 on 2 and 74 DF,  p-value: < 2.2e-16

Zip code social distancing data and case data from SMC (old case data)

zctas_94 <- 
  zctas(cb = F, starts_with = "94") %>% 
  st_transform('+proj=longlat +datum=WGS84') %>%
  dplyr::select(ZCTA5CE10, geometry)

smc_bg_zctas <- smc_blockgroups %>% 
  st_centroid() %>%
  st_join(zctas_94) %>% 
  dplyr::select(GEOID, ZCTA5CE10) %>%
  rename(blockgroup = GEOID, zipcode = ZCTA5CE10)

sd_date_cutoff <- as.Date("2020-05-10")
pre_sd_date_cutoff <- as.Date("2020-03-01")
shelter_date <- as.Date("2020-03-16")

smc_sd_zip <- bay_sd %>%
  filter(origin_census_block_group %in% smc_blockgroups$GEOID) %>%
  filter(date <= sd_date_cutoff & date >= shelter_date)  %>% 
  left_join(smc_bg_zctas %>% dplyr::select(blockgroup, zipcode), by = c("origin_census_block_group" = "blockgroup")) %>% 
  group_by(zipcode) %>%
  summarize(total_at_home_zip = sum(completely_home_device_count), total_devices = sum(device_count)) %>% 
  mutate(
    percent_at_home = total_at_home_zip*100/total_devices, 
    percent_leaving_home = (100 - percent_at_home)
  )

# get block group averages leaving home before shelter in place/COVID-19
smc_pre_sd_leaving_avg <- bay_sd %>%
  filter(origin_census_block_group %in% smc_blockgroups$GEOID) %>%
  filter(date <= pre_sd_date_cutoff)  %>% 
  left_join(smc_bg_zctas %>% dplyr::select(blockgroup, zipcode), by = c("origin_census_block_group" = "blockgroup")) %>% 
  group_by(zipcode) %>%
  summarize(total_at_home_zip = sum(completely_home_device_count), total_devices = sum(device_count)) %>% 
  mutate(
    percent_at_home_pre = total_at_home_zip*100/total_devices, 
    percent_leaving_home_pre = (100 - percent_at_home_pre)
  )

# combine
smc_sd_zip <- smc_sd_zip %>% 
  left_join(smc_pre_sd_leaving_avg %>% dplyr::select(zipcode, percent_leaving_home_pre)) %>%
  mutate(dif_percent_leaving = percent_leaving_home - percent_leaving_home_pre)

# load case data, stored from what was released through 4/30
smc_zip_cases <- read_csv("smc_cases_4.29.2020.csv")

# join case data and social distancing data
smc_sd_cases_zip <- left_join(smc_zip_cases, smc_sd_zip)

smc_sd_cases_zip[smc_sd_cases_zip == "<10"] <- "10"

smc_sd_cases_zip <- smc_sd_cases_zip %>% mutate(cases = as.numeric(cases))

smc_sd_cases_zip %>% ggplot(
  aes(x = dif_percent_leaving, y = cases)) + geom_point() + geom_smooth(method=lm)

testing_zip_cases_sd_abs <- lm(cases ~ dif_percent_leaving, smc_sd_cases_zip, na.action = na.omit)

summary(testing_zip_cases_sd_abs)
## 
## Call:
## lm(formula = cases ~ dif_percent_leaving, data = smc_sd_cases_zip, 
##     na.action = na.omit)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -31.40 -28.69 -12.14  15.01 117.76 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)
## (Intercept)          36.7619    34.9110   1.053    0.302
## dif_percent_leaving  -0.1736     1.3192  -0.132    0.896
## 
## Residual standard error: 36.82 on 25 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.0006919,  Adjusted R-squared:  -0.03928 
## F-statistic: 0.01731 on 1 and 25 DF,  p-value: 0.8964

Compare with demographic variables (also normalize by population). The chart below shows the difference in percent leaving home from shelter in place through 5/10 averaged over that time period relative to Jan+Feb 2020 and the cases normalized by population, with each point a zip code in San Mateo County.

# obtain the saved census data 
setwd("~/Documents/2020 Spring Quarter/CEE 218Z")
acs_vars = readRDS("censusData2018_acs_acs5.rds")
setwd("~/Documents/2020 Spring Quarter/CEE 218Z/covid19")

# define a function for pulling census data
pullCensus <- function(variableToPull, county) {
    regionString <- paste0("state:06+county:", county)
    censusData <- getCensus(
      name = "acs/acs5",
      vintage = 2018,
      region = "block group:*", 
      regionin = regionString,
      vars = variableToPull
    ) %>%
    mutate(blockgroup = paste0(state,county,tract,block_group)) %>%
      select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME"))
  
  return(censusData)
}

# get population data
smc_fips <- fips("CA", "San Mateo") %>% substr(3,5)

smc_pop_zip <- pullCensus("B01003_001E", smc_fips) %>% 
  left_join(smc_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>% 
  group_by(zipcode) %>%
  summarize(total_pop_zip = sum(B01003_001E))

smc_sd_cases_zip <- smc_sd_cases_zip %>% left_join(smc_pop_zip) %>%
  mutate(cases_by_pop = cases / total_pop_zip)

smc_sd_cases_zip %>% ggplot(
  aes(x = dif_percent_leaving, y = cases_by_pop)) + geom_point() + ggtitle("San Mateo County") + geom_smooth(method=lm)

testing_zip_cases_sd <- lm(cases_by_pop ~ dif_percent_leaving, smc_sd_cases_zip, na.action = na.omit)

summary(testing_zip_cases_sd)
## 
## Call:
## lm(formula = cases_by_pop ~ dif_percent_leaving, data = smc_sd_cases_zip, 
##     na.action = na.omit)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0033257 -0.0009319 -0.0005497  0.0000597  0.0153501 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)   
## (Intercept)         0.0100169  0.0033124   3.024   0.0057 **
## dif_percent_leaving 0.0002906  0.0001252   2.322   0.0287 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.003493 on 25 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.1774, Adjusted R-squared:  0.1445 
## F-statistic: 5.392 on 1 and 25 DF,  p-value: 0.02867
# try absolute percents leaving
smc_sd_cases_zip %>% ggplot(
  aes(x = percent_leaving_home, y = cases_by_pop)) + geom_point() + ggtitle("San Mateo County") + geom_smooth(method=lm)

testing_zip_cases_sd_abs <- lm(cases_by_pop ~ percent_leaving_home, smc_sd_cases_zip, na.action = na.omit)

summary(testing_zip_cases_sd_abs)
## 
## Call:
## lm(formula = cases_by_pop ~ percent_leaving_home, data = smc_sd_cases_zip, 
##     na.action = na.omit)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0033897 -0.0013731 -0.0008541  0.0003338  0.0154641 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)  
## (Intercept)          -0.0175050  0.0095295  -1.837   0.0781 .
## percent_leaving_home  0.0003882  0.0001846   2.103   0.0457 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.00355 on 25 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.1503, Adjusted R-squared:  0.1163 
## F-statistic: 4.423 on 1 and 25 DF,  p-value: 0.04568

What if we look at the change in leaving home just in the first week after shelter in place? Inspired by the recent articles on how interventions in the immediate time frame might have been most important.

First let’s just see how the change in leaving home compares across zip codes.

# preserve dates 
smc_sd_zip_by_date <- bay_sd %>%
  filter(origin_census_block_group %in% smc_blockgroups$GEOID) %>%
  filter(date <= sd_date_cutoff & date >= shelter_date)  %>% 
  left_join(smc_bg_zctas %>% dplyr::select(blockgroup, zipcode), by = c("origin_census_block_group" = "blockgroup")) %>% 
  group_by(zipcode, date) %>%
  summarize(total_at_home_zip = sum(completely_home_device_count), total_devices = sum(device_count)) %>% 
  mutate(
    percent_at_home = total_at_home_zip*100/total_devices, 
    percent_leaving_home = (100 - percent_at_home)
  )

# combine with pre shelter in place data
smc_sd_zip_by_date <- smc_sd_zip_by_date %>% 
  left_join(smc_pre_sd_leaving_avg %>% dplyr::select(zipcode, percent_leaving_home_pre)) %>%
  mutate(dif_percent_leaving = percent_leaving_home - percent_leaving_home_pre)

smc_sd_zip_by_date %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = dif_percent_leaving, color=zipcode))

That’s a mess. But there is definitely variability.

# just look at the first week after the shelter in place order
smc_zip_week_post <- smc_sd_zip_by_date %>%
  dplyr::select(-c(percent_at_home, percent_leaving_home, dif_percent_leaving)) %>%
  filter(date <= shelter_date + 7) %>%
  ungroup() %>%
  group_by(zipcode) %>%
  summarize(total_at_home_week = sum(total_at_home_zip), total_devices_week = sum(total_devices)) %>%
  left_join(smc_sd_cases_zip %>% dplyr::select(zipcode, cases, cases_by_pop, percent_leaving_home_pre)) %>%
  mutate(percent_at_home_week = total_at_home_week*100/total_devices_week, 
    percent_leaving_home_week = (100 - percent_at_home_week),
    dif_percent_leaving_week = percent_leaving_home_week - percent_leaving_home_pre)

# plot
smc_zip_week_post %>% ggplot(
  aes(x = dif_percent_leaving_week, y = cases_by_pop)) + geom_point() + ggtitle("San Mateo County, difference in percent leaving for the week after shelter in place")

testing_zip_cases_sd <- lm(cases_by_pop ~ dif_percent_leaving_week, smc_zip_week_post, na.action = na.omit)

summary(testing_zip_cases_sd)
## 
## Call:
## lm(formula = cases_by_pop ~ dif_percent_leaving_week, data = smc_zip_week_post, 
##     na.action = na.omit)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0050029 -0.0011430 -0.0003392 -0.0000695  0.0146226 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)   
## (Intercept)              0.0082126  0.0027149   3.025  0.00569 **
## dif_percent_leaving_week 0.0002549  0.0001170   2.179  0.03898 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.003531 on 25 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.1596, Adjusted R-squared:  0.126 
## F-statistic: 4.747 on 1 and 25 DF,  p-value: 0.03898

Try adding in San Francisco county.

# geometries 
sf_blockgroups <- 
  block_groups("CA","San Francisco", cb=F, progress_bar=F) %>% 
  st_transform('+proj=longlat +datum=WGS84')

sf_bg_zctas <- sf_blockgroups %>% 
  st_centroid() %>%
  st_join(zctas_94) %>% 
  dplyr::select(GEOID, ZCTA5CE10) %>%
  rename(blockgroup = GEOID, zipcode = ZCTA5CE10)

# get cases from LA Times
sf_place_cases_max <- 
  read_csv("https://raw.githubusercontent.com/datadesk/california-coronavirus-data/master/latimes-place-totals.csv") %>%
  filter(county == 'San Francisco') %>%
  filter(date == max(date))

# get population data
sf_fips <- fips("CA", "San Francisco") %>% substr(3,5)

sf_pop_zip <- pullCensus("B01003_001E", sf_fips) %>% 
  left_join(sf_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>% 
  group_by(zipcode) %>%
  summarize(total_pop_zip = sum(B01003_001E))

sf_cases_zip <- sf_place_cases_max %>% left_join(sf_pop_zip, by = c("place" = "zipcode")) %>%
  mutate(cases_by_pop = confirmed_cases / total_pop_zip) %>%
  rename(zipcode = place)

# social distancing data
sf_sd_zip <- bay_sd %>%
  filter(origin_census_block_group %in% sf_blockgroups$GEOID) %>%
  filter(date <= sd_date_cutoff & date >= shelter_date)  %>% 
  left_join(sf_bg_zctas %>% dplyr::select(blockgroup, zipcode), by = c("origin_census_block_group" = "blockgroup")) %>% 
  group_by(zipcode) %>%
  summarize(total_at_home_zip = sum(completely_home_device_count), total_devices = sum(device_count)) %>% 
  mutate(
    percent_at_home = total_at_home_zip*100/total_devices, 
    percent_leaving_home = (100 - percent_at_home)
  )

# get block group averages leaving home before shelter in place/COVID-19
sf_pre_sd_leaving_avg <- bay_sd %>%
  filter(origin_census_block_group %in% sf_blockgroups$GEOID) %>%
  filter(date <= pre_sd_date_cutoff)  %>% 
  left_join(sf_bg_zctas %>% dplyr::select(blockgroup, zipcode), by = c("origin_census_block_group" = "blockgroup")) %>% 
  group_by(zipcode) %>%
  summarize(total_at_home_zip = sum(completely_home_device_count), total_devices = sum(device_count)) %>% 
  mutate(
    percent_at_home_pre = total_at_home_zip*100/total_devices, 
    percent_leaving_home_pre = (100 - percent_at_home_pre)
  )

# combine
sf_sd_zip <- sf_sd_zip %>% 
  left_join(sf_pre_sd_leaving_avg %>% dplyr::select(zipcode, percent_leaving_home_pre)) %>%
  mutate(dif_percent_leaving = percent_leaving_home - percent_leaving_home_pre)

sf_sd_cases_zip <- left_join(sf_cases_zip, sf_sd_zip)

sf_sd_cases_zip %>% ggplot(
  aes(x = dif_percent_leaving, y = cases_by_pop)) + geom_point() + ggtitle("San Francisco County") + geom_smooth(method=lm)

testing_zip_cases_sd_sf <- lm(cases_by_pop ~ dif_percent_leaving, sf_sd_cases_zip, na.action = na.omit)

summary(testing_zip_cases_sd_sf)
## 
## Call:
## lm(formula = cases_by_pop ~ dif_percent_leaving, data = sf_sd_cases_zip, 
##     na.action = na.omit)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0020105 -0.0011944 -0.0009303  0.0012579  0.0032346 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)
## (Intercept)         2.393e-03  1.841e-03    1.30    0.207
## dif_percent_leaving 7.247e-06  8.061e-05    0.09    0.929
## 
## Residual standard error: 0.001748 on 23 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.0003513,  Adjusted R-squared:  -0.04311 
## F-statistic: 0.008083 on 1 and 23 DF,  p-value: 0.9291
# absolute cases
sf_sd_cases_zip %>% ggplot(
  aes(x = dif_percent_leaving, y = confirmed_cases)) + geom_point() + ggtitle("San Francisco County") + geom_smooth(method=lm)

# absolute percents
sf_sd_cases_zip %>% ggplot(
  aes(x = percent_leaving_home, y = cases_by_pop)) + geom_point() + ggtitle("San Francisco County") + geom_smooth(method=lm) 

testing_zip_cases_sd_sf_abs <- lm(cases_by_pop ~ percent_leaving_home, sf_sd_cases_zip, na.action = na.omit)

summary(testing_zip_cases_sd_sf_abs)
## 
## Call:
## lm(formula = cases_by_pop ~ percent_leaving_home, data = sf_sd_cases_zip, 
##     na.action = na.omit)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0020452 -0.0011647 -0.0006851  0.0007240  0.0031585 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)  
## (Intercept)           7.961e-03  4.083e-03   1.950   0.0635 .
## percent_leaving_home -1.095e-04  7.778e-05  -1.408   0.1724  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.001677 on 23 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.07938,    Adjusted R-squared:  0.03936 
## F-statistic: 1.983 on 1 and 23 DF,  p-value: 0.1724

Combine both SMC and SF

smc_sf_combined <- rbind(sf_sd_cases_zip %>% dplyr::select(dif_percent_leaving, cases_by_pop), smc_sd_cases_zip %>% dplyr::select(dif_percent_leaving, cases_by_pop))

smc_sf_combined %>% ggplot(
  aes(x = dif_percent_leaving, y = cases_by_pop)) + geom_point() + ggtitle("San Francisco County and San Mateo County") + geom_smooth(method=lm)

smc_sf_combined_lm <- lm(cases_by_pop ~ dif_percent_leaving, smc_sf_combined, na.action = na.omit)

summary(smc_sf_combined_lm)
## 
## Call:
## lm(formula = cases_by_pop ~ dif_percent_leaving, data = smc_sf_combined, 
##     na.action = na.omit)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0037344 -0.0012153 -0.0005591  0.0004768  0.0163492 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)   
## (Intercept)         6.122e-03  1.888e-03   3.242  0.00211 **
## dif_percent_leaving 1.551e-04  7.617e-05   2.036  0.04702 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.002857 on 50 degrees of freedom
##   (6 observations deleted due to missingness)
## Multiple R-squared:  0.07659,    Adjusted R-squared:  0.05812 
## F-statistic: 4.147 on 1 and 50 DF,  p-value: 0.04702

Santa Clara County

scc_map_cases <- esri2sf("https://services2.arcgis.com/RiZWfy7B1r76pKTz/arcgis/rest/services/COVID_zip_FC/FeatureServer/0")
## [1] "Feature Layer"
## [1] "esriGeometryPolygon"