#Question 1

library(tidyverse)
## ── Attaching packages ──────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.1
## ✓ tidyr   1.1.1     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ─────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(knitr)
library(readxl)
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
pop = read_excel('../data/PopulationEstimates.xls',skip = 2)%>%
  select(pop2019 = POP_ESTIMATE_2019, fips = FIPStxt)

url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv'
covid = read_csv(url)
## Parsed with column specification:
## cols(
##   date = col_date(format = ""),
##   county = col_character(),
##   state = col_character(),
##   fips = col_character(),
##   cases = col_double(),
##   deaths = col_double()
## )
state1 = 'California'

newdata = inner_join(pop, covid, by = 'fips')%>%
  filter(state == state1)%>%
  group_by(county)%>%
  mutate(new_cases = cases - lag(cases))

cumulative cases in the 5 worst counties

worst = newdata%>%
  group_by(county)%>%
  summarise(cases = sum(cases, na.rm = TRUE))%>%
  ungroup()%>%
  arrange(-cases)%>%
  slice_max(cases,n = 5)
## `summarise()` ungrouping output (override with `.groups` argument)
knitr::kable(worst, caption = paste(state1, 'counties with most cumulative cases'), col.names = c('county', 'cases'))
California counties with most cumulative cases
county cases
Los Angeles 11684573
Riverside 2150109
Orange 1938741
San Diego 1749473
San Bernardino 1653067

5 counties with most new cases

newcases = newdata%>%
  select(county, new_cases)%>%
  group_by(county)%>%
  summarise(new_cases = sum(new_cases, na.rm = TRUE))%>%
  arrange(-new_cases)%>%
  slice_max(new_cases, n = 5)
## `summarise()` ungrouping output (override with `.groups` argument)
knitr::kable(newcases, caption = paste(state1, 'counties with most new cases'), col.names = c('county','New Cases'))
California counties with most new cases
county New Cases
Los Angeles 221949
Riverside 46719
Orange 43708
San Bernardino 41230
San Diego 34740

The California Department of Public Health maintains a watch list of counties that are being monitored for worsening coronavirus trends. Thre are 13 counties that are concerned to be safe based on their criteria.

day14 = newdata%>%
  filter(date > max(date) - 13)%>%
  group_by(county, pop2019)%>%
  summarise(new_cases = sum(new_cases))%>%
  ungroup()%>%
  mutate(caseper100 = new_cases / (pop2019 / 100000))%>%
  filter(caseper100 <= 100)%>%
  pull(county)
## `summarise()` regrouping output by 'county' (override with `.groups` argument)

#Question 2

fourstate = covid%>%
  group_by(state, date)%>%
  summarise(cases = sum(cases))%>%
  ungroup()%>%
  filter(state %in% c("California","New York","Florida","Louisiana"))%>%
  group_by(state)%>%
  mutate(newcases = cases - lag(cases))%>%
  ungroup()%>%
  mutate(avg = rollmean(newcases,7,fill=NA,allign='right'))
## `summarise()` regrouping output by 'state' (override with `.groups` argument)
ggplot(data = fourstate, aes(x = date))+
  geom_col(aes(y = cases))+
  facet_wrap(~state)+
  geom_line(aes(y = avg), col = "darkred", size = 1)