#Question 1
library(tidyverse)
## ── Attaching packages ──────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.1
## ✓ tidyr 1.1.1 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ─────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(knitr)
library(readxl)
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
pop = read_excel('../data/PopulationEstimates.xls',skip = 2)%>%
select(pop2019 = POP_ESTIMATE_2019, fips = FIPStxt)
url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv'
covid = read_csv(url)
## Parsed with column specification:
## cols(
## date = col_date(format = ""),
## county = col_character(),
## state = col_character(),
## fips = col_character(),
## cases = col_double(),
## deaths = col_double()
## )
state1 = 'California'
newdata = inner_join(pop, covid, by = 'fips')%>%
filter(state == state1)%>%
group_by(county)%>%
mutate(new_cases = cases - lag(cases))
cumulative cases in the 5 worst counties
worst = newdata%>%
group_by(county)%>%
summarise(cases = sum(cases, na.rm = TRUE))%>%
ungroup()%>%
arrange(-cases)%>%
slice_max(cases,n = 5)
## `summarise()` ungrouping output (override with `.groups` argument)
knitr::kable(worst, caption = paste(state1, 'counties with most cumulative cases'), col.names = c('county', 'cases'))
county | cases |
---|---|
Los Angeles | 11684573 |
Riverside | 2150109 |
Orange | 1938741 |
San Diego | 1749473 |
San Bernardino | 1653067 |
5 counties with most new cases
newcases = newdata%>%
select(county, new_cases)%>%
group_by(county)%>%
summarise(new_cases = sum(new_cases, na.rm = TRUE))%>%
arrange(-new_cases)%>%
slice_max(new_cases, n = 5)
## `summarise()` ungrouping output (override with `.groups` argument)
knitr::kable(newcases, caption = paste(state1, 'counties with most new cases'), col.names = c('county','New Cases'))
county | New Cases |
---|---|
Los Angeles | 221949 |
Riverside | 46719 |
Orange | 43708 |
San Bernardino | 41230 |
San Diego | 34740 |
The California Department of Public Health maintains a watch list of counties that are being monitored for worsening coronavirus trends. Thre are 13 counties that are concerned to be safe based on their criteria.
day14 = newdata%>%
filter(date > max(date) - 13)%>%
group_by(county, pop2019)%>%
summarise(new_cases = sum(new_cases))%>%
ungroup()%>%
mutate(caseper100 = new_cases / (pop2019 / 100000))%>%
filter(caseper100 <= 100)%>%
pull(county)
## `summarise()` regrouping output by 'county' (override with `.groups` argument)
#Question 2
fourstate = covid%>%
group_by(state, date)%>%
summarise(cases = sum(cases))%>%
ungroup()%>%
filter(state %in% c("California","New York","Florida","Louisiana"))%>%
group_by(state)%>%
mutate(newcases = cases - lag(cases))%>%
ungroup()%>%
mutate(avg = rollmean(newcases,7,fill=NA,allign='right'))
## `summarise()` regrouping output by 'state' (override with `.groups` argument)
ggplot(data = fourstate, aes(x = date))+
geom_col(aes(y = cases))+
facet_wrap(~state)+
geom_line(aes(y = avg), col = "darkred", size = 1)