This is the core of the empirical side of our anti-immigration legislation study. To examine the impact of the law, we employed the modular approach in programming to conduct regression analysis on an extensive amount of county pairs. We defined two functions that does the regression and organizes the output, respectively. Doing so enables us to obtain any set of results we are interested by simply adjusting the input of the function.
Here I am only applying the functions to a small subset of our data. For more details on the regression results and our interpretation, please refer to the paper: The Labor Market Effect of State Level Anti-immigration Legislation in the United States
In this project, we have to run tens of thousands of similar regressions. The best approach is to construct a function that takes a table as input, in which each row specifies a certain regression analysis to be conducted, and return a list of regression results that correspond to the table’s specification. This is the modular approach in programming, and the function we have here is called hb56reg:
hb56reg = function(hb56_dt, qwi, countypairs) {
try(file.remove(paste0('reg_result/logs/parlog.txt')))
cl = makePSOCKcluster(ncore, outfile = paste0('reg_result/logs/parlog.txt'))
registerDoParallel(cl)
x = foreach(i = 1:nrow(hb56_dt),
.packages = c('data.table', 'lfe', 'optiRum','tibble', 'stats'),
.errorhandling = 'pass'
) %dopar% {
# define the following items for easier reference
i.state = hb56_dt[i,]$state
i.race = hb56_dt[i,]$race
i.ethnicity = hb56_dt[i,]$ethnicity
i.industry = hb56_dt[i,]$industry
i.education = hb56_dt[i,]$education
i.lawStart = hb56_dt[i,]$lawStart
i.lawEnd = hb56_dt[i,]$lawEnd
i.depdVar = hb56_dt[i,]$depdVar
i.regModel = hb56_dt[i,]$regModel
i.nlead = hb56_dt[i,]$nlead
# pull the group of states that this iteration will be looking at
if (i.state == 1) {
state_control = c(1,12,28,47)
} else if (i.state == 4) {
state_control = c(4,32,8,35,6)
} else if (i.state == 13) {
state_control = c(13,37,12,47)
} else if (i.state == 18) {
state_control = c(18,26,17,21,39)
} else if (i.state == 45) {
state_control = c(45,37)
} else {
state_control = c(49,32,16,56,8)
}
# create a dummy variable called treat_it that indicates if a county is treated at a certain time
# treat_it will be the main independent variable in our regression
z = qwi[list(state_control)] # uses key here, somewhat faster
z = z[race == i.race & ethnicity == i.ethnicity
& industry == i.industry & education == i.education]
z = z[, treat_i := 0][, treat_t := 0]
z[statefipsn %in% (c(1,4,13,18,45,49)), treat_i := 1]
z[dt >= i.lawStart & dt <= i.lawEnd, treat_t := 1]
z[, treat_it := treat_i * treat_t]
z = z[, c(c("dt","geo","industry","race","ethnicity",
"education","treat_i","treat_t","treat_it","population", 'statefipsn'),
i.depdVar), with = FALSE]
names(z)[ncol(z)] = 'depdVar'
# for the dependent variables whose unit is person, we divide them by the population of the county to normalize the variable
if (i.depdVar == 'EarnS' | i.depdVar == 'EarnS_adjusted') {
z = z[, rate := depdVar]
} else {
z = z[, rate := depdVar/population]
}
# we have four regression models here, regModel 1 simply regresses y (e.g., leaded employment) on treat_it
# with fixed effects on dt and geo, and use statefipsn to compute cluster robust standard errors.
# regModel 2, 3, and 4 employs the difference-in-difference technique, these three models still regress
# y (e.g., leaded employment) on treat_it, yet they utilize the county pairs we obtained before (where diff-in-diff comes in)
# and they differ from each other by specifying what factors to project out and what factors to use for computing
# cluster robust standard errors.
if (i.regModel == 1) {
z = z[, y := shift(rate, n = i.nlead, type ='lead'),
by = c('geo','industry','education','race','ethnicity')]
regResult=tryCatch({
reg = felm(y ~ treat_it | dt + geo| 0 | statefipsn , data = z)
conf = confint(reg, 'treat_it', level=0.90)
regResult = c(summary(reg)$coefficients[1,1],
summary(reg)$coefficients[1,4], conf[1,1], conf[1,2])
}, error = function(e){
regResult = e
})
} else {
z1 = CJ.dt(hb56_dt[i,c(2:5)], countypairs[control == i.state][,c(1,2)])
z1 = rowid_to_column(z1, "pairs")
z1 = CJ.dt(as.data.table(unique(z$dt)), z1)
names(z1)[1] = 'dt'
z1 = melt(z1, id.vars = 1:6, variable.name = 'county', value.name = 'countyfipsn')
setcolorder(z1, neworder = c('pairs',"county","dt","countyfipsn",
"industry","education","race","ethnicity"))
z = merge(z1, z, by.x = c('dt','industry','education','race','ethnicity','countyfipsn'),
by.y = c('dt','industry','education','race','ethnicity','geo'), all.x = T)
z = z[, y := shift(rate, n = i.nlead, type ='lead'),
by = c('pairs','county','industry','education','race','ethnicity')]
if (i.regModel == 2) {
regResult=tryCatch({
reg = felm(y ~ treat_it | dt + countyfipsn + pairs| 0 | pairs + statefipsn, data = z)
conf = confint(reg, 'treat_it', level=0.90)
regResult = c(summary(reg)$coefficients[1,1],
summary(reg)$coefficients[1,4], conf[1,1], conf[1,2])
}, error = function(e){
regResult = e
})
} else if (i.regModel == 3) {
regResult=tryCatch({
reg = felm(y ~ treat_it | dt + countyfipsn| 0 | pairs + statefipsn, data = z)
conf = confint(reg, 'treat_it', level=0.90)
regResult = c(summary(reg)$coefficients[1,1],
summary(reg)$coefficients[1,4], conf[1,1], conf[1,2])
}, error = function(e){
regResult = e
})
} else if (i.regModel == 4) {
z = z[, period_pairs := paste0(dt, "_", pairs)]
regResult=tryCatch({
reg = felm(y ~ treat_it | period_pairs + countyfipsn | 0 | pairs + statefipsn, data = z)
conf = confint(reg, 'treat_it', level=0.90)
regResult = c(summary(reg)$coefficients[1,1],
summary(reg)$coefficients[1,4], conf[1,1], conf[1,2])
}, error = function(e){
regResult = e
})
}
}
gc()
return(regResult)
}
stopCluster(cl)
l.output = list(inputs = hb56_dt, results = x)
return(l.output)
}
Now let’s test hb56reg, we pass the following table to the function:
qwi = fread('qwiNeededAdjusted.csv')
qwi = qwi[industry == '00' & education == 'E0' & ethnicity == 'A0' & race == 'A0'] # this is to save computing power since it's only a demonstration
qwi[, dt := ymd(dt)]
qwi[,statefipsn := floor(geo/1000)]
qwi[,cntyfipsn := geo - statefipsn*1000]
setkey(qwi, statefipsn)
countypairs = fread('countypairs.csv')
names(countypairs)[1:2] = c(1,2)
naics = unique(qwi$industry)
g = unique(qwi$geo)
edu = unique(qwi$education)
r = unique(qwi$race)
eth = unique(qwi$ethnicity)
var = unique(qwi$variable)
stateIN = c(18,26,17,21,39)
stateAZ = c(4,32,8,35,6)
stateUT = c(49,32,16,56,8)
stateAL = c(1,12,28,47)
stateGA = c(13,37,12,47)
stateSC = c(45,37)
# Specify how many cores to use
setDTthreads(4)
ncore = 6
# Run the function on a sample of data
lawdates = readRDS('law_start_dates.rds')
hb56_dt = as.data.table(expand.grid(state = c(1,4,13,18,45,49), race = r[1], ethnicity = eth[1],
industry = naics[1],
education = edu[1],
depdVar = c('Emp_adjusted'),
regModel = 4,
nlead = c(-2,-1,0,1,2,3,4), stringsAsFactors = F))
hb56_dt = merge(hb56_dt, lawdates[, .(state, passqtr)], all.x=T)
setnames(hb56_dt, 'passqtr', 'lawStart')
hb56_dt$lawEnd = hb56_dt$lawStart
year(hb56_dt$lawEnd) = year(hb56_dt$lawEnd) + 1
print(hb56_dt[1:10])
## state race ethnicity industry education depdVar regModel nlead
## 1: 1 A0 A0 00 E0 Emp_adjusted 4 -2
## 2: 1 A0 A0 00 E0 Emp_adjusted 4 -1
## 3: 1 A0 A0 00 E0 Emp_adjusted 4 0
## 4: 1 A0 A0 00 E0 Emp_adjusted 4 1
## 5: 1 A0 A0 00 E0 Emp_adjusted 4 2
## 6: 1 A0 A0 00 E0 Emp_adjusted 4 3
## 7: 1 A0 A0 00 E0 Emp_adjusted 4 4
## 8: 4 A0 A0 00 E0 Emp_adjusted 4 -2
## 9: 4 A0 A0 00 E0 Emp_adjusted 4 -1
## 10: 4 A0 A0 00 E0 Emp_adjusted 4 0
## lawStart lawEnd
## 1: 2011-07-01 2012-07-01
## 2: 2011-07-01 2012-07-01
## 3: 2011-07-01 2012-07-01
## 4: 2011-07-01 2012-07-01
## 5: 2011-07-01 2012-07-01
## 6: 2011-07-01 2012-07-01
## 7: 2011-07-01 2012-07-01
## 8: 2010-07-01 2011-07-01
## 9: 2010-07-01 2011-07-01
## 10: 2010-07-01 2011-07-01
And this is the output of hb56reg:
x = hb56reg(hb56_dt, qwi, countypairs)
print(x$inputs[1:6])
## state race ethnicity industry education depdVar regModel nlead
## 1: 1 A0 A0 00 E0 Emp_adjusted 4 -2
## 2: 1 A0 A0 00 E0 Emp_adjusted 4 -1
## 3: 1 A0 A0 00 E0 Emp_adjusted 4 0
## 4: 1 A0 A0 00 E0 Emp_adjusted 4 1
## 5: 1 A0 A0 00 E0 Emp_adjusted 4 2
## 6: 1 A0 A0 00 E0 Emp_adjusted 4 3
## lawStart lawEnd
## 1: 2011-07-01 2012-07-01
## 2: 2011-07-01 2012-07-01
## 3: 2011-07-01 2012-07-01
## 4: 2011-07-01 2012-07-01
## 5: 2011-07-01 2012-07-01
## 6: 2011-07-01 2012-07-01
print(x$results[1:6])
## [[1]]
## [1] -0.004126427 0.006826559 -0.005579031 -0.002673822
##
## [[2]]
## [1] -0.004879526 0.008745811 -0.006754088 -0.003004965
##
## [[3]]
## [1] -0.005667440 0.011394437 -0.008060033 -0.003274848
##
## [[4]]
## [1] -0.005996597 0.015809309 -0.008846704 -0.003146490
##
## [[5]]
## [1] -0.006186906 0.017050271 -0.009209998 -0.003163815
##
## [[6]]
## [1] -0.006157318 0.018765094 -0.009274071 -0.003040564
Define function getGSTable that converts the output of function hb56reg, which is a list that includes both meta data and the reg results list, into a data.table:
getGSTable = function(l.output) {
inputdt = l.output[[1]]
results = l.output[[2]]
# Error Check
errorchk = function(x) {
if (class(x) == 'numeric') {
if (length(is.na(x)) == 4) {
return('good')
} else {
return('incomplete')
}
} else {
return(as.character(paste0(x), collapse = '_'))
}
}
err = unlist(lapply(results, errorchk))
inputdt = cbind(inputdt, err)
pct_good = sum(err=='good')/length(err)
# in the ouput, each element is a vector, here we convert them to data.table first, so later on we can just use rbindList
results_good = list()
for (i in 1:nrow(inputdt)) {
#print(i)
if (inputdt[i, err] == 'good') {
results_good[[i]] = as.data.frame(t(results[[i]]))
} else {
results_good[[i]] = as.data.frame(t(c(NA, NA, NA, NA)))
}
}
outdt = rbindlist(results_good)
names(outdt) = c('est','p_value','conf_lower','conf_upper')
outdt = cbind(inputdt, outdt)
outdt = merge(outdt, pctHis, by.x = c('state','industry'),
by.y = c('statefipsn','industry'), all.x = T)
outdt = merge(outdt, pctEdu, by.x = c('state','industry'),
by.y = c('statefipsn','industry'), all.x = T)
outdt = merge(outdt, indusNames, by = 'industry', all.x = T)
outdt = merge(outdt, pop[, .(statefipsn, population)], by.x = 'state',
by.y = 'statefipsn', all.x = T)
outdt[, est_pop := round(est*population, digits= 0)]
outdt$population = NULL
return(outdt)
}
We also add the percentage of Hispanic population (for a certain state and industry), the percentage of less than college educated people (for a certain state and industry), and industries’ description into the output data.table:
pctHis = fread('pct_general_hispanic_versus_nonhispanic.csv')[, 1:3]
pctEdu = fread('pct_general_college_versus_noncollege.csv')[, 1:3]
indusNames = fread('naics_code_and_indus_names.csv')
pop = readRDS('pop_cnty_yr.rds')
# this is to get the population of the states of interest at the time they passed the law
pop = pop[, list(population = sum(population)), by = c('year','statefipsn')]
pop = merge(pop, lawdates[, .(state, passqtr)], by.x='statefipsn', by.y='state', all.y=T)
pop[, passyr := year(passqtr)]
pop=pop[year == passyr]
Now test function getGSTable:
x_dt = getGSTable(x)
print(x_dt)
## state industry race ethnicity education depdVar regModel nlead
## 1: 1 00 A0 A0 E0 Emp_adjusted 4 -2
## 2: 1 00 A0 A0 E0 Emp_adjusted 4 -1
## 3: 1 00 A0 A0 E0 Emp_adjusted 4 0
## 4: 1 00 A0 A0 E0 Emp_adjusted 4 1
## 5: 1 00 A0 A0 E0 Emp_adjusted 4 2
## 6: 1 00 A0 A0 E0 Emp_adjusted 4 3
## 7: 1 00 A0 A0 E0 Emp_adjusted 4 4
## 8: 4 00 A0 A0 E0 Emp_adjusted 4 -2
## 9: 4 00 A0 A0 E0 Emp_adjusted 4 -1
## 10: 4 00 A0 A0 E0 Emp_adjusted 4 0
## 11: 4 00 A0 A0 E0 Emp_adjusted 4 1
## 12: 4 00 A0 A0 E0 Emp_adjusted 4 2
## 13: 4 00 A0 A0 E0 Emp_adjusted 4 3
## 14: 4 00 A0 A0 E0 Emp_adjusted 4 4
## 15: 13 00 A0 A0 E0 Emp_adjusted 4 -2
## 16: 13 00 A0 A0 E0 Emp_adjusted 4 -1
## 17: 13 00 A0 A0 E0 Emp_adjusted 4 0
## 18: 13 00 A0 A0 E0 Emp_adjusted 4 1
## 19: 13 00 A0 A0 E0 Emp_adjusted 4 2
## 20: 13 00 A0 A0 E0 Emp_adjusted 4 3
## 21: 13 00 A0 A0 E0 Emp_adjusted 4 4
## 22: 18 00 A0 A0 E0 Emp_adjusted 4 -2
## 23: 18 00 A0 A0 E0 Emp_adjusted 4 -1
## 24: 18 00 A0 A0 E0 Emp_adjusted 4 0
## 25: 18 00 A0 A0 E0 Emp_adjusted 4 1
## 26: 18 00 A0 A0 E0 Emp_adjusted 4 2
## 27: 18 00 A0 A0 E0 Emp_adjusted 4 3
## 28: 18 00 A0 A0 E0 Emp_adjusted 4 4
## 29: 45 00 A0 A0 E0 Emp_adjusted 4 -2
## 30: 45 00 A0 A0 E0 Emp_adjusted 4 -1
## 31: 45 00 A0 A0 E0 Emp_adjusted 4 0
## 32: 45 00 A0 A0 E0 Emp_adjusted 4 1
## 33: 45 00 A0 A0 E0 Emp_adjusted 4 2
## 34: 45 00 A0 A0 E0 Emp_adjusted 4 3
## 35: 45 00 A0 A0 E0 Emp_adjusted 4 4
## 36: 49 00 A0 A0 E0 Emp_adjusted 4 -2
## 37: 49 00 A0 A0 E0 Emp_adjusted 4 -1
## 38: 49 00 A0 A0 E0 Emp_adjusted 4 0
## 39: 49 00 A0 A0 E0 Emp_adjusted 4 1
## 40: 49 00 A0 A0 E0 Emp_adjusted 4 2
## 41: 49 00 A0 A0 E0 Emp_adjusted 4 3
## 42: 49 00 A0 A0 E0 Emp_adjusted 4 4
## state industry race ethnicity education depdVar regModel nlead
## lawStart lawEnd err est p_value conf_lower
## 1: 2011-07-01 2012-07-01 good -0.0041264267 0.006826559 -5.579031e-03
## 2: 2011-07-01 2012-07-01 good -0.0048795264 0.008745811 -6.754088e-03
## 3: 2011-07-01 2012-07-01 good -0.0056674405 0.011394437 -8.060033e-03
## 4: 2011-07-01 2012-07-01 good -0.0059965973 0.015809309 -8.846704e-03
## 5: 2011-07-01 2012-07-01 good -0.0061869063 0.017050271 -9.209998e-03
## 6: 2011-07-01 2012-07-01 good -0.0061573177 0.018765094 -9.274071e-03
## 7: 2011-07-01 2012-07-01 good -0.0055262222 0.019138962 -8.344011e-03
## 8: 2010-07-01 2011-07-01 good -0.0068748515 0.172863384 -1.571992e-02
## 9: 2010-07-01 2011-07-01 good -0.0045570619 0.294409542 -1.261524e-02
## 10: 2010-07-01 2011-07-01 good -0.0047161494 0.289340120 -1.295457e-02
## 11: 2010-07-01 2011-07-01 good -0.0039318549 0.318538231 -1.129544e-02
## 12: 2010-07-01 2011-07-01 good -0.0056628503 0.174784723 -1.299017e-02
## 13: 2010-07-01 2011-07-01 good -0.0020829629 0.567440385 -9.218271e-03
## 14: 2010-07-01 2011-07-01 good 0.0013247919 0.822900379 -1.049550e-02
## 15: 2011-07-01 2012-07-01 good -0.0006835257 0.567093840 -3.192777e-03
## 16: 2011-07-01 2012-07-01 good -0.0008768215 0.549653611 -3.947042e-03
## 17: 2011-07-01 2012-07-01 good -0.0009011883 0.590511937 -4.432988e-03
## 18: 2011-07-01 2012-07-01 good -0.0013415725 0.437648627 -4.877051e-03
## 19: 2011-07-01 2012-07-01 good -0.0016150335 0.344221823 -5.007960e-03
## 20: 2011-07-01 2012-07-01 good -0.0013670660 0.402661829 -4.676296e-03
## 21: 2011-07-01 2012-07-01 good -0.0011085437 0.461491402 -4.205953e-03
## 22: 2011-07-01 2012-07-01 good 0.0009440780 0.370437894 -1.052382e-03
## 23: 2011-07-01 2012-07-01 good 0.0010260706 0.337940838 -9.852755e-04
## 24: 2011-07-01 2012-07-01 good 0.0013828221 0.226296960 -6.806028e-04
## 25: 2011-07-01 2012-07-01 good 0.0018154738 0.109305266 -6.963504e-05
## 26: 2011-07-01 2012-07-01 good 0.0021221688 0.043582329 5.686535e-04
## 27: 2011-07-01 2012-07-01 good 0.0022237146 0.031844260 7.579999e-04
## 28: 2011-07-01 2012-07-01 good 0.0024753919 0.027049532 9.274347e-04
## 29: 2011-07-01 2012-07-01 good 0.0002802729 0.730595191 -3.648687e-03
## 30: 2011-07-01 2012-07-01 good -0.0004996922 0.567234970 -4.402828e-03
## 31: 2011-07-01 2012-07-01 good -0.0016482674 0.227885638 -5.541202e-03
## 32: 2011-07-01 2012-07-01 good -0.0024318460 0.149909963 -6.115736e-03
## 33: 2011-07-01 2012-07-01 good -0.0019433834 0.172162249 -5.344922e-03
## 34: 2011-07-01 2012-07-01 good -0.0010057120 0.293311922 -4.157522e-03
## 35: 2011-07-01 2012-07-01 good 0.0002260844 0.722443980 -2.837858e-03
## 36: 2011-04-01 2012-04-01 good -0.0033763200 0.198528292 -8.051846e-03
## 37: 2011-04-01 2012-04-01 good -0.0096108223 0.004422290 -1.314946e-02
## 38: 2011-04-01 2012-04-01 good -0.0132822021 0.001195106 -1.672780e-02
## 39: 2011-04-01 2012-04-01 good -0.0153992614 0.001911763 -1.992130e-02
## 40: 2011-04-01 2012-04-01 good -0.0162483763 0.005057275 -2.245611e-02
## 41: 2011-04-01 2012-04-01 good -0.0159550992 0.007982560 -2.288160e-02
## 42: 2011-04-01 2012-04-01 good -0.0162528996 0.004064969 -2.210056e-02
## lawStart lawEnd err est p_value conf_lower
## conf_upper pctHispanic pctNonCollege industry_name est_pop
## 1: -0.002673822 0.03398677 0.8033123 All Industries -19590
## 2: -0.003004965 0.03398677 0.8033123 All Industries -23165
## 3: -0.003274848 0.03398677 0.8033123 All Industries -26906
## 4: -0.003146490 0.03398677 0.8033123 All Industries -28468
## 5: -0.003163815 0.03398677 0.8033123 All Industries -29372
## 6: -0.003040564 0.03398677 0.8033123 All Industries -29231
## 7: -0.002708434 0.03398677 0.8033123 All Industries -26235
## 8: 0.001970212 0.27982479 0.7881099 All Industries -42946
## 9: 0.003501118 0.27982479 0.7881099 All Industries -28467
## 10: 0.003522272 0.27982479 0.7881099 All Industries -29461
## 11: 0.003431732 0.27982479 0.7881099 All Industries -24562
## 12: 0.001664471 0.27982479 0.7881099 All Industries -35375
## 13: 0.005052345 0.27982479 0.7881099 All Industries -13012
## 14: 0.013145084 0.27982479 0.7881099 All Industries 8276
## 15: 0.001825725 0.06648705 0.7738238 All Industries -6562
## 16: 0.002193399 0.06648705 0.7738238 All Industries -8418
## 17: 0.002630612 0.06648705 0.7738238 All Industries -8652
## 18: 0.002193906 0.06648705 0.7738238 All Industries -12880
## 19: 0.001777893 0.06648705 0.7738238 All Industries -15505
## 20: 0.001942164 0.06648705 0.7738238 All Industries -13125
## 21: 0.001988866 0.06648705 0.7738238 All Industries -10643
## 22: 0.002940538 0.05115713 0.8057191 All Industries 6093
## 23: 0.003037417 0.05115713 0.8057191 All Industries 6623
## 24: 0.003446247 0.05115713 0.8057191 All Industries 8925
## 25: 0.003700583 0.05115713 0.8057191 All Industries 11718
## 26: 0.003675684 0.05115713 0.8057191 All Industries 13697
## 27: 0.003689429 0.05115713 0.8057191 All Industries 14352
## 28: 0.004023349 0.05115713 0.8057191 All Industries 15977
## 29: 0.004209233 0.03974096 0.8043821 All Industries 1282
## 30: 0.003403444 0.03974096 0.8043821 All Industries -2287
## 31: 0.002244667 0.03974096 0.8043821 All Industries -7542
## 32: 0.001252044 0.03974096 0.8043821 All Industries -11128
## 33: 0.001458155 0.03974096 0.8043821 All Industries -8893
## 34: 0.002146098 0.03974096 0.8043821 All Industries -4602
## 35: 0.003290026 0.03974096 0.8043821 All Industries 1035
## 36: 0.001299206 0.11766882 0.7627860 All Industries -9168
## 37: -0.006072189 0.11766882 0.7627860 All Industries -26097
## 38: -0.009836602 0.11766882 0.7627860 All Industries -36066
## 39: -0.010877221 0.11766882 0.7627860 All Industries -41815
## 40: -0.010040642 0.11766882 0.7627860 All Industries -44120
## 41: -0.009028596 0.11766882 0.7627860 All Industries -43324
## 42: -0.010405240 0.11766882 0.7627860 All Industries -44133
## conf_upper pctHispanic pctNonCollege industry_name est_pop
Finally we can visualize the output of the regression function. The line chart displayed below can be interpreted as the impact of the anti-immigration laws over time, where lag = 0 is the time that the law took effect:
visuals_lag_data = fread('reg_result/v16_edu_table.csv')
visuals_lag_data = visuals_lag_data[lawduration == '1 year']
visuals_lag_data = visuals_lag_data[education != 'E0']
visuals_lag_data = visuals_lag_data[education != 'E5' & regModel == 4 & industry == '00']
visual_lag_input = as.data.table(expand.grid(depdVar = unique(visuals_lag_data$depdVar),stringsAsFactors = F))
i = 3
i.depdVar = visual_lag_input[i,]$depdVar
plot_dt = visuals_lag_data[depdVar == i.depdVar]
plot_dt[, state_names := state]
plot_dt$state_names = mapvalues(plot_dt$state_names, from = c(1,4,13,18,45,49), to = c('Alabama','Arizona','Georgia','Indiana','South Carolina','Utah'))
ggplot(plot_dt, aes(x=nlead, y=est, color= education, group = education)) +
geom_line() +
geom_point(aes(shape = education), size = 3.4, position=position_dodge(.21), alpha=4) +
scale_shape_manual(
"Education Levels",
values=c(0, 1, 2, 18),
breaks = c("E1", "E2", "E3", "E4"),
labels = c("<HS", "HS", "2Yr College","BA & More")) +
scale_colour_manual(
"Education Levels",
values = c("coral4","bisque4","darkolivegreen4","darkcyan"),
breaks = c("E1", "E2", "E3", "E4"),
labels = c("<HS", "HS", "2Yr College","BA & More")
) +
geom_vline(xintercept=0, linetype="dashed", color = "darkgrey", size = 0.75) +
geom_errorbar(aes(ymin=conf_lower, ymax=conf_upper), width=0.33, position=position_dodge(.21)) +
facet_wrap(~state_names, scales='free_y') +
theme(legend.position="bottom") +
xlab("Lag (Quarters)") + ylab("Estimate")