cd "C:\Users\David\Dropbox\Documents\Work\Clients & prospects\GiveWell\Criminal justice\Replications"

***
*** Routines to make impulse-response function after (panel) Granger regressions
***

cap mata mata drop myIRF()
cap mata mata drop myIRFMC()
cap mata mata drop myIRFCI()

mata
// Afer a regression on lags lags of depvar and indepvar, construct a transition matix M to represent forward tranformation according to results.
// Take [periods] powers of it to compute response of depvar to an impulse in the latest lag of indepvar
real colvector myIRF(string scalar depvar, string scalar indepvar, real scalar lags, real scalar periods) {
	real colvector retval; string matrix colstripe; real matrix M, P; real scalar i
	retval = J(periods, 1, .)
	colstripe = st_matrixcolstripe("e(b)")
	(M = J(lags, lags, 0))[|2,.\.,lags-1|] = I(lags-1); M = blockdiag(M,M) // transition matrix to lag-shift (Y_1,...,Y_lags,X_1,...,X_lags)
	M[1,] = st_matrix("e(b)")[selectindex( strmatch(colstripe[,2], depvar) + strmatch(colstripe[,2], indepvar) )] // top row of M projects next Y, X
	P = M
	for (i=1; i<=periods; i++) {
		retval[i] = P[1,lags+1] // derivative of Y_i wrt X_1
		P = P * M
	}
	return (retval)
}

// Monte Carlo variant of above, for constructing confidence intervals
// Take many draws from the distibution of e(b), using e(V) and assuming normality
real matrix myIRFMC(string scalar depvar, string scalar indepvar, real scalar lags, real scalar periods, real scalar reps) {
	real colvector p; string matrix colstripe; real matrix M, P, C, retval, X, L; real rowvector mu; real scalar j, i
	retval = J(periods, reps, .)
	colstripe = st_matrixcolstripe("e(b)")
	p = selectindex( strmatch(colstripe[,2], depvar) + strmatch(colstripe[,2], indepvar) ) // indexes of entries in e(b) for all lags of indepvar & depvar
	mu = st_matrix("e(b)")[p]
	symeigensystem(st_matrix("e(V)")[p,p], X, L)
	C = diag(sqrt(edittozero(L,10))) * X'
	(M = J(lags, lags, 0))[|2,.\.,lags-1|] = I(lags-1); M = blockdiag(M,M) // transition matrix to lag (Y_1,...,Y_lags,X_1,...,X_lags)
	for (j = reps; j; j--) {
		M[1,] = mu + rnormal(1,2*lags,0,1) * C  // top row of M projects next Y
		P = M
		for (i=1; i<=periods; i++) {
			retval[i,j] = P[1,lags+1] // derivative of Y_i wrt X_1
			P = P * M
		}
	}
	return (retval)
}

// Extract percentiles 2.5, 7.5, ... from simulation results in myIRFMC()
real matrix myIRFCI(real matrix yhatMC) {
	real matrix retval; real colvector tmp; real scalar t
	retval = J(rows(yhatMC), 20, .)
	for (t=rows(retval); t; t--) {
		tmp = yhatMC[t,]'
		_sort(tmp,1)
		retval[t,] = tmp[round(range(0.025, 0.975, .05) * rows(tmp))]'
	}
	return (J(1,cols(retval),0) \ retval) // create entry for t=0
}
end

cap program drop myIRF
program define myIRF
	syntax, depvar(string) indepvar(string) lags(integer) periods(integer) reps(integer) [*]

	mata yhat = 0 \ myIRF("`depvar'", "`indepvar'", `lags', `periods')
	mata CIBounds = myIRFCI(myIRFMC("`depvar'", "`indepvar'", `lags', `periods', `reps'))
	getmata yhat (CIBounds*)=CIBounds, force replace

	cap drop t
	gen int t = _n-1 if _n <= `periods' + 1
	twoway rarea CIBounds1  CIBounds20 t, color(green*.1) fintensity(100) || ///
				 rarea CIBounds2  CIBounds19 t, color(green*.2) fintensity(100) || ///
				 rarea CIBounds3  CIBounds18 t, color(green*.3) fintensity(100) || ///
				 rarea CIBounds4  CIBounds17 t, color(green*.4) fintensity(100) || ///
				 rarea CIBounds5  CIBounds16 t, color(green*.5) fintensity(100) || ///
				 rarea CIBounds6  CIBounds15 t, color(green*.6) fintensity(100) || ///
				 rarea CIBounds7  CIBounds14 t, color(green*.7) fintensity(100) || ///
				 rarea CIBounds8  CIBounds13 t, color(green*.8) fintensity(100) || ///
				 rarea CIBounds9  CIBounds12 t, color(green*.9) fintensity(100) || ///
				 rarea CIBounds10 CIBounds11 t, color(green*1 ) fintensity(100) || ///
				 line yhat t, legend(off) lcolor(black) || if t<=`periods'+1, ///
		ylabel(0 0 `=ln(1.1)' "+10%" `=ln(.9)' "-10%", angle(horizontal)) ///
		`options' ///
		yline(0)
end


***
*** First, work with public Abrams data
***

*** Adaptation of Abrams Figure 5 (event study). "Gun" dots in plots 1 and 3 of first row should match central lines in figure 5

global range 6 // time range of graphs
global Quarterly 0 // work with annual data

use "C:\Users\David\Dropbox\Documents\Work\Library\Sentencing reform\AbramsDeterrenceAEJApp2011-0005Data\AbramsDeterrence2", clear
replace relyr = -7 if relyr < -7 & relyr != .
replace relyr = 6 if relyr > 6 & relyr != .
egen _relyr = group(relyr), label // "i." operator doesn't like negative values in relyr
levelsof _relyr if relyr==0
global rel0 `r(levels)'
reg lnpcrrobgun ib$rel0._relyr i.FSTATE         i.year dc* [aw=statepop], cluster(FSTATE)
est store NoTrends
reg lnpcrrobgun ib$rel0._relyr i.FSTATE##c.year i.year dc* [aw=statepop], cluster(FSTATE)
fvexpand i(`=$rel0-6'/`=$rel0+7')._relyr
coefplot ///
		(NoTrends, keep(`=subinstr("`r(varlist)'","b.",".",.)') base label("No controls for state linear trends")) ///
		(        , keep(`=subinstr("`r(varlist)'","b.",".",.)') base label("Controls for state linear trends")), ///
	scheme(s1color) vertical cismooth ///
	xtitle(Years until/since passage of gun add-on law, margin(small)) ///
	ylabel(0 "0" `=ln(1.5)' "+50%" `=ln(.5)' "-50%", labsize(small)) ///
	legend(region(style(none)) margin(zero) pos(8) ring(0))

set scheme s1color
local i 0
foreach post74 in 1 "year>1974" {
	foreach balanced in 1 "relyr>=-7 & relyr<=6" {
		foreach timetrend in "" i.FSTATE#c.year {
			local ++i
			use  "C:\Users\David\Dropbox\Documents\Work\Library\Sentencing reform\AbramsDeterrenceAEJApp2011-0005Data\AbramsDeterrence2" if `post74' & `balanced', clear
*			replace relyr = -7 if relyr < -7 & relyr != .
*			replace relyr =  6 if relyr >  6 & relyr != .
			egen _relyr = group(relyr), label // "i." operator doesn't like negative values in relyr
			levelsof _relyr if relyr==0
			global rel0 `r(levels)'
			reg lnpcrrobgun  ib$rel0._relyr i.FSTATE `timetrend' i.year dc* [aw=statepop], cluster(FSTATE)
			est store lnpcrrobgun
			reg lnpcrasltgun ib$rel0._relyr i.FSTATE `timetrend' i.year dc* [aw=statepop], cluster(FSTATE)
			fvexpand i(`=$rel0-6'/`=$rel0+6')._relyr
			coefplot ///
				(lnpcrrobgun, keep(`=subinstr("`r(varlist)'","b.",".",.)') base cismooth(n(20) lwidth(1 10) color(blue)) offset(-0.2) label(Robbery))  ///
				(           , keep(`=subinstr("`r(varlist)'","b.",".",.)') base cismooth(n(20) lwidth(1 10) color(blue)) offset( 0.2) label(Assault)),  vertical ///
				xlabel(`=cond(`i'<5, `""""', `"`=cond($Quarterly, `"4 "-5" 8 "-4" 12 "-3" 16 "-2" 20 "-1" 24 "0" 28 "1" 32 "2" 36 "3" 40 "4" 44 "5""', `"1 "-6" 2 "-5" 3 "-4" 4 "-3" 5 "-2" 6 "-1" 7 "0" 8 "1" 9 "2" 10 "3" 11 "4" 12 "5" 13 "6""')', labsize(vsmall)"')') ///
				ylabel(0 "0%" `=ln(1.5)' "+50%" `=ln(.5)' "–50%", tlen(*.5) labgap(0) angle(horizontal) labsize(small)) `=cond(mod(`i',4)==1, `"ytitle(`"`=cond(`i'<5,"1965","1975")'-2002 data"', bmargin(zero))"', "")' ///
				xline(`=$range*(12/$Per)+!$Quarterly') ///
				`=cond(`i'<5, `"title(`"`=cond(inlist(`i',1,3),"All data","Data within 6 years of passage")'"' `"`=cond(inlist(`i',1,2)," ","Controlling for state linear trends")'"', size(medsmall) span)"', "")' /// 
				mcolor(none) ///
				graphregion(margin(zero)) ///
				legend(`=cond(`i'==1, "region(style(none)) margin(zero) pos(6) ring(0) size(small) keygap(*.2)" , "off")') ///
				msize(small) ///
				yscale(range(`=ln(.4)' `=ln(1.8)')) ///
				name(g`i', replace)
		}
	}
}
graph combine g1 g2 g3 g4 g5 g6 g7 g8, cols(4) title("Years until/since passage of gun add-on law", margin(small) pos(6) size(small))  graphregion(margin(zero)) name(EventOriginal, replace)
graph export "Abrams Figure 5.png", replace width(1000)


***
*** Abrams Table 3
***

cd "C:\Users\David\Dropbox\Documents\Work\Library\Sentencing reform\AbramsDeterrenceAEJApp2011-0005Data"

* balpan is 0 for state level data, 1 for balanced panel of precincts using dates of implementation (as in Table 3), & 2 for balanced panel using dates of passage
global balpan 1
local datename = cond($balpan==1, "implementation", "passage")

use AbramsDeterrence$balpan, clear // agency-level data--AbramsDeterrence1 based on date in effect, AbramsDeterrence2 based on date adopted
gen double lnpcrrobnongun = ln(pcrrobtot - pcrrobgun)
label var lnpcrrobgun    "Gun robbery rate"
label var lnpcrrobnongun "Non-gun robbery rate"
label var lnpcrasltgun   "Gun assault rate"

xtset

outreg, clear(Table3)
qui forvalues lag=1/3 {
	cap drop Dyaddon
	gen byte Dyaddon = yaddon & !L`lag'.yaddon // repeals in CA and TN (see notes to Table 1) not included in treatment variable
	outreg, clear
	foreach post74 in 1 "year>1974" {
		foreach balanced in 1 "relyr>=-7 & relyr<=6" {
			foreach timetrend in "" c.year#i.FSTATE {
				reg lnpcrrobgun Dyaddon ymm dc* i.FSTATE i.year `timetrend' [pw = statepop] if `post74' & `balanced', cluster(FSTATE)
				outreg, merge keep(Dyaddon ymm) rtitle("`lag' `=plural(`lag',"year")' after add-on law" \ "  `datename' date" \ "`lag' `=plural(`lag',"year")' after MM law" \ "  `datename' date") ctitle("","") bdec(4) starloc(1) se `=cond(`lag'<3,`"addrows("") noauto"',"")' summstat("_" \ N \ r2)
			}
		}
	}
	outreg, append replay(Table3) store(Table3)
}
outreg, replay(Table3)


***
*** Granger test
*** 

* use data set based on dates of passage
use "C:\Users\David\Dropbox\Documents\Work\Library\Sentencing reform\AbramsDeterrenceAEJApp2011-0005Data\AbramsDeterrence2", clear
xtset

forvalues lag=1/20 { // favors 8 lags? Going by AIC / N...
	qui reg lnpcrrobgun L(1/`lag').(lnpcrrobgun F.(yaddon ymm)) dc* i.FSTATE i.year [aw=statepop], cluster(FSTATE)
	di `lag'
	estat ic
	mat S = r(S)
	di "AIC/N=" S[1,5]/e(N) " BIC/N=" S[1,6]/e(N)
}

* Add one extra lag because treatment is I(1) (http://davegiles.blogspot.com/2011/04/testing-for-granger-causality.html). 
reg lnpcrrobgun L(1/9).(lnpcrrobgun F.(yaddon ymm)) dc* i.FSTATE i.year [aw=statepop], cluster(FSTATE)
myIRF, depvar(*lnpcrrobgun) indepvar(*yaddon) lags(9) periods(20) reps(10000) xtitle(Years since enactment of gun add-on law)
graph export "Abrams IRF original.png", width(1000) replace


***
*** Build and impute data set
***

cd "C:\Users\David\Dropbox\Documents\Work\Clients & prospects\GiveWell\Criminal justice\Replications"
set more off

/*
odbc load, clear dsn(Crime) exec("select * from [Abrams replication]")
saveold "Abrams state-level", replace version(12)
*/


global AbramsLEAsOnly 1 // restrict to Abrams LEAs (approximately, as I did not match quite all)

*** Generate CDF of year-2000 population of LEAs and determine population minimum for 80% population covereage

* odbc load, clear dsn(UCR) exec("SELECT ORICode, UPOPTOT AS Pop FROM [LEA crosswalk] INNER JOIN (SELECT DISTINCT ORICode FROM Crimes) t ON [LEA crosswalk].ORI7=t.ORICode where UPOPTOT>0")
* saveold "Abrams year-2000 LEA populations", version(12)
use "Abrams year-2000 LEA populations", clear

sort Pop
gen Cum = Pop in 1
replace Cum = Cum[_n-1] + Pop if _n>1
gen CDF = Cum / Cum[_N]
sum Pop if CDF > .2 // sample representing 80% of population
global MinPop `r(min)'

* Base data set: to control file size, retains only LEAs with population>=10000
/*#delimit ;
odbc load, clear dsn(Crime) exec(`"
	SELECT       UCR.dbo.Crimes.PostalCode, [Sin taxes].dbo.States.FIPS AS FIPSState, UCR.dbo.Crimes.ORICode, UCR.dbo.Crimes.Month, UCR.dbo.Crimes.Year, UCR.dbo.Crimes.ActNumGunRobber AS rrobgun, 
													 UCR.dbo.Crimes.ActNumRobbryTot AS rrobtot, UCR.dbo.Crimes.ActNumGunAssaul AS rasltgun, UCR.dbo.Crimes.ActNumAssltTota AS raslttot, UCR.dbo.Crimes.Population AS Pop, 
													 cast(CASE WHEN UCR.dbo.Crimes.ActNumAllFields > 0 THEN 1 ELSE 0 END as bit) AS HasTotal, 
													 [Abrams chronology].[Add-on passage] as AddOnPassDate, [Abrams city code crosswalk].city AS AbramsCity, UCR.dbo.[LEA crosswalk].UPOPTOT as Pop2000,
													 case when (UCR.dbo.[Targonski missingness].CI IS NULL OR (UCR.dbo.[Targonski missingness].CI NOT IN (-99, -98, -93, -94, -90, -85, -80)
																								`=cond($Quarterly     , "AND (UCR.dbo.[Targonski missingness].CI>-100 OR (-UCR.dbo.[Targonski missingness].CI-101)/3=(UCR.dbo.Crimes.Month-1)/3)","")')) then 0 else 1 end as TargonskiMissing
	FROM            UCR.dbo.Crimes INNER JOIN
													 UCR.dbo.[LEA crosswalk] ON UCR.dbo.Crimes.ORICode = UCR.dbo.[LEA crosswalk].ORI7 INNER JOIN
													 [Sin taxes].dbo.States ON UCR.dbo.Crimes.PostalCode = [Sin taxes].dbo.States.[Postal code] INNER JOIN
													 [Abrams chronology] ON [Sin taxes].dbo.States.[Postal code] = [Abrams chronology].[Postal code] AND UCR.dbo.Crimes.PostalCode = [Abrams chronology].[Postal code] LEFT OUTER JOIN
													 [Abrams city code crosswalk] ON UCR.dbo.Crimes.ORICode = [Abrams city code crosswalk].ORICode LEFT OUTER JOIN
													 UCR.dbo.[Targonski missingness] ON UCR.dbo.Crimes.ORICode = UCR.dbo.[Targonski missingness].ORICode AND UCR.dbo.Crimes.Month = UCR.dbo.[Targonski missingness].Month AND 
													 UCR.dbo.Crimes.Year = UCR.dbo.[Targonski missingness].Year LEFT OUTER JOIN
													 [Abrams city code crosswalk] AS [Abrams city code crosswalk_1] ON UCR.dbo.Crimes.ORICode = [Abrams city code crosswalk_1].ORICode
	WHERE        UCR.dbo.Crimes.Year>=1965 AND ([Abrams city code crosswalk].city IS NOT NULL OR UCR.dbo.[LEA crosswalk].UPOPTOT>=10000)
"');
#delimit cr
compress
saveold Abrams, replace version(12)*/

use Abrams if `=cond($AbramsLEAsOnly, "AbramsCity<.", "Pop2000>=$MinPop")', clear
ren Year ty
replace AddOnPassDate = mofd(dofc(AddOnPassDate))
gen tm = ym(ty, Month)
format %tm AddOnPassDate tm
egen id = group(ORICode)
recode Pop 0 = .
gen double lnPop = ln(Pop)

* Estimate population-weighted share of Abrams data set that was imputed (for gun robbery)
preserve
use "C:\Users\David\Dropbox\Documents\Work\Library\Sentencing reform\AbramsDeterrenceAEJApp2011-0005Data\AbramsDeterrence2", clear
keep year rrobgun statepop city
ren (year rrobgun statepop city) (ty _rrobgun _Pop AbramsCity)
tempfile file
save "`file'"
restore
preserve
collapse (sum) rrobgun  (first) FIPSState AddOnPassDate Pop, by(AbramsCity ty) fast
merge 1:1 AbramsCity ty using "`file'", keep(match using)
gen byte imputed = rrobgun!=_rrobgun | Pop!=_Pop
di "Population-weighted share of annual totals that differ in Abrams and raw FBI data:"
sum imputed [aw=Pop]
sum imputed [aw=Pop] if ty>=1970 & ty<=1999
restore

* Mark an assault or robbery entry as missing if
*    it is negative;
*    if the count for the gun subcategory exceeds that for the larger category, or if it equals the count for the larger category and both exceed 100;
*    if total crime in a given year does not follow a perfect monthly, quarterly, semiannual, or annual pattern of non-zeroness
*    or if the total crime count is identified as missing by Targonski (2004, covering 1977–2000; codes -99, -98, -93, -94, -90, -85, -80 in a “CI” field).
egen HasTotalMonthly    = min(HasTotal == !mod(Month,1 )), by(id ty) // crime data for every month this year?
egen HasTotalQuarterly  = min(HasTotal == !mod(Month,3 )), by(id ty)
egen HasTotalSemiannual = min(HasTotal == !mod(Month,6 )), by(id ty)
egen HasTotalAnnual     = min(HasTotal == !mod(Month,12)), by(id ty)
foreach var in rob aslt {
	egen Sumr`var'tot = sum(r`var'tot), by(id ty)
	egen Sumr`var'gun = sum(r`var'gun), by(id ty)
	replace r`var'tot=. if TargonskiMissing | r`var'tot < 0
	replace r`var'gun=. if TargonskiMissing | r`var'gun < 0 | r`var'gun > r`var'tot | (r`var'gun==0 & r`var'tot>=50) | ((Sumr`var'gun==0 | Sumr`var'gun==Sumr`var'tot) & Sumr`var'tot>=100)
}

foreach Freq in m q y {
	preserve
	foreach var in rob aslt {
		if "`Freq'"=="y" {
			replace r`var'tot = . if !(HasTotalSemiannual | HasTotalAnnual | HasTotalQuarterly | HasTotalMonthly)
			replace r`var'gun = . if !(HasTotalSemiannual | HasTotalAnnual | HasTotalQuarterly | HasTotalMonthly)
		}
		else if "`Freq'"=="q" {
			replace r`var'tot = . if !(HasTotalQuarterly | HasTotalMonthly)
			replace r`var'gun = . if !(HasTotalQuarterly | HasTotalMonthly)
		}
		else {
			replace r`var'tot = . if !HasTotalMonthly
			replace r`var'gun = . if !HasTotalMonthly
		}
	}

	keep FIPSState PostalCode ORICode AbramsCity Month ty tm rrobgun rrobtot rasltgun raslttot id Pop lnPop AddOnPassDate

	xtset id tm
	tsfill, full // fill out to balanced panel
	foreach var in FIPSState AddOnPassDate { // fill in certain state-level vars in new observations
		replace `var' = L.`var' if `var'==.
		by id: replace `var' = `var'[_N]
	}
	replace ty = yofd(dofm(tm))
	gen tq = floor(tm/3)
	replace Month = mod(tm,12)+1
	gen byte T = tm >= AddOnPassDate // month of passage is viewed as partially treated

	if "`Freq'"=="m" { // cited in journal supplementary materials
		gen byte miss = rrobgun==. | Pop==0 | Pop==.
		noi di "Missing fraction of gun robberies. Abrams LEAs only = $AbramsLEAsOnly"
		sum miss [aw=Pop] if ty>=1970 & ty<=1999
	}
	
	** Before imputation, compare missing-coded data to Abrams
/*	if $AbramsLEAsOnly & "`Freq'"=="y" {
		preserve
		use "C:\Users\David\Dropbox\Documents\Work\Library\Sentencing reform\AbramsDeterrenceAEJApp2011-0005Data\AbramsDeterrence2", clear
		keep year FSTATE rrobtot rrobgun raslttot rasltgun statepop lnpc* dc* relyr city
		ren (year FSTATE rrobtot rrobgun raslttot rasltgun statepop lnpc* dc* relyr city) (ty FIPSState _rrobtot _rrobgun _raslttot _rasltgun _Pop _lnpc* _dc* _relyr AbramsCity)
		tempfile file
		save "`file'"
		restore
		preserve
		collapse (sum) rrobgun rrobtot rasltgun raslttot (count) Nrrobgun=rrobgun (first) FIPSState AddOnPassDate Pop, by(AbramsCity ty PostalCode ORICode) fast
		replace rrobgun=. if Nrrobgun<12
		merge 1:1 AbramsCity ty using "`file'"
		gen lnpcrrobgun = ln(rrobgun/Pop)
		gen str9 label = ORICode + substr(string(ty),3,2)
		scatter *lnpcrrobgun if lnpcrrobgun<., mlab(label)
		restore
	}*/

	mi set flong
	mi xtset id tm
	mi register imputed lnPop rrobtot raslttot rrobgun rasltgun

	* Force monotonicity in missingness to reduce computational burden of multivariate imputation;Pop < rrobtot < raslttot < rrobgun < rasltgun. Before these changes, missingness is neraly monotone
	replace rrobtot  = . if lnPop    == . & rrobtot  != . 
	replace raslttot = . if rrobtot  == . & raslttot != .
	replace rrobgun  = . if raslttot == . & rrobgun  != . 
	replace rasltgun = . if rrobgun  == . & rasltgun != .

	* number of entries to be imputed
	foreach var in lnPop rrobtot raslttot rrobgun {
		count if `var'==.
	}

	* Imputation model: Enter previous count vars in logs, zero-inflated; interact all terms in crime eqs with post-Add-On dummy. Zeroness dummies for rXXXtot essentially unidentified, thus excluded, in rXXXgun eqs since they should only be 1 when rXXXgun=0.
	set matsize 4000
	set seed 987654321
	mi impute monotone (reg lnPop i.ty i.id) /// 
	                   (poisson rrobtot  T#i.ty T#i.Month T                                                                                                                                                                                 , off(lnPop)) ///
										 (poisson raslttot T#i.ty T#i.Month T (cond(rrobtot,  ln(rrobtot),-1,-1)) (cond(rrobtot,0,1,1))                                                                                                                               ///
	                                                        (cond(rrobtot,T*ln(rrobtot),-T,-T)) (cond(rrobtot,0,T,T))                                                                                                                       , off(lnPop)) ///
	                   (poisson rrobgun  T#i.ty T#i.Month T (cond(rrobtot,  ln(rrobtot),-1,-1))                       (cond(raslttot,  ln(raslttot),-1,-1)) (cond(raslttot,0,1,1))                                                                    ///
	                                                        (cond(rrobtot,T*ln(rrobtot),-T,-T))                       (cond(raslttot,T*ln(raslttot),-T,-T)) (cond(raslttot,0,T,T))                                                          , off(lnPop)) ///
	                   (poisson rasltgun T#i.ty T#i.Month T (cond(rrobtot,  ln(rrobtot),-1,-1)) (cond(rrobtot,0,1,1)) (cond(raslttot,  ln(raslttot),-1,-1))                        (cond(rrobgun,  ln(rrobgun),-1,-1)) (cond(rrobgun,0,1,1))            ///
	                                                        (cond(rrobtot,T*ln(rrobtot),-T,-T)) (cond(rrobtot,0,T,T)) (cond(raslttot,T*ln(raslttot),-T,-T))                        (cond(rrobgun,T*ln(rrobgun),-T,-T)) (cond(rrobgun,0,T,T)), off(lnPop)) ///
		, custom add(5) force noi dots noupdate nomonotonechk

	replace Pop = exp(lnPop)

	* Having imputed, for speed, trick mi into working with state-aggregated data
	mi unregister rrobgun rrobtot rasltgun raslttot lnPop
	collapse (sum) Pop rrobgun rrobtot rasltgun raslttot (max) _mi_miss, fast by(_mi_m  FIPSState ty `=cond("`Freq'"=="y","",cond("`Freq'"=="q","tq","Month tm"))')
	replace Pop = Pop / 12
	by _mi_m: gen _mi_id = _n
	mi xtset FIPSState t`Freq'
	ren ty Year
	mi merge m:1 FIPSState Year using "Abrams state-level", noupdate keep(match master)
	ren Year ty
	gen relAddOn = t`Freq' - `Freq'ofd(dofc(AddOnPassDate)) if AddOnPassDate>tc(1jan1900 00:00:00)
	gen relMM    = t`Freq' - `Freq'ofd(dofc(       MMDate)) if        MMDate>tc(1jan1900 00:00:00)
	gen rrobnongun = rrobtot - rrobgun
	gen rrasltnongun = raslttot - rasltgun
	foreach var of varlist rrob* raslt* {
		gen double lnpc`var' = ln(`var'/Pop)
	}

	* Mark, in all imputations, observations of gun crimes that are zero for at least one. -mi estimate: regress lnpcXXX - wants exact same sample for each imputation.
	foreach crime in aslt rob {
		egen r`crime'gunImputedZeroes = max(r`crime'gun==0 & _mi_m), by(FIPSState `_dta[tis]')
	}

	save `"Abrams MI collapsed`=cond($AbramsLEAsOnly," Abrams LEAs","")' `Freq'"', replace
*/
	restore
}

***
*** Compare averaged MI data sets to Abrams state panel
***

global AbramsLEAsOnly 1

use "C:\Users\David\Dropbox\Documents\Work\Library\Sentencing reform\AbramsDeterrenceAEJApp2011-0005Data\AbramsDeterrence2", clear
collapse (sum) rrobtot rrobgun raslttot rasltgun statepop (first) dc* relyr, by(FSTATE year) fast
ren (year FSTATE rrobtot rrobgun raslttot rasltgun statepop dc* relyr) (ty FIPSState _rrobtot _rrobgun _raslttot _rasltgun _Pop _dc* _relyr)
tempfile file
save "`file'"
use `"Abrams MI collapsed`=cond($AbramsLEAsOnly," Abrams LEAs","")' y"' if ty>=1970 & ty<=1999 & AddOnPassDate>tc(01jan1900 00:00:00) & AddOnPassDate<., clear
mi unset
order PostalCode
collapse Pop rrobgun rrobtot rasltgun raslttot if mi_m, by(mi_m FIPSState PostalCode ty) fast
foreach var in rrobtot rrobgun raslttot rasltgun {
	gen double lnpc`var' = ln(`var'/Pop)
}
collapse Pop lnpc*, by(FIPSState PostalCode ty) fast
joinby FIPSState ty using "`file'", unmatched(master)

label var lnpcrrobtot "Robberies"
label var lnpcrrobgun "Robberies with gun"
label var lnpcraslttot "Assaults"
label var lnpcrasltgun "Assaults with gun"
gen str4 label = PostalCode + substr(string(ty),3,2)
local i 0
set scheme s1color
foreach var in rrobtot rrobgun raslttot rasltgun {
	local ++i
	gen _lnpc`var' = ln(_`var'/_Pop)
	corr lnpc`var' _lnpc`var' [aw = Pop]
	scatter lnpc`var' _lnpc`var', ///
		name(`var', replace) mlab(label) msym(none) mlabpos(0) mlabsize(small) ///
		ytitle(`"`=cond(mod(`i',2),"New data","")'"') xtitle(`"`=cond(`i'>2,"Abrams data","")'"') title("`:var label lnpc`var'' ", size(medlarge)) ///
		note(`"Correlation = `=string(r(rho),"%4.2f")'"', pos(5) ring(0)) ylabel(, tlen(*.5) labgap(0) angle(horizontal) labsize(small)) xlabel(, labsize(small))
}
graph combine rrobtot rrobgun raslttot rasltgun, imargin(zero) graphregion(margin(zero)) name(all, replace)
graph export "Abrams vs. replication scatters.png", replace width(1000)

foreach var of varlist dc* {
	scatter `var' _`var', mlab(label) name(`var', replace)
}

***
*** MI event study
***

cd "C:\Users\David\Dropbox\Documents\Work\Clients & prospects\GiveWell\Criminal justice\Replications"
global Freq m
global AbramsLEAsOnly 1
global Preferred 1
global range 6 // time range of graphs

global Per = cond("$Freq"=="m", 1, cond("$Freq"=="q", 3, 12))

use `"Abrams MI collapsed`=cond($AbramsLEAsOnly," Abrams LEAs","")' $Freq"', clear
if !$Preferred {
	replace relAddOn = -7*(12/$Per) if relAddOn < -7*(12/$Per) & relAddOn != .
	replace relAddOn =  6*(12/$Per) if relAddOn >  6*(12/$Per) & relAddOn != .
}

egen _rel = group(relAddOn), label // "i." operator doesn't like negative values in relAddOn
levelsof _rel if relAddOn==0
global rel0 `r(levels)'

replace ty = ty - 1960 // helps with convergence when regressing on i.FIPSState#c.ty

set scheme s1color
local i 0
foreach post74 in 1 "ty>1974-1960" {
	foreach balanced in 1 "relAddOn>=-7*(12/$Per) & relAddOn<=6*(12/$Per)" {
		foreach timetrend in "" i.FIPSState#c.t$Freq {
			local ++i

			mi estimate, post noupdate nowarning esampvaryok: cmp (lnpcrrobgun  = ib${rel0}._rel i.t$Freq i.FIPSState dc* `timetrend') ///
			                                                      (lnpcrasltgun = ib${rel0}._rel i.t$Freq i.FIPSState dc* `timetrend') [pw=Pop] if `post74' & `balanced', cluster(FIPSState) ind(1 1) qui nolr

			fvexpand i(`=$rel0-$range*(12/$Per)'/`=$rel0+$range*(12/$Per)')._rel
			coefplot ///
				(, keep(lnpcrrobgun :`=subinstr("`r(varlist)'","b.",".",.)') base cismooth(n(10) lwidth(1 10) color(blue)) offset(-0.2) label(Robbery))  ///
				(, keep(lnpcrasltgun:`=subinstr("`r(varlist)'","b.",".",.)') base cismooth(n(10) lwidth(1 10) color(blue)) offset( 0.2) label(Assault)), vertical ///
				xlabel(`=cond(`i'<5, `""""', `"`=cond($Quarterly, `"4 "-5" 8 "-4" 12 "-3" 16 "-2" 20 "-1" 24 "0" 28 "1" 32 "2" 36 "3" 40 "4" 44 "5""', `"1 "-6" 2 "-5" 3 "-4" 4 "-3" 5 "-2" 6 "-1" 7 "0" 8 "1" 9 "2" 10 "3" 11 "4" 12 "5" 13 "6""')', labsize(vsmall)"')') ///
				`=cond(mod(`i',4)==1, `"ylabel(0 "0%" `=ln(1.5)' "+50%" `=ln(.5)' "–50%", tlen(*.5) labgap(0) angle(horizontal) labsize(small)) ytitle(`"`=cond(`i'<5,"1965","1975")'-2006 data"', bmargin(zero))"', `"ylabel("")"')' ///
				xline(`=$range*(12/$Per)+!$Quarterly') ///
				`=cond(`i'<5, `"title(`"`=cond(inlist(`i',1,3),"All data","Data within 6 years of passage")'"' `"`=cond(inlist(`i',1,2)," ","Controlling for state linear trends")'"', size(medsmall) span)"', "")' /// 
				graphregion(margin(zero)) ///
				legend(`=cond(`i'==1, "region(style(none)) margin(zero) pos(6) ring(0) size(small) keygap(*.2)" , "off")') ///
				msize(small) ///
				yscale(range(-.62 .64)) ///
				name(g`i', replace) replace
		}
	}
}
graph combine g1 g2 g3 g4 g5 g6 g7 g8, cols(4) title("Years until/since passage of gun add-on law", margin(small) size(small) pos(6)) graphregion(margin(zero)) name(EventReplication, replace)
graph export `"Abrams Figure 5 replication`=cond($AbramsLEAsOnly," Abrams LEAs","")'`=cond($Quarterly," quarterly","")'.png"', replace width(1000)


***
*** MI panel estimation
***

global AbramsLEAsOnly 1
global Freq y
global EventType Pass // Pass or Eff for passage or effective date

local datename = cond("$EventType"=="Eff", "implementation", "passage")

cd "C:\Users\David\Dropbox\Documents\Work\Clients & prospects\GiveWell\Criminal justice\Replications"
use `"Abrams MI collapsed`=cond($AbramsLEAsOnly," Abrams LEAs","")' $Freq"', clear

replace AddOn${EventType}Date = yofd(dofc(AddOn${EventType}Date))
replace MMDate                = yofd(dofc(MMDate))
gen byte yAddOn = ty >= AddOn${EventType}Date
gen byte yMM    = ty >= MMDate
replace ty = ty - 1960

label var lnpcrrobgun    "Gun robbery rate"
label var lnpcrrobnongun "Non-gun robbery rate"
label var lnpcrasltgun   "Gun assault rate"

outreg, clear(MITable3)
forvalues lag=1/3 {
	outreg, clear
	cap drop DyAddOn
	gen byte DyAddOn = yAddOn & !yAddOn[_n-`lag'] & FIPSState==FIPSState[_n-`lag'] // equivalent to "yAddOn & !L`lag'.yAddOn" while avoiding lag operators, which don't work in mi data
	foreach post74 in 1 "ty>1974-1960" {
		foreach balanced in 1 "relAddOn>=-7 & relAddOn<=6" {
			foreach timetrend in "" c.ty#i.FIPSState {
				mi estimate, noupdate post: regress lnpcrrobgun DyAddOn yMM dc* i.ty i.FIPSState `timetrend' [pw=Pop] if `post74' & `balanced', cluster(FIPSState)
				outreg, merge keep(DyAddOn yMM) coljust(l{c}c) rtitle("`lag' `=plural(`lag',"year")' after add-on law" \ "   `datename' date" \ "`lag' `=plural(`lag',"year")' after MM law" \ "  `datename' date") ctitle("","") bdec(4) starloc(1) starlevels(10 5 1) se `=cond(`lag'<3,`"addrows("") noauto"',"")' summstat("_" \ N) nodisplay
			}
		}
	}
	outreg, append replay(MITable3) store(MITable3) coljust(l{c}c)
}
outreg using "Abrams replication", replace replay(MITable3)


***
*** Granger test--not used in text
*** 

global AbramsLEAsOnly 0
global Freq y
global EventType Eff // Pass or Eff for passage or effective date

global Per = cond("$Freq"=="m", 1, cond("$Freq"=="q", 3, 12))

cd "C:\Users\David\Dropbox\Documents\Work\Clients & prospects\GiveWell\Criminal justice\Replications"
use `"Abrams MI collapsed`=cond($AbramsLEAsOnly," Abrams LEAs","")' $Freq"', clear

replace AddOn${EventType}Date = yofd(dofc(AddOn${EventType}Date))
replace MMDate                = yofd(dofc(MMDate))

gen byte yAddOn = ty >= AddOn${EventType}Date
gen byte yMM    = ty >= MMDate

preserve // to select lag limit based on AIC/BIC, collapse to single imputation
mi unset
collapse lnpcrrobgun y* Pop dc* if mi_m, by(FIPSState ty `_dta[tis]')
xtset FIPSState `_dta[tis]'
forvalues lag=1/30 { // on annual data, favors 10 lags?
	qui reg lnpcrrobgun L(1/`lag').(lnpcrrobgun F.(yAddOn yMM)) dc* i.FIPSState i.ty [aw=Pop], cluster(FIPSState)
	di `lag'
	estat ic
	mat S = r(S)
	di "AIC/N=" S[1,5]/e(N) " BIC/N=" S[1,6]/e(N)
}
restore

* Granger tests. Add one extra lag because treatment is I(1) (http://davegiles.blogspot.com/2011/04/testing-for-granger-causality.html). 
* Do with and without fixed effects, mindful of dynamic panel bias. Coefficients should tend to bracket true value (http://www.cemmap.ac.uk/wps/cwp0209.pdf#page=7)

replace rrobgunImputedZeroes = 1 if rrobgunImputedZeroes[_n-1]==1 & FIPSState==FIPSState[_n-1] // zap time series once and if they have log of zero

mi estimate, post: reg lnpcrrobgun L(1/9).(lnpcrrobgun F.(yAddOn yMM)) dc* i.FIPSState i.ty [aw=Pop] if !rrobgunImputedZeroes, cluster(FIPSState)
myIRF, depvar(*lnpcrrobgun) indepvar(*yAddOn) lags(9) periods(20) reps(10000) xtitle(Years since enactment of gun add-on law)
graph export "Abrams IRF replication.png", width(1000) replace


----

* journal graphs

set scheme s1color
global range 6 // time range of graphs

cd "C:\Users\David\Dropbox\Documents\Work\Clients & prospects\GiveWell\Criminal justice\Replications"
use  "C:\Users\David\Dropbox\Documents\Work\Library\Sentencing reform\AbramsDeterrenceAEJApp2011-0005Data\AbramsDeterrence2", clear
replace relyr = -7 if relyr < -7
replace relyr =  6 if relyr >  6 & relyr < .
* recode  relyr (. = 99)
egen _rel = group(relyr), label // "i." operator doesn't like negative values in relyr
levelsof _rel if relyr==0
global rel0 `r(levels)'
sum _rel, meanonly
global relmax `r(max)'

forvalues r=1/3 { // replace dummies for immediate pre- and post- r-year ranges with linear splines and test for difference in slopes
	cap drop PreSpline PostSpline
	gen byte PreSpline  = (relyr<0) * (relyr>=-`r') * relyr
	gen byte PostSpline = (relyr>0) * (relyr<= `r') * relyr
	regress lnpcrrobgun *Spline i(1/`=$rel0-`r'-1' `=$rel0+`r'+1'/$relmax)b$rel0._rel i.FSTATE i.year dc* [pw=statepop], cluster(FSTATE)
	test PreSpline = PostSpline
	local p`r' = string(`r(p)', "%4.2f")
	scalar PreSpline`r'  = _b[PreSpline]
	scalar PostSpline`r' = _b[PostSpline]
}

reg lnpcrrobgun i.b$rel0._rel i.FSTATE i.year dc* [pw=statepop], cluster(FSTATE)

fvexpand i(`=$rel0-$range'/`=$rel0+$range')._rel
coefplot , keep(`=subinstr("`r(varlist)'","b.",".",.)') base cismooth(n(10) lwidth(2 11) intensity(10 100) color(blue)) vertical ///
	xlabel(1 "-6" 2 "-5" 3 "-4" 4 "-3" 5 "-2" 6 "-1" 7 "0" 8 "1" 9 "2" 10 "3" 11 "4" 12 "5" 13 "6", labsize(small)) ///
	ylabel(0 "0%" `=ln(1.5)' "+50%" `=ln(.75)' "-25%", tlen(*.5) labgap(0) angle(horizontal) labsize(small)) title("Abrams data", bmargin(zero)) subtitle(" ") ///
	xline(`=$range+1') ///
	mcolor(none) ///
	graphregion(margin(zero)) legend(off) msize(small) ///
	text(`=ln(.56)' .35 "p = `p1', `p2', `p3'", place(e) margin(zero)) ///
	text(`=ln(1.7)' .6 "A", place(e) margin(zero)) /// 
	yscale(range(`=ln(.55)' `=ln(1.7)')) ///
	generate replace

`r(graph)' || scatteri `=PreSpline3*-3' `=($range-3)+1' 0 `=($range-0)+1' `=PostSpline3*3' `=($range+3)+1', recast(line) lcolor(red) || ///
              scatteri `=PreSpline2*-2' `=($range-2)+1' 0 `=($range-0)+1' `=PostSpline2*2' `=($range+2)+1', recast(line) lcolor(red) || ///
              scatteri `=PreSpline1*-1' `=($range-1)+1' 0 `=($range-0)+1' `=PostSpline1*1' `=($range+1)+1', recast(line) lcolor(red)    ///
	name(g, replace)

local char 65
foreach Freq in y m {
	global Per = cond("`Freq'"=="m", 1, cond("`Freq'"=="q", 3, 12))
	local width = 3 + 9 * ("`Freq'"=="y")

	foreach AbramsLEAsOnly in 1 0 {
		use `"Abrams MI collapsed`=cond(`AbramsLEAsOnly'," Abrams LEAs","")' `Freq'"', clear
		replace relAddOn = -$range*(12/$Per) if relAddOn < -$range*(12/$Per)
		replace relAddOn =  $range*(12/$Per) if relAddOn >  $range*(12/$Per) & relAddOn < .
		recode  relAddOn (. = 99)
		egen _rel = group(relAddOn), label // "i." operator doesn't like negative values in relAddOn
		levelsof _rel if relAddOn==0
		global rel0 `r(levels)'
		sum _rel, meanonly
		global relmax `r(max)'

		forvalues r=1/3 { // replace dummies for immediate pre- and post- r-year ranges with linear splines and test for difference in slopes
			cap drop PreSpline PostSpline
			gen byte PreSpline  = (relAddOn<0) * (relAddOn>=-`r'*(12/$Per)) * relAddOn
			gen byte PostSpline = (relAddOn>0) * (relAddOn<= `r'*(12/$Per)) * relAddOn
			mi estimate, post noupdate esampvaryok: regress lnpcrrobgun *Spline i(1/`=$rel0-`r'*(12/$Per)-1' `=$rel0+`r'*(12/$Per)+1'/$relmax)b$rel0._rel i.t`Freq' i.FIPSState dc* [pw=Pop] if ty>=1970 & ty<=1999, cluster(FIPSState)
			test PreSpline = PostSpline
			local p`r' = string(`r(p)', "%4.2f")
			scalar PreSpline`r'  = _b[PreSpline]
			scalar PostSpline`r' = _b[PostSpline]
		}

		mi estimate, post noupdate esampvaryok: regress lnpcrrobgun ib$rel0._rel i.t`Freq' i.FIPSState dc* [pw=Pop] if ty>=1970 & ty<=1999, cluster(FIPSState)

		fvexpand i(`=$rel0-$range*(12/$Per)'/`=$rel0+$range*(12/$Per)')._rel
		coefplot, keep(`=subinstr("`r(varlist)'","b.",".",.)') base cismooth(n(10) lwidth(2 `width') intensity(10 100) color(blue)) vertical ///
			xlabel(`=12/$Per*($range-6)+1' "-6" `=12/$Per*($range-5)+1' "-5" `=12/$Per*($range-4)+1' "-4" `=12/$Per*($range-3)+1' "-3" `=12/$Per*($range-2)+1' "-2" `=12/$Per*($range-1)+1' "-1" `=12/$Per*($range+0)+1' "0" `=12/$Per*($range+1)+1' "1" `=12/$Per*($range+2)+1' "2" `=12/$Per*($range+3)+1' "3" `=12/$Per*($range+4)+1' "4" `=12/$Per*($range+5)+1' "5" `=12/$Per*($range+6)+1' "6", labsize(small)) ///
			ylabel("") ///
			`=cond("`Freq'"=="m","",`"title("Multiply imputed data", bmargin(zero)) subtitle("`=cond(`AbramsLEAsOnly'," ","Doubled population coverage")'")"')' ///
			xline(`=$range*(12/$Per)+1') ///
			mcolor(none) ///
			graphregion(margin(zero)) legend(off) msize(small) ///
			text(`=ln(.56)' .75 "p = `p1', `p2', `p3'", place(e) margin(zero)) ///
			text(`=ln(1.7)' .6 "{char `++char'}", place(e) margin(zero)) /// 
			yscale(range(`=ln(.55)' `=ln(1.7)')) ///
			generate replace

		`r(graph)' || scatteri `=PreSpline3*-3*(12/$Per)' `=12/$Per*($range-3)+1' 0 `=12/$Per*($range-0)+1' `=PostSpline3*3*(12/$Per)' `=12/$Per*($range+3)+1', recast(line) lcolor(red) || ///
									scatteri `=PreSpline2*-2*(12/$Per)' `=12/$Per*($range-2)+1' 0 `=12/$Per*($range-0)+1' `=PostSpline2*2*(12/$Per)' `=12/$Per*($range+2)+1', recast(line) lcolor(red) || ///
									scatteri `=PreSpline1*-1*(12/$Per)' `=12/$Per*($range-1)+1' 0 `=12/$Per*($range-0)+1' `=PostSpline1*1*(12/$Per)' `=12/$Per*($range+1)+1', recast(line) lcolor(red)  ///
			name(g`Freq'`AbramsLEAsOnly', replace)
	}
}
graph combine g gy1 gy0 gm1 gm0, cols(3) hole(4) title("Years until/since passage of gun add-on law", margin(small) pos(6) size(small)) graphregion(margin(zero)) name(EventOriginal`Freq', replace)
graph export "Abrams journal.png", replace width(1000)
