#=========================================================
#Part 2 - code for Chapter 4 "Propensity Score Stratification" of book:
#Leite, W. L. (2017). Practical propensity score methods using R.
#Thousand Oaks, CA: Sage.
#
#this is the code that was used to generate the example results in the book
#As the R software and the R packages used in this example are updated frequently
#some incompatibilities between the current code and new R versions or package versions
#may appear
#Any updates to the code will be posted at:
# http://www.practicalpropensityscore.com
#=========================================================
#this code is for estimating the effect of schools
#having at least one full-time security guards
#on the frequency of harsh punishment
#using propensity score stratification
#using data from the School Survey on Crime and Safety
#Part 2 - perform propensity score stratification
#load data
load(file="Chapter4_ssocs08_with_propensity_scores.Rdata")
#cut propensity scores into five strata for estimating the ATT
SSOCS.data$subclass <- cut(x=SSOCS.data$pScores,
breaks=quantile(SSOCS.data$pScores,
prob = seq(0, 1, 1/5)),include.lowest=T)
#rename the strata labels
levels(SSOCS.data$subclass) <- 1:length(levels(SSOCS.data$subclass))
#examine common support
xtabs(~treat+subclass,SSOCS.data) #table of counts per stratum
#=========================================================================
#evaluate covariate balance within strata separately for categorical and continuous variables
library(MatchIt)
SSOCS.data2 <- SSOCS.data[,c("SCHID", "treat","percHarsh", "pScores","pScores",
"STRATA", "FINALWGT",covariateNames)]
balanceFormula <- paste(covariateNames, collapse="+")
balanceFormula <- formula(paste("treat~",balanceFormula,sep=""))
stratification <- matchit(balanceFormula,distance=SSOCS.data2$pScores,
data = SSOCS.data2,
method = "subclass", sub.by = "treat", subclass=5)
#the code above gives a warning that does not apply
data.stratification <- match.data(stratification)
data.stratification$treat <- factor(data.stratification$treat, levels=c(0,1),
labels=c("Untreated","Treated"))
data.stratification$subclass <- factor(data.stratification$subclass)
xtabs(~treat+subclass, data.stratification)
#diagnose covariate balance
balance.stratification <- summary(stratification, standardize=T)
#obtain standardized mean differences for all strata
strataDifferences <- data.frame(balance.stratification$q.table[,3,])
summaryStrataDifferences <- summary(strataDifferences)
#write.csv(data.frame(summaryStrataDifferences), file="summary_strata_differences.csv")
#obtain the summary of balance after stratification combining all strata
summary(abs(balance.stratification$sum.subclass$"Std. Mean Diff."))
table(abs(balance.stratification$sum.subclass$"Std. Mean Diff.") > 0.1)
#save dataset with strata membership
save(data.stratification, file="Chapter4_ssocs08_with_stratification.Rdata")
#=================================================================================
#Estimating and Pooling Stratum-specific effects
#obtain means for treated and untreated groups by stratum
library(survey)
#define survey design
surveyDesign <- svydesign(ids=~0,strata=~STRATA,weights=~FINALWGT, data=data.stratification)
#add replication weights for 1000 bootstrapped samples to the design object
surveyDesignBoot <- as.svrepdesign(surveyDesign, type=c("bootstrap"),replicates=1000)
#obtain means and standard errors by stratum and treatment group combinations
(subclassMeans <- svyby(formula=~percHarsh, by=~treat+subclass,
design=surveyDesignBoot,FUN=svymean, covmat=TRUE))
#obtain the ATE pooling strata-specific ATEs
#the weights are for each group defined by combining stratum and treatment group
#the weights are (stratum size)/(total sample size)
(pooledEffects <- svycontrast(subclassMeans, list(
ATE=c(-0.5,0.5,-0.18,0.18,-0.13,0.13,-0.10,0.10,-0.09,0.09),
ATT=c(-0.2,0.2,-0.2,0.2,-0.2,0.2,-0.2,0.2,-0.2,0.2))))