This R script will replicate the results in ‘Elections, Ethnicity and Political Instability’. Files should be placed in the project directory, or set working directory to the location where the files are stored.

Required Packages and customised functions

require(ggplot2) # Version 2.1.0
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.2.4
require(Amelia) # Version 1.7.4
## Loading required package: Amelia
## Loading required package: Rcpp
## ## 
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.4, built: 2015-12-05)
## ## Copyright (C) 2005-2016 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
require(Zelig) # Version 4.2.1 - crucial - major changes to syntax have occured since version 5.  
## Loading required package: Zelig
## Loading required package: boot
## Loading required package: MASS
## Loading required package: sandwich
## ZELIG (Versions 4.2-1, built: 2013-09-12)
## 
## +----------------------------------------------------------------+
## |  Please refer to http://gking.harvard.edu/zelig for full       |
## |  documentation or help.zelig() for help with commands and      |
## |  models support by Zelig.                                      |
## |                                                                |
## |  Zelig project citations:                                      |
## |    Kosuke Imai, Gary King, and Olivia Lau.  (2009).            |
## |    ``Zelig: Everyone's Statistical Software,''                 |
## |    http://gking.harvard.edu/zelig                              |
## |   and                                                          |
## |    Kosuke Imai, Gary King, and Olivia Lau. (2008).             |
## |    ``Toward A Common Framework for Statistical Analysis        |
## |    and Development,'' Journal of Computational and             |
## |    Graphical Statistics, Vol. 17, No. 4 (December)             |
## |    pp. 892-913.                                                |
## |                                                                |
## |   To cite individual Zelig models, please use the citation     |
## |   format printed with each model run and in the documentation. |
## +----------------------------------------------------------------+
## 
## Attaching package: 'Zelig'
## The following object is masked from 'package:ggplot2':
## 
##     alpha
## The following object is masked from 'package:utils':
## 
##     cite
library(Hmisc) # Version 3.17-2
## Loading required package: lattice
## 
## Attaching package: 'lattice'
## The following object is masked from 'package:boot':
## 
##     melanoma
## Loading required package: survival
## 
## Attaching package: 'survival'
## The following object is masked from 'package:boot':
## 
##     aml
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:Zelig':
## 
##     combine, describe, describe.default, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units
require(texreg) # Version 1.36.4
## Loading required package: texreg
## Version:  1.36.4
## Date:     2016-02-16
## Author:   Philip Leifeld (Eawag & University of Bern)
## 
## Please cite the JSS article in your publications -- see citation("texreg").
require(ZeligMultiLevel) # Version 0.7-1 - crucial
## Loading required package: ZeligMultiLevel
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'ZeligMultiLevel'
# Customised functions

# Function to extract results from logit analyses of multiply imputed data with texreg

require(texreg)
extract.mi.logit <- function(model, ...) {
  
  s <- summary(model, ...) # save the summary statistics
  names <- rownames(s$coefficients) # extract coefficient names
  co <- s$coefficients[,1] # extract the coefficient values
  se <- s$coefficients[,2] # extract the standard errors
  pval <- s$coefficients[,4] # extract the p-values
  aic <- mean(s$all[[1]][[5]], s$all[[2]][[5]], s$all[[3]][[5]], s$all[[4]][[5]], s$all[[5]][[5]])
  rs <- s$r.squared # extract R-squared
  adj <- s$adj.r.squared # extract adjusted R-squared
  n <- nrow(model$imp1$data) # extract number of observations
  gof <- c(aic, n) #create a vector of GOF statistics
  gof.names <- c("AIC", "Num. obs.") #names of GOFs
  decimal.places <- c(TRUE, FALSE) #the last one is a count variable
  
  tr <- createTexreg( # create a texreg object
    coef.names=names,
    coef=co,
    se=se,
    pvalues=pval, # p-values are only needed when
    gof.names=gof.names, # signif. stars shall be printed
    gof=gof,
    gof.decimal=decimal.places # (optional)
    
  )
  return(tr) # return texreg object to texreg
}

setMethod("extract", signature=className("mi", "zelig"),
          definition = extract.mi.logit)
## [1] "extract"
# Function to simulate and extract first differences with multiply imputed data and Zelig models.
# Requires the name of the model (model), the names of the variables that need to be simulated as a strong vector (names), lower and upper bound of the confidence intervals (c1, c2), a label for the model name (label) and the number of imputed datasets (imps). 

sims.mi.fd <- function (model, names, c1, c2, label, imps) {
  
  results <- NULL
  results.f <- NULL
  margins <- NULL
  model.name <- as.character(label)
  names(names) <- "v1"
  names <- as.data.frame(names)
  names$v2 <- paste(names$v1,".x1", sep="")
  names$v3 <- paste(names$v1,".x2", sep="")
  
  
  ## sims for each variable in the names
  for (i in 1:nrow(names)) {
    ### take the first variable
    
    x<-get(names[i,2])
    x1<-get(names[i,3])
    c <- sim(model, x=x, x1=x1)
    object <- c
    
    ### now extract the mi simulations from qi
    
    for (i in 1:imps) { 
      a <- object[[i]]$qi[[5]]
      results <- rbind(a, results)
    }
    
    results.f <- cbind(results.f, results)
    results <- NULL
    
  }
  
  results.f <- as.data.frame(results.f)
  titles <- as.character(names[,1])
  names(results.f) <- titles
  
  for (i in 1:ncol(results.f)){
    mean <- mean(results.f[,i])
    ci1 <- quantile(results.f[,i], prob=c1)
    ci2 <- quantile(results.f[,i], prob=c2)
    d <- as.data.frame(cbind(mean, ci1, ci2))
    d$variable <- as.character(names(results.f[i]))
    margins <- rbind(margins, d)
    
  }
  
  margins <- as.data.frame(margins)
  names(margins) <- c("mean", "low", "high", "variable")
  margins$model <- model.name
  return(margins) 
}



# Multiplot function from Chang 2013 - 'R Grpahics Cookbook'. 


multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  require(grid)
  
  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)
  
  numPlots = length(plots)
  
  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }
  
  if (numPlots==1) {
    print(plots[[1]])
    
  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    
    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}


# Function to generate robust standard errors. From King and Roberts (2015)

robust.se <- function(model, cluster){
  require(sandwich)
  require(lmtest)
  M <- length(unique(cluster))
  N <- length(cluster)
  K <- model$rank
  dfc <- (M/(M - 1)) * ((N - 1)/(N - K))
  uj <- apply(estfun(model), 2, function(x) tapply(x, cluster, sum));
  rcse.cov <- dfc * sandwich(model, meat = crossprod(uj)/N)
  rcse.se <- coeftest(model, rcse.cov)
  return(list(rcse.cov, rcse.se))
}

# Function to generate robust, clustered standard errors. From King and Roberts (2015) 

cl   <- function(dat,fm, cluster){
  require(sandwich, quietly = TRUE)
  require(lmtest, quietly = TRUE)
  M <- length(unique(cluster))
  N <- length(cluster)
  K <- fm$rank
  dfc <- (M/(M-1))*((N-1)/(N-K))
  uj  <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
  vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
  coeftest(fm, vcovCL) }




sims <- function (model, names, c1, c2, label, imps) {
  
  results <- NULL
  results.f <- NULL
  margins <- NULL
  model.name <- as.character(label)
  names(names) <- "v1"
  names <- as.data.frame(names)
  names$v2 <- paste(names$v1,".x1", sep="")
  names$v3 <- paste(names$v1,".x2", sep="")
  
  
  ## sims for each variable in the names
  for (i in 1:nrow(names)) {
    ### take the first variable
    
    x<-get(names[i,2])
    x1<-get(names[i,3])
    c <- sim(model, x=x, x1=x1)
    object <- c
    
    ### now extract the mi simulations from qi
    
    for (i in 1:imps) { 
      a <- object[[i]]$qi[[5]]
      results <- rbind(a, results)
    }
    
    results.f <- cbind(results.f, results)
    results <- NULL
    
  }
  
  results.f <- as.data.frame(results.f)
  titles <- as.character(names[,1])
  names(results.f) <- titles
  
  for (i in 1:ncol(results.f)){
    mean <- mean(results.f[,i])
    ci1 <- quantile(results.f[,i], prob=c1)
    ci2 <- quantile(results.f[,i], prob=c2)
    d <- as.data.frame(cbind(mean, ci1, ci2))
    d$variable <- as.character(names(results.f[i]))
    margins <- rbind(margins, d)
    
  }
  
  margins <- as.data.frame(margins)
  names(margins) <- c("mean", "low", "high", "variable")
  margins$model <- model.name
  return(margins) 
}

Read in the main dataframe

df <- read.csv("df-02-08-2016.csv")

Begin the analysis

Create 4 samples, 1 each for whether there was an election at t-1, t, and t+1, and a ‘control’ when there was no election at t-1, t, and t+1. Create Figure 1 which shows the distribution of intstability onsets across

election.l1 <- subset(df, nld.election.l1==1)
election.t <- subset(df, nld.election==1)
election.f1 <- subset(df, nld.election.f1==1)
control <- subset(df, nld.election.l1==0 & nld.election==0 & nld.election.f1==0)



# Distribution of instability onsets across fractionalization and polarization scores - 'Control' group. 

control.fig <- ggplot(control, aes(x=ef, y=polarization, size=as.numeric(instab.st))) + 
  geom_point(position=position_jitter(width=.05, height=0.05))+
  xlab("Ethnic Fractionalization")+ylab("Ethnic Polarization")+
  guides(size=FALSE) + ggtitle("Control") + annotate("rect", xmin=0.5, xmax=1.2, ymin=-0.2, ymax=0.6, alpha=.1,fill="green") + 
  annotate("rect", xmin=-0.2, xmax=0.5, ymin=-0.2, ymax=0.6, alpha=.1,fill="blue") + annotate("rect", xmin=-0.2, xmax=1.2, ymin=0.6, ymax=1.2, alpha=.1,fill="red") +
  annotate("text", x=0.1, y=-0.1, label="Homogenous", size=3)+
  annotate("text", x=0.75, y=0.18, label="Fractionalized", size=3) + annotate("text", x=0.5, y=1.1, label="Polarized", , size=3) 

# Election in the previous year

election.l1.fig <- ggplot(election.l1, aes(x=ef, y=polarization, size=as.numeric(instab.st))) + 
  geom_point(position=position_jitter(width=.05, height=0.05))+
  xlab("Ethnic Fractionalization")+ylab("Ethnic Polarization")+
  guides(size=FALSE) + ggtitle("Election at t-1") + annotate("rect", xmin=0.5, xmax=1.2, ymin=-0.2, ymax=0.6, alpha=.1,fill="green") + 
  annotate("rect", xmin=-0.2, xmax=0.5, ymin=-0.2, ymax=0.6, alpha=.1,fill="blue") + annotate("rect", xmin=-0.2, xmax=1.2, ymin=0.6, ymax=1.2, alpha=.1,fill="red")

# Election in that year

election.t.fig <- ggplot(election.t, aes(x=ef, y=polarization, size=as.numeric(instab.st))) + 
  geom_point(position=position_jitter(width=.05, height=0.05))+
  xlab("Ethnic Fractionalization")+ylab("Ethnic Polarization")+
  guides(size=FALSE) + ggtitle("Election at t") + annotate("rect", xmin=0.5, xmax=1.2, ymin=-0.2, ymax=0.6, alpha=.1,fill="green") + 
  annotate("rect", xmin=-0.2, xmax=0.5, ymin=-0.2, ymax=0.6, alpha=.1,fill="blue") + annotate("rect", xmin=-0.2, xmax=1.2, ymin=0.6, ymax=1.2, alpha=.1,fill="red")

# Election next year

election.f1.fig <- ggplot(election.f1, aes(x=ef, y=polarization, size=as.numeric(instab.st))) + 
  geom_point(position=position_jitter(width=.05, height=0.05))+
  xlab("Ethnic Fractionalization")+ylab("Ethnic Polarization")+
  guides(size=FALSE) + ggtitle("Election at t+1") + annotate("rect", xmin=0.5, xmax=1.2, ymin=-0.2, ymax=0.6, alpha=.1,fill="green") + 
  annotate("rect", xmin=-0.2, xmax=0.5, ymin=-0.2, ymax=0.6, alpha=.1,fill="blue") + annotate("rect", xmin=-0.2, xmax=1.2, ymin=0.6, ymax=1.2, alpha=.1,fill="red")

# Combine the three figures into figure 1


multiplot(control.fig, election.f1.fig, election.t.fig, election.l1.fig, cols=3, layout=matrix(c(2,3,4,1,1,1), nrow=2, byrow=TRUE), linetype="dashed")
## Loading required package: grid
## Warning: Removed 120 rows containing missing values (geom_point).
## Warning: Removed 35 rows containing missing values (geom_point).
## Warning: Removed 36 rows containing missing values (geom_point).
## Warning: Removed 35 rows containing missing values (geom_point).

## [1] "dashed"

Now split the sample into ‘homogenous’, ‘polarized’ and ‘fractionalized’ states. The cutoffs are the same as the colours in Figure 1. Homogenous = frac >0.5 & pol >0.6, Polarized = pol>=0.6, fractionalized = frac >=0.5 & pol >=0.6. Then report the mean instability onset rates for these different samples and the correlations between the presence of elections at t-1, t, and t+1 and instability onsets.

sample <- na.omit(subset(df, select=c("instab.st", "ef", "polarization", "nld.election.l1", "nld.election", "nld.election.f1", "nld.election.period", "ccode", "year")))
sample$instab.st <- as.numeric(sample$instab.st)
sample$nld.election.f1 <- as.numeric(sample$nld.election.f1)
sample$nld.election.l1 <- as.numeric(sample$nld.election.l1)-1
sample$nld.election <- as.numeric(sample$nld.election)

homogenous <- subset(sample, ef<0.5 & polarization<0.6)
polarized <- subset(sample, polarization>=0.6)
fractionalized <- subset(sample, ef>=0.5 & polarization<0.6 )



# Instability rates and election rates across homogenous, polarized and fractionalized settings
# As reported in Table 3. 

# Mean instability 
mean(homogenous$instab.st)
## [1] 0.01900041
mean(polarized$instab.st)
## [1] 0.03807723
mean(fractionalized$instab.st)
## [1] 0.03951368
# Mean elections 
mean(as.numeric(homogenous$nld.election))
## [1] 0.2866584
mean(as.numeric(polarized$nld.election))
## [1] 0.231704
mean(as.numeric(fractionalized$nld.election))
## [1] 0.2294833
# Pearson correlations between elections and instability rates across homogneous, polarized and fractionalized
# settings

library(Hmisc)

hom.f1 <- subset(homogenous, nld.election.f1==1 | nld.election.period==0, select=c("instab.st", "nld.election.f1"))
hom.chi.f1 <- rcorr(as.matrix(hom.f1), type="pearson")
hom.chi.f1
##                 instab.st nld.election.f1
## instab.st            1.00           -0.01
## nld.election.f1     -0.01            1.00
## 
## n= 1416 
## 
## 
## P
##                 instab.st nld.election.f1
## instab.st                 0.6381         
## nld.election.f1 0.6381
hom.t <- subset(homogenous, nld.election==1 | nld.election.period==0, select=c("instab.st", "nld.election"))
hom.chi.t <- rcorr(as.matrix(hom.t), type="pearson")
hom.chi.t
##              instab.st nld.election
## instab.st         1.00        -0.02
## nld.election     -0.02         1.00
## 
## n= 1414 
## 
## 
## P
##              instab.st nld.election
## instab.st              0.5062      
## nld.election 0.5062
hom.l1 <- subset(homogenous, nld.election.l1==1 | nld.election.period==0, select=c("instab.st", "nld.election.l1"))
hom.chi.l1 <- rcorr(as.matrix(hom.l1), type="pearson")
hom.chi.l1
##                 instab.st nld.election.l1
## instab.st               1             NaN
## nld.election.l1       NaN               1
## 
## n= 720 
## 
## 
## P
##                 instab.st nld.election.l1
## instab.st                                
## nld.election.l1
##### Polarized countries
pol.f1 <- subset(polarized, nld.election.f1==1 | nld.election.period==0, select=c("instab.st", "nld.election.f1"))
pol.chi.f1 <- rcorr(as.matrix(pol.f1), type="pearson")
pol.chi.f1
##                 instab.st nld.election.f1
## instab.st            1.00           -0.05
## nld.election.f1     -0.05            1.00
## 
## n= 2369 
## 
## 
## P
##                 instab.st nld.election.f1
## instab.st                 0.0201         
## nld.election.f1 0.0201
pol.t <- subset(polarized, nld.election==1 | nld.election.period==0, select=c("instab.st", "nld.election"))
pol.chi.t <- rcorr(as.matrix(pol.t), type="pearson")
pol.chi.t
##              instab.st nld.election
## instab.st         1.00        -0.01
## nld.election     -0.01         1.00
## 
## n= 2364 
## 
## 
## P
##              instab.st nld.election
## instab.st              0.567       
## nld.election 0.567
pol.l1 <- subset(polarized, nld.election.l1==1 | nld.election.period==0, select=c("instab.st", "nld.election.l1"))
pol.chi.l1 <- rcorr(as.matrix(pol.l1), type="pearson")
pol.chi.l1
##                 instab.st nld.election.l1
## instab.st               1             NaN
## nld.election.l1       NaN               1
## 
## n= 1506 
## 
## 
## P
##                 instab.st nld.election.l1
## instab.st                                
## nld.election.l1
##### Fractionalized countries
frac.f1 <- subset(fractionalized, nld.election.f1==1 | nld.election.period==0, select=c("instab.st", "nld.election.f1"))
frac.chi.f1 <- rcorr(as.matrix(frac.f1), type="pearson")
frac.chi.f1
##                 instab.st nld.election.f1
## instab.st             1.0            -0.1
## nld.election.f1      -0.1             1.0
## 
## n= 409 
## 
## 
## P
##                 instab.st nld.election.f1
## instab.st                 0.0485         
## nld.election.f1 0.0485
frac.t <- subset(fractionalized, nld.election==1 | nld.election.period==0, select=c("instab.st", "nld.election"))
frac.chi.t <- rcorr(as.matrix(frac.t), type="pearson")
frac.chi.t
##              instab.st nld.election
## instab.st         1.00        -0.03
## nld.election     -0.03         1.00
## 
## n= 408 
## 
## 
## P
##              instab.st nld.election
## instab.st              0.5026      
## nld.election 0.5026
frac.l1 <- subset(fractionalized, nld.election.l1==1 | nld.election.period==0, select=c("instab.st", "nld.election.l1"))
frac.chi.l1 <- rcorr(as.matrix(frac.l1), type="pearson")
frac.chi.l1
##                 instab.st nld.election.l1
## instab.st               1             NaN
## nld.election.l1       NaN               1
## 
## n= 257 
## 
## 
## P
##                 instab.st nld.election.l1
## instab.st                                
## nld.election.l1

The next section shows the results of the regression analysis. The first step is to use multiple imputation to fill missing values of the data for three different samples (1) where there was an election at t-1 and then no election in time t and t+1, and (2) where there was an election at t and then no election in time t+1 and t-1, and (3) where there was an election at t+1 and then no election in time t and t-1. The next step runs the probit models and displays the results, and the final section simulated the marginal effects.

# We have selected all variables in the model. Three different samples are imputed (1) where there was an election at t-1 and then no election in time t and t+1, and (2) where there was an election at t and then no election in time t+1 and t-1, and (3) where there was an election at t+1 and then no election in time t and t-1. 

# The following code creates 5 multiply imputed datasets for these 3 samples. It uses a quadratic for imputing across time. 




require(Amelia)
set.seed(12345)
a.out.l1 <- amelia(subset(df, select=c("instab.st", "nld.election.l1", "ef", "polarization", 
                                    "ln.wdi.imr.l1", "polity2.lag.1", "part.dem.fac.l1", "stabyrs", "stabyrs.2", "stabyrs.3", # Structural Controls
                                    "ln.wdi.pop.l1",  "nac.l1", "ccode", "year", "pr.l1", 
                                    "nld.earlylate.l1",  
                                    "nld.suspend.l1"), year<=2010 & is.na(ef)==F & nld.election==0 & nld.election.f1==0), idvars=c("stabyrs", "stabyrs.2", "stabyrs.3"), m = 5, cs="ccode", ts="year", polytime=2, noms=c("instab.st", "nld.election.l1" ))
## -- Imputation 1 --
## 
##   1  2  3  4  5
## 
## -- Imputation 2 --
## 
##   1  2  3  4
## 
## -- Imputation 3 --
## 
##   1  2  3  4  5
## 
## -- Imputation 4 --
## 
##   1  2  3  4  5
## 
## -- Imputation 5 --
## 
##   1  2  3  4  5
a.out <- amelia(subset(df, select=c("instab.st", "ef", "polarization", "nld.election",
                                    "ln.wdi.imr.l1", "polity2.lag.1", "part.dem.fac.l1", "stabyrs", "stabyrs.2", "stabyrs.3", # Structural Controls
                                    "ln.wdi.pop.l1",  "nac.l1", "ccode", "year", "pr.l1", 
                                    "nld.earlylate",  
                                    "nld.suspend"), year<=2010 & is.na(ef)==F & nld.election.l1==0 & nld.election.f1==0), idvars=c("stabyrs", "stabyrs.2", "stabyrs.3"), m = 5, cs="ccode", ts="year", polytime=2, noms=c("instab.st", "nld.election" ))
## -- Imputation 1 --
## 
##   1  2  3  4  5
## 
## -- Imputation 2 --
## 
##   1  2  3  4  5
## 
## -- Imputation 3 --
## 
##   1  2  3  4  5
## 
## -- Imputation 4 --
## 
##   1  2  3  4  5
## 
## -- Imputation 5 --
## 
##   1  2  3  4  5
a.out.f1 <- amelia(subset(df, select=c("instab.st", "ef", "polarization", "nld.election.f1", 
                                    "ln.wdi.imr.l1", "polity2.lag.1", "part.dem.fac.l1", "stabyrs", "stabyrs.2", "stabyrs.3", # Structural Controls
                                    "ln.wdi.pop.l1",  "nac.l1", "ccode", "year", "pr.l1",
                                    "nld.earlylate.f1",  
                                    "nld.suspend.f1"), year<=2010 & is.na(ef)==F & nld.election.l1==0 & nld.election==0), idvars=c("stabyrs", "stabyrs.2", "stabyrs.3"), m = 5, cs="ccode", ts="year", polytime=2, noms=c("instab.st", "nld.election.f1" ))
## -- Imputation 1 --
## 
##   1  2  3  4  5
## 
## -- Imputation 2 --
## 
##   1  2  3  4
## 
## -- Imputation 3 --
## 
##   1  2  3  4
## 
## -- Imputation 4 --
## 
##   1  2  3  4  5
## 
## -- Imputation 5 --
## 
##   1  2  3  4  5
### No controls, election at t-1

m.nc.l1 <- zelig(instab.st ~ nld.election.l1*ef + nld.election.l1*polarization, model="probit", data=a.out.l1)
## 
## 
##  How to cite this model in Zelig:
##   Kosuke Imai, Gary King, and Olivia Lau. 2016.
##   "probit: Probit Regression for Dichotomous Dependent Variables"
##   in Kosuke Imai, Gary King, and Olivia Lau, "Zelig: Everyone's Statistical Software,"
##   http://gking.harvard.edu/zelig
## 
tbl.1.1 <- extract.mi.logit(m.nc.l1)



### Controls, election at t-1

m.l1 <- zelig(instab.st ~ nld.election.l1*ef + nld.election.l1*polarization + 
                 ln.wdi.imr.l1 + polity2.lag.1 + part.dem.fac.l1 + stabyrs + stabyrs.2 +stabyrs.3 + # Structural Controls
                 ln.wdi.pop.l1 + nac.l1 + pr.l1 + nld.earlylate.l1 + nld.suspend.l1, model="probit", data=a.out.l1)
## 
## 
##  How to cite this model in Zelig:
##   Kosuke Imai, Gary King, and Olivia Lau. 2016.
##   "probit: Probit Regression for Dichotomous Dependent Variables"
##   in Kosuke Imai, Gary King, and Olivia Lau, "Zelig: Everyone's Statistical Software,"
##   http://gking.harvard.edu/zelig
## 
tbl.1.2 <- extract.mi.logit(m.l1)

### Controls, election at t

m.t <- zelig(instab.st ~ nld.election*ef + nld.election*polarization + 
                ln.wdi.imr.l1 + polity2.lag.1 + part.dem.fac.l1 + stabyrs + stabyrs.2 +stabyrs.3 + # Structural Controls
                ln.wdi.pop.l1 + nac.l1 + pr.l1 + nld.earlylate + nld.suspend, model="probit", data=a.out)
## 
## 
##  How to cite this model in Zelig:
##   Kosuke Imai, Gary King, and Olivia Lau. 2016.
##   "probit: Probit Regression for Dichotomous Dependent Variables"
##   in Kosuke Imai, Gary King, and Olivia Lau, "Zelig: Everyone's Statistical Software,"
##   http://gking.harvard.edu/zelig
## 
tbl.1.3 <- extract.mi.logit(m.t)

### Controls, election at t+1

m.f1 <- zelig(instab.st ~ nld.election.f1*ef + nld.election.f1*polarization + 
               ln.wdi.imr.l1 + polity2.lag.1 + part.dem.fac.l1 + stabyrs + stabyrs.2 +stabyrs.3 + # Structural Controls
               ln.wdi.pop.l1 + nac.l1 + pr.l1 + nld.earlylate.f1 + nld.suspend.f1, model="probit", data=a.out.f1)
## 
## 
##  How to cite this model in Zelig:
##   Kosuke Imai, Gary King, and Olivia Lau. 2016.
##   "probit: Probit Regression for Dichotomous Dependent Variables"
##   in Kosuke Imai, Gary King, and Olivia Lau, "Zelig: Everyone's Statistical Software,"
##   http://gking.harvard.edu/zelig
## 
tbl.1.4 <- extract.mi.logit(m.f1)

model.names <- c("No Controls, Election t-1", "Election t+1", "Election t", "Election t-1")

table.1.1 <- htmlreg(list(tbl.1.1, tbl.1.4, tbl.1.3, tbl.1.2), custom.model.names = model.names, caption="Elections and Violent Political Instability, Base Model (shown in main text)", omit.coef = "(stab)" , stars = c(0.01, 
    0.05, 0.1), caption.above=TRUE)

Show regression table with the results

table.1.1
Elections and Violent Political Instability, Base Model (shown in main text)
No Controls, Election t-1 Election t+1 Election t Election t-1
(Intercept) -1.99*** -5.69*** -5.37*** -6.01***
(0.15) (0.69) (0.72) (0.72)
nld.election.l1 -0.25 -0.11
(0.27) (0.33)
ef 0.58*** 0.16 0.03 0.03
(0.19) (0.23) (0.22) (0.23)
polarization -0.11 0.12 0.13 0.16
(0.23) (0.26) (0.27) (0.27)
nld.election.l1:ef -1.16** -1.63***
(0.51) (0.59)
nld.election.l1:polarization 0.99* 1.03*
(0.52) (0.61)
nld.election.f1 -0.32
(0.34)
ln.wdi.imr.l1 0.37*** 0.45*** 0.45***
(0.08) (0.09) (0.09)
polity2.lag.1 0.01 0.02** 0.02*
(0.01) (0.01) (0.01)
part.dem.fac.l1 0.48*** 0.34*** 0.48***
(0.13) (0.13) (0.13)
ln.wdi.pop.l1 0.12*** 0.07** 0.12***
(0.03) (0.03) (0.03)
nac.l1 0.04 0.09*** 0.06*
(0.03) (0.03) (0.03)
pr.l1 0.06 0.02 0.05
(0.05) (0.05) (0.05)
nld.earlylate.f1 0.25
(0.22)
nld.suspend.f1 0.51**
(0.21)
nld.election.f1:ef -0.31
(0.46)
nld.election.f1:polarization 0.08
(0.54)
nld.election -0.46
(0.35)
nld.earlylate -0.12
(0.23)
nld.suspend 0.19
(0.19)
nld.election:ef 0.33
(0.42)
nld.election:polarization 0.28
(0.49)
nld.earlylate.l1 -0.32
(0.28)
nld.suspend.l1 0.28
(0.21)
AIC 1105.99 1011.17 1078.80 1000.78
Num. obs. 3633 3710 3713 3633
p < 0.01, p < 0.05, p < 0.1

Now simulate the marginal effects for each model. This section simulates the marginal effects for different scenarios in the data, moving from ethnically homogenized countries to ethnically heterogenous ones. We’ve picked cases from each section of the polarization/fractionalization sactter (Figure 1) that reflect escalating fractionalization. These cases are Greece, Parguay, Vietnam, Swaziland, Dominican Republic, Guatemala, Brazil, Peru, Afghanistan, Lebanon, Cameroon, Uganda

# Simulate the marginal effects for different scenarios in the data, moving from ethnically homogenized countries to ethnically heterogenous ones. We've picked cases from each section of the polarization/fractionalization sactter (Figure 1) that reflect escalating fractionalization. These cases are Greece, Parguay, Vietnam, Swaziland, Dominican Republic, Guatemala, Brazil, Peru, Afghanistan, Lebanon, Cameroon, Uganda

require(foreign)
## Loading required package: foreign
fearon <- read.dta("fearon.dta")
fearon <- subset(fearon, select=c("ef", "polarization", "country", "ccodebg"))

fracmes <- c(fearon$ef[fearon$country=="GREECE"], fearon$ef[fearon$country=="PARAGUAY"], fearon$ef[fearon$country=="VIETNAM"], fearon$ef[fearon$country=="SWAZILAND"], fearon$ef[fearon$country=="DOMINICAN REP."],
             fearon$ef[fearon$country=="GUATEMALA"], fearon$ef[fearon$country=="BRAZIL"], fearon$ef[fearon$country=="PERU"], fearon$ef[fearon$country=="AFGHANISTAN"], fearon$ef[fearon$country=="CENTRAL AFRICAN REP."],
             fearon$ef[fearon$country=="CAMEROON"],fearon$ef[fearon$country=="UGANDA"])
polmes <- c(fearon$polarization[fearon$country=="GREECE"], fearon$polarization[fearon$country=="PARAGUAY"], fearon$polarization[fearon$country=="VIETNAM"], fearon$polarization[fearon$country=="SWAZILAND"], fearon$polarization[fearon$country=="DOMINICAN REP."],
            fearon$polarization[fearon$country=="GUATEMALA"], fearon$polarization[fearon$country=="BRAZIL"], fearon$polarization[fearon$country=="PERU"], fearon$polarization[fearon$country=="AFGHANISTAN"], fearon$polarization[fearon$country=="CENTRAL AFRICAN REP."],
            fearon$polarization[fearon$country=="CAMEROON"],fearon$polarization[fearon$country=="UGANDA"])


# mes is an object with the values of polarization and fractionalization for each of these scenarios. 


mes <- as.data.frame(cbind(fracmes, polmes))

fearon$sim <- 0
fearon$sim[fearon$country=="GREECE"|fearon$country=="PARAGUAY"|fearon$country=="VIETNAM"|fearon$country=="SWAZILAND"|fearon$country=="DOMINICAN REP."|fearon$country=="GUATEMALA"|
           fearon$country=="BRAZIL"|fearon$country=="PERU"|fearon$country=="AFGHANISTAN"|fearon$country=="CENTRAL AFRICAN REP."|fearon$country=="CAMEROON"|fearon$country=="UGANDA"] <- 1

fearon$label <- NA
for (i in 1:nrow(fearon)) { fearon$label[i][fearon$sim[i]==1] <- fearon$country[i]}

# Now create the figure that shows which cases are being simulated (Figure 3)


ggplot(fearon, aes(x=ef, y=polarization, size=sim))+ geom_point()+
  geom_text(aes(label=label), size=4, vjust=2) +
  xlab("Ethnic Fractionalization")+ylab("Ethnic Polarization")+
  guides(size=FALSE) + annotate("rect", xmin=0.5, xmax=1.2, ymin=-0.2, ymax=0.6, alpha=.1,fill="green") + 
  annotate("rect", xmin=-0.2, xmax=0.5, ymin=-0.2, ymax=0.6, alpha=.1,fill="blue") + annotate("rect", xmin=-0.2, xmax=1.2, ymin=0.6, ymax=1.2, alpha=.1,fill="red")
## Warning: Removed 148 rows containing missing values (geom_text).

# The next set of code generates expected values for each of these data points in election and non-election periods
# and stores the first difference between these values. 

# for election at t-1. 
set.seed(12345)

# 95% confidence intervals

  c1 <- 0.025
  c2 <- 0.975

result.election.l1 <- NULL

for (i in 1:nrow(mes)) {
  no.elect.x  <- setx(m.l1, nld.election.l1=0, ef=mes$fracmes[i], polarization=mes$polmes[i])
  elect.x     <- setx(m.l1, nld.election.l1=1, ef=mes$fracmes[i], polarization=mes$polmes[i])
  no.elect.s  <- sim(m.l1, x=no.elect.x, x1=elect.x, num=10000)
  
  mean.n <- as.numeric(c(mean
                         (rbind(no.elect.s$imp1$qi[[5]], no.elect.s$imp2$qi[[5]], no.elect.s$imp3$qi[[5]], no.elect.s$imp4$qi[[5]], no.elect.s$imp5$qi[[5]]))))
  
  ci1.n <- as.numeric(c(quantile(rbind(no.elect.s$imp1$qi[[5]], no.elect.s$imp2$qi[[5]], no.elect.s$imp3$qi[[5]], no.elect.s$imp4$qi[[5]], no.elect.s$imp5$qi[[5]]), prob=c1)))
  ci2.n <- as.numeric(c(quantile(rbind(no.elect.s$imp1$qi[[5]], no.elect.s$imp2$qi[[5]], no.elect.s$imp3$qi[[5]], no.elect.s$imp4$qi[[5]], no.elect.s$imp5$qi[[5]]), prob=c2)))
  result.inside <- as.data.frame(cbind(mean.n,ci1.n, ci2.n))
  result.inside$scen <- i
  result.election.l1 <- as.data.frame(rbind(result.inside, result.election.l1)) }

# Create the figure for simulations at time t-1

margins.2.1 <- ggplot(data = result.election.l1, aes(x = scen, y = mean.n, ymin = ci1.n, ymax = ci2.n, label="Election (t-1)")) + 
  geom_point()+
  geom_point(position = position_dodge(width = 0.2)) +
  geom_errorbar(position = position_dodge(width = 0.2), width = 0.1) +
  theme_bw() +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.major.y = element_line(colour="grey60", linetype="dashed")) + geom_hline(yintercept=0, linetype="dotted") + xlab("") + ylab("Marginal Effect on the Probability of Instability")+
  scale_x_continuous(breaks=c(1, 6, 9, 12), labels=c("Greece", "Guatemala", "Afghanistan", "Uganda") ) + xlab("Scenario") + ylab("")+
  ylim(-0.1, 0.1)+ggtitle("Election t-1") 



# For election at time t 

set.seed(12345)

result.election <- NULL

for (i in 1:nrow(mes)) {
  no.elect.x  <- setx(m.t, nld.election=0, ef=mes$fracmes[i], polarization=mes$polmes[i])
  elect.x     <- setx(m.t, nld.election=1, ef=mes$fracmes[i], polarization=mes$polmes[i])
  no.elect.s  <- sim(m.t, x=no.elect.x, x1=elect.x, num=10000)
  
  mean.n <- as.numeric(c(mean
                         (rbind(no.elect.s$imp1$qi[[5]], no.elect.s$imp2$qi[[5]], no.elect.s$imp3$qi[[5]], no.elect.s$imp4$qi[[5]], no.elect.s$imp5$qi[[5]]))))
  
  ci1.n <- as.numeric(c(quantile(rbind(no.elect.s$imp1$qi[[5]], no.elect.s$imp2$qi[[5]], no.elect.s$imp3$qi[[5]], no.elect.s$imp4$qi[[5]], no.elect.s$imp5$qi[[5]]), prob=c1)))
  ci2.n <- as.numeric(c(quantile(rbind(no.elect.s$imp1$qi[[5]], no.elect.s$imp2$qi[[5]], no.elect.s$imp3$qi[[5]], no.elect.s$imp4$qi[[5]], no.elect.s$imp5$qi[[5]]), prob=c2)))
  result.inside <- as.data.frame(cbind(mean.n,ci1.n, ci2.n))
  result.inside$scen <- i
  result.election <- as.data.frame(rbind(result.inside, result.election)) }




margins.2.2 <- ggplot(data = result.election, aes(x = scen, y = mean.n, ymin = ci1.n, ymax = ci2.n, label="Election (t-1)")) + 
  geom_point()+
  geom_point(position = position_dodge(width = 0.2)) +
  geom_errorbar(position = position_dodge(width = 0.2), width = 0.1) +
  theme_bw() +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.major.y = element_line(colour="grey60", linetype="dashed")) + geom_hline(yintercept=0, linetype="dotted") + xlab("") + ylab("Marginal Effect on the Probability of Instability")+
  scale_x_continuous(breaks=c(1, 6, 9, 12), labels=c("Greece", "Guatemala", "Afghanistan", "Uganda") ) + xlab("Scenario") + ylab("")+
  ylim(-0.1, 0.1)+ggtitle("Election t") 


#### For election at time t+1 

set.seed(12345)

result.election.f1 <- NULL


for (i in 1:nrow(mes)) {
  no.elect.x  <- setx(m.f1, nld.election.f1=0, ef=mes$fracmes[i], polarization=mes$polmes[i])
  elect.x     <- setx(m.f1, nld.election.f1=1, ef=mes$fracmes[i], polarization=mes$polmes[i])
  no.elect.s  <- sim(m.f1, x=no.elect.x, x1=elect.x, num=10000)
  
  mean.n <- as.numeric(c(mean
                         (rbind(no.elect.s$imp1$qi[[5]], no.elect.s$imp2$qi[[5]], no.elect.s$imp3$qi[[5]], no.elect.s$imp4$qi[[5]], no.elect.s$imp5$qi[[5]]))))
  
  ci1.n <- as.numeric(c(quantile(rbind(no.elect.s$imp1$qi[[5]], no.elect.s$imp2$qi[[5]], no.elect.s$imp3$qi[[5]], no.elect.s$imp4$qi[[5]], no.elect.s$imp5$qi[[5]]), prob=c1)))
  ci2.n <- as.numeric(c(quantile(rbind(no.elect.s$imp1$qi[[5]], no.elect.s$imp2$qi[[5]], no.elect.s$imp3$qi[[5]], no.elect.s$imp4$qi[[5]], no.elect.s$imp5$qi[[5]]), prob=c2)))
  result.inside <- as.data.frame(cbind(mean.n,ci1.n, ci2.n))
  result.inside$scen <- i
  result.election.f1 <- as.data.frame(rbind(result.inside, result.election.f1)) }


margins.2.3 <- ggplot(data = result.election.f1, aes(x = scen, y = mean.n, ymin = ci1.n, ymax = ci2.n, label="Election (t-1)")) + 
  geom_point()+
  geom_point(position = position_dodge(width = 0.2)) +
  geom_errorbar(position = position_dodge(width = 0.2), width = 0.1) +
  theme_bw() +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.major.y = element_line(colour="grey60", linetype="dashed")) + geom_hline(yintercept=0, linetype="dotted") + xlab("") + ylab("Marginal Effect on the Probability of Instability")+
  scale_x_continuous(breaks=c(1, 6, 9, 12), labels=c("Greece", "Guatemala", "Afghanistan", "Uganda") ) + xlab("Scenario") + ylab("")+
  ylim(-0.1, 0.1)+ggtitle("Election t+1")


result.election.l1$model <- "election.l1"
result.election$model <- "election.t"
result.election.f1$model <- "election.f1"

# bind all of the simualtion results together

sims.results <- as.data.frame(rbind(result.election.l1, result.election, result.election.f1))

write.csv(sims.results, "sims.csv")

# Produce Figure 4


multiplot(margins.2.3, margins.2.2, margins.2.1, cols=3)

Next is showing the marginal effects of moving each variable in the model from the 10th to the 90th percentile value as reported in Figure 2. These are the effects for models 1-4. First process is to set the x’s for each variable in the model, then the first differences are simlated for each x variable, stored, and recalled at the end to create Figure 2.

# Now, we are using the three probit models above to extract the mean, and 95% CIs for the first difference for each variable in these models after moving them from their 10th (q1) to their 90th percentile value (q2) in the data. 

# first step is to set the x's for every variable in the model. 

q1 <- 0.10
q2 <- 0.90

nld.election.l1.x1 <- setx(m.nc.l1, nld.election.l1=0)
nld.election.l1.x2 <- setx(m.nc.l1, nld.election.l1=1)

ef.x1 <- setx(m.nc.l1, nld.election.l1=0, ef=quantile(na.omit(df$ef), prob=q1))
ef.x2 <- setx(m.nc.l1, nld.election.l1=0, ef=quantile(na.omit(df$ef), prob=q2))

polarization.x1 <- setx(m.nc.l1, nld.election.l1=0, polarization=quantile(na.omit(df$polarization), prob=q1))
polarization.x2 <- setx(m.nc.l1, nld.election.l1=0, polarization=quantile(na.omit(df$polarization), prob=q2))

# note here we are simulating the effects for an election in a 90th percentile state on the fractionalization index. 
nld.election.l1Xef.x1 <- setx(m.nc.l1, nld.election.l1=0, ef=quantile(na.omit(df$ef), prob=q2))
nld.election.l1Xef.x2 <- setx(m.nc.l1, nld.election.l1=1, ef=quantile(na.omit(df$ef), prob=q2))


# as above, election in 90th percentile state on the polarization scale. 
nld.election.l1Xpolarization.x1 <- setx(m.nc.l1, nld.election.l1=0, polarization=quantile(na.omit(df$polarization), prob=q2))
nld.election.l1Xpolarization.x2 <- setx(m.nc.l1, nld.election.l1=1, polarization=quantile(na.omit(df$polarization), prob=q2))

model.2.1.names <- data.frame(c("nld.election.l1", "ef", "polarization", "nld.election.l1Xef", "nld.election.l1Xpolarization"))

model.2.1.s <- sims.mi.fd(m.nc.l1, model.2.1.names, 0.025, 0.975, "No Controls, Election t-1", 5)





### election at t-1

q1 <- 0.10
q2 <- 0.90

nld.election.l1.x1 <- setx(m.l1, nld.election.l1=0)
nld.election.l1.x2 <- setx(m.l1, nld.election.l1=1)

ef.x1 <- setx(m.l1, nld.election.l1=0, ef=quantile(na.omit(df$ef), prob=q1))
ef.x2 <- setx(m.l1, nld.election.l1=0, ef=quantile(na.omit(df$ef), prob=q2))

polarization.x1 <- setx(m.l1, nld.election.l1=0, polarization=quantile(na.omit(df$polarization), prob=q1))
polarization.x2 <- setx(m.l1, nld.election.l1=0, polarization=quantile(na.omit(df$polarization), prob=q2))

nld.election.l1Xef.x1 <- setx(m.l1, nld.election.l1=0, ef=quantile(na.omit(df$ef), prob=q2))
nld.election.l1Xef.x2 <- setx(m.l1, nld.election.l1=1, ef=quantile(na.omit(df$ef), prob=q2))

nld.election.l1Xpolarization.x1 <- setx(m.l1, nld.election.l1=0, polarization=quantile(na.omit(df$polarization), prob=q2))
nld.election.l1Xpolarization.x2 <- setx(m.l1, nld.election.l1=1, polarization=quantile(na.omit(df$polarization), prob=q2))

ln.wdi.pop.l1.x1 <- setx(m.l1, ln.wdi.pop.l1=quantile(na.omit(df$ln.wdi.pop.l1), prob=q1), nld.election.l1=0)
ln.wdi.pop.l1.x2 <- setx(m.l1, ln.wdi.pop.l1=quantile(na.omit(df$ln.wdi.pop.l1), prob=q2), nld.election.l1=0)

ln.wdi.imr.l1.x1 <- setx(m.l1, ln.wdi.imr.l1=quantile(na.omit(df$ln.wdi.imr.l1), prob=q1), nld.election.l1=0)
ln.wdi.imr.l1.x2 <- setx(m.l1, ln.wdi.imr.l1=quantile(na.omit(df$ln.wdi.imr.l1), prob=q2), nld.election.l1=0)

nac.l1.x1 <- setx(m.l1, nac.l1=quantile(na.omit(df$nac.l1), prob=q1), nld.election.l1=0)
nac.l1.x2 <- setx(m.l1, nac.l1=quantile(na.omit(df$nac.l1), prob=q2), nld.election.l1=0)

polity2.lag.1.x1 <- setx(m.l1, polity2.lag.1=quantile(na.omit(df$polity2.lag.1), prob=q1), nld.election.l1=0)
polity2.lag.1.x2 <- setx(m.l1, polity2.lag.1=quantile(na.omit(df$polity2.lag.1), prob=q2), nld.election.l1=0)

part.dem.fac.l1.x1 <- setx(m.l1, part.dem.fac.l1=quantile(na.omit(df$part.dem.fac.l1), prob=q1), nld.election.l1=0)
part.dem.fac.l1.x2 <- setx(m.l1, part.dem.fac.l1=quantile(na.omit(df$part.dem.fac.l1), prob=q2), nld.election.l1=0)

pr.l1.x1 <- setx(m.l1, pr.l1=quantile(na.omit(df$pr.l1), prob=q1), nld.election.l1=0)
pr.l1.x2 <- setx(m.l1, pr.l1=quantile(na.omit(df$pr.l1), prob=q2), nld.election.l1=0)

nld.earlylate.l1.x1 <- setx(m.l1, nld.earlylate.l1=0, nld.election.l1=1)
nld.earlylate.l1.x2 <- setx(m.l1, nld.earlylate.l1=1, nld.election.l1=1)

nld.suspend.l1.x1 <- setx(m.l1, nld.suspend.l1=0, nld.election.l1=1)
nld.suspend.l1.x2 <- setx(m.l1, nld.suspend.l1=1, nld.election.l1=1)

model.2.2.names <- data.frame(c("nld.election.l1", "ef", "polarization", "nld.election.l1Xef", "nld.election.l1Xpolarization", "ln.wdi.pop.l1", "ln.wdi.imr.l1", "nac.l1", "polity2.lag.1",
                                "part.dem.fac.l1", "pr.l1", "nld.earlylate.l1", "nld.suspend.l1"))

model.2.2.s <- sims.mi.fd(m.l1, model.2.2.names, 0.025, 0.975, "Election t-1", 5)


# Election at t


nld.election.x1 <- setx(m.t, nld.election=0)
nld.election.x2 <- setx(m.t, nld.election=1)

ef.x1 <- setx(m.t, nld.election=0, ef=quantile(na.omit(df$ef), prob=q1))
ef.x2 <- setx(m.t, nld.election=0, ef=quantile(na.omit(df$ef), prob=q2))

polarization.x1 <- setx(m.t, nld.election=0, polarization=quantile(na.omit(df$polarization), prob=q1))
polarization.x2 <- setx(m.t, nld.election=0, polarization=quantile(na.omit(df$polarization), prob=q2))

nld.electionXef.x1 <- setx(m.t, nld.election=0, ef=quantile(na.omit(df$ef), prob=q2))
nld.electionXef.x2 <- setx(m.t, nld.election=1, ef=quantile(na.omit(df$ef), prob=q2))

nld.electionXpolarization.x1 <- setx(m.t, nld.election=0, polarization=quantile(na.omit(df$polarization), prob=q2))
nld.electionXpolarization.x2 <- setx(m.t, nld.election=1, polarization=quantile(na.omit(df$polarization), prob=q2))

ln.wdi.pop.l1.x1 <- setx(m.t, ln.wdi.pop.l1=quantile(na.omit(df$ln.wdi.pop.l1), prob=q1), nld.election=0)
ln.wdi.pop.l1.x2 <- setx(m.t, ln.wdi.pop.l1=quantile(na.omit(df$ln.wdi.pop.l1), prob=q2), nld.election=0)

ln.wdi.imr.l1.x1 <- setx(m.t, ln.wdi.imr.l1=quantile(na.omit(df$ln.wdi.imr.l1), prob=q1), nld.election=0)
ln.wdi.imr.l1.x2 <- setx(m.t, ln.wdi.imr.l1=quantile(na.omit(df$ln.wdi.imr.l1), prob=q2), nld.election=0)

nac.l1.x1 <- setx(m.t, nac.l1=quantile(na.omit(df$nac.l1), prob=q1), nld.election=0)
nac.l1.x2 <- setx(m.t, nac.l1=quantile(na.omit(df$nac.l1), prob=q2), nld.election=0)

polity2.lag.1.x1 <- setx(m.t, polity2.lag.1=quantile(na.omit(df$polity2.lag.1), prob=q1), nld.election=0)
polity2.lag.1.x2 <- setx(m.t, polity2.lag.1=quantile(na.omit(df$polity2.lag.1), prob=q2), nld.election=0)

part.dem.fac.l1.x1 <- setx(m.t, part.dem.fac.l1=quantile(na.omit(df$part.dem.fac.l1), prob=q1), nld.election=0)
part.dem.fac.l1.x2 <- setx(m.t, part.dem.fac.l1=quantile(na.omit(df$part.dem.fac.l1), prob=q2), nld.election=0)

pr.l1.x1 <- setx(m.t, pr.l1=quantile(na.omit(df$pr.l1), prob=q1), nld.election=0)
pr.l1.x2 <- setx(m.t, pr.l1=quantile(na.omit(df$pr.l1), prob=q2), nld.election=0)

nld.earlylate.x1 <- setx(m.t, nld.earlylate=0, nld.election=1)
nld.earlylate.x2 <- setx(m.t, nld.earlylate=1, nld.election=1)

nld.suspend.x1 <- setx(m.t, nld.suspend=0, nld.election=1)
nld.suspend.x2 <- setx(m.t, nld.suspend=1, nld.election=1)

model.2.3.names <- data.frame(c("nld.election", "ef", "polarization", "nld.electionXef", "nld.electionXpolarization", "ln.wdi.pop.l1", "ln.wdi.imr.l1", "nac.l1", "polity2.lag.1",
                                "part.dem.fac.l1", "pr.l1", "nld.earlylate", "nld.suspend"))

model.2.3.s <- sims.mi.fd(m.t, model.2.3.names, 0.025, 0.975, "Election t", 5)


# Election at t+1

nld.election.f1.x1 <- setx(m.f1, nld.election.f1=0)
nld.election.f1.x2 <- setx(m.f1, nld.election.f1=1)

ef.x1 <- setx(m.f1, nld.election.f1=0, ef=quantile(na.omit(df$ef), prob=q1))
ef.x2 <- setx(m.f1, nld.election.f1=0, ef=quantile(na.omit(df$ef), prob=q2))

polarization.x1 <- setx(m.f1, nld.election.f1=0, polarization=quantile(na.omit(df$polarization), prob=q1))
polarization.x2 <- setx(m.f1, nld.election.f1=0, polarization=quantile(na.omit(df$polarization), prob=q2))

nld.election.f1Xef.x1 <- setx(m.f1, nld.election.f1=0, ef=quantile(na.omit(df$ef), prob=q2))
nld.election.f1Xef.x2 <- setx(m.f1, nld.election.f1=1, ef=quantile(na.omit(df$ef), prob=q2))

nld.election.f1Xpolarization.x1 <- setx(m.f1, nld.election.f1=0, polarization=quantile(na.omit(df$polarization), prob=q2))
nld.election.f1Xpolarization.x2 <- setx(m.f1, nld.election.f1=1, polarization=quantile(na.omit(df$polarization), prob=q2))

ln.wdi.pop.l1.x1 <- setx(m.f1, ln.wdi.pop.l1=quantile(na.omit(df$ln.wdi.pop.l1), prob=q1), nld.election.f1=0)
ln.wdi.pop.l1.x2 <- setx(m.f1, ln.wdi.pop.l1=quantile(na.omit(df$ln.wdi.pop.l1), prob=q2), nld.election.f1=0)

ln.wdi.imr.l1.x1 <- setx(m.f1, ln.wdi.imr.l1=quantile(na.omit(df$ln.wdi.imr.l1), prob=q1), nld.election.f1=0)
ln.wdi.imr.l1.x2 <- setx(m.f1, ln.wdi.imr.l1=quantile(na.omit(df$ln.wdi.imr.l1), prob=q2), nld.election.f1=0)

nac.l1.x1 <- setx(m.f1, nac.l1=quantile(na.omit(df$nac.l1), prob=q1), nld.election.f1=0)
nac.l1.x2 <- setx(m.f1, nac.l1=quantile(na.omit(df$nac.l1), prob=q2), nld.election.f1=0)

polity2.lag.1.x1 <- setx(m.f1, polity2.lag.1=quantile(na.omit(df$polity2.lag.1), prob=q1), nld.election.f1=0)
polity2.lag.1.x2 <- setx(m.f1, polity2.lag.1=quantile(na.omit(df$polity2.lag.1), prob=q2), nld.election.f1=0)

part.dem.fac.l1.x1 <- setx(m.f1, part.dem.fac.l1=quantile(na.omit(df$part.dem.fac.l1), prob=q1), nld.election.f1=0)
part.dem.fac.l1.x2 <- setx(m.f1, part.dem.fac.l1=quantile(na.omit(df$part.dem.fac.l1), prob=q2), nld.election.f1=0)

pr.l1.x1 <- setx(m.f1, pr.l1=quantile(na.omit(df$pr.l1), prob=q1), nld.election.f1=0)
pr.l1.x2 <- setx(m.f1, pr.l1=quantile(na.omit(df$pr.l1), prob=q2), nld.election.f1=0)

nld.earlylate.f1.x1 <- setx(m.f1, nld.earlylate.f1=0, nld.election.f1=1)
nld.earlylate.f1.x2 <- setx(m.f1, nld.earlylate.f1=1, nld.election.f1=1)

nld.suspend.f1.x1 <- setx(m.f1, nld.suspend.f1=0, nld.election.f1=1)
nld.suspend.f1.x2 <- setx(m.f1, nld.suspend.f1=1, nld.election.f1=1)

model.2.4.names <- data.frame(c("nld.election.f1", "ef", "polarization", "nld.election.f1Xef", "nld.election.f1Xpolarization", "ln.wdi.pop.l1", "ln.wdi.imr.l1", "nac.l1", "polity2.lag.1",
                                "part.dem.fac.l1", "pr.l1", "nld.earlylate.f1", "nld.suspend.f1"))

model.2.4.s <- sims.mi.fd(m.f1, model.2.4.names, 0.025, 0.975, "Election t+1", 5)



figure1 <- as.data.frame(rbind(model.2.1.s, model.2.2.s,model.2.3.s,model.2.4.s))
figure1$variable <- sub(".f1", replacement="", x=figure1$variable)
figure1$variable <- sub(".lag.1", replacement="", x=figure1$variable)
figure1$variable <- sub(".l1", replacement="", x=figure1$variable)
order.f1 <- rev(c("nld.election", "ef", "polarization", "nld.electionXef", "nld.electionXpolarization", "ln.wdi.pop", "ln.wdi.imr", "nac", "polity2", "part.dem.fac", "pr",
                  "nld.earlylate", "nld.suspend"))
figure1$model2 <- factor(figure1$model, levels = c("No Controls, Election t-1", "Election t+1", "Election t", "Election t-1"))
figure1$variable <- factor(figure1$variable, levels=order.f1, labels = rev(c("Election", "Fractionalization", "Polarization",  "Election X Fractionalization", "Election X Polarization", 
                                                                             "Population (log)", "Infant Mortality", "Neighboring Conflicts", "Polity", "Partial Dem. with Factions", "Prop. Rep.",
                                                                             "Election Early/Late", "Election Suspended")))

# This is storing the effect for the summary of robustness tests figure. 
base <- subset(figure1, variable=="Election X Fractionalization")

# Create Figure 2


ggplot(data = figure1, aes(x = variable, y = mean, ymin = low, ymax = high)) + 
  geom_point()+facet_grid(~model2) +
  geom_point(position = position_dodge(width = 0.2)) +
  geom_errorbar(position = position_dodge(width = 0.2), width = 0.1) +
  coord_flip() +
  theme_bw() +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.major.y = element_line(colour="grey60", linetype="dashed")) + geom_hline(yintercept=0, linetype="dotted") + xlab("") + ylab("Marginal Effect on the Probability of Instability") 

This final section shows the table of most frationalized states with the numbers of onests of each type (Table 2)

# Table 2 -  Instability onsets and types in fractionalized states

require(dplyr)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:Hmisc':
## 
##     combine, src, summarize
## The following objects are masked from 'package:Zelig':
## 
##     combine, summarize
## The following object is masked from 'package:MASS':
## 
##     select
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
fractionalized <- subset(df, ef>=0.5 & polarization<0.6)
fractionalized$instab.st <- as.numeric(as.character(fractionalized$instab.st))
fractionalized <- fractionalized %>% tbl_df() %>% group_by(cname) %>% summarise(instab.sum=sum(instab.st), rwar.sum=sum(rwar.st),
                                                                                ewar.sum=sum(ewar.st), areg.sum=sum(areg.st),
                                                                                gpol.sum=sum(gpol.st))
# show the table
fractionalized
## Source: local data frame [14 x 6]
## 
##                               cname instab.sum rwar.sum ewar.sum areg.sum
##                              (fctr)      (dbl)    (int)    (int)    (int)
## 1                      Burkina Faso          1        0        0        1
## 2                          Cameroon          0        0        0        0
## 3  Democratic Republic of the Congo          3        1        2        1
## 4                             Gabon          0        0        0        0
## 5                             Ghana          2        0        0        2
## 6                             India          4        1        3        0
## 7                             Kenya          3        0        2        1
## 8                           Liberia          4        3        0        1
## 9                        Madagascar          1        0        0        1
## 10                 Papua New Guinea          1        0        1        0
## 11                     South Africa          2        1        1        0
## 12                         Tanzania          0        0        0        0
## 13                             Togo          0        0        0        0
## 14                           Uganda          5        1        2        2
## Variables not shown: gpol.sum (int)

The code below creates Figure 4, which shows he cases simulated, along with their distribution of ethnic groups represented as increasingly large circles depending upon their proportion of the total population.

# Fearon graph reported in figure 4.
require(foreign)


fearon <- read.dta("fearon.dta")
require(dplyr)
fearon <- rename(fearon, ccode=ccodebg)
fearon.grp <- read.csv("fearongroupdata.csv")

fearon.pol <- subset(fearon, select=c("ccode", "polarization"))
fearon <- merge(fearon.grp, fearon.pol, all.x=T)

fracmes <- c(fearon$ef[fearon$country=="GREECE"], fearon$ef[fearon$country=="PARAGUAY"], fearon$ef[fearon$country=="VIETNAM"], fearon$ef[fearon$country=="SWAZILAND"], fearon$ef[fearon$country=="DOMINICAN REP."],
             fearon$ef[fearon$country=="GUATEMALA"], fearon$ef[fearon$country=="BRAZIL"], fearon$ef[fearon$country=="PERU"], fearon$ef[fearon$country=="AFGHANISTAN"], fearon$ef[fearon$country=="CENTRAL AFRICAN REP."],
             fearon$ef[fearon$country=="CAMEROON"],fearon$ef[fearon$country=="UGANDA"])
polmes <- c(fearon$polarization[fearon$country=="GREECE"], fearon$polarization[fearon$country=="PARAGUAY"], fearon$polarization[fearon$country=="VIETNAM"], fearon$polarization[fearon$country=="SWAZILAND"], fearon$polarization[fearon$country=="DOMINICAN REP."],
            fearon$polarization[fearon$country=="GUATEMALA"], fearon$polarization[fearon$country=="BRAZIL"], fearon$polarization[fearon$country=="PERU"], fearon$polarization[fearon$country=="AFGHANISTAN"], fearon$polarization[fearon$country=="CENTRAL AFRICAN REP."],
            fearon$polarization[fearon$country=="CAMEROON"],fearon$polarization[fearon$country=="UGANDA"])

mes <- as.data.frame(cbind(fracmes, polmes))

fearon$sim <- 0
fearon$sim[fearon$country=="GREECE"|fearon$country=="PARAGUAY"|fearon$country=="VIETNAM"|fearon$country=="SWAZILAND"|fearon$country=="DOMINICAN REP."|fearon$country=="GUATEMALA"|
             fearon$country=="BRAZIL"|fearon$country=="PERU"|fearon$country=="AFGHANISTAN"|fearon$country=="CENTRAL AFRICAN REP."|fearon$country=="CAMEROON"|fearon$country=="UGANDA"] <- 1

fearon$label <- NA
for (i in 1:nrow(fearon)) { fearon$label[i][fearon$sim[i]==1] <- fearon$country[i]}

fearon.fig <- subset(fearon, sim==1)


# Create the plot (Figure 3)

ggplot(fearon.fig, aes(x=ef, y=polarization, size=gpro)) + geom_point(position=position_jitter(width=0.05, height=0.01), alpha=.8)+
  geom_text(aes(label=country), size=4, vjust=-1) +
  xlab("Ethnic Fractionalization")+ylab("Ethnic Polarization")+
  guides(size=FALSE) + annotate("rect", xmin=0.5, xmax=1.2, ymin=-0.2, ymax=0.6, alpha=.1,fill="green") + 
  annotate("rect", xmin=-0.2, xmax=0.5, ymin=-0.2, ymax=0.6, alpha=.1,fill="blue") + annotate("rect", xmin=-0.2, xmax=1.2, ymin=0.6, ymax=1.2, alpha=.1,fill="red")