Extension-SCM-without-PI.Rmd

---
output: pdf_document
latex_engine: xelatex
header-includes:
  - "\\usepackage{listings}"
  - "\\lstset{breaklines=true}"
---

# Extension: Synthetic Control without Piauí

```{r setup, include=FALSE}
#-----------------------------------------------------------------------------------------#
# Initial Setup
#-----------------------------------------------------------------------------------------#

library(dplyr)
library(ggplot2)
library(Synth)
library(xtable)
library(ggpubr)

source("functions/plot_scm.R")

load("data/DATA_COMPLETE.RData")
load("data/abbr_code.RData")
```

```{r}
#-----------------------------------------------------------------------------------------#
# Data Subsetting
#-----------------------------------------------------------------------------------------#

PRIMARY_M_PI <- as.data.frame(filter(DATA_COMPLETE, grade == "P", subject == "math"))
PRIMARY_P_PI <- as.data.frame(filter(DATA_COMPLETE, grade == "P", subject == "port"))
LOWERS_M_PI <- as.data.frame(filter(DATA_COMPLETE, grade == "LS", subject == "math"))
LOWERS_P_PI <- as.data.frame(filter(DATA_COMPLETE, grade == "LS", subject == "port"))

#-----------------------------------------------------------------------------------------#
# Preparing Data for Synth (Excluding Piauí)
#-----------------------------------------------------------------------------------------#

prepare_p_ls_PI <- function(data) {
  library(Synth)
  
  predictors <- c("homicides", "TWh", "ln_pop", "unemployment", "edu_invest_pc")
  
  DATA_PI <- dataprep(foo = data,
                   predictors = predictors,
                   dependent = "score",
                   unit.variable = "code_state",
                   time.variable = "year",
                   unit.names.variable = "abbr_state",
                   treatment.identifier = 23, # Ceará Code
                   controls.identifier = c(11:17, 21, 24:29, 31:33, 35, 41:43, 50:53), 
                   # Exclude Piauí (22) and Ceará (23)
                   time.predictors.prior = seq(1995, 2007, 2),
                   time.optimize.ssr = seq(1995, 2007, 2),
                   time.plot = seq(1995, 2019, 2))
  
  return(DATA_PI)
}

#-----------------------------------------------------------------------------------------#
# Execution of Synthetic Control Method
#-----------------------------------------------------------------------------------------#

DATA_PM_PI <- prepare_p_ls_PI(PRIMARY_M_PI)
SCM_PM_PI <- synth(DATA_PM_PI)

DATA_PP_PI <- prepare_p_ls_PI(PRIMARY_P_PI)
SCM_PP_PI <- synth(DATA_PP_PI)

DATA_LSM_PI <- prepare_p_ls_PI(LOWERS_M_PI)
SCM_LSM_PI <- synth(DATA_LSM_PI)

DATA_LSP_PI <- prepare_p_ls_PI(LOWERS_P_PI)
SCM_LSP_PI <- synth(DATA_LSP_PI)

#-----------------------------------------------------------------------------------------#
# Graphs Generation in ggplot (Excluding Piauí)
#-----------------------------------------------------------------------------------------#

# Generate Plots for Each Category
PM_PI <- plot_scm(PRIMARY_M_PI, synth.tab(dataprep.res = DATA_PM_PI, synth.res = SCM_PM_PI))
PP_PI <- plot_scm(PRIMARY_P_PI, synth.tab(dataprep.res = DATA_PP_PI, synth.res = SCM_PP_PI))
LSM_PI <- plot_scm(LOWERS_M_PI, synth.tab(dataprep.res = DATA_LSM_PI, synth.res = SCM_LSM_PI))
LSP_PI <- plot_scm(LOWERS_P_PI, synth.tab(dataprep.res = DATA_LSP_PI, synth.res = SCM_LSP_PI))

#-----------------------------------------------------------------------------------------#
# Combining Data for Graphs and Plot Adjustments
#-----------------------------------------------------------------------------------------#

# Combine the Synthetic Control Graph Data
DATA_GRAPH_PI <- rbind(PM_PI[[1]], PP_PI[[1]], LSM_PI[[1]], LSP_PI[[1]])

# Adjust the Labels and Factors
DATA_GRAPH_PI$grade[DATA_GRAPH_PI$grade=="P"] <- "Primary Education"
DATA_GRAPH_PI$grade[DATA_GRAPH_PI$grade=="LS"] <- "Lower Secondary Education"
DATA_GRAPH_PI$grade[DATA_GRAPH_PI$grade=="US"] <- "Upper Secondary Education"
DATA_GRAPH_PI$grade <- factor(DATA_GRAPH_PI$grade, levels = c("Primary Education", 
                                                        "Lower Secondary Education"))

DATA_GRAPH_PI$subject[DATA_GRAPH_PI$subject=="math"] <- "Mathematics"
DATA_GRAPH_PI$subject[DATA_GRAPH_PI$subject=="port"] <- "Portuguese"
DATA_GRAPH_PI$subject <- factor(DATA_GRAPH_PI$subject, levels = c("Mathematics", "Portuguese"))

#-----------------------------------------------------------------------------------------#
# Final Plotting and Saving
#-----------------------------------------------------------------------------------------#

# Assuming DATA_GRAPH_PI and DATA_GAP_PI are your combined datasets

# Figure for Primary Education
a_without_PI <- ggplot(data = filter(DATA_GRAPH_PI, grade == "Primary Education"), 
                       aes(x = year, y = score, color = unit)) +
  geom_vline(xintercept = 2008, color = "#636363", linetype = "dashed", size = 0.9) +
  geom_vline(xintercept = 2011, color = "#636363", linetype = "dashed", size = 0.9) +
  geom_line(size = 0.9) +
  scale_color_manual(values = c("#01665e", "#d8b365"), labels = c("Ceará", 
                                                                  "Synthetic Ceará (without Piauí)"), name = "") +
  ylab("Score") +
  xlab("") +
  theme_bw() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 
        legend.position = "bottom") +
  facet_grid(vars(grade), vars(subject))

# Figure for Lower Secondary Education
b_without_PI <- ggplot(data = filter(DATA_GRAPH_PI, grade == "Lower Secondary Education"), 
                       aes(x = year, y = score, color = unit)) +
  geom_vline(xintercept = 2008, color = "#636363", linetype = "dashed", size = 0.9) +
  geom_vline(xintercept = 2015, color = "#636363", linetype = "dashed", size = 0.9) +
  geom_line(size = 0.9) +
  scale_color_manual(values = c("#01665e", "#d8b365"), 
                     labels = c("Ceará", "Synthetic Ceará (without Piauí)"), name = "") +
  ylab("Score") +
  xlab("Year") +
  theme_bw() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 
        legend.position = "bottom") +
  facet_grid(vars(grade), vars(subject))

# Arrange and Save the Plots
ggarrange(a_without_PI, b_without_PI, ncol = 1, nrow = 2, common.legend = TRUE, legend = "bottom")
ggsave(filename = "figures_without_PI.png", path = "plots", width = 21, height = 15, units = "cm")

# Run Synthetic Control Method for each category (Excluding Piauí)
SCM_PM_PI    <- synth(DATA_PM_PI)
TABLES_PM_PI <- synth.tab(dataprep.res = DATA_PM_PI, synth.res = SCM_PM_PI)

SCM_PP_PI    <- synth(DATA_PP_PI)
TABLES_PP_PI <- synth.tab(dataprep.res = DATA_PP_PI, synth.res = SCM_PP_PI)

SCM_LSM_PI   <- synth(DATA_LSM_PI)
TABLES_LSM_PI <- synth.tab(dataprep.res = DATA_LSM_PI, synth.res = SCM_LSM_PI)

SCM_LSP_PI   <- synth(DATA_LSP_PI)
TABLES_LSP_PI <- synth.tab(dataprep.res = DATA_LSP_PI, synth.res = SCM_LSP_PI)

# Extracting Weights and Renaming
W_PM_PI <- as.data.frame(TABLES_PM_PI$tab.w) %>% 
  filter(w.weights > 0.01) %>% 
  rename(PM_PI = w.weights, abbreviation = unit.names) %>% 
  dplyr::select(-unit.numbers)

W_PP_PI <- as.data.frame(TABLES_PP_PI$tab.w) %>% 
  filter(w.weights > 0.01) %>% 
  rename(PP_PI = w.weights, abbreviation = unit.names) %>% 
  dplyr::select(-unit.numbers)

W_LSM_PI <- as.data.frame(TABLES_LSM_PI$tab.w) %>% 
  filter(w.weights > 0.01) %>% 
  rename(LSM_PI = w.weights, abbreviation = unit.names) %>% 
  dplyr::select(-unit.numbers)

W_LSP_PI <- as.data.frame(TABLES_LSP_PI$tab.w) %>% 
  filter(w.weights > 0.01) %>% 
  rename(LSP_PI = w.weights, abbreviation = unit.names) %>% 
  dplyr::select(-unit.numbers)

#-----------------------------------------------------------------------------------------#
# Weights Table
#-----------------------------------------------------------------------------------------#

# Combining Weights into a Single Table
TABLE_W_PI <- left_join(abbr_state, W_PM_PI, by = "abbreviation")
TABLE_W_PI <- left_join(TABLE_W_PI, W_PP_PI, by = "abbreviation")
TABLE_W_PI <- left_join(TABLE_W_PI, W_LSM_PI, by = "abbreviation")
TABLE_W_PI <- left_join(TABLE_W_PI, W_LSP_PI, by = "abbreviation")

# Replace NA values with 0.00
TABLE_W_PI[is.na(TABLE_W_PI)] <- 0.00

# Write the table to a CSV file
write.csv(TABLE_W_PI, file = "table_weights_without_PI.csv")
```