\[ \begin{split} \Bigg( \sum_{i = 1}^N \begin{pmatrix} 1 & W_i \\ W_i & W_i \end{pmatrix} \Bigg)^{-1} \\ \times \Bigg( \sum_{g = 1}^G \sum_{i: G_{ig} = 1} \begin{pmatrix} \hat{\epsilon}_i\\ W_i \cdot \hat{\epsilon}_i \end{pmatrix} \sum_{i: G_{ig} = 1} \begin{pmatrix} \hat{\epsilon}_i\\ W_i \cdot \hat{\epsilon}_i \end{pmatrix}' \Bigg) \\ \times \Bigg( \sum_{i = 1}^N \begin{pmatrix} 1 & W_i \\ W_i & W_i \end{pmatrix} \Bigg)^{-1}. \end{split} \]
set.seed(1) G <- 100 G_t <- 50 N_g <- rpois(G, 99) + 1 N <- sum(N_g) R <- 1000 tau_g <- abs(rnorm(G))
outcome <- foreach (g = 1:G, .combine = "rbind") %do% { outcome_g <- data.frame( g = g, y_0 = rnorm(N_g[g], mean = 0, sd = 1), y_1 = rnorm(N_g[g], mean = tau_g[g], sd = 1) ) return(outcome_g) } head(outcome)
## g y_0 y_1 ## 1 1 1.4085634 0.4271008 ## 2 1 -0.5417604 0.4635871 ## 3 1 0.2786647 1.6584401 ## 4 1 -0.1939727 -0.2107222 ## 5 1 1.5761582 1.0671814 ## 6 1 -1.4755476 -0.3604636
tau_fs <- outcome %>% dplyr::summarise(tau = mean(y_1 - y_0)) %>% dplyr::pull(tau) tau_fs
## [1] 0.7909974
tau_C <- outcome %>% dplyr::group_by(g) %>% dplyr::summarise(tau = mean(y_1 - y_0)) %>% dplyr::ungroup() %>% dplyr::summarise(tau = mean(tau)) %>% dplyr::pull(tau) tau_C
## [1] 0.7942535
assignment <- sample(G, G_t) outcome <- outcome %>% dplyr::mutate(w = (g %in% assignment)) head(outcome)
## g y_0 y_1 w ## 1 1 1.4085634 0.4271008 TRUE ## 2 1 -0.5417604 0.4635871 TRUE ## 3 1 0.2786647 1.6584401 TRUE ## 4 1 -0.1939727 -0.2107222 TRUE ## 5 1 1.5761582 1.0671814 TRUE ## 6 1 -1.4755476 -0.3604636 TRUE
outcome_observed <- outcome %>% dplyr::mutate(y = y_0 * (1 - w) + y_1 * w) %>% dplyr::select(g, y, w) head(outcome_observed)
## g y w ## 1 1 0.4271008 TRUE ## 2 1 0.4635871 TRUE ## 3 1 1.6584401 TRUE ## 4 1 -0.2107222 TRUE ## 5 1 1.0671814 TRUE ## 6 1 -0.3604636 TRUE
weight <- outcome_observed %>% dplyr::group_by(g) %>% dplyr::summarise( e = sum(w) / length(w), q = length(g) ) %>% dplyr::ungroup() %>% dplyr::mutate( q = q / sum(q) ) weight
## # A tibble: 100 x 3 ## g e q ## <int> <dbl> <dbl> ## 1 1 1 0.00919 ## 2 2 0 0.0112 ## 3 3 1 0.0111 ## 4 4 1 0.0103 ## 5 5 0 0.00830 ## 6 6 1 0.0103 ## 7 7 1 0.0106 ## 8 8 0 0.0104 ## 9 9 1 0.00949 ## 10 10 0 0.00909 ## # ... with 90 more rows
tau_c_hat <- outcome_observed %>% dplyr::group_by(g) %>% dplyr::summarise( w = mean(w), y = mean(y) ) %>% dplyr::ungroup() %>% dplyr::summarise( tau = sum(y * w) / sum(w) - sum(y * (1 - w)) / sum(1 - w) ) tau_c_hat
## # A tibble: 1 x 1 ## tau ## <dbl> ## 1 0.802
tau_c_hat_se <- outcome_observed %>% dplyr::group_by(g) %>% dplyr::summarise( w = mean(w), y = mean(y) ) %>% dplyr::ungroup() %>% dplyr::group_by(w) %>% dplyr::summarise( G_w = length(y), variance = sum((y - mean(y))^2 / (G_w - 1)) ) %>% dplyr::ungroup() %>% dplyr::summarise( se = sum(variance / G_w) %>% sqrt() ) tau_c_hat_se
## # A tibble: 1 x 1 ## se ## <dbl> ## 1 0.0922
tau_c_hat_regression <- outcome_observed %>% dplyr::group_by(g) %>% dplyr::summarise( w = mean(w), y = mean(y) ) %>% dplyr::ungroup() %>% lm( data = ., formula = y ~ w )
tau_c_hat_regression %>% modelsummary(fmt = 6)
Model 1 | |
---|---|
(Intercept) | 0.018931 |
(0.065214) | |
w | 0.801823 |
(0.092227) | |
Num.Obs. | 100 |
R2 | 0.435 |
R2 Adj. | 0.430 |
AIC | 133.0 |
BIC | 140.8 |
Log.Lik. | -63.477 |
F | 75.586 |
tau_fs_hat_regression <- outcome_observed %>% dplyr::mutate(w = as.integer(w)) %>% lm( data = ., formula = y ~ w )
tau_fs_hat_regression %>% modelsummary(vcov = ~ g, fmt = 6)
Model 1 | |
---|---|
(Intercept) | 0.018490 |
(0.014696) | |
w | 0.799357 |
(0.091399) | |
Num.Obs. | 10116 |
R2 | 0.118 |
R2 Adj. | 0.118 |
AIC | 30514.8 |
BIC | 30536.4 |
Log.Lik. | -15254.385 |
F | 1352.176 |
Std.Errors | C: g |
se_cluster_robust <- outcome_observed %>% dplyr::mutate( constant = 1, epsilon = tau_fs_hat_regression$residuals ) term_1 <- se_cluster_robust %>% dplyr::select(constant, w) %>% as.matrix() term_1 <- crossprod(term_1, term_1)
term_2 <- se_cluster_robust %>% dplyr::group_split(g) %>% purrr::map( ., function(df) { df <- df %>% dplyr::mutate(w_epsilon = w * epsilon) %>% dplyr::select(epsilon, w_epsilon) %>% dplyr::summarise_all(sum) %>% as.matrix() df <- crossprod(df, df) } ) %>% purrr::reduce(`+`)
se_cluster_robust <- solve(term_1, term_2) %*% solve(term_1) se_cluster_robust %>% diag() %>% sqrt()
## constant w ## 0.01462145 0.09093666