\[ \begin{split} \Bigg( \sum_{i = 1}^N \begin{pmatrix} 1 & W_i \\ W_i & W_i \end{pmatrix} \Bigg)^{-1} \\ \times \Bigg( \sum_{g = 1}^G \sum_{i: G_{ig} = 1} \begin{pmatrix} \hat{\epsilon}_i\\ W_i \cdot \hat{\epsilon}_i \end{pmatrix} \sum_{i: G_{ig} = 1} \begin{pmatrix} \hat{\epsilon}_i\\ W_i \cdot \hat{\epsilon}_i \end{pmatrix}' \Bigg) \\ \times \Bigg( \sum_{i = 1}^N \begin{pmatrix} 1 & W_i \\ W_i & W_i \end{pmatrix} \Bigg)^{-1}. \end{split} \]
set.seed(1) G <- 100 G_t <- 50 N_g <- rpois(G, 99) + 1 N <- sum(N_g) R <- 1000 tau_g <- abs(rnorm(G))
outcome <-
foreach (g = 1:G, .combine = "rbind") %do% {
outcome_g <-
data.frame(
g = g,
y_0 = rnorm(N_g[g], mean = 0, sd = 1),
y_1 = rnorm(N_g[g], mean = tau_g[g], sd = 1)
)
return(outcome_g)
}
head(outcome)
## g y_0 y_1 ## 1 1 1.4085634 0.4271008 ## 2 1 -0.5417604 0.4635871 ## 3 1 0.2786647 1.6584401 ## 4 1 -0.1939727 -0.2107222 ## 5 1 1.5761582 1.0671814 ## 6 1 -1.4755476 -0.3604636
tau_fs <- outcome %>% dplyr::summarise(tau = mean(y_1 - y_0)) %>% dplyr::pull(tau) tau_fs
## [1] 0.7909974
tau_C <- outcome %>% dplyr::group_by(g) %>% dplyr::summarise(tau = mean(y_1 - y_0)) %>% dplyr::ungroup() %>% dplyr::summarise(tau = mean(tau)) %>% dplyr::pull(tau) tau_C
## [1] 0.7942535
assignment <- sample(G, G_t) outcome <- outcome %>% dplyr::mutate(w = (g %in% assignment)) head(outcome)
## g y_0 y_1 w ## 1 1 1.4085634 0.4271008 TRUE ## 2 1 -0.5417604 0.4635871 TRUE ## 3 1 0.2786647 1.6584401 TRUE ## 4 1 -0.1939727 -0.2107222 TRUE ## 5 1 1.5761582 1.0671814 TRUE ## 6 1 -1.4755476 -0.3604636 TRUE
outcome_observed <- outcome %>% dplyr::mutate(y = y_0 * (1 - w) + y_1 * w) %>% dplyr::select(g, y, w) head(outcome_observed)
## g y w ## 1 1 0.4271008 TRUE ## 2 1 0.4635871 TRUE ## 3 1 1.6584401 TRUE ## 4 1 -0.2107222 TRUE ## 5 1 1.0671814 TRUE ## 6 1 -0.3604636 TRUE
weight <-
outcome_observed %>%
dplyr::group_by(g) %>%
dplyr::summarise(
e = sum(w) / length(w),
q = length(g)
) %>%
dplyr::ungroup() %>%
dplyr::mutate(
q = q / sum(q)
)
weight
## # A tibble: 100 x 3 ## g e q ## <int> <dbl> <dbl> ## 1 1 1 0.00919 ## 2 2 0 0.0112 ## 3 3 1 0.0111 ## 4 4 1 0.0103 ## 5 5 0 0.00830 ## 6 6 1 0.0103 ## 7 7 1 0.0106 ## 8 8 0 0.0104 ## 9 9 1 0.00949 ## 10 10 0 0.00909 ## # ... with 90 more rows
tau_c_hat <-
outcome_observed %>%
dplyr::group_by(g) %>%
dplyr::summarise(
w = mean(w),
y = mean(y)
) %>%
dplyr::ungroup() %>%
dplyr::summarise(
tau = sum(y * w) / sum(w) - sum(y * (1 - w)) / sum(1 - w)
)
tau_c_hat
## # A tibble: 1 x 1 ## tau ## <dbl> ## 1 0.802
tau_c_hat_se <-
outcome_observed %>%
dplyr::group_by(g) %>%
dplyr::summarise(
w = mean(w),
y = mean(y)
) %>%
dplyr::ungroup() %>%
dplyr::group_by(w) %>%
dplyr::summarise(
G_w = length(y),
variance = sum((y - mean(y))^2 / (G_w - 1))
) %>%
dplyr::ungroup() %>%
dplyr::summarise(
se = sum(variance / G_w) %>% sqrt()
)
tau_c_hat_se
## # A tibble: 1 x 1 ## se ## <dbl> ## 1 0.0922
tau_c_hat_regression <-
outcome_observed %>%
dplyr::group_by(g) %>%
dplyr::summarise(
w = mean(w),
y = mean(y)
) %>%
dplyr::ungroup() %>%
lm(
data = .,
formula = y ~ w
)
tau_c_hat_regression %>% modelsummary(fmt = 6)
| Model 1 | |
|---|---|
| (Intercept) | 0.018931 |
| (0.065214) | |
| w | 0.801823 |
| (0.092227) | |
| Num.Obs. | 100 |
| R2 | 0.435 |
| R2 Adj. | 0.430 |
| AIC | 133.0 |
| BIC | 140.8 |
| Log.Lik. | -63.477 |
| F | 75.586 |
tau_fs_hat_regression <-
outcome_observed %>%
dplyr::mutate(w = as.integer(w)) %>%
lm(
data = .,
formula = y ~ w
)
tau_fs_hat_regression %>% modelsummary(vcov = ~ g, fmt = 6)
| Model 1 | |
|---|---|
| (Intercept) | 0.018490 |
| (0.014696) | |
| w | 0.799357 |
| (0.091399) | |
| Num.Obs. | 10116 |
| R2 | 0.118 |
| R2 Adj. | 0.118 |
| AIC | 30514.8 |
| BIC | 30536.4 |
| Log.Lik. | -15254.385 |
| F | 1352.176 |
| Std.Errors | C: g |
se_cluster_robust <-
outcome_observed %>%
dplyr::mutate(
constant = 1,
epsilon = tau_fs_hat_regression$residuals
)
term_1 <-
se_cluster_robust %>%
dplyr::select(constant, w) %>%
as.matrix()
term_1 <- crossprod(term_1, term_1)
term_2 <-
se_cluster_robust %>%
dplyr::group_split(g) %>%
purrr::map(
.,
function(df) {
df <-
df %>%
dplyr::mutate(w_epsilon = w * epsilon) %>%
dplyr::select(epsilon, w_epsilon) %>%
dplyr::summarise_all(sum) %>%
as.matrix()
df <- crossprod(df, df)
}
) %>%
purrr::reduce(`+`)
se_cluster_robust <- solve(term_1, term_2) %*% solve(term_1) se_cluster_robust %>% diag() %>% sqrt()
## constant w ## 0.01462145 0.09093666