The average outcomes by treatment assignment status: \[ \overline{Y}_0^{obs} \equiv \frac{1}{N_0} \sum_{i = 1}^N (1 - Z_i) \cdot Y_i^{obs}, \overline{Y}_1^{obs} \equiv \frac{1}{N_1} \sum_{i = 1}^N Z_i \cdot Y_i^{obs}. \]
The average treatment receipt by treatment assignment status: \[ \overline{W}_0^{obs} \equiv \frac{1}{N_0} \sum_{i = 1}^N (1 - Z_i) \cdot W_i^{obs}, \overline{W}_1^{obs} \equiv \frac{1}{N_1} \sum_{i = 1}^N Z_i \cdot W_i^{obs}. \]
Subjects are divided into latent compliance status according to their value of \(W_i(z)\). \[ G_i \equiv \begin{cases} \text{co if } W_i(1) = 1 \\ \text{nc if } W_i(1) = 0. \end{cases} \]
Underlying compliance status:
\(Z_i = 0\) | \(Z_i = 1\) | |
---|---|---|
\(W_i^{obs} = 0\) | nc or co | nc |
\(W_i^{obs} = 1\) | - | co |
\(Z_i = 0\) | \(Z_i = 1\) | |
---|---|---|
\(W_i^{obs} = 0\) | nt/co | nt/df |
\(W_i^{obs} = 1\) | at/df | at/co |
\[ \begin{split} ITT_{Y, nt} & = \frac{1}{N_{nt}} \sum_{i: G_i = nt}\{Y_i[1, W_i(1)] - Y_i[0, W_i(0)] \}\\ &=\frac{1}{N_{nt}} \sum_{i: G_i = nt}\{Y_i(1, 0) - Y_i(0, 0) \}\\ &=0. \end{split} \]
\[ \begin{split} ITT_{Y, at} & = \frac{1}{N_{at}} \sum_{i: G_i = at}\{Y_i[1, W_i(1)] - Y_i[0, W_i(0)] \}\\ &=\frac{1}{N_{at}} \sum_{i: G_i = at}\{Y_i(1, 1) - Y_i(0, 1) \}\\ &=0. \end{split} \] - Therefore, \(ITT_Y\) is a mixture of compliers and defiers.
\[ \begin{split} \mathbb{E}[Y|e(Z) = e] &= \mathbb{E}[Y(0)| e(z) = e]\\ &+ \mathbb{E}\{D\cdot[Y(1) - Y(0)]| e(z) = e\} \\ &= \mathbb{E}[Y(0)| e(z) = e]\\ &+ \mathbb{E}[Y(1) - Y(0)| D = 1, e(z) = e] \cdot \mathbb{P}[D = 1|e(z) = e]\\ &= \mathbb{E}[Y(0)| e(z) = e] + \mathbb{E}[Y(1) - Y(0)| D = 1, e(z) = e] \cdot e\\ &= \mathbb{E}[Y(0)] + \mathbb{E}[Y(1) - Y(0)| D = 1, e(z) = e] \cdot e\\ &= \mathbb{E}[Y(0)] + (\mu_1 - \mu_0) \cdot e\\ &+ \mathbb{E}[U_1 - U_0| D = 1, e(z) = e] \cdot e\\ &= \mathbb{E}[Y(0)] + (\mu_1 - \mu_0) \cdot e + \mathbb{E}[U_1 - U_0| U_D \le e] \cdot e\\ &= \mathbb{E}[Y(0)] + (\mu_1 - \mu_0) \cdot e\\ &+ \int_0^e \mathbb{E}[U_1 - U_0| U_d = u_D] d u_D. \end{split} \]
set.seed(1) N <- 1000 e <- 0.5 tau <- c(1, 2)
df_latent <- dplyr::bind_rows( tibble::tibble( g = "nc", y_0 = rnorm(N), y_1 = tau[1] + rnorm(N) ), tibble::tibble( g = "co", y_0 = rnorm(N), y_1 = tau[2] + rnorm(N) ) ) df_latent
## # A tibble: 2,000 x 3 ## g y_0 y_1 ## <chr> <dbl> <dbl> ## 1 nc -0.626 2.13 ## 2 nc 0.184 2.11 ## 3 nc -0.836 0.129 ## 4 nc 1.60 1.21 ## 5 nc 0.330 1.07 ## 6 nc -0.820 -0.663 ## 7 nc 0.487 1.81 ## 8 nc 0.738 -0.912 ## 9 nc 0.576 -0.247 ## 10 nc -0.305 2.00 ## # ... with 1,990 more rows
df_latent %>% dplyr::filter(g == "co") %>% dplyr::summarise(mean(y_1) - mean(y_0))
## # A tibble: 1 x 1 ## `mean(y_1) - mean(y_0)` ## <dbl> ## 1 2.00
df_latent <- df_latent %>% dplyr::mutate(z = (runif(length(g)) < e)) df_latent
## # A tibble: 2,000 x 4 ## g y_0 y_1 z ## <chr> <dbl> <dbl> <lgl> ## 1 nc -0.626 2.13 TRUE ## 2 nc 0.184 2.11 TRUE ## 3 nc -0.836 0.129 FALSE ## 4 nc 1.60 1.21 TRUE ## 5 nc 0.330 1.07 FALSE ## 6 nc -0.820 -0.663 TRUE ## 7 nc 0.487 1.81 TRUE ## 8 nc 0.738 -0.912 TRUE ## 9 nc 0.576 -0.247 TRUE ## 10 nc -0.305 2.00 FALSE ## # ... with 1,990 more rows
df_latent <- df_latent %>% dplyr::mutate( w = ifelse( g == "nc", FALSE, z ) ) df_latent
## # A tibble: 2,000 x 5 ## g y_0 y_1 z w ## <chr> <dbl> <dbl> <lgl> <lgl> ## 1 nc -0.626 2.13 TRUE FALSE ## 2 nc 0.184 2.11 TRUE FALSE ## 3 nc -0.836 0.129 FALSE FALSE ## 4 nc 1.60 1.21 TRUE FALSE ## 5 nc 0.330 1.07 FALSE FALSE ## 6 nc -0.820 -0.663 TRUE FALSE ## 7 nc 0.487 1.81 TRUE FALSE ## 8 nc 0.738 -0.912 TRUE FALSE ## 9 nc 0.576 -0.247 TRUE FALSE ## 10 nc -0.305 2.00 FALSE FALSE ## # ... with 1,990 more rows
df_observed <- df_latent %>% dplyr::mutate(y = y_0 * (1 - w) + y_1 * w) %>% dplyr::select(y, z, w) head(df_observed)
## # A tibble: 6 x 3 ## y z w ## <dbl> <lgl> <lgl> ## 1 -0.626 TRUE FALSE ## 2 0.184 TRUE FALSE ## 3 -0.836 FALSE FALSE ## 4 1.60 TRUE FALSE ## 5 0.330 FALSE FALSE ## 6 -0.820 TRUE FALSE
itt_w <- df_observed %>% dplyr::filter(z == 1) %>% dplyr::summarise(w = sum(w) / length(w)) %>% dplyr::pull(w) itt_w
## [1] 0.5024777
df_observed %>% lm( data = ., formula = w ~ z ) %>% modelsummary(fmt = 6)
Model 1 | |
---|---|
(Intercept) | 0.000000 |
(0.011287) | |
zTRUE | 0.502478 |
(0.015891) | |
Num.Obs. | 2000 |
R2 | 0.334 |
R2 Adj. | 0.333 |
AIC | 1540.7 |
BIC | 1557.5 |
Log.Lik. | -767.371 |
F | 999.870 |
itt_y <- df_observed %>% dplyr::group_by(z) %>% dplyr::summarise(y = mean(y)) %>% dplyr::ungroup() %>% dplyr::summarise(y = sum(y * z) - sum(y * (1 - z))) %>% dplyr::pull(y) itt_y
## [1] 1.014518
df_observed %>% lm( data = ., formula = y ~ z ) %>% modelsummary(fmt = 6)
Model 1 | |
---|---|
(Intercept) | 0.007606 |
(0.040564) | |
zTRUE | 1.014518 |
(0.057109) | |
Num.Obs. | 2000 |
R2 | 0.136 |
R2 Adj. | 0.136 |
AIC | 6657.7 |
BIC | 6674.5 |
Log.Lik. | -3325.828 |
F | 315.577 |
tau_late <- itt_y / itt_w tau_late
## [1] 2.019031
df_observed %>% estimatr::iv_robust( data = ., formula = y ~ w | z ) %>% modelsummary(fmt = 6)
Model 1 | |
---|---|
(Intercept) | 0.007606 |
(0.033751) | |
wTRUE | 2.019031 |
(0.093112) | |
Num.Obs. | 2000 |
R2 | 0.421 |
R2 Adj. | 0.420 |
p.value.endogeneity | |
p.value.overid | |
p.value.weakinst | |
se_type | HC2 |
statistic.endogeneity | |
statistic.overid | |
statistic.weakinst |