set.seed(1) N <- 1000 J <- 4 M <- 1 beta_t <- c(1, 0.1) beta_c <- c(0, 0.05)
outcome <-
tibble::tibble(
x = rnorm(N),
y_0 = beta_c[1] + beta_c[2] * x + rnorm(N),
y_1 = beta_t[1] + beta_t[2] * x + rnorm(N)
)
head(outcome)
## # A tibble: 6 x 3 ## x y_0 y_1 ## <dbl> <dbl> <dbl> ## 1 -0.626 1.10 0.0512 ## 2 0.184 1.12 -0.904 ## 3 -0.836 -0.913 2.54 ## 4 1.60 0.290 1.68 ## 5 0.330 0.0859 0.977 ## 6 -0.820 -1.70 1.61
tau_fs <- outcome %>% dplyr::summarise(tau = mean(y_1 - y_0)) %>% dplyr::pull(tau) tau_fs
## [1] 1.030989
outcome <-
outcome %>%
dplyr::mutate(
e = exp(x) / (1 + exp(x)),
w = (runif(N) < e)
)
head(outcome)
## # A tibble: 6 x 5 ## x y_0 y_1 e w ## <dbl> <dbl> <dbl> <dbl> <lgl> ## 1 -0.626 1.10 0.0512 0.348 FALSE ## 2 0.184 1.12 -0.904 0.546 FALSE ## 3 -0.836 -0.913 2.54 0.302 FALSE ## 4 1.60 0.290 1.68 0.831 TRUE ## 5 0.330 0.0859 0.977 0.582 FALSE ## 6 -0.820 -1.70 1.61 0.306 TRUE
outcome_observed <- outcome %>% dplyr::mutate(y = y_0 * (1 - w) + y_1 * w) %>% dplyr::select(y, w, x) head(outcome_observed)
## # A tibble: 6 x 3 ## y w x ## <dbl> <lgl> <dbl> ## 1 1.10 FALSE -0.626 ## 2 1.12 FALSE 0.184 ## 3 -0.913 FALSE -0.836 ## 4 1.68 TRUE 1.60 ## 5 0.0859 FALSE 0.330 ## 6 1.61 TRUE -0.820
result <-
lm(
data = outcome_observed,
formula = y ~ w + w:x
)
result %>%
modelsummary(fmt = 6)
| Model 1 | |
|---|---|
| (Intercept) | -0.017528 |
| (0.049717) | |
| wTRUE | 1.103605 |
| (0.073258) | |
| wFALSE × x | 0.088752 |
| (0.047710) | |
| wTRUE × x | 0.068441 |
| (0.052452) | |
| Num.Obs. | 1000 |
| R2 | 0.238 |
| R2 Adj. | 0.236 |
| AIC | 2949.7 |
| BIC | 2974.3 |
| Log.Lik. | -1469.856 |
| F | 103.888 |
result <-
c(
y ~ w + w:exp(x),
y ~ w + w:x^2,
y ~ w + w:(x + x^2)
) %>%
purrr::map(
.,
~ lm(
data = outcome_observed,
formula = .
)
)
result %>% modelsummary(fmt = 6)
| Model 1 | Model 2 | Model 3 | |
|---|---|---|---|
| (Intercept) | -0.125049 | -0.017528 | -0.017528 |
| (0.057698) | (0.049717) | (0.049717) | |
| wTRUE | 1.179050 | 1.103605 | 1.103605 |
| (0.084518) | (0.073258) | (0.073258) | |
| wFALSE × exp(x) | 0.066445 | ||
| (0.032783) | |||
| wTRUE × exp(x) | 0.026059 | ||
| (0.016001) | |||
| wFALSE × x | 0.088752 | 0.088752 | |
| (0.047710) | (0.047710) | ||
| wTRUE × x | 0.068441 | 0.068441 | |
| (0.052452) | (0.052452) | ||
| Num.Obs. | 1000 | 1000 | 1000 |
| R2 | 0.240 | 0.238 | 0.238 |
| R2 Adj. | 0.237 | 0.236 | 0.236 |
| AIC | 2948.1 | 2949.7 | 2949.7 |
| BIC | 2972.7 | 2974.3 | 2974.3 |
| Log.Lik. | -1469.059 | -1469.856 | -1469.856 |
| F | 104.583 | 103.888 | 103.888 |
propensity_score <-
glm(
data = outcome_observed,
formula = w ~ x,
family = binomial("logit")
)
propensity_score %>%
modelsummary(fmt = 6)
| Model 1 | |
|---|---|
| (Intercept) | -0.139605 |
| (0.069562) | |
| x | 0.951090 |
| (0.080246) | |
| Num.Obs. | 1000 |
| AIC | 1204.7 |
| BIC | 1214.5 |
| Log.Lik. | -600.337 |
outcome_observed <-
outcome_observed %>%
dplyr::mutate(e = predict(propensity_score, type = "response"))
outcome_observed %>%
ggplot(
aes(
x = x,
y = e
)
) +
geom_line() +
theme_classic()
outcome_observed %>% dplyr::mutate(weight = (w / e + (1 - w) / (1 - e)) / length(w)) %>% dplyr::summarise(y = sum(w * y * weight) - sum((1 - w) * y * weight))
## # A tibble: 1 x 1 ## y ## <dbl> ## 1 1.08
blocking_estimator <-
outcome_observed %>%
dplyr::mutate(j = dplyr::ntile(outcome_observed$e, J)) %>%
dplyr::group_by(j) %>%
dplyr::summarise(
N_j = length(y),
y = sum(y * w) / sum(w) - sum(y * (1 - w)) / sum(1 - w)
) %>%
dplyr::ungroup()
blocking_estimator
## # A tibble: 4 x 3 ## j N_j y ## <int> <int> <dbl> ## 1 1 250 1.09 ## 2 2 250 1.21 ## 3 3 250 1.00 ## 4 4 250 1.17
blocking_estimator %>% dplyr::summarise(y = sum(y * N_j) / sum(N_j))
## # A tibble: 1 x 1 ## y ## <dbl> ## 1 1.12
matched <-
MatchIt::matchit(
data = outcome_observed,
formula = w ~ x,
method = "nearest",
distance = "glm",
ratio = M
)
matched
## A matchit object ## - method: 1:1 nearest neighbor matching without replacement ## - distance: Propensity score ## - estimated with logistic regression ## - number of obs.: 1000 (original), 938 (matched) ## - target estimand: ATT ## - covariates: x
matched_observed <- MatchIt::match.data(matched) matched_observed
## # A tibble: 938 x 7 ## y w x e distance weights subclass ## <dbl> <lgl> <dbl> <dbl> <dbl> <dbl> <fct> ## 1 1.10 FALSE -0.626 0.324 0.324 1 424 ## 2 1.12 FALSE 0.184 0.509 0.509 1 59 ## 3 -0.913 FALSE -0.836 0.282 0.282 1 363 ## 4 1.68 TRUE 1.60 0.799 0.799 1 158 ## 5 0.0859 FALSE 0.330 0.543 0.543 1 39 ## 6 1.61 TRUE -0.820 0.285 0.285 1 265 ## 7 1.10 TRUE 0.487 0.580 0.580 1 314 ## 8 -0.236 TRUE 0.738 0.637 0.637 1 366 ## 9 -1.07 TRUE 0.576 0.601 0.601 1 412 ## 10 0.983 FALSE -0.305 0.394 0.394 1 113 ## # ... with 928 more rows
matched_observed %>% dplyr::group_by(subclass) %>% dplyr::summarise(y = sum(y * w) - sum(y * (1 - w))) %>% dplyr::ungroup() %>% dplyr::summarise(y = mean(y))
## # A tibble: 1 x 1 ## y ## <dbl> ## 1 1.15