set.seed(1) N <- 1000 J <- 4 M <- 1 beta_t <- c(1, 0.1) beta_c <- c(0, 0.05)
outcome <- tibble::tibble( x = rnorm(N), y_0 = beta_c[1] + beta_c[2] * x + rnorm(N), y_1 = beta_t[1] + beta_t[2] * x + rnorm(N) ) head(outcome)
## # A tibble: 6 x 3 ## x y_0 y_1 ## <dbl> <dbl> <dbl> ## 1 -0.626 1.10 0.0512 ## 2 0.184 1.12 -0.904 ## 3 -0.836 -0.913 2.54 ## 4 1.60 0.290 1.68 ## 5 0.330 0.0859 0.977 ## 6 -0.820 -1.70 1.61
tau_fs <- outcome %>% dplyr::summarise(tau = mean(y_1 - y_0)) %>% dplyr::pull(tau) tau_fs
## [1] 1.030989
outcome <- outcome %>% dplyr::mutate( e = exp(x) / (1 + exp(x)), w = (runif(N) < e) ) head(outcome)
## # A tibble: 6 x 5 ## x y_0 y_1 e w ## <dbl> <dbl> <dbl> <dbl> <lgl> ## 1 -0.626 1.10 0.0512 0.348 FALSE ## 2 0.184 1.12 -0.904 0.546 FALSE ## 3 -0.836 -0.913 2.54 0.302 FALSE ## 4 1.60 0.290 1.68 0.831 TRUE ## 5 0.330 0.0859 0.977 0.582 FALSE ## 6 -0.820 -1.70 1.61 0.306 TRUE
outcome_observed <- outcome %>% dplyr::mutate(y = y_0 * (1 - w) + y_1 * w) %>% dplyr::select(y, w, x) head(outcome_observed)
## # A tibble: 6 x 3 ## y w x ## <dbl> <lgl> <dbl> ## 1 1.10 FALSE -0.626 ## 2 1.12 FALSE 0.184 ## 3 -0.913 FALSE -0.836 ## 4 1.68 TRUE 1.60 ## 5 0.0859 FALSE 0.330 ## 6 1.61 TRUE -0.820
result <- lm( data = outcome_observed, formula = y ~ w + w:x ) result %>% modelsummary(fmt = 6)
Model 1 | |
---|---|
(Intercept) | -0.017528 |
(0.049717) | |
wTRUE | 1.103605 |
(0.073258) | |
wFALSE × x | 0.088752 |
(0.047710) | |
wTRUE × x | 0.068441 |
(0.052452) | |
Num.Obs. | 1000 |
R2 | 0.238 |
R2 Adj. | 0.236 |
AIC | 2949.7 |
BIC | 2974.3 |
Log.Lik. | -1469.856 |
F | 103.888 |
result <- c( y ~ w + w:exp(x), y ~ w + w:x^2, y ~ w + w:(x + x^2) ) %>% purrr::map( ., ~ lm( data = outcome_observed, formula = . ) )
result %>% modelsummary(fmt = 6)
Model 1 | Model 2 | Model 3 | |
---|---|---|---|
(Intercept) | -0.125049 | -0.017528 | -0.017528 |
(0.057698) | (0.049717) | (0.049717) | |
wTRUE | 1.179050 | 1.103605 | 1.103605 |
(0.084518) | (0.073258) | (0.073258) | |
wFALSE × exp(x) | 0.066445 | ||
(0.032783) | |||
wTRUE × exp(x) | 0.026059 | ||
(0.016001) | |||
wFALSE × x | 0.088752 | 0.088752 | |
(0.047710) | (0.047710) | ||
wTRUE × x | 0.068441 | 0.068441 | |
(0.052452) | (0.052452) | ||
Num.Obs. | 1000 | 1000 | 1000 |
R2 | 0.240 | 0.238 | 0.238 |
R2 Adj. | 0.237 | 0.236 | 0.236 |
AIC | 2948.1 | 2949.7 | 2949.7 |
BIC | 2972.7 | 2974.3 | 2974.3 |
Log.Lik. | -1469.059 | -1469.856 | -1469.856 |
F | 104.583 | 103.888 | 103.888 |
propensity_score <- glm( data = outcome_observed, formula = w ~ x, family = binomial("logit") ) propensity_score %>% modelsummary(fmt = 6)
Model 1 | |
---|---|
(Intercept) | -0.139605 |
(0.069562) | |
x | 0.951090 |
(0.080246) | |
Num.Obs. | 1000 |
AIC | 1204.7 |
BIC | 1214.5 |
Log.Lik. | -600.337 |
outcome_observed <- outcome_observed %>% dplyr::mutate(e = predict(propensity_score, type = "response")) outcome_observed %>% ggplot( aes( x = x, y = e ) ) + geom_line() + theme_classic()
outcome_observed %>% dplyr::mutate(weight = (w / e + (1 - w) / (1 - e)) / length(w)) %>% dplyr::summarise(y = sum(w * y * weight) - sum((1 - w) * y * weight))
## # A tibble: 1 x 1 ## y ## <dbl> ## 1 1.08
blocking_estimator <- outcome_observed %>% dplyr::mutate(j = dplyr::ntile(outcome_observed$e, J)) %>% dplyr::group_by(j) %>% dplyr::summarise( N_j = length(y), y = sum(y * w) / sum(w) - sum(y * (1 - w)) / sum(1 - w) ) %>% dplyr::ungroup() blocking_estimator
## # A tibble: 4 x 3 ## j N_j y ## <int> <int> <dbl> ## 1 1 250 1.09 ## 2 2 250 1.21 ## 3 3 250 1.00 ## 4 4 250 1.17
blocking_estimator %>% dplyr::summarise(y = sum(y * N_j) / sum(N_j))
## # A tibble: 1 x 1 ## y ## <dbl> ## 1 1.12
matched <- MatchIt::matchit( data = outcome_observed, formula = w ~ x, method = "nearest", distance = "glm", ratio = M ) matched
## A matchit object ## - method: 1:1 nearest neighbor matching without replacement ## - distance: Propensity score ## - estimated with logistic regression ## - number of obs.: 1000 (original), 938 (matched) ## - target estimand: ATT ## - covariates: x
matched_observed <- MatchIt::match.data(matched) matched_observed
## # A tibble: 938 x 7 ## y w x e distance weights subclass ## <dbl> <lgl> <dbl> <dbl> <dbl> <dbl> <fct> ## 1 1.10 FALSE -0.626 0.324 0.324 1 424 ## 2 1.12 FALSE 0.184 0.509 0.509 1 59 ## 3 -0.913 FALSE -0.836 0.282 0.282 1 363 ## 4 1.68 TRUE 1.60 0.799 0.799 1 158 ## 5 0.0859 FALSE 0.330 0.543 0.543 1 39 ## 6 1.61 TRUE -0.820 0.285 0.285 1 265 ## 7 1.10 TRUE 0.487 0.580 0.580 1 314 ## 8 -0.236 TRUE 0.738 0.637 0.637 1 366 ## 9 -1.07 TRUE 0.576 0.601 0.601 1 412 ## 10 0.983 FALSE -0.305 0.394 0.394 1 113 ## # ... with 928 more rows
matched_observed %>% dplyr::group_by(subclass) %>% dplyr::summarise(y = sum(y * w) - sum(y * (1 - w))) %>% dplyr::ungroup() %>% dplyr::summarise(y = mean(y))
## # A tibble: 1 x 1 ## y ## <dbl> ## 1 1.15