vignettes/ffv_opt_sobin_rkone_allrw_training_wage.Rmd
ffv_opt_sobin_rkone_allrw_training_wage.Rmd
Linear wage regression estimation of A and alpha from the Lalonde Training Dataset (722 Observations). Solve for optimal binary allocation queues. Test binary allocation queue with Lalonde training dataset. There are 722 observations, 297 in the treatment group, 425 in the control group. Following Lalonde, regressions are in terms of wage levels (not log wage)
Generate four categories by initial height and mother’s education levels combinations.
# Dataset
data(df_opt_lalonde_training)
# Add a binary variable for if there are wage in year 1975
dft <- df_opt_lalonde_training %>%
mutate(re75_zero = case_when(re75 <= 1 ~ 1,
TRUE ~ 0)) %>%
select(trt, re75_zero, everything())
# summary
REconTools::ff_summ_percentiles(dft)
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## Warning: attributes are not identical across measure variables;
## they will be dropped
## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if `.name_repair` is omitted as of tibble 2.0.0.
## Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## # A tibble: 18 x 13
## stats age black educ hisp marr nodeg re74 re75 re75.zero re78 trt
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 n "722" "722" "722" "722" "722" "722" "722" "722" "722" "722" "722"
## 2 unique " 35" " 2" " 14" " 2" " 2" " 2" "116" "424" " 2" "524" " 2"
## 3 NAobs " 0" " 0" " 0" " 0" " 0" " 0" "277" " 0" " 0" " 0" " 0"
## 4 ZEROobs " 0" "144" " 0" "646" "605" "159" NA "289" "433" "196" "425"
## 5 mean " 2~ " ~ " 1~ " ~ " ~ " ~ "210~ "304~ " 0.40~ "545~ " ~
## 6 sd " ~ " ~ " ~ " ~ " ~ " ~ "536~ "506~ " 0.49~ "625~ " ~
## 7 cv "0.2~ "0.4~ "0.1~ "2.9~ "2.2~ "0.5~ "2.5~ "1.6~ "1.22488~ "1.1~ "1.1~
## 8 min "17" " 0" " 3" " 0" " 0" " 0" " 0" " 0" " 0" " 0" " 0"
## 9 p01 "17.~ " 0.~ " 5.~ " 0.~ " 0.~ " 0.~ " 0.~ " 0.~ " 0.00" " 0.~ " 0.~
## 10 p05 "17.~ " 0.~ " 8.~ " 0.~ " 0.~ " 0.~ " 0.~ " 0.~ " 0.00" " 0.~ " 0.~
## 11 p10 "18.~ " 0.~ " 8.~ " 0.~ " 0.~ " 0.~ " 0.~ " 0.~ " 0.0" " 0.~ " 0.~
## 12 p25 " 19~ " 1~ " 9~ " 0~ " 0~ " 1~ " 0~ " 0~ " 0.00" " 0~ " 0~
## 13 p50 " 2~ " ~ " 1~ " ~ " ~ " ~ " ~ " 93~ " 0.00~ "395~ " ~
## 14 p75 " 2~ " ~ " 1~ " ~ " ~ " ~ " 82~ "399~ " 1.00~ "877~ " ~
## 15 p90 " ~ " ~ " ~ " ~ " ~ " ~ " 78~ " 89~ " 1.0~ "128~ " ~
## 16 p95 " ~ " ~ " ~ " ~ " ~ " ~ "122~ "122~ " 1.0~ "166~ " ~
## 17 p99 " ~ " ~ " ~ " ~ " ~ " ~ "270~ "242~ " 1.0~ "260~ " ~
## 18 max " ~ " ~ " ~ " ~ " ~ " ~ "395~ "374~ " 1.0~ "603~ " ~
## # ... with 1 more variable: X <chr>
# summary treated only
REconTools::ff_summ_percentiles(dft %>% filter(trt == 1))
## Warning: attributes are not identical across measure variables;
## they will be dropped
## # A tibble: 18 x 13
## stats age black educ hisp marr nodeg re74 re75 re75.zero re78 trt
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 n "297" "297" "297" "297" "297" "297" "297" "297" "297" "297" "297"
## 2 unique " 30" " 2" " 13" " 2" " 2" " 2" " 54" "184" " 2" "230" " 1"
## 3 NAobs " 0" " 0" " 0" " 0" " 0" " 0" "112" " 0" " 0" " 0" " 0"
## 4 ZEROobs " 0" " 59" " 0" "269" "247" " 80" NA "111" "186" " 67" " 0"
## 5 mean "2.4~ "8.0~ "1.0~ "9.4~ "1.6~ "7.3~ "2.0~ "3.0~ "3.73737~ "5.9~ "1.0~
## 6 sd " ~ " ~ " ~ " ~ " ~ " ~ "488~ "487~ " 0.48~ "692~ " ~
## 7 cv "0.2~ "0.4~ "0.1~ "3.1~ "2.2~ "0.6~ "2.3~ "1.5~ "1.29666~ "1.1~ "0.0~
## 8 min " 17" " 0" " 4" " 0" " 0" " 0" " 0" " 0" " 0" " 0" " 1"
## 9 p01 " 17~ " 0~ " 4~ " 0~ " 0~ " 0~ " 0~ " 0~ " 0.0" " 0~ " 1~
## 10 p05 " 17" " 0" " 8" " 0" " 0" " 0" " 0" " 0" " 0" " 0" " 1"
## 11 p10 " 18" " 0" " 8" " 0" " 0" " 0" " 0" " 0" " 0" " 0" " 1"
## 12 p25 " 2~ " ~ " ~ " ~ " ~ " ~ " ~ " ~ " 0.00~ " 54~ " ~
## 13 p50 " 2~ " ~ " 1~ " ~ " ~ " ~ " ~ "111~ " 0.00~ "423~ " ~
## 14 p75 " 2~ " ~ " 1~ " ~ " ~ " ~ "129~ "431~ " 1.00~ "938~ " ~
## 15 p90 " ~ " ~ " ~ " ~ " ~ " ~ " 84~ " 84~ " 1.0~ "134~ " ~
## 16 p95 " ~ " ~ " ~ " ~ " ~ " ~ "123~ "122~ " 1.0~ "173~ " ~
## 17 p99 " ~ " ~ " ~ " ~ " ~ " ~ "211~ "231~ " 1.0~ "271~ " ~
## 18 max " ~ " ~ " ~ " ~ " ~ " ~ "350~ "374~ " 1.0~ "603~ " ~
## # ... with 1 more variable: X <chr>
REconTools::ff_summ_percentiles(dft %>% filter(trt == 0))
## Warning: attributes are not identical across measure variables;
## they will be dropped
## # A tibble: 18 x 13
## stats age black educ hisp marr nodeg re74 re75 re75.zero re78 trt
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 n "425" "425" "425" "425" "425" "425" "425" "425" "425" "425" "425"
## 2 unique " 31" " 2" " 12" " 2" " 2" " 2" " 67" "245" " 2" "297" " 1"
## 3 NAobs " 0" " 0" " 0" " 0" " 0" " 0" "165" " 0" " 0" " 0" " 0"
## 4 ZEROobs " 0" " 85" " 0" "377" "358" " 79" NA "178" "247" "129" "425"
## 5 mean " 2~ " ~ " 1~ " ~ " ~ " ~ "210~ "302~ " 0.41~ "509~ " ~
## 6 sd " ~ " ~ " ~ " ~ " ~ " ~ "568~ "520~ " 0.49~ "571~ " ~
## 7 cv "0.2~ "0.5~ "0.1~ "2.8~ "2.3~ "0.4~ "2.6~ "1.7~ "1.17936~ "1.1~ NA
## 8 min "17" " 0" " 3" " 0" " 0" " 0" " 0" " 0" " 0" " 0" " 0"
## 9 p01 "17.~ " 0.~ " 5.~ " 0.~ " 0.~ " 0.~ " 0.~ " 0.~ " 0.00" " 0.~ " 0.~
## 10 p05 "17.~ " 0.~ " 8.~ " 0.~ " 0.~ " 0.~ " 0.~ " 0.~ " 0.0" " 0.~ " 0.~
## 11 p10 "18.~ " 0.~ " 8.~ " 0.~ " 0.~ " 0.~ " 0.~ " 0.~ " 0.0" " 0.~ " 0.~
## 12 p25 " 19" " 1" " 9" " 0" " 0" " 1" " 0" " 0" " 0" " 0" " 0"
## 13 p50 " 2~ " ~ " 1~ " ~ " ~ " ~ " ~ " 82~ " 0.00~ "374~ " ~
## 14 p75 " 2~ " ~ " 1~ " ~ " ~ " ~ " 13~ "364~ " 1.00~ "832~ " ~
## 15 p90 " ~ " ~ " ~ " ~ " ~ " ~ " 76~ " 91~ " 1.0~ "124~ " ~
## 16 p95 " ~ " ~ " ~ " ~ " ~ " ~ "106~ "121~ " 1.0~ "162~ " ~
## 17 p99 " ~ " ~ " ~ " ~ " ~ " ~ "295~ "242~ " 1.0~ "209~ " ~
## 18 max " ~ " ~ " ~ " ~ " ~ " ~ "395~ "369~ " 1.0~ "394~ " ~
## # ... with 1 more variable: X <chr>
# within 297 treated, 111 (37%) had zero wage in 1975
# within 425 untreated, 178 (42%) had zero wage in 1975
# Load Data
dft <- dft %>% mutate(id = X) %>%
select(-X) %>%
select(id, everything())
# Summarize
str(dft)
## 'data.frame': 722 obs. of 12 variables:
## $ id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ trt : int 0 0 0 0 0 0 0 0 0 0 ...
## $ re75_zero: num 1 1 1 0 1 1 1 0 1 1 ...
## $ age : int 23 26 22 34 18 45 18 27 24 34 ...
## $ educ : int 10 12 9 9 9 11 9 12 8 11 ...
## $ black : int 1 0 1 1 1 1 1 1 0 1 ...
## $ hisp : int 0 0 0 0 0 0 0 0 0 0 ...
## $ marr : int 0 0 0 0 0 0 0 0 0 1 ...
## $ nodeg : int 1 0 1 1 1 1 1 0 1 1 ...
## $ re74 : num 0 0 0 NA 0 0 0 NA 0 0 ...
## $ re75 : num 0 0 0 4368 0 ...
## $ re78 : num 0 12384 0 14051 10740 ...
summary(dft)
## id trt re75_zero age
## Min. : 1.0 Min. :0.0000 Min. :0.0000 Min. :17.00
## 1st Qu.: 181.2 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:19.00
## Median : 361.5 Median :0.0000 Median :0.0000 Median :23.00
## Mean : 854.4 Mean :0.4114 Mean :0.4003 Mean :24.52
## 3rd Qu.:1458.5 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:27.00
## Max. :4110.0 Max. :1.0000 Max. :1.0000 Max. :55.00
##
## educ black hisp marr
## Min. : 3.00 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.: 9.00 1st Qu.:1.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :10.00 Median :1.0000 Median :0.0000 Median :0.000
## Mean :10.27 Mean :0.8006 Mean :0.1053 Mean :0.162
## 3rd Qu.:11.00 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.000
## Max. :16.00 Max. :1.0000 Max. :1.0000 Max. :1.000
##
## nodeg re74 re75 re78
## Min. :0.0000 Min. : 0.0 Min. : 0.0 Min. : 0
## 1st Qu.:1.0000 1st Qu.: 0.0 1st Qu.: 0.0 1st Qu.: 0
## Median :1.0000 Median : 0.0 Median : 936.3 Median : 3952
## Mean :0.7798 Mean : 2102.3 Mean : 3042.9 Mean : 5455
## 3rd Qu.:1.0000 3rd Qu.: 824.4 3rd Qu.: 3993.2 3rd Qu.: 8772
## Max. :1.0000 Max. :39570.7 Max. :37431.7 Max. :60308
## NA's :277
# Generate Employment 0 or 1 status Variable, and non-zero wages variable
dft <- dft %>% mutate(emp78 =
case_when(re78 <= 0 ~ 0,
TRUE ~ 1)) %>%
mutate(emp75 =
case_when(re75 <= 0 ~ 0,
TRUE ~ 1)) %>%
mutate(emp74 =
case_when(re74 <= 0 ~ 0,
TRUE ~ 1))
# Generate combine black + hispanic status
# 0 = white, 1 = black, 2 = hispanics
dft <- dft %>%
mutate(race =
case_when(black == 1 ~ 1,
hisp == 1 ~ 2,
TRUE ~ 0))
dft <- dft %>%
mutate(age_m2 =
case_when(age <= 23 ~ 1,
age > 23~ 2)) %>%
mutate(age_m3 =
case_when(age <= 20 ~ 1,
age > 20 & age <= 26 ~ 2,
age > 26 ~ 3))
# filter(re78 != 0) %>%
# mutate(re78_log = log(re78))
# Exclude zeros
# when this is on, both linear and log linear results exclude wage = 0
# dft <- dft %>%
# filter(re78 > 0)
# Generate Discrete Version of continuous variables
# dft <- dft %>%
# mutate(momwgtLowHigh = cut(lwt,
# breaks=c(-Inf, 129, Inf),
# labels=c("LW","HW"))) %>%
# mutate(mombirthage = cut(age,
# breaks=c(-Inf, 24, Inf),
# labels=c("young","older")))
What is the average difference in wage between treatment and control, do they match what is reported in Lalonde (1986)?
# Summarize average for all variables grouping by treatment status
# re78 is significantly different
round(t(dft %>% group_by(trt) %>%
summarise_if(is.numeric, funs(mean = mean), na.rm = TRUE)), digits=3)
## [,1] [,2]
## trt 0.000 1.000
## id_mean 213.000 1772.316
## re75_zero_mean 0.419 0.374
## age_mean 24.447 24.626
## educ_mean 10.188 10.380
## black_mean 0.800 0.801
## hisp_mean 0.113 0.094
## marr_mean 0.158 0.168
## nodeg_mean 0.814 0.731
## re74_mean 2107.027 2095.574
## re75_mean 3026.683 3066.098
## re78_mean 5090.048 5976.352
## emp78_mean 0.696 0.774
## emp75_mean 0.581 0.626
## emp74_mean 0.541 0.559
## race_mean 1.026 0.990
## age_m2_mean 1.489 1.481
## age_m3_mean 1.948 1.990
round(t(dft %>% group_by(marr, age_m2) %>%
summarise_if(is.numeric, funs(mean = mean), na.rm = TRUE)), digits=3)
## [,1] [,2] [,3] [,4]
## marr 0.000 0.000 1.000 1.000
## age_m2 1.000 2.000 1.000 2.000
## id_mean 840.140 823.626 1083.179 928.270
## trt_mean 0.411 0.405 0.464 0.416
## re75_zero_mean 0.350 0.489 0.214 0.393
## age_mean 19.569 29.485 20.929 30.124
## educ_mean 10.105 10.412 10.143 10.506
## black_mean 0.778 0.824 0.607 0.876
## hisp_mean 0.134 0.069 0.250 0.056
## nodeg_mean 0.845 0.698 0.821 0.753
## re74_mean 1991.445 1509.914 2758.832 3996.214
## re75_mean 2304.886 2950.030 5827.808 5284.371
## re78_mean 5219.173 5516.690 7153.395 5644.975
## emp78_mean 0.752 0.683 0.893 0.719
## emp75_mean 0.650 0.511 0.786 0.607
## emp74_mean 0.615 0.447 0.714 0.539
## race_mean 1.047 0.962 1.107 0.989
## age_m3_mean 1.309 2.630 1.607 2.652
# Summarize by finer sub groups: RACE
# big increase for black, but not for other group
round(t(dft %>% group_by(trt, race) %>%
summarise_if(is.numeric, funs(mean = mean), na.rm = TRUE)), digits=3)
## [,1] [,2] [,3] [,4] [,5] [,6]
## trt 0.000 0.000 0.000 1.000 1.000 1.000
## race 0.000 1.000 2.000 0.000 1.000 2.000
## id_mean 233.027 209.679 221.083 1700.129 1782.723 1763.786
## re75_zero_mean 0.243 0.435 0.438 0.323 0.403 0.179
## age_mean 23.351 24.812 22.708 25.677 24.853 21.536
## educ_mean 10.486 10.285 9.271 10.839 10.382 9.857
## black_mean 0.000 1.000 0.000 0.000 1.000 0.000
## hisp_mean 0.000 0.000 1.000 0.000 0.000 1.000
## marr_mean 0.135 0.162 0.146 0.161 0.168 0.179
## nodeg_mean 0.649 0.821 0.896 0.548 0.739 0.857
## re74_mean 3791.631 2086.477 1242.022 1305.037 2155.013 2546.219
## re75_mean 5373.717 2862.948 2377.296 4038.830 2785.916 4370.691
## re78_mean 7114.471 4654.996 6611.178 7034.041 5814.419 6181.772
## emp78_mean 0.838 0.659 0.854 0.935 0.739 0.893
## emp75_mean 0.757 0.565 0.562 0.677 0.597 0.821
## emp74_mean 0.676 0.521 0.583 0.548 0.534 0.786
## age_m2_mean 1.514 1.506 1.354 1.484 1.513 1.214
## age_m3_mean 1.946 1.988 1.667 2.065 2.021 1.643
# Summarize by finer sub groups: MARRIAGE
# big increase for black, but not for other group
round(t(dft %>% group_by(trt, marr) %>%
summarise_if(is.numeric, funs(mean = mean), na.rm = TRUE)), digits=3)
## [,1] [,2] [,3] [,4]
## trt 0.000 0.000 1.000 1.000
## marr 0.000 1.000 0.000 1.000
## id_mean 210.176 228.090 1735.688 1953.260
## re75_zero_mean 0.427 0.373 0.385 0.320
## age_mean 23.933 27.194 23.761 28.900
## educ_mean 10.131 10.493 10.393 10.320
## black_mean 0.796 0.821 0.802 0.800
## hisp_mean 0.115 0.104 0.093 0.100
## nodeg_mean 0.818 0.791 0.729 0.740
## re74_mean 1834.893 3603.759 1646.427 4020.488
## re75_mean 2465.121 6027.269 2756.966 4593.212
## re78_mean 5105.237 5008.892 5699.897 7342.041
## emp78_mean 0.701 0.672 0.753 0.880
## emp75_mean 0.573 0.627 0.615 0.680
## emp74_mean 0.534 0.582 0.555 0.580
## race_mean 1.025 1.030 0.988 1.000
## age_m2_mean 1.436 1.776 1.429 1.740
## age_m3_mean 1.866 2.388 1.903 2.420
# Summarize by finer sub groups: AGE GROUPS
round(t(dft %>% group_by(trt, age_m2) %>%
summarise_if(is.numeric, funs(mean = mean), na.rm = TRUE)), digits=3)
## [,1] [,2] [,3] [,4]
## trt 0.000 0.000 1.000 1.000
## age_m2 1.000 2.000 1.000 2.000
## id_mean 241.908 182.841 1727.292 1820.804
## re75_zero_mean 0.382 0.457 0.279 0.476
## age_mean 19.539 29.567 19.857 29.762
## educ_mean 9.977 10.409 10.292 10.476
## black_mean 0.774 0.827 0.753 0.853
## hisp_mean 0.143 0.082 0.143 0.042
## marr_mean 0.069 0.250 0.084 0.259
## nodeg_mean 0.899 0.726 0.766 0.692
## re74_mean 2333.261 1900.754 1571.490 2486.164
## re75_mean 2313.545 3770.677 2933.215 3209.204
## re78_mean 5095.991 5083.849 5744.425 6226.119
## emp78_mean 0.728 0.663 0.812 0.734
## emp75_mean 0.618 0.543 0.721 0.524
## emp74_mean 0.590 0.490 0.669 0.441
## race_mean 1.060 0.990 1.039 0.937
## age_m3_mean 1.290 2.635 1.390 2.636
round(t(dft %>% group_by(trt, age_m3) %>%
summarise_if(is.numeric, funs(mean = mean), na.rm = TRUE)), digits=3)
## [,1] [,2] [,3] [,4] [,5] [,6]
## trt 0.000 0.000 0.000 1.000 1.000 1.000
## age_m3 1.000 2.000 3.000 1.000 2.000 3.000
## id_mean 261.675 180.475 190.462 1646.723 1918.705 1721.879
## re75_zero_mean 0.383 0.453 0.424 0.266 0.366 0.495
## age_mean 18.532 23.626 32.212 18.479 23.402 32.484
## educ_mean 9.877 10.209 10.530 9.851 10.902 10.286
## black_mean 0.740 0.835 0.833 0.777 0.777 0.857
## hisp_mean 0.175 0.072 0.083 0.160 0.071 0.055
## marr_mean 0.039 0.209 0.242 0.053 0.170 0.286
## nodeg_mean 0.922 0.806 0.697 0.883 0.607 0.725
## re74_mean 827.694 3297.704 2095.804 1466.314 2840.083 1794.839
## re75_mean 1564.343 3604.432 4124.358 2302.777 4136.499 2537.167
## re78_mean 4987.829 5320.403 4966.734 5498.803 5744.032 6755.577
## emp78_mean 0.760 0.676 0.644 0.777 0.795 0.747
## emp75_mean 0.617 0.547 0.576 0.734 0.634 0.505
## emp74_mean 0.584 0.532 0.500 0.670 0.598 0.396
## race_mean 1.091 0.978 1.000 1.096 0.920 0.967
## age_m2_mean 1.000 1.547 2.000 1.000 1.464 2.000
round(t(dft %>% group_by(trt, marr, age_m2) %>%
summarise_if(is.numeric, funs(mean = mean), na.rm = TRUE)), digits=3)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## trt 0.000 0.000 0.000 0.000 1.000 1.000 1.000
## marr 0.000 0.000 1.000 1.000 0.000 0.000 1.000
## age_m2 1.000 2.000 1.000 2.000 1.000 2.000 1.000
## id_mean 243.302 167.282 223.133 229.519 1695.184 1789.566 2075.538
## re75_zero_mean 0.391 0.474 0.267 0.404 0.291 0.509 0.154
## age_mean 19.450 29.737 20.733 29.058 19.738 29.113 21.154
## educ_mean 9.965 10.346 10.133 10.596 10.305 10.509 10.154
## black_mean 0.782 0.814 0.667 0.865 0.773 0.840 0.538
## hisp_mean 0.139 0.083 0.200 0.077 0.128 0.047 0.308
## nodeg_mean 0.896 0.718 0.933 0.750 0.773 0.670 0.692
## re74_mean 2215.155 1386.863 5144.184 3383.698 1621.704 1669.249 1055.009
## re75_mean 2033.499 3024.016 6084.840 6010.662 2693.681 2841.146 5531.232
## re78_mean 5009.475 5229.235 6261.061 4647.690 5519.591 5939.737 8183.010
## emp78_mean 0.723 0.673 0.800 0.635 0.794 0.698 1.000
## emp75_mean 0.609 0.526 0.733 0.596 0.709 0.491 0.846
## emp74_mean 0.579 0.474 0.733 0.538 0.667 0.406 0.692
## race_mean 1.059 0.981 1.067 1.019 1.028 0.934 1.154
## age_m3_mean 1.267 2.641 1.600 2.615 1.369 2.613 1.615
## [,8]
## trt 1.000
## marr 1.000
## age_m2 2.000
## id_mean 1910.297
## re75_zero_mean 0.378
## age_mean 31.622
## educ_mean 10.378
## black_mean 0.892
## hisp_mean 0.027
## nodeg_mean 0.757
## re74_mean 4761.858
## re75_mean 4263.638
## re78_mean 7046.565
## emp78_mean 0.838
## emp75_mean 0.622
## emp74_mean 0.541
## race_mean 0.946
## age_m3_mean 2.703
# Tabulate groups, how many in each group, enough for group heterogeneity in effects?
dft %>%
group_by(trt, marr) %>%
summarize(freq = n()) %>%
pivot_wider(names_from = trt, values_from = freq)
## `summarise()` has grouped output by 'trt'. You can override using the `.groups` argument.
## # A tibble: 2 x 3
## marr `0` `1`
## <int> <int> <int>
## 1 0 358 247
## 2 1 67 50
# Tabulate groups, how many in each group, enough for group heterogeneity in effects?
dft %>%
group_by(trt, age_m2, marr) %>%
summarize(freq = n()) %>%
pivot_wider(names_from = trt, values_from = freq)
## `summarise()` has grouped output by 'trt', 'age_m2'. You can override using the `.groups` argument.
## # A tibble: 4 x 4
## # Groups: age_m2 [2]
## age_m2 marr `0` `1`
## <dbl> <int> <int> <int>
## 1 1 0 202 141
## 2 1 1 15 13
## 3 2 0 156 106
## 4 2 1 52 37
# As noted in Lalonde (1986), functional form assumptions do not matter much
# Dummies, treatment effect average about 801 dollars
summary(lm(re78 ~ factor(age)
+ factor(educ)
+ factor(race)
+ factor(marr)
+ factor(nodeg)
+ factor(trt) - 1,
data = dft))
##
## Call:
## lm(formula = re78 ~ factor(age) + factor(educ) + factor(race) +
## factor(marr) + factor(nodeg) + factor(trt) - 1, data = dft)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14412 -4409 -1265 3139 54012
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## factor(age)17 2290.8 6400.4 0.358 0.7205
## factor(age)18 2445.2 6380.0 0.383 0.7016
## factor(age)19 3540.8 6384.6 0.555 0.5794
## factor(age)20 3558.8 6389.4 0.557 0.5777
## factor(age)21 4670.1 6412.9 0.728 0.4667
## factor(age)22 2975.7 6262.4 0.475 0.6348
## factor(age)23 2278.3 6416.7 0.355 0.7227
## factor(age)24 2362.0 6417.2 0.368 0.7129
## factor(age)25 2430.8 6395.4 0.380 0.7040
## factor(age)26 4223.8 6419.2 0.658 0.5108
## factor(age)27 2720.7 6413.6 0.424 0.6716
## factor(age)28 4311.2 6428.8 0.671 0.5027
## factor(age)29 2895.7 6475.4 0.447 0.6549
## factor(age)30 2044.7 6593.6 0.310 0.7566
## factor(age)31 3847.6 6471.3 0.595 0.5523
## factor(age)32 4379.5 6762.9 0.648 0.5175
## factor(age)33 3880.4 6645.4 0.584 0.5595
## factor(age)34 2741.6 6613.4 0.415 0.6786
## factor(age)35 2397.2 6770.4 0.354 0.7234
## factor(age)36 4081.3 6758.7 0.604 0.5461
## factor(age)37 1315.2 7296.6 0.180 0.8570
## factor(age)38 1610.3 6740.8 0.239 0.8113
## factor(age)39 -545.5 6937.2 -0.079 0.9374
## factor(age)40 14642.9 7741.1 1.892 0.0590 .
## factor(age)41 -1436.5 6972.1 -0.206 0.8368
## factor(age)42 3079.3 6992.4 0.440 0.6598
## factor(age)43 1562.5 7719.2 0.202 0.8397
## factor(age)44 3459.6 7062.6 0.490 0.6244
## factor(age)45 6597.1 7347.2 0.898 0.3696
## factor(age)46 -1436.8 7354.4 -0.195 0.8452
## factor(age)48 5423.7 9353.1 0.580 0.5622
## factor(age)49 12421.9 8898.2 1.396 0.1632
## factor(age)50 2091.2 7718.9 0.271 0.7865
## factor(age)54 6628.8 8864.7 0.748 0.4549
## factor(age)55 7456.5 6243.0 1.194 0.2328
## factor(educ)4 1939.6 6912.8 0.281 0.7791
## factor(educ)5 2173.8 6919.6 0.314 0.7535
## factor(educ)6 496.9 6723.6 0.074 0.9411
## factor(educ)7 3107.2 6476.6 0.480 0.6316
## factor(educ)8 2867.3 6322.4 0.454 0.6503
## factor(educ)9 3836.5 6298.3 0.609 0.5426
## factor(educ)10 3028.1 6288.2 0.482 0.6303
## factor(educ)11 2796.4 6283.5 0.445 0.6564
## factor(educ)12 3351.0 6291.2 0.533 0.5945
## factor(educ)13 4125.2 6420.0 0.643 0.5207
## factor(educ)14 11173.8 6586.6 1.696 0.0903 .
## factor(educ)15 9616.1 7679.7 1.252 0.2110
## factor(educ)16 NA NA NA NA
## factor(race)1 -1612.7 822.1 -1.962 0.0502 .
## factor(race)2 -95.0 1082.8 -0.088 0.9301
## factor(marr)1 626.9 670.8 0.935 0.3504
## factor(nodeg)1 NA NA NA NA
## factor(trt)1 801.0 488.2 1.641 0.1013
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6189 on 671 degrees of freedom
## Multiple R-squared: 0.4826, Adjusted R-squared: 0.4433
## F-statistic: 12.27 on 51 and 671 DF, p-value: < 2.2e-16
# cts Controls and treatment effects about 761 dollars
summary(lm(re78 ~ age + I(age^2) +
educ + I(educ^2) +
+ factor(race)
+ factor(marr)
+ factor(nodeg)
+ factor(trt) - 1,
data = dft))
##
## Call:
## lm(formula = re78 ~ age + I(age^2) + educ + I(educ^2) + +factor(race) +
## factor(marr) + factor(nodeg) + factor(trt) - 1, data = dft)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9154 -4677 -1556 3246 54687
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## age -29.1127 213.9845 -0.136 0.8918
## I(age^2) 0.6658 3.5882 0.186 0.8529
## educ -1150.6075 1021.2110 -1.127 0.2602
## I(educ^2) 76.1351 56.0966 1.357 0.1751
## factor(race)0 10204.2957 5360.7995 1.904 0.0574 .
## factor(race)1 8421.7101 5328.1587 1.581 0.1144
## factor(race)2 10059.7028 5298.4854 1.899 0.0580 .
## factor(marr)1 579.8210 650.3352 0.892 0.3729
## factor(nodeg)1 175.0729 913.5232 0.192 0.8481
## factor(trt)1 761.4122 472.5196 1.611 0.1075
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6206 on 712 degrees of freedom
## Multiple R-squared: 0.448, Adjusted R-squared: 0.4402
## F-statistic: 57.78 on 10 and 712 DF, p-value: < 2.2e-16
# Treatment interactions by marriage status, 476 unmarried, vs 2216 for married
summary(lm(re78 ~ age + I(age^2) +
educ + I(educ^2) +
+ factor(race)
+ factor(marr)
+ factor(nodeg)
+ factor(marr):factor(trt) - 1,
data = dft))
##
## Call:
## lm(formula = re78 ~ age + I(age^2) + educ + I(educ^2) + +factor(race) +
## factor(marr) + factor(nodeg) + factor(marr):factor(trt) -
## 1, data = dft)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9999 -4554 -1552 3132 54838
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## age -14.6509 214.1161 -0.068 0.9455
## I(age^2) 0.3773 3.5922 0.105 0.9164
## educ -1124.9544 1020.7625 -1.102 0.2708
## I(educ^2) 75.1305 56.0673 1.340 0.1807
## factor(race)0 9986.4688 5359.9102 1.863 0.0628 .
## factor(race)1 8214.9578 5327.0673 1.542 0.1235
## factor(race)2 9845.8189 5297.5758 1.859 0.0635 .
## factor(marr)1 -156.2047 844.1521 -0.185 0.8532
## factor(nodeg)1 176.8634 912.9685 0.194 0.8464
## factor(marr)0:factor(trt)1 476.6372 516.1797 0.923 0.3561
## factor(marr)1:factor(trt)1 2216.3634 1164.8513 1.903 0.0575 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6202 on 711 degrees of freedom
## Multiple R-squared: 0.4494, Adjusted R-squared: 0.4409
## F-statistic: 52.76 on 11 and 711 DF, p-value: < 2.2e-16
# Treatment interactions by marriage status, 453 vs 1070 each age group
summary(lm(re78 ~ age + I(age^2) + factor(age_m2) +
educ + I(educ^2) +
+ factor(race)
+ factor(marr)
+ factor(nodeg)
+ factor(age_m2):factor(trt) - 1,
data = dft))
##
## Call:
## lm(formula = re78 ~ age + I(age^2) + factor(age_m2) + educ +
## I(educ^2) + +factor(race) + factor(marr) + factor(nodeg) +
## factor(age_m2):factor(trt) - 1, data = dft)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9281 -4664 -1521 3260 54557
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## age 73.5664 317.3603 0.232 0.817
## I(age^2) -0.7038 4.7486 -0.148 0.882
## factor(age_m2)1 8890.8921 6129.7194 1.450 0.147
## factor(age_m2)2 8237.1456 6661.6040 1.237 0.217
## educ -1137.5178 1025.7038 -1.109 0.268
## I(educ^2) 75.1320 56.3181 1.334 0.183
## factor(race)1 -1782.6432 804.4433 -2.216 0.027 *
## factor(race)2 -130.3132 1055.7904 -0.123 0.902
## factor(marr)1 590.5769 651.5151 0.906 0.365
## factor(nodeg)1 139.6452 916.7444 0.152 0.879
## factor(age_m2)1:factor(trt)1 453.7963 659.7260 0.688 0.492
## factor(age_m2)2:factor(trt)1 1070.8127 677.9949 1.579 0.115
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6212 on 710 degrees of freedom
## Multiple R-squared: 0.4484, Adjusted R-squared: 0.4391
## F-statistic: 48.1 on 12 and 710 DF, p-value: < 2.2e-16
# Treatment interactions by marriage status, greater effect for older married + younger married
summary(lm(re78 ~ age + I(age^2) + factor(age_m2) +
educ + I(educ^2) +
+ factor(race)
+ factor(marr)
+ factor(nodeg)
+ factor(marr):factor(age_m2):factor(trt) - 1,
data = dft))
##
## Call:
## lm(formula = re78 ~ age + I(age^2) + factor(age_m2) + educ +
## I(educ^2) + +factor(race) + factor(marr) + factor(nodeg) +
## factor(marr):factor(age_m2):factor(trt) - 1, data = dft)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9725 -4576 -1573 3089 54691
##
## Coefficients: (3 not defined because of singularities)
## Estimate Std. Error t value
## age 37.3770 319.8439 0.117
## I(age^2) -0.2698 4.7852 -0.056
## factor(age_m2)1 10519.8273 6563.8924 1.603
## factor(age_m2)2 9557.6915 6655.2092 1.436
## educ -1107.2556 1026.5070 -1.079
## I(educ^2) 74.1506 56.3551 1.316
## factor(race)1 -1732.3553 806.1276 -2.149
## factor(race)2 -143.3612 1056.8346 -0.136
## factor(marr)1 1217.5255 1196.2563 1.018
## factor(nodeg)1 164.2353 918.1533 0.179
## factor(age_m2)1:factor(marr)0:factor(trt)0 -1378.9391 2164.2994 -0.637
## factor(age_m2)2:factor(marr)0:factor(trt)0 -640.4138 785.5838 -0.815
## factor(age_m2)1:factor(marr)1:factor(trt)0 -1595.6462 2363.5262 -0.675
## factor(age_m2)2:factor(marr)1:factor(trt)0 -2339.1547 1346.3197 -1.737
## factor(age_m2)1:factor(marr)0:factor(trt)1 -1029.3096 2177.4284 -0.473
## factor(age_m2)2:factor(marr)0:factor(trt)1 NA NA NA
## factor(age_m2)1:factor(marr)1:factor(trt)1 NA NA NA
## factor(age_m2)2:factor(marr)1:factor(trt)1 NA NA NA
## Pr(>|t|)
## age 0.9070
## I(age^2) 0.9551
## factor(age_m2)1 0.1095
## factor(age_m2)2 0.1514
## educ 0.2811
## I(educ^2) 0.1887
## factor(race)1 0.0320 *
## factor(race)2 0.8921
## factor(marr)1 0.3091
## factor(nodeg)1 0.8581
## factor(age_m2)1:factor(marr)0:factor(trt)0 0.5242
## factor(age_m2)2:factor(marr)0:factor(trt)0 0.4152
## factor(age_m2)1:factor(marr)1:factor(trt)0 0.4998
## factor(age_m2)2:factor(marr)1:factor(trt)0 0.0827 .
## factor(age_m2)1:factor(marr)0:factor(trt)1 0.6366
## factor(age_m2)2:factor(marr)0:factor(trt)1 NA
## factor(age_m2)1:factor(marr)1:factor(trt)1 NA
## factor(age_m2)2:factor(marr)1:factor(trt)1 NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6215 on 707 degrees of freedom
## Multiple R-squared: 0.4502, Adjusted R-squared: 0.4386
## F-statistic: 38.6 on 15 and 707 DF, p-value: < 2.2e-16
# Store Regression Results
mt_model <- model.matrix( ~ age + I(age^2) + factor(age_m2) +
educ + I(educ^2) +
+ factor(race)
+ factor(marr)
+ factor(nodeg)
+ factor(re75_zero)
+ factor(age_m2):factor(trt),
data = dft)
rs_wage_on_trt = lm(re78 ~ mt_model - 1, data = dft)
print(summary(rs_wage_on_trt))
##
## Call:
## lm(formula = re78 ~ mt_model - 1, data = dft)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9457 -4550 -1555 3407 54173
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## mt_model(Intercept) 10052.3179 6160.1001 1.632 0.1032
## mt_modelage 52.9924 317.1787 0.167 0.8674
## mt_modelI(age^2) -0.2876 4.7488 -0.061 0.9517
## mt_modelfactor(age_m2)2 -588.1949 1031.9301 -0.570 0.5689
## mt_modeleduc -1263.1725 1027.0533 -1.230 0.2191
## mt_modelI(educ^2) 80.5134 56.3343 1.429 0.1534
## mt_modelfactor(race)1 -1676.5079 805.8386 -2.080 0.0378 *
## mt_modelfactor(race)2 -86.6527 1054.7258 -0.082 0.9345
## mt_modelfactor(marr)1 506.4687 652.5609 0.776 0.4379
## mt_modelfactor(nodeg)1 162.4342 915.6448 0.177 0.8592
## mt_modelfactor(re75_zero)1 -823.5052 486.9104 -1.691 0.0912 .
## mt_modelfactor(age_m2)1:factor(trt)1 379.3939 660.3304 0.575 0.5658
## mt_modelfactor(age_m2)2:factor(trt)1 1079.5020 677.1278 1.594 0.1113
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6204 on 709 degrees of freedom
## Multiple R-squared: 0.4506, Adjusted R-squared: 0.4406
## F-statistic: 44.74 on 13 and 709 DF, p-value: < 2.2e-16
rs_wage_on_trt_tidy = tidy(rs_wage_on_trt)
rs_wage_on_trt_tidy
## # A tibble: 13 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 mt_model(Intercept) 10052. 6160. 1.63 0.103
## 2 mt_modelage 53.0 317. 0.167 0.867
## 3 mt_modelI(age^2) -0.288 4.75 -0.0606 0.952
## 4 mt_modelfactor(age_m2)2 -588. 1032. -0.570 0.569
## 5 mt_modeleduc -1263. 1027. -1.23 0.219
## 6 mt_modelI(educ^2) 80.5 56.3 1.43 0.153
## 7 mt_modelfactor(race)1 -1677. 806. -2.08 0.0378
## 8 mt_modelfactor(race)2 -86.7 1055. -0.0822 0.935
## 9 mt_modelfactor(marr)1 506. 653. 0.776 0.438
## 10 mt_modelfactor(nodeg)1 162. 916. 0.177 0.859
## 11 mt_modelfactor(re75_zero)1 -824. 487. -1.69 0.0912
## 12 mt_modelfactor(age_m2)1:factor(trt)1 379. 660. 0.575 0.566
## 13 mt_modelfactor(age_m2)2:factor(trt)1 1080. 677. 1.59 0.111
Multiply coefficient vector by covariate matrix to generate A vector that is child/individual specific.
# Estimates Table
head(rs_wage_on_trt_tidy, 6)
## # A tibble: 6 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 mt_model(Intercept) 10052. 6160. 1.63 0.103
## 2 mt_modelage 53.0 317. 0.167 0.867
## 3 mt_modelI(age^2) -0.288 4.75 -0.0606 0.952
## 4 mt_modelfactor(age_m2)2 -588. 1032. -0.570 0.569
## 5 mt_modeleduc -1263. 1027. -1.23 0.219
## 6 mt_modelI(educ^2) 80.5 56.3 1.43 0.153
# Covariates
head(mt_model, 5)
## (Intercept) age I(age^2) factor(age_m2)2 educ I(educ^2) factor(race)1
## 1 1 23 529 0 10 100 1
## 2 1 26 676 1 12 144 0
## 3 1 22 484 0 9 81 1
## 4 1 34 1156 1 9 81 1
## 5 1 18 324 0 9 81 1
## factor(race)2 factor(marr)1 factor(nodeg)1 factor(re75_zero)1
## 1 0 0 1 1
## 2 0 0 0 1
## 3 0 0 1 1
## 4 0 0 1 0
## 5 0 0 1 1
## factor(age_m2)1:factor(trt)1 factor(age_m2)2:factor(trt)1
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
# Covariates coefficients from regression (including constant)
ar_fl_cova_esti <- as.matrix(rs_wage_on_trt_tidy %>% filter(!str_detect(term, 'trt')) %>% select(estimate))
ar_fl_main_esti <- as.matrix(rs_wage_on_trt_tidy %>% filter(str_detect(term, 'trt')) %>% select(estimate))
head(ar_fl_cova_esti, 5)
## estimate
## [1,] 10052.3178902
## [2,] 52.9923510
## [3,] -0.2876203
## [4,] -588.1948533
## [5,] -1263.1724715
head(ar_fl_main_esti, 5)
## estimate
## [1,] 379.3939
## [2,] 1079.5020
# Select Matrix subcomponents
mt_cova <- as.matrix(as_tibble(mt_model) %>% select(-contains("trt")))
mt_intr <- model.matrix(~ factor(marr) - 1, data = dft)
# Generate A_i, use mt_cova_wth_const
ar_A_m <- mt_cova %*% ar_fl_cova_esti
head(ar_A_m, 5)
## estimate
## [1,] 4201.030
## [2,] 6259.851
## [3,] 3894.398
## [4,] 4572.335
## [5,] 3728.447
## estimate
## 1 379.3939
## 2 379.3939
## 3 379.3939
## 4 379.3939
## 5 379.3939
# Initate Dataframe that will store all estimates and optimal allocation relevant information
# combine identifying key information along with estimation A, alpha results
# note that we only need indi.id as key
mt_opti <- cbind(ar_alpha_m, ar_A_m, ar_beta_m)
ar_st_varnames <- c('alpha', 'A', 'beta')
df_esti_alpha_A_beta <- as_tibble(mt_opti) %>% rename_all(~c(ar_st_varnames))
tb_key_alpha_A_beta <- bind_cols(dft, df_esti_alpha_A_beta) %>%
select(one_of(c('id', 'trt', 'age', 'educ', 'race', 'marr', 'nodeg', 're78',
ar_st_varnames)))
# Rescale A and alpha to deal more easily with large powers
tb_key_alpha_A_beta <- tb_key_alpha_A_beta %>%
mutate(alpha = alpha/1000, A = A/1000)
# Need to only include the smokers here
# tb_key_alpha_A_beta <- tb_key_alpha_A_beta %>% filter(trt == 0)
# Unique beta, A, and alpha check
tb_opti_unique <- tb_key_alpha_A_beta %>% group_by(!!!syms(ar_st_varnames)) %>%
arrange(!!!syms(ar_st_varnames)) %>%
summarise(n_obs_group=n())
## `summarise()` has grouped output by 'alpha', 'A'. You can override using the `.groups` argument.
# Show cars
head(tb_key_alpha_A_beta, 32)
## id trt age educ race marr nodeg re78 alpha A beta
## 1 1 0 23 10 1 0 1 0.000 0.3793939 4.201030 0.001385042
## 2 2 0 26 12 0 0 0 12383.680 0.3793939 6.259851 0.001385042
## 3 3 0 22 9 1 0 1 0.000 0.3793939 3.894398 0.001385042
## 4 4 0 34 9 1 0 1 14051.160 0.3793939 4.572335 0.001385042
## 5 5 0 18 9 1 0 1 10740.080 0.3793939 3.728447 0.001385042
## 6 6 0 45 11 1 0 1 11796.470 0.3793939 4.775996 0.001385042
## 7 7 0 18 9 1 0 1 9227.052 0.3793939 3.728447 0.001385042
## 8 8 0 27 12 1 0 0 3552.268 0.3793939 5.444597 0.001385042
## 9 9 0 24 8 0 0 1 10569.270 0.3793939 4.956678 0.001385042
## 10 10 0 34 11 1 1 1 6040.335 1.0795020 4.949491 0.001385042
## 11 11 0 20 13 1 0 0 9199.563 0.3793939 6.506135 0.001385042
## 12 12 0 24 4 2 0 1 3880.833 0.3793939 6.058071 0.001385042
## 13 13 0 36 10 1 0 1 0.000 0.3793939 4.081130 0.001385042
## 14 14 0 21 10 1 0 1 0.000 0.3793939 4.943861 0.001385042
## 15 15 0 28 12 1 1 0 10067.430 1.0795020 5.988239 0.001385042
## 16 16 0 21 14 1 0 0 5775.062 0.3793939 6.634520 0.001385042
## 17 17 0 28 9 1 0 1 0.000 0.3793939 3.537871 0.001385042
## 18 18 0 21 11 1 1 1 8976.826 1.0795020 5.877939 0.001385042
## 19 19 0 27 7 1 1 1 0.000 1.0795020 3.957082 0.001385042
## 20 20 0 18 12 0 0 0 2502.526 0.3793939 7.348854 0.001385042
## 21 21 0 19 11 0 0 1 0.000 0.3793939 6.141498 0.001385042
## 22 22 0 30 13 1 0 0 0.000 0.3793939 6.304054 0.001385042
## 23 23 0 26 11 2 0 1 0.000 0.3793939 6.570501 0.001385042
## 24 24 0 20 8 1 0 1 0.000 0.3793939 3.707017 0.001385042
## 25 25 0 31 4 0 0 1 0.000 0.3793939 7.228442 0.001385042
## 26 26 0 18 12 1 0 0 14444.710 0.3793939 5.672347 0.001385042
## 27 27 0 34 12 1 0 0 2113.722 0.3793939 4.869224 0.001385042
## 28 28 0 24 10 1 0 1 7618.639 0.3793939 3.652309 0.001385042
## 29 29 0 22 8 2 0 1 9920.945 0.3793939 5.378697 0.001385042
## 30 30 0 28 8 1 0 1 4623.188 0.3793939 4.255820 0.001385042
## 31 31 0 25 11 1 0 1 4196.375 0.3793939 4.118817 0.001385042
## 32 32 0 39 9 1 0 1 0.000 0.3793939 3.908810 0.001385042
svr_inpalc <- 'opti_alloc_queue'
# Child Count
it_obs = dim(tb_opti_unique)[1]
# Vector of Planner Preference
ar_rho <- c(-100, 0.8)
ar_rho <- c(-50, -25, -10)
ar_rho <- c(-100, -5, -1, 0.1, 0.6, 0.8)
# ar_rho <- c(seq(-200, -100, length.out=5), seq(-100, -25, length.out=5), seq(-25, -5, length.out=5), seq(-5, -1, length.out=5), seq(-1, -0.01, length.out=5), seq(0.01, 0.25, length.out=5), seq(0.25, 0.90, length.out=5))
# ar_rho <- c(-100, -5, -1, 0.1, 0.6, 0.99)
# ar_rho <- c(-20, -1, 0.05, 0.9)
# ar_rho <- c(-50, -40, -30, -20, -15, -10, -7.5, -5,-3,-2,-1)
# ar_rho = c(-100, -0.001, 0.95)
ar_rho = c(-100, -0.001, 0.95)
ar_rho <- c(seq(-200, -100, length.out=5), seq(-100, -25, length.out=5), seq(-25, -5, length.out=5), seq(-5, -1, length.out=5), seq(-1, -0.01, length.out=5), seq(0.01, 0.25, length.out=5), seq(0.25, 0.90, length.out=5))
ar_rho <- 1 - (10^(c(seq(-2,2, length.out=30))))
ar_rho <- unique(ar_rho)
svr_rho_val <- 'rho_val'
ls_bin_solu_all_rhos <-
ffp_opt_anlyz_rhgin_bin(tb_key_alpha_A_beta,
svr_id_i = 'id',
svr_A_i = 'A', svr_alpha_i = 'alpha', svr_beta_i = 'beta',
ar_rho = ar_rho,
svr_rho = 'rho', svr_rho_val = svr_rho_val,
svr_inpalc = svr_inpalc,
svr_expout = 'opti_exp_outcome')
## Joining, by = "id"
df_all_rho <- ls_bin_solu_all_rhos$df_all_rho
df_all_rho_long <- ls_bin_solu_all_rhos$df_all_rho_long
# How many people have different ranks across rhos
it_how_many_vary_rank <- sum(df_all_rho$rank_max - df_all_rho$rank_min)
it_how_many_vary_rank
## [1] -94434
# Change Variable names so that this can becombined with logit file later
df_all_rho <- df_all_rho %>% rename_at(vars(starts_with("rho_")), funs(str_replace(., "rk", "rk_wage")))
df_all_rho <- df_all_rho %>% rename(A_i_wage = A, alpha_i_wage = alpha, beta_i_wage = beta,
rank_min_wage = rank_min, rank_max_wage = rank_max, avg_rank_wage = avg_rank)
# Save File
if (bl_save_rda) {
df_opt_lalonde_training_wage <- df_all_rho
usethis::use_data(df_opt_lalonde_training_wage, df_opt_lalonde_training_wage, overwrite = TRUE)
}
# get rank when wage rho = 1
df_all_rho_rho_c1 <- df_all_rho %>% select(id, rho_c1_rk_wage)
# Merge
df_all_rho_long_more <- df_all_rho_long %>% mutate(rho = as.numeric(rho)) %>%
left_join(df_all_rho_rho_c1, by='id')
# Select subset to graph
df_rank_graph <- df_all_rho_long_more %>%
mutate(id = factor(id)) %>%
filter((id == 167) | # utilitarian rank = 1
(id == 175) | # utilitarian rank = 101
(id == 3710) | # utilitarian rank = 202
(id == 2151) | # utilitarian rank = 306
(id == 27)| # utilitarian rank = 405
(id == 58) | # utilitarian rank = 503
(id == 184) | # utilitarian rank = 603
(id == 1481) # utilitarian rank = 701
) %>%
mutate(one_minus_rho = 1 - !!sym(svr_rho_val)) %>%
mutate(rho_c1_rk_wage = factor(rho_c1_rk_wage))
df_rank_graph$rho_c1_rk_wage
## [1] 405 405 405 405 405 405 405 405 405 405 405 405 405 405 405 405 405 405
## [19] 405 405 405 405 405 405 405 405 405 405 405 405 503 503 503 503 503 503
## [37] 503 503 503 503 503 503 503 503 503 503 503 503 503 503 503 503 503 503
## [55] 503 503 503 503 503 503 1 1 1 1 1 1 1 1 1 1 1 1
## [73] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [91] 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101
## [109] 101 101 101 101 101 101 101 101 101 101 101 101 603 603 603 603 603 603
## [127] 603 603 603 603 603 603 603 603 603 603 603 603 603 603 603 603 603 603
## [145] 603 603 603 603 603 603 202 202 202 202 202 202 202 202 202 202 202 202
## [163] 202 202 202 202 202 202 202 202 202 202 202 202 202 202 202 202 202 202
## [181] 701 701 701 701 701 701 701 701 701 701 701 701 701 701 701 701 701 701
## [199] 701 701 701 701 701 701 701 701 701 701 701 701 306 306 306 306 306 306
## [217] 306 306 306 306 306 306 306 306 306 306 306 306 306 306 306 306 306 306
## [235] 306 306 306 306 306 306
## Levels: 1 101 202 306 405 503 603 701
df_rank_graph$id <- factor(df_rank_graph$rho_c1_rk_wage,
labels = c('Rank= 1 when λ=0.99', #200
'Rank= 101 when λ=0.99', #110
'Rank= 201 when λ=0.99', #95
'Rank= 301 when λ=0.99', #217
'Rank= 401 when λ=0.99', #274
'Rank= 501 when λ=0.99', #101
'Rank= 601 when λ=0.99', #131
'Rank= 701 when λ=0.99' #3610
))
# x-labels
x.labels <- c('λ=0.99', 'λ=0.90', 'λ=0', 'λ=-10', 'λ=-100')
x.breaks <- c(0.01, 0.10, 1, 10, 100)
# title line 2
title_line1 <- sprintf("LINEAR WAGE: Optimal Tageting Queue, NSW Lalonde (AER, 1986)")
title_line2 <- sprintf("How Do Allocation Rankings (Targeting Queue) Shift with λ?")
# Graph Results--Draw
line.plot <- df_rank_graph %>%
ggplot(aes(x=one_minus_rho, y=!!sym(svr_inpalc),
group=fct_rev(id),
colour=fct_rev(id),
size=2)) +
# geom_line(aes(linetype =fct_rev(id)), size=0.75) +
geom_line(size=0.75) +
geom_vline(xintercept=c(1), linetype="dotted") +
labs(title = paste0(title_line1, '\n', title_line2),
x = 'log10 Rescale of λ, Log10(λ)\nλ=1 Utilitarian, λ=-infty Rawlsian',
y = 'Optimal Targeting Queue Rank (1=highest)',
caption = 'Training RCT with 297 treated, 425 untreated. Optimal Binary Targeting Queue.') +
scale_x_continuous(trans='log10', labels = x.labels, breaks = x.breaks) +
theme_bw()
print(line.plot)
# tb_opti_alloc_all_rho_long %>%
# ggplot(aes(x = rho, y = rank, group = id)) +
# geom_line(aes(color = race, alpha = 1), size = 2) +
# geom_point(aes(color = race, alpha = 1), size = 4) +
# scale_x_discrete(expand = c(0.85,0))+
# scale_y_reverse(breaks = 1:nrow(tb_opti_alloc_all_rho_long))+
# theme(legend.position = "none") +
# labs(x = "Equality vs Efficiency",
# y = "Rank",
# title = "Binary Allocation Rank, which untrained to receive training first") +
# ffy_opt_ghthm_dk() +
# geom_text(data =tb_opti_alloc_all_rho,aes(y=rho_c1_rk,x=0.6,label=id),hjust="right")