Linear wage regression estimation of A and alpha from the Lalonde Training Dataset (722 Observations). Solve for optimal binary allocation queues. Test binary allocation queue with Lalonde training dataset. There are 722 observations, 297 in the treatment group, 425 in the control group. Following Lalonde, regressions are in terms of wage levels (not log wage)

Load Dependencies

rm(list = ls(all.names = TRUE))
options(knitr.duplicate.label = 'allow')

Load Data and Generate New Variables

Generate four categories by initial height and mother’s education levels combinations.

# Dataset
data(df_opt_lalonde_training)

# Add a binary variable for if there are wage in year 1975
dft <- df_opt_lalonde_training %>% 
  mutate(re75_zero = case_when(re75 <= 1 ~ 1, 
                               TRUE ~ 0)) %>%
  select(trt, re75_zero, everything())

# summary
REconTools::ff_summ_percentiles(dft)
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## Warning: attributes are not identical across measure variables;
## they will be dropped
## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if `.name_repair` is omitted as of tibble 2.0.0.
## Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## # A tibble: 18 x 13
##    stats   age   black educ  hisp  marr  nodeg re74  re75  re75.zero re78  trt  
##    <chr>   <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>     <chr> <chr>
##  1 n       "722" "722" "722" "722" "722" "722" "722" "722" "722"     "722" "722"
##  2 unique  " 35" "  2" " 14" "  2" "  2" "  2" "116" "424" "  2"     "524" "  2"
##  3 NAobs   "  0" "  0" "  0" "  0" "  0" "  0" "277" "  0" "  0"     "  0" "  0"
##  4 ZEROobs "  0" "144" "  0" "646" "605" "159"  NA   "289" "433"     "196" "425"
##  5 mean    "  2~ "   ~ "  1~ "   ~ "   ~ "   ~ "210~ "304~ "   0.40~ "545~ "   ~
##  6 sd      "   ~ "   ~ "   ~ "   ~ "   ~ "   ~ "536~ "506~ "   0.49~ "625~ "   ~
##  7 cv      "0.2~ "0.4~ "0.1~ "2.9~ "2.2~ "0.5~ "2.5~ "1.6~ "1.22488~ "1.1~ "1.1~
##  8 min     "17"  " 0"  " 3"  " 0"  " 0"  " 0"  " 0"  " 0"  " 0"      " 0"  " 0" 
##  9 p01     "17.~ " 0.~ " 5.~ " 0.~ " 0.~ " 0.~ " 0.~ " 0.~ " 0.00"   " 0.~ " 0.~
## 10 p05     "17.~ " 0.~ " 8.~ " 0.~ " 0.~ " 0.~ " 0.~ " 0.~ " 0.00"   " 0.~ " 0.~
## 11 p10     "18.~ " 0.~ " 8.~ " 0.~ " 0.~ " 0.~ " 0.~ " 0.~ " 0.0"    " 0.~ " 0.~
## 12 p25     " 19~ "  1~ "  9~ "  0~ "  0~ "  1~ "  0~ "  0~ "  0.00"  "  0~ "  0~
## 13 p50     "  2~ "   ~ "  1~ "   ~ "   ~ "   ~ "   ~ " 93~ "   0.00~ "395~ "   ~
## 14 p75     "  2~ "   ~ "  1~ "   ~ "   ~ "   ~ " 82~ "399~ "   1.00~ "877~ "   ~
## 15 p90     "   ~ "   ~ "   ~ "   ~ "   ~ "   ~ " 78~ " 89~ "    1.0~ "128~ "   ~
## 16 p95     "   ~ "   ~ "   ~ "   ~ "   ~ "   ~ "122~ "122~ "    1.0~ "166~ "   ~
## 17 p99     "   ~ "   ~ "   ~ "   ~ "   ~ "   ~ "270~ "242~ "    1.0~ "260~ "   ~
## 18 max     "   ~ "   ~ "   ~ "   ~ "   ~ "   ~ "395~ "374~ "    1.0~ "603~ "   ~
## # ... with 1 more variable: X <chr>
# summary treated only
REconTools::ff_summ_percentiles(dft %>% filter(trt == 1))
## Warning: attributes are not identical across measure variables;
## they will be dropped
## # A tibble: 18 x 13
##    stats   age   black educ  hisp  marr  nodeg re74  re75  re75.zero re78  trt  
##    <chr>   <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>     <chr> <chr>
##  1 n       "297" "297" "297" "297" "297" "297" "297" "297" "297"     "297" "297"
##  2 unique  " 30" "  2" " 13" "  2" "  2" "  2" " 54" "184" "  2"     "230" "  1"
##  3 NAobs   "  0" "  0" "  0" "  0" "  0" "  0" "112" "  0" "  0"     "  0" "  0"
##  4 ZEROobs "  0" " 59" "  0" "269" "247" " 80"  NA   "111" "186"     " 67" "  0"
##  5 mean    "2.4~ "8.0~ "1.0~ "9.4~ "1.6~ "7.3~ "2.0~ "3.0~ "3.73737~ "5.9~ "1.0~
##  6 sd      "   ~ "   ~ "   ~ "   ~ "   ~ "   ~ "488~ "487~ "   0.48~ "692~ "   ~
##  7 cv      "0.2~ "0.4~ "0.1~ "3.1~ "2.2~ "0.6~ "2.3~ "1.5~ "1.29666~ "1.1~ "0.0~
##  8 min     " 17" "  0" "  4" "  0" "  0" "  0" "  0" "  0" "  0"     "  0" "  1"
##  9 p01     " 17~ "  0~ "  4~ "  0~ "  0~ "  0~ "  0~ "  0~ "  0.0"   "  0~ "  1~
## 10 p05     " 17" "  0" "  8" "  0" "  0" "  0" "  0" "  0" "  0"     "  0" "  1"
## 11 p10     " 18" "  0" "  8" "  0" "  0" "  0" "  0" "  0" "  0"     "  0" "  1"
## 12 p25     "  2~ "   ~ "   ~ "   ~ "   ~ "   ~ "   ~ "   ~ "   0.00~ " 54~ "   ~
## 13 p50     "  2~ "   ~ "  1~ "   ~ "   ~ "   ~ "   ~ "111~ "   0.00~ "423~ "   ~
## 14 p75     "  2~ "   ~ "  1~ "   ~ "   ~ "   ~ "129~ "431~ "   1.00~ "938~ "   ~
## 15 p90     "   ~ "   ~ "   ~ "   ~ "   ~ "   ~ " 84~ " 84~ "    1.0~ "134~ "   ~
## 16 p95     "   ~ "   ~ "   ~ "   ~ "   ~ "   ~ "123~ "122~ "    1.0~ "173~ "   ~
## 17 p99     "   ~ "   ~ "   ~ "   ~ "   ~ "   ~ "211~ "231~ "    1.0~ "271~ "   ~
## 18 max     "   ~ "   ~ "   ~ "   ~ "   ~ "   ~ "350~ "374~ "    1.0~ "603~ "   ~
## # ... with 1 more variable: X <chr>
REconTools::ff_summ_percentiles(dft %>% filter(trt == 0))
## Warning: attributes are not identical across measure variables;
## they will be dropped
## # A tibble: 18 x 13
##    stats   age   black educ  hisp  marr  nodeg re74  re75  re75.zero re78  trt  
##    <chr>   <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>     <chr> <chr>
##  1 n       "425" "425" "425" "425" "425" "425" "425" "425" "425"     "425" "425"
##  2 unique  " 31" "  2" " 12" "  2" "  2" "  2" " 67" "245" "  2"     "297" "  1"
##  3 NAobs   "  0" "  0" "  0" "  0" "  0" "  0" "165" "  0" "  0"     "  0" "  0"
##  4 ZEROobs "  0" " 85" "  0" "377" "358" " 79"  NA   "178" "247"     "129" "425"
##  5 mean    "  2~ "   ~ "  1~ "   ~ "   ~ "   ~ "210~ "302~ "   0.41~ "509~ "   ~
##  6 sd      "   ~ "   ~ "   ~ "   ~ "   ~ "   ~ "568~ "520~ "   0.49~ "571~ "   ~
##  7 cv      "0.2~ "0.5~ "0.1~ "2.8~ "2.3~ "0.4~ "2.6~ "1.7~ "1.17936~ "1.1~  NA  
##  8 min     "17"  " 0"  " 3"  " 0"  " 0"  " 0"  " 0"  " 0"  " 0"      " 0"  " 0" 
##  9 p01     "17.~ " 0.~ " 5.~ " 0.~ " 0.~ " 0.~ " 0.~ " 0.~ " 0.00"   " 0.~ " 0.~
## 10 p05     "17.~ " 0.~ " 8.~ " 0.~ " 0.~ " 0.~ " 0.~ " 0.~ " 0.0"    " 0.~ " 0.~
## 11 p10     "18.~ " 0.~ " 8.~ " 0.~ " 0.~ " 0.~ " 0.~ " 0.~ " 0.0"    " 0.~ " 0.~
## 12 p25     " 19" "  1" "  9" "  0" "  0" "  1" "  0" "  0" "  0"     "  0" "  0"
## 13 p50     "  2~ "   ~ "  1~ "   ~ "   ~ "   ~ "   ~ " 82~ "   0.00~ "374~ "   ~
## 14 p75     "  2~ "   ~ "  1~ "   ~ "   ~ "   ~ " 13~ "364~ "   1.00~ "832~ "   ~
## 15 p90     "   ~ "   ~ "   ~ "   ~ "   ~ "   ~ " 76~ " 91~ "    1.0~ "124~ "   ~
## 16 p95     "   ~ "   ~ "   ~ "   ~ "   ~ "   ~ "106~ "121~ "    1.0~ "162~ "   ~
## 17 p99     "   ~ "   ~ "   ~ "   ~ "   ~ "   ~ "295~ "242~ "    1.0~ "209~ "   ~
## 18 max     "   ~ "   ~ "   ~ "   ~ "   ~ "   ~ "395~ "369~ "    1.0~ "394~ "   ~
## # ... with 1 more variable: X <chr>
# within 297 treated, 111 (37%) had zero wage in 1975
# within 425 untreated, 178 (42%) had zero wage in 1975
# Load Data
dft <- dft %>% mutate(id = X) %>%
           select(-X) %>%
           select(id, everything())
# Summarize
str(dft)
## 'data.frame':    722 obs. of  12 variables:
##  $ id       : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ trt      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ re75_zero: num  1 1 1 0 1 1 1 0 1 1 ...
##  $ age      : int  23 26 22 34 18 45 18 27 24 34 ...
##  $ educ     : int  10 12 9 9 9 11 9 12 8 11 ...
##  $ black    : int  1 0 1 1 1 1 1 1 0 1 ...
##  $ hisp     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ marr     : int  0 0 0 0 0 0 0 0 0 1 ...
##  $ nodeg    : int  1 0 1 1 1 1 1 0 1 1 ...
##  $ re74     : num  0 0 0 NA 0 0 0 NA 0 0 ...
##  $ re75     : num  0 0 0 4368 0 ...
##  $ re78     : num  0 12384 0 14051 10740 ...
summary(dft)
##        id              trt           re75_zero           age       
##  Min.   :   1.0   Min.   :0.0000   Min.   :0.0000   Min.   :17.00  
##  1st Qu.: 181.2   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:19.00  
##  Median : 361.5   Median :0.0000   Median :0.0000   Median :23.00  
##  Mean   : 854.4   Mean   :0.4114   Mean   :0.4003   Mean   :24.52  
##  3rd Qu.:1458.5   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:27.00  
##  Max.   :4110.0   Max.   :1.0000   Max.   :1.0000   Max.   :55.00  
##                                                                    
##       educ           black             hisp             marr      
##  Min.   : 3.00   Min.   :0.0000   Min.   :0.0000   Min.   :0.000  
##  1st Qu.: 9.00   1st Qu.:1.0000   1st Qu.:0.0000   1st Qu.:0.000  
##  Median :10.00   Median :1.0000   Median :0.0000   Median :0.000  
##  Mean   :10.27   Mean   :0.8006   Mean   :0.1053   Mean   :0.162  
##  3rd Qu.:11.00   3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.000  
##  Max.   :16.00   Max.   :1.0000   Max.   :1.0000   Max.   :1.000  
##                                                                   
##      nodeg             re74              re75              re78      
##  Min.   :0.0000   Min.   :    0.0   Min.   :    0.0   Min.   :    0  
##  1st Qu.:1.0000   1st Qu.:    0.0   1st Qu.:    0.0   1st Qu.:    0  
##  Median :1.0000   Median :    0.0   Median :  936.3   Median : 3952  
##  Mean   :0.7798   Mean   : 2102.3   Mean   : 3042.9   Mean   : 5455  
##  3rd Qu.:1.0000   3rd Qu.:  824.4   3rd Qu.: 3993.2   3rd Qu.: 8772  
##  Max.   :1.0000   Max.   :39570.7   Max.   :37431.7   Max.   :60308  
##                   NA's   :277
# Generate Employment 0 or 1 status Variable, and non-zero wages variable
dft <- dft %>% mutate(emp78 =
                      case_when(re78 <= 0 ~ 0,
                                TRUE ~ 1)) %>%
               mutate(emp75 =
                      case_when(re75 <= 0 ~ 0,
                                TRUE ~ 1)) %>%
               mutate(emp74 =
                      case_when(re74 <= 0 ~ 0,
                                TRUE ~ 1))

# Generate combine black + hispanic status
# 0 = white, 1 = black, 2 = hispanics
dft <- dft %>%
    mutate(race =
             case_when(black == 1 ~ 1,
                       hisp == 1 ~ 2,
                       TRUE ~ 0))

dft <- dft %>%
    mutate(age_m2 =
             case_when(age <= 23 ~ 1,
                       age >  23~ 2)) %>%
    mutate(age_m3 =
             case_when(age <= 20 ~ 1,
                       age > 20 & age <= 26 ~ 2,
                       age > 26 ~ 3))



# filter(re78 != 0) %>%
# mutate(re78_log = log(re78))

# Exclude zeros
# when this is on, both linear and log linear results exclude wage = 0
# dft <- dft %>%
#     filter(re78 > 0)

# Generate Discrete Version of continuous variables
# dft <- dft %>%
#     mutate(momwgtLowHigh = cut(lwt,
#                                breaks=c(-Inf, 129, Inf),
#                                labels=c("LW","HW"))) %>%
#     mutate(mombirthage = cut(age,
#                                breaks=c(-Inf, 24, Inf),
#                                labels=c("young","older")))

Regression with Data and Construct Input Arrays

Tabulate and Summarize Averages

What is the average difference in wage between treatment and control, do they match what is reported in Lalonde (1986)?

# Summarize average for all variables grouping by treatment status
# re78 is significantly different
round(t(dft %>% group_by(trt) %>%
        summarise_if(is.numeric, funs(mean = mean), na.rm = TRUE)), digits=3)
##                    [,1]     [,2]
## trt               0.000    1.000
## id_mean         213.000 1772.316
## re75_zero_mean    0.419    0.374
## age_mean         24.447   24.626
## educ_mean        10.188   10.380
## black_mean        0.800    0.801
## hisp_mean         0.113    0.094
## marr_mean         0.158    0.168
## nodeg_mean        0.814    0.731
## re74_mean      2107.027 2095.574
## re75_mean      3026.683 3066.098
## re78_mean      5090.048 5976.352
## emp78_mean        0.696    0.774
## emp75_mean        0.581    0.626
## emp74_mean        0.541    0.559
## race_mean         1.026    0.990
## age_m2_mean       1.489    1.481
## age_m3_mean       1.948    1.990
round(t(dft %>% group_by(marr, age_m2) %>%
        summarise_if(is.numeric, funs(mean = mean), na.rm = TRUE)), digits=3)
##                    [,1]     [,2]     [,3]     [,4]
## marr              0.000    0.000    1.000    1.000
## age_m2            1.000    2.000    1.000    2.000
## id_mean         840.140  823.626 1083.179  928.270
## trt_mean          0.411    0.405    0.464    0.416
## re75_zero_mean    0.350    0.489    0.214    0.393
## age_mean         19.569   29.485   20.929   30.124
## educ_mean        10.105   10.412   10.143   10.506
## black_mean        0.778    0.824    0.607    0.876
## hisp_mean         0.134    0.069    0.250    0.056
## nodeg_mean        0.845    0.698    0.821    0.753
## re74_mean      1991.445 1509.914 2758.832 3996.214
## re75_mean      2304.886 2950.030 5827.808 5284.371
## re78_mean      5219.173 5516.690 7153.395 5644.975
## emp78_mean        0.752    0.683    0.893    0.719
## emp75_mean        0.650    0.511    0.786    0.607
## emp74_mean        0.615    0.447    0.714    0.539
## race_mean         1.047    0.962    1.107    0.989
## age_m3_mean       1.309    2.630    1.607    2.652
# Summarize by finer sub groups: RACE
# big increase for black, but not for other group
round(t(dft %>% group_by(trt, race) %>%
        summarise_if(is.numeric, funs(mean = mean), na.rm = TRUE)), digits=3)
##                    [,1]     [,2]     [,3]     [,4]     [,5]     [,6]
## trt               0.000    0.000    0.000    1.000    1.000    1.000
## race              0.000    1.000    2.000    0.000    1.000    2.000
## id_mean         233.027  209.679  221.083 1700.129 1782.723 1763.786
## re75_zero_mean    0.243    0.435    0.438    0.323    0.403    0.179
## age_mean         23.351   24.812   22.708   25.677   24.853   21.536
## educ_mean        10.486   10.285    9.271   10.839   10.382    9.857
## black_mean        0.000    1.000    0.000    0.000    1.000    0.000
## hisp_mean         0.000    0.000    1.000    0.000    0.000    1.000
## marr_mean         0.135    0.162    0.146    0.161    0.168    0.179
## nodeg_mean        0.649    0.821    0.896    0.548    0.739    0.857
## re74_mean      3791.631 2086.477 1242.022 1305.037 2155.013 2546.219
## re75_mean      5373.717 2862.948 2377.296 4038.830 2785.916 4370.691
## re78_mean      7114.471 4654.996 6611.178 7034.041 5814.419 6181.772
## emp78_mean        0.838    0.659    0.854    0.935    0.739    0.893
## emp75_mean        0.757    0.565    0.562    0.677    0.597    0.821
## emp74_mean        0.676    0.521    0.583    0.548    0.534    0.786
## age_m2_mean       1.514    1.506    1.354    1.484    1.513    1.214
## age_m3_mean       1.946    1.988    1.667    2.065    2.021    1.643
# Summarize by finer sub groups: MARRIAGE
# big increase for black, but not for other group
round(t(dft %>% group_by(trt, marr) %>%
        summarise_if(is.numeric, funs(mean = mean), na.rm = TRUE)), digits=3)
##                    [,1]     [,2]     [,3]     [,4]
## trt               0.000    0.000    1.000    1.000
## marr              0.000    1.000    0.000    1.000
## id_mean         210.176  228.090 1735.688 1953.260
## re75_zero_mean    0.427    0.373    0.385    0.320
## age_mean         23.933   27.194   23.761   28.900
## educ_mean        10.131   10.493   10.393   10.320
## black_mean        0.796    0.821    0.802    0.800
## hisp_mean         0.115    0.104    0.093    0.100
## nodeg_mean        0.818    0.791    0.729    0.740
## re74_mean      1834.893 3603.759 1646.427 4020.488
## re75_mean      2465.121 6027.269 2756.966 4593.212
## re78_mean      5105.237 5008.892 5699.897 7342.041
## emp78_mean        0.701    0.672    0.753    0.880
## emp75_mean        0.573    0.627    0.615    0.680
## emp74_mean        0.534    0.582    0.555    0.580
## race_mean         1.025    1.030    0.988    1.000
## age_m2_mean       1.436    1.776    1.429    1.740
## age_m3_mean       1.866    2.388    1.903    2.420
# Summarize by finer sub groups: AGE GROUPS
round(t(dft %>% group_by(trt, age_m2) %>%
        summarise_if(is.numeric, funs(mean = mean), na.rm = TRUE)), digits=3)
##                    [,1]     [,2]     [,3]     [,4]
## trt               0.000    0.000    1.000    1.000
## age_m2            1.000    2.000    1.000    2.000
## id_mean         241.908  182.841 1727.292 1820.804
## re75_zero_mean    0.382    0.457    0.279    0.476
## age_mean         19.539   29.567   19.857   29.762
## educ_mean         9.977   10.409   10.292   10.476
## black_mean        0.774    0.827    0.753    0.853
## hisp_mean         0.143    0.082    0.143    0.042
## marr_mean         0.069    0.250    0.084    0.259
## nodeg_mean        0.899    0.726    0.766    0.692
## re74_mean      2333.261 1900.754 1571.490 2486.164
## re75_mean      2313.545 3770.677 2933.215 3209.204
## re78_mean      5095.991 5083.849 5744.425 6226.119
## emp78_mean        0.728    0.663    0.812    0.734
## emp75_mean        0.618    0.543    0.721    0.524
## emp74_mean        0.590    0.490    0.669    0.441
## race_mean         1.060    0.990    1.039    0.937
## age_m3_mean       1.290    2.635    1.390    2.636
round(t(dft %>% group_by(trt, age_m3) %>%
        summarise_if(is.numeric, funs(mean = mean), na.rm = TRUE)), digits=3)
##                    [,1]     [,2]     [,3]     [,4]     [,5]     [,6]
## trt               0.000    0.000    0.000    1.000    1.000    1.000
## age_m3            1.000    2.000    3.000    1.000    2.000    3.000
## id_mean         261.675  180.475  190.462 1646.723 1918.705 1721.879
## re75_zero_mean    0.383    0.453    0.424    0.266    0.366    0.495
## age_mean         18.532   23.626   32.212   18.479   23.402   32.484
## educ_mean         9.877   10.209   10.530    9.851   10.902   10.286
## black_mean        0.740    0.835    0.833    0.777    0.777    0.857
## hisp_mean         0.175    0.072    0.083    0.160    0.071    0.055
## marr_mean         0.039    0.209    0.242    0.053    0.170    0.286
## nodeg_mean        0.922    0.806    0.697    0.883    0.607    0.725
## re74_mean       827.694 3297.704 2095.804 1466.314 2840.083 1794.839
## re75_mean      1564.343 3604.432 4124.358 2302.777 4136.499 2537.167
## re78_mean      4987.829 5320.403 4966.734 5498.803 5744.032 6755.577
## emp78_mean        0.760    0.676    0.644    0.777    0.795    0.747
## emp75_mean        0.617    0.547    0.576    0.734    0.634    0.505
## emp74_mean        0.584    0.532    0.500    0.670    0.598    0.396
## race_mean         1.091    0.978    1.000    1.096    0.920    0.967
## age_m2_mean       1.000    1.547    2.000    1.000    1.464    2.000
round(t(dft %>% group_by(trt, marr, age_m2) %>%
        summarise_if(is.numeric, funs(mean = mean), na.rm = TRUE)), digits=3)
##                    [,1]     [,2]     [,3]     [,4]     [,5]     [,6]     [,7]
## trt               0.000    0.000    0.000    0.000    1.000    1.000    1.000
## marr              0.000    0.000    1.000    1.000    0.000    0.000    1.000
## age_m2            1.000    2.000    1.000    2.000    1.000    2.000    1.000
## id_mean         243.302  167.282  223.133  229.519 1695.184 1789.566 2075.538
## re75_zero_mean    0.391    0.474    0.267    0.404    0.291    0.509    0.154
## age_mean         19.450   29.737   20.733   29.058   19.738   29.113   21.154
## educ_mean         9.965   10.346   10.133   10.596   10.305   10.509   10.154
## black_mean        0.782    0.814    0.667    0.865    0.773    0.840    0.538
## hisp_mean         0.139    0.083    0.200    0.077    0.128    0.047    0.308
## nodeg_mean        0.896    0.718    0.933    0.750    0.773    0.670    0.692
## re74_mean      2215.155 1386.863 5144.184 3383.698 1621.704 1669.249 1055.009
## re75_mean      2033.499 3024.016 6084.840 6010.662 2693.681 2841.146 5531.232
## re78_mean      5009.475 5229.235 6261.061 4647.690 5519.591 5939.737 8183.010
## emp78_mean        0.723    0.673    0.800    0.635    0.794    0.698    1.000
## emp75_mean        0.609    0.526    0.733    0.596    0.709    0.491    0.846
## emp74_mean        0.579    0.474    0.733    0.538    0.667    0.406    0.692
## race_mean         1.059    0.981    1.067    1.019    1.028    0.934    1.154
## age_m3_mean       1.267    2.641    1.600    2.615    1.369    2.613    1.615
##                    [,8]
## trt               1.000
## marr              1.000
## age_m2            2.000
## id_mean        1910.297
## re75_zero_mean    0.378
## age_mean         31.622
## educ_mean        10.378
## black_mean        0.892
## hisp_mean         0.027
## nodeg_mean        0.757
## re74_mean      4761.858
## re75_mean      4263.638
## re78_mean      7046.565
## emp78_mean        0.838
## emp75_mean        0.622
## emp74_mean        0.541
## race_mean         0.946
## age_m3_mean       2.703

Tabulate

# Tabulate groups, how many in each group, enough for group heterogeneity in effects?
dft %>%
  group_by(trt, marr) %>%
  summarize(freq = n()) %>%
  pivot_wider(names_from = trt, values_from = freq)
## `summarise()` has grouped output by 'trt'. You can override using the `.groups` argument.
## # A tibble: 2 x 3
##    marr   `0`   `1`
##   <int> <int> <int>
## 1     0   358   247
## 2     1    67    50
# Tabulate groups, how many in each group, enough for group heterogeneity in effects?
dft %>%
  group_by(trt, age_m2, marr) %>%
  summarize(freq = n()) %>%
  pivot_wider(names_from = trt, values_from = freq)
## `summarise()` has grouped output by 'trt', 'age_m2'. You can override using the `.groups` argument.
## # A tibble: 4 x 4
## # Groups:   age_m2 [2]
##   age_m2  marr   `0`   `1`
##    <dbl> <int> <int> <int>
## 1      1     0   202   141
## 2      1     1    15    13
## 3      2     0   156   106
## 4      2     1    52    37

Regression Testing

# As noted in Lalonde (1986), functional form assumptions do not matter much
# Dummies, treatment effect average about 801 dollars
summary(lm(re78 ~ factor(age)
                  + factor(educ)
                  + factor(race)
                  + factor(marr)
                  + factor(nodeg)
                  + factor(trt) - 1,
                  data = dft))
## 
## Call:
## lm(formula = re78 ~ factor(age) + factor(educ) + factor(race) + 
##     factor(marr) + factor(nodeg) + factor(trt) - 1, data = dft)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -14412  -4409  -1265   3139  54012 
## 
## Coefficients: (2 not defined because of singularities)
##                Estimate Std. Error t value Pr(>|t|)  
## factor(age)17    2290.8     6400.4   0.358   0.7205  
## factor(age)18    2445.2     6380.0   0.383   0.7016  
## factor(age)19    3540.8     6384.6   0.555   0.5794  
## factor(age)20    3558.8     6389.4   0.557   0.5777  
## factor(age)21    4670.1     6412.9   0.728   0.4667  
## factor(age)22    2975.7     6262.4   0.475   0.6348  
## factor(age)23    2278.3     6416.7   0.355   0.7227  
## factor(age)24    2362.0     6417.2   0.368   0.7129  
## factor(age)25    2430.8     6395.4   0.380   0.7040  
## factor(age)26    4223.8     6419.2   0.658   0.5108  
## factor(age)27    2720.7     6413.6   0.424   0.6716  
## factor(age)28    4311.2     6428.8   0.671   0.5027  
## factor(age)29    2895.7     6475.4   0.447   0.6549  
## factor(age)30    2044.7     6593.6   0.310   0.7566  
## factor(age)31    3847.6     6471.3   0.595   0.5523  
## factor(age)32    4379.5     6762.9   0.648   0.5175  
## factor(age)33    3880.4     6645.4   0.584   0.5595  
## factor(age)34    2741.6     6613.4   0.415   0.6786  
## factor(age)35    2397.2     6770.4   0.354   0.7234  
## factor(age)36    4081.3     6758.7   0.604   0.5461  
## factor(age)37    1315.2     7296.6   0.180   0.8570  
## factor(age)38    1610.3     6740.8   0.239   0.8113  
## factor(age)39    -545.5     6937.2  -0.079   0.9374  
## factor(age)40   14642.9     7741.1   1.892   0.0590 .
## factor(age)41   -1436.5     6972.1  -0.206   0.8368  
## factor(age)42    3079.3     6992.4   0.440   0.6598  
## factor(age)43    1562.5     7719.2   0.202   0.8397  
## factor(age)44    3459.6     7062.6   0.490   0.6244  
## factor(age)45    6597.1     7347.2   0.898   0.3696  
## factor(age)46   -1436.8     7354.4  -0.195   0.8452  
## factor(age)48    5423.7     9353.1   0.580   0.5622  
## factor(age)49   12421.9     8898.2   1.396   0.1632  
## factor(age)50    2091.2     7718.9   0.271   0.7865  
## factor(age)54    6628.8     8864.7   0.748   0.4549  
## factor(age)55    7456.5     6243.0   1.194   0.2328  
## factor(educ)4    1939.6     6912.8   0.281   0.7791  
## factor(educ)5    2173.8     6919.6   0.314   0.7535  
## factor(educ)6     496.9     6723.6   0.074   0.9411  
## factor(educ)7    3107.2     6476.6   0.480   0.6316  
## factor(educ)8    2867.3     6322.4   0.454   0.6503  
## factor(educ)9    3836.5     6298.3   0.609   0.5426  
## factor(educ)10   3028.1     6288.2   0.482   0.6303  
## factor(educ)11   2796.4     6283.5   0.445   0.6564  
## factor(educ)12   3351.0     6291.2   0.533   0.5945  
## factor(educ)13   4125.2     6420.0   0.643   0.5207  
## factor(educ)14  11173.8     6586.6   1.696   0.0903 .
## factor(educ)15   9616.1     7679.7   1.252   0.2110  
## factor(educ)16       NA         NA      NA       NA  
## factor(race)1   -1612.7      822.1  -1.962   0.0502 .
## factor(race)2     -95.0     1082.8  -0.088   0.9301  
## factor(marr)1     626.9      670.8   0.935   0.3504  
## factor(nodeg)1       NA         NA      NA       NA  
## factor(trt)1      801.0      488.2   1.641   0.1013  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6189 on 671 degrees of freedom
## Multiple R-squared:  0.4826, Adjusted R-squared:  0.4433 
## F-statistic: 12.27 on 51 and 671 DF,  p-value: < 2.2e-16
# cts Controls and treatment effects about 761 dollars
summary(lm(re78 ~  age + I(age^2) +
                   educ + I(educ^2) +
                  + factor(race)
                  + factor(marr)
                  + factor(nodeg)
                  + factor(trt) - 1,
                  data = dft))
## 
## Call:
## lm(formula = re78 ~ age + I(age^2) + educ + I(educ^2) + +factor(race) + 
##     factor(marr) + factor(nodeg) + factor(trt) - 1, data = dft)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -9154  -4677  -1556   3246  54687 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## age              -29.1127   213.9845  -0.136   0.8918  
## I(age^2)           0.6658     3.5882   0.186   0.8529  
## educ           -1150.6075  1021.2110  -1.127   0.2602  
## I(educ^2)         76.1351    56.0966   1.357   0.1751  
## factor(race)0  10204.2957  5360.7995   1.904   0.0574 .
## factor(race)1   8421.7101  5328.1587   1.581   0.1144  
## factor(race)2  10059.7028  5298.4854   1.899   0.0580 .
## factor(marr)1    579.8210   650.3352   0.892   0.3729  
## factor(nodeg)1   175.0729   913.5232   0.192   0.8481  
## factor(trt)1     761.4122   472.5196   1.611   0.1075  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6206 on 712 degrees of freedom
## Multiple R-squared:  0.448,  Adjusted R-squared:  0.4402 
## F-statistic: 57.78 on 10 and 712 DF,  p-value: < 2.2e-16
# Treatment interactions by marriage status, 476 unmarried, vs 2216 for married
summary(lm(re78 ~  age + I(age^2) +
                   educ + I(educ^2) +
                  + factor(race)
                  + factor(marr)
                  + factor(nodeg)
                  + factor(marr):factor(trt) - 1,
                  data = dft))
## 
## Call:
## lm(formula = re78 ~ age + I(age^2) + educ + I(educ^2) + +factor(race) + 
##     factor(marr) + factor(nodeg) + factor(marr):factor(trt) - 
##     1, data = dft)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -9999  -4554  -1552   3132  54838 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)  
## age                          -14.6509   214.1161  -0.068   0.9455  
## I(age^2)                       0.3773     3.5922   0.105   0.9164  
## educ                       -1124.9544  1020.7625  -1.102   0.2708  
## I(educ^2)                     75.1305    56.0673   1.340   0.1807  
## factor(race)0               9986.4688  5359.9102   1.863   0.0628 .
## factor(race)1               8214.9578  5327.0673   1.542   0.1235  
## factor(race)2               9845.8189  5297.5758   1.859   0.0635 .
## factor(marr)1               -156.2047   844.1521  -0.185   0.8532  
## factor(nodeg)1               176.8634   912.9685   0.194   0.8464  
## factor(marr)0:factor(trt)1   476.6372   516.1797   0.923   0.3561  
## factor(marr)1:factor(trt)1  2216.3634  1164.8513   1.903   0.0575 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6202 on 711 degrees of freedom
## Multiple R-squared:  0.4494, Adjusted R-squared:  0.4409 
## F-statistic: 52.76 on 11 and 711 DF,  p-value: < 2.2e-16
# Treatment interactions by marriage status, 453 vs 1070 each age group
summary(lm(re78 ~  age + I(age^2) + factor(age_m2) +
                   educ + I(educ^2) +
                  + factor(race)
                  + factor(marr)
                  + factor(nodeg)
                  + factor(age_m2):factor(trt) - 1,
                  data = dft))
## 
## Call:
## lm(formula = re78 ~ age + I(age^2) + factor(age_m2) + educ + 
##     I(educ^2) + +factor(race) + factor(marr) + factor(nodeg) + 
##     factor(age_m2):factor(trt) - 1, data = dft)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -9281  -4664  -1521   3260  54557 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)  
## age                             73.5664   317.3603   0.232    0.817  
## I(age^2)                        -0.7038     4.7486  -0.148    0.882  
## factor(age_m2)1               8890.8921  6129.7194   1.450    0.147  
## factor(age_m2)2               8237.1456  6661.6040   1.237    0.217  
## educ                         -1137.5178  1025.7038  -1.109    0.268  
## I(educ^2)                       75.1320    56.3181   1.334    0.183  
## factor(race)1                -1782.6432   804.4433  -2.216    0.027 *
## factor(race)2                 -130.3132  1055.7904  -0.123    0.902  
## factor(marr)1                  590.5769   651.5151   0.906    0.365  
## factor(nodeg)1                 139.6452   916.7444   0.152    0.879  
## factor(age_m2)1:factor(trt)1   453.7963   659.7260   0.688    0.492  
## factor(age_m2)2:factor(trt)1  1070.8127   677.9949   1.579    0.115  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6212 on 710 degrees of freedom
## Multiple R-squared:  0.4484, Adjusted R-squared:  0.4391 
## F-statistic:  48.1 on 12 and 710 DF,  p-value: < 2.2e-16
# Treatment interactions by marriage status, greater effect for older married + younger married
summary(lm(re78 ~  age + I(age^2) + factor(age_m2) +
                   educ + I(educ^2) +
                  + factor(race)
                  + factor(marr)
                  + factor(nodeg)
                  + factor(marr):factor(age_m2):factor(trt) - 1,
                  data = dft))
## 
## Call:
## lm(formula = re78 ~ age + I(age^2) + factor(age_m2) + educ + 
##     I(educ^2) + +factor(race) + factor(marr) + factor(nodeg) + 
##     factor(marr):factor(age_m2):factor(trt) - 1, data = dft)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -9725  -4576  -1573   3089  54691 
## 
## Coefficients: (3 not defined because of singularities)
##                                              Estimate Std. Error t value
## age                                           37.3770   319.8439   0.117
## I(age^2)                                      -0.2698     4.7852  -0.056
## factor(age_m2)1                            10519.8273  6563.8924   1.603
## factor(age_m2)2                             9557.6915  6655.2092   1.436
## educ                                       -1107.2556  1026.5070  -1.079
## I(educ^2)                                     74.1506    56.3551   1.316
## factor(race)1                              -1732.3553   806.1276  -2.149
## factor(race)2                               -143.3612  1056.8346  -0.136
## factor(marr)1                               1217.5255  1196.2563   1.018
## factor(nodeg)1                               164.2353   918.1533   0.179
## factor(age_m2)1:factor(marr)0:factor(trt)0 -1378.9391  2164.2994  -0.637
## factor(age_m2)2:factor(marr)0:factor(trt)0  -640.4138   785.5838  -0.815
## factor(age_m2)1:factor(marr)1:factor(trt)0 -1595.6462  2363.5262  -0.675
## factor(age_m2)2:factor(marr)1:factor(trt)0 -2339.1547  1346.3197  -1.737
## factor(age_m2)1:factor(marr)0:factor(trt)1 -1029.3096  2177.4284  -0.473
## factor(age_m2)2:factor(marr)0:factor(trt)1         NA         NA      NA
## factor(age_m2)1:factor(marr)1:factor(trt)1         NA         NA      NA
## factor(age_m2)2:factor(marr)1:factor(trt)1         NA         NA      NA
##                                            Pr(>|t|)  
## age                                          0.9070  
## I(age^2)                                     0.9551  
## factor(age_m2)1                              0.1095  
## factor(age_m2)2                              0.1514  
## educ                                         0.2811  
## I(educ^2)                                    0.1887  
## factor(race)1                                0.0320 *
## factor(race)2                                0.8921  
## factor(marr)1                                0.3091  
## factor(nodeg)1                               0.8581  
## factor(age_m2)1:factor(marr)0:factor(trt)0   0.5242  
## factor(age_m2)2:factor(marr)0:factor(trt)0   0.4152  
## factor(age_m2)1:factor(marr)1:factor(trt)0   0.4998  
## factor(age_m2)2:factor(marr)1:factor(trt)0   0.0827 .
## factor(age_m2)1:factor(marr)0:factor(trt)1   0.6366  
## factor(age_m2)2:factor(marr)0:factor(trt)1       NA  
## factor(age_m2)1:factor(marr)1:factor(trt)1       NA  
## factor(age_m2)2:factor(marr)1:factor(trt)1       NA  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6215 on 707 degrees of freedom
## Multiple R-squared:  0.4502, Adjusted R-squared:  0.4386 
## F-statistic:  38.6 on 15 and 707 DF,  p-value: < 2.2e-16

Regress Wage on Training Status

Linear Binary Problem

# Store Regression Results
mt_model <- model.matrix( ~ age + I(age^2) + factor(age_m2) +
                            educ + I(educ^2) +
                          + factor(race)
                          + factor(marr)
                          + factor(nodeg)
                          + factor(re75_zero)
                          + factor(age_m2):factor(trt),
                          data = dft)
rs_wage_on_trt = lm(re78 ~ mt_model - 1, data = dft)
print(summary(rs_wage_on_trt))
## 
## Call:
## lm(formula = re78 ~ mt_model - 1, data = dft)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -9457  -4550  -1555   3407  54173 
## 
## Coefficients:
##                                        Estimate Std. Error t value Pr(>|t|)  
## mt_model(Intercept)                  10052.3179  6160.1001   1.632   0.1032  
## mt_modelage                             52.9924   317.1787   0.167   0.8674  
## mt_modelI(age^2)                        -0.2876     4.7488  -0.061   0.9517  
## mt_modelfactor(age_m2)2               -588.1949  1031.9301  -0.570   0.5689  
## mt_modeleduc                         -1263.1725  1027.0533  -1.230   0.2191  
## mt_modelI(educ^2)                       80.5134    56.3343   1.429   0.1534  
## mt_modelfactor(race)1                -1676.5079   805.8386  -2.080   0.0378 *
## mt_modelfactor(race)2                  -86.6527  1054.7258  -0.082   0.9345  
## mt_modelfactor(marr)1                  506.4687   652.5609   0.776   0.4379  
## mt_modelfactor(nodeg)1                 162.4342   915.6448   0.177   0.8592  
## mt_modelfactor(re75_zero)1            -823.5052   486.9104  -1.691   0.0912 .
## mt_modelfactor(age_m2)1:factor(trt)1   379.3939   660.3304   0.575   0.5658  
## mt_modelfactor(age_m2)2:factor(trt)1  1079.5020   677.1278   1.594   0.1113  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6204 on 709 degrees of freedom
## Multiple R-squared:  0.4506, Adjusted R-squared:  0.4406 
## F-statistic: 44.74 on 13 and 709 DF,  p-value: < 2.2e-16
rs_wage_on_trt_tidy = tidy(rs_wage_on_trt)
rs_wage_on_trt_tidy
## # A tibble: 13 x 5
##    term                                  estimate std.error statistic p.value
##    <chr>                                    <dbl>     <dbl>     <dbl>   <dbl>
##  1 mt_model(Intercept)                  10052.      6160.      1.63    0.103 
##  2 mt_modelage                             53.0      317.      0.167   0.867 
##  3 mt_modelI(age^2)                        -0.288      4.75   -0.0606  0.952 
##  4 mt_modelfactor(age_m2)2               -588.      1032.     -0.570   0.569 
##  5 mt_modeleduc                         -1263.      1027.     -1.23    0.219 
##  6 mt_modelI(educ^2)                       80.5       56.3     1.43    0.153 
##  7 mt_modelfactor(race)1                -1677.       806.     -2.08    0.0378
##  8 mt_modelfactor(race)2                  -86.7     1055.     -0.0822  0.935 
##  9 mt_modelfactor(marr)1                  506.       653.      0.776   0.438 
## 10 mt_modelfactor(nodeg)1                 162.       916.      0.177   0.859 
## 11 mt_modelfactor(re75_zero)1            -824.       487.     -1.69    0.0912
## 12 mt_modelfactor(age_m2)1:factor(trt)1   379.       660.      0.575   0.566 
## 13 mt_modelfactor(age_m2)2:factor(trt)1  1080.       677.      1.59    0.111

Construct Input Arrays \(A_i\) and \(\alpha_i\)

Linear Binary Regression

Multiply coefficient vector by covariate matrix to generate A vector that is child/individual specific.

# Estimates Table
head(rs_wage_on_trt_tidy, 6)
## # A tibble: 6 x 5
##   term                     estimate std.error statistic p.value
##   <chr>                       <dbl>     <dbl>     <dbl>   <dbl>
## 1 mt_model(Intercept)     10052.      6160.      1.63     0.103
## 2 mt_modelage                53.0      317.      0.167    0.867
## 3 mt_modelI(age^2)           -0.288      4.75   -0.0606   0.952
## 4 mt_modelfactor(age_m2)2  -588.      1032.     -0.570    0.569
## 5 mt_modeleduc            -1263.      1027.     -1.23     0.219
## 6 mt_modelI(educ^2)          80.5       56.3     1.43     0.153
# Covariates
head(mt_model, 5)
##   (Intercept) age I(age^2) factor(age_m2)2 educ I(educ^2) factor(race)1
## 1           1  23      529               0   10       100             1
## 2           1  26      676               1   12       144             0
## 3           1  22      484               0    9        81             1
## 4           1  34     1156               1    9        81             1
## 5           1  18      324               0    9        81             1
##   factor(race)2 factor(marr)1 factor(nodeg)1 factor(re75_zero)1
## 1             0             0              1                  1
## 2             0             0              0                  1
## 3             0             0              1                  1
## 4             0             0              1                  0
## 5             0             0              1                  1
##   factor(age_m2)1:factor(trt)1 factor(age_m2)2:factor(trt)1
## 1                            0                            0
## 2                            0                            0
## 3                            0                            0
## 4                            0                            0
## 5                            0                            0
# Covariates coefficients from regression (including constant)
ar_fl_cova_esti <- as.matrix(rs_wage_on_trt_tidy %>% filter(!str_detect(term, 'trt')) %>% select(estimate))
ar_fl_main_esti <- as.matrix(rs_wage_on_trt_tidy %>% filter(str_detect(term, 'trt')) %>% select(estimate))
head(ar_fl_cova_esti, 5)
##           estimate
## [1,] 10052.3178902
## [2,]    52.9923510
## [3,]    -0.2876203
## [4,]  -588.1948533
## [5,] -1263.1724715
head(ar_fl_main_esti, 5)
##       estimate
## [1,]  379.3939
## [2,] 1079.5020
# Select Matrix subcomponents
mt_cova <- as.matrix(as_tibble(mt_model) %>% select(-contains("trt")))
mt_intr <- model.matrix(~ factor(marr) - 1, data = dft)

# Generate A_i, use mt_cova_wth_const
ar_A_m <- mt_cova %*% ar_fl_cova_esti
head(ar_A_m, 5)
##      estimate
## [1,] 4201.030
## [2,] 6259.851
## [3,] 3894.398
## [4,] 4572.335
## [5,] 3728.447
# Generate alpha_i
ar_alpha_m <- mt_intr %*% ar_fl_main_esti
head(ar_alpha_m, 5)
##   estimate
## 1 379.3939
## 2 379.3939
## 3 379.3939
## 4 379.3939
## 5 379.3939

Individual Weight

# Child Weight
ar_beta_m <- rep(1/length(ar_A_m), times=length(ar_A_m))

Matrix with Required Inputs for Allocation

Linear Binary Matrix

# Initate Dataframe that will store all estimates and optimal allocation relevant information
# combine identifying key information along with estimation A, alpha results
# note that we only need indi.id as key
mt_opti <- cbind(ar_alpha_m, ar_A_m, ar_beta_m)
ar_st_varnames <- c('alpha', 'A', 'beta')
df_esti_alpha_A_beta <- as_tibble(mt_opti) %>% rename_all(~c(ar_st_varnames))
tb_key_alpha_A_beta <- bind_cols(dft, df_esti_alpha_A_beta) %>%
              select(one_of(c('id', 'trt', 'age', 'educ', 'race', 'marr', 'nodeg', 're78',
                              ar_st_varnames)))

# Rescale A and alpha to deal more easily with large powers
tb_key_alpha_A_beta <- tb_key_alpha_A_beta %>%
                          mutate(alpha = alpha/1000, A = A/1000)

# Need to only include the smokers here
# tb_key_alpha_A_beta <- tb_key_alpha_A_beta %>% filter(trt == 0)

# Unique beta, A, and alpha check
tb_opti_unique <- tb_key_alpha_A_beta %>% group_by(!!!syms(ar_st_varnames)) %>%
                    arrange(!!!syms(ar_st_varnames)) %>%
                    summarise(n_obs_group=n())
## `summarise()` has grouped output by 'alpha', 'A'. You can override using the `.groups` argument.
# Show cars
head(tb_key_alpha_A_beta, 32)
##    id trt age educ race marr nodeg      re78     alpha        A        beta
## 1   1   0  23   10    1    0     1     0.000 0.3793939 4.201030 0.001385042
## 2   2   0  26   12    0    0     0 12383.680 0.3793939 6.259851 0.001385042
## 3   3   0  22    9    1    0     1     0.000 0.3793939 3.894398 0.001385042
## 4   4   0  34    9    1    0     1 14051.160 0.3793939 4.572335 0.001385042
## 5   5   0  18    9    1    0     1 10740.080 0.3793939 3.728447 0.001385042
## 6   6   0  45   11    1    0     1 11796.470 0.3793939 4.775996 0.001385042
## 7   7   0  18    9    1    0     1  9227.052 0.3793939 3.728447 0.001385042
## 8   8   0  27   12    1    0     0  3552.268 0.3793939 5.444597 0.001385042
## 9   9   0  24    8    0    0     1 10569.270 0.3793939 4.956678 0.001385042
## 10 10   0  34   11    1    1     1  6040.335 1.0795020 4.949491 0.001385042
## 11 11   0  20   13    1    0     0  9199.563 0.3793939 6.506135 0.001385042
## 12 12   0  24    4    2    0     1  3880.833 0.3793939 6.058071 0.001385042
## 13 13   0  36   10    1    0     1     0.000 0.3793939 4.081130 0.001385042
## 14 14   0  21   10    1    0     1     0.000 0.3793939 4.943861 0.001385042
## 15 15   0  28   12    1    1     0 10067.430 1.0795020 5.988239 0.001385042
## 16 16   0  21   14    1    0     0  5775.062 0.3793939 6.634520 0.001385042
## 17 17   0  28    9    1    0     1     0.000 0.3793939 3.537871 0.001385042
## 18 18   0  21   11    1    1     1  8976.826 1.0795020 5.877939 0.001385042
## 19 19   0  27    7    1    1     1     0.000 1.0795020 3.957082 0.001385042
## 20 20   0  18   12    0    0     0  2502.526 0.3793939 7.348854 0.001385042
## 21 21   0  19   11    0    0     1     0.000 0.3793939 6.141498 0.001385042
## 22 22   0  30   13    1    0     0     0.000 0.3793939 6.304054 0.001385042
## 23 23   0  26   11    2    0     1     0.000 0.3793939 6.570501 0.001385042
## 24 24   0  20    8    1    0     1     0.000 0.3793939 3.707017 0.001385042
## 25 25   0  31    4    0    0     1     0.000 0.3793939 7.228442 0.001385042
## 26 26   0  18   12    1    0     0 14444.710 0.3793939 5.672347 0.001385042
## 27 27   0  34   12    1    0     0  2113.722 0.3793939 4.869224 0.001385042
## 28 28   0  24   10    1    0     1  7618.639 0.3793939 3.652309 0.001385042
## 29 29   0  22    8    2    0     1  9920.945 0.3793939 5.378697 0.001385042
## 30 30   0  28    8    1    0     1  4623.188 0.3793939 4.255820 0.001385042
## 31 31   0  25   11    1    0     1  4196.375 0.3793939 4.118817 0.001385042
## 32 32   0  39    9    1    0     1     0.000 0.3793939 3.908810 0.001385042

Optimal Linear Allocations

svr_inpalc <- 'opti_alloc_queue'

# Child Count
it_obs = dim(tb_opti_unique)[1]

# Vector of Planner Preference
ar_rho <- c(-100, 0.8)
ar_rho <- c(-50, -25, -10)
ar_rho <- c(-100, -5, -1, 0.1, 0.6, 0.8)
# ar_rho <- c(seq(-200, -100, length.out=5), seq(-100, -25, length.out=5), seq(-25, -5, length.out=5), seq(-5, -1, length.out=5), seq(-1, -0.01, length.out=5), seq(0.01, 0.25, length.out=5), seq(0.25, 0.90, length.out=5))
# ar_rho <- c(-100, -5, -1, 0.1, 0.6, 0.99)
# ar_rho <- c(-20, -1, 0.05, 0.9)
# ar_rho <- c(-50, -40, -30, -20, -15, -10, -7.5, -5,-3,-2,-1)
# ar_rho = c(-100, -0.001,  0.95)
ar_rho = c(-100, -0.001,  0.95)
ar_rho <- c(seq(-200, -100, length.out=5), seq(-100, -25, length.out=5), seq(-25, -5, length.out=5), seq(-5, -1, length.out=5), seq(-1, -0.01, length.out=5), seq(0.01, 0.25, length.out=5), seq(0.25, 0.90, length.out=5))
ar_rho <- 1 - (10^(c(seq(-2,2, length.out=30))))
ar_rho <- unique(ar_rho)

svr_rho_val <- 'rho_val'
ls_bin_solu_all_rhos <-
  ffp_opt_anlyz_rhgin_bin(tb_key_alpha_A_beta,
                          svr_id_i = 'id',
                          svr_A_i = 'A', svr_alpha_i = 'alpha', svr_beta_i = 'beta',
                          ar_rho = ar_rho, 
                          svr_rho = 'rho', svr_rho_val = svr_rho_val,
                          svr_inpalc = svr_inpalc,
                          svr_expout = 'opti_exp_outcome')
## Joining, by = "id"
df_all_rho <- ls_bin_solu_all_rhos$df_all_rho
df_all_rho_long <- ls_bin_solu_all_rhos$df_all_rho_long

# How many people have different ranks across rhos
it_how_many_vary_rank <- sum(df_all_rho$rank_max - df_all_rho$rank_min)
it_how_many_vary_rank
## [1] -94434

Save Results

# Change Variable names so that this can becombined with logit file later
df_all_rho <- df_all_rho %>% rename_at(vars(starts_with("rho_")), funs(str_replace(., "rk", "rk_wage")))
df_all_rho <- df_all_rho %>% rename(A_i_wage = A, alpha_i_wage = alpha, beta_i_wage = beta,
                                    rank_min_wage = rank_min, rank_max_wage = rank_max, avg_rank_wage = avg_rank)

# Save File
if (bl_save_rda) {
  df_opt_lalonde_training_wage <- df_all_rho
  usethis::use_data(df_opt_lalonde_training_wage, df_opt_lalonde_training_wage, overwrite = TRUE)
}

Change in Rank along rho

# get rank when wage rho = 1
df_all_rho_rho_c1 <- df_all_rho %>% select(id, rho_c1_rk_wage)

# Merge
df_all_rho_long_more <- df_all_rho_long %>% mutate(rho = as.numeric(rho)) %>%
                      left_join(df_all_rho_rho_c1, by='id')

# Select subset to graph
df_rank_graph <- df_all_rho_long_more %>%
                    mutate(id = factor(id)) %>%
                    filter((id == 167) |  # utilitarian rank = 1
                           (id == 175) |  # utilitarian rank = 101
                           (id == 3710)  |  # utilitarian rank = 202
                           (id == 2151) |  # utilitarian rank = 306
                           (id == 27)|  # utilitarian rank = 405
                           (id == 58) |  # utilitarian rank = 503
                           (id == 184) |  # utilitarian rank = 603
                           (id == 1481)   # utilitarian rank = 701
                             ) %>%
                    mutate(one_minus_rho = 1 - !!sym(svr_rho_val)) %>%
                    mutate(rho_c1_rk_wage = factor(rho_c1_rk_wage))

df_rank_graph$rho_c1_rk_wage
##   [1] 405 405 405 405 405 405 405 405 405 405 405 405 405 405 405 405 405 405
##  [19] 405 405 405 405 405 405 405 405 405 405 405 405 503 503 503 503 503 503
##  [37] 503 503 503 503 503 503 503 503 503 503 503 503 503 503 503 503 503 503
##  [55] 503 503 503 503 503 503 1   1   1   1   1   1   1   1   1   1   1   1  
##  [73] 1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1  
##  [91] 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101
## [109] 101 101 101 101 101 101 101 101 101 101 101 101 603 603 603 603 603 603
## [127] 603 603 603 603 603 603 603 603 603 603 603 603 603 603 603 603 603 603
## [145] 603 603 603 603 603 603 202 202 202 202 202 202 202 202 202 202 202 202
## [163] 202 202 202 202 202 202 202 202 202 202 202 202 202 202 202 202 202 202
## [181] 701 701 701 701 701 701 701 701 701 701 701 701 701 701 701 701 701 701
## [199] 701 701 701 701 701 701 701 701 701 701 701 701 306 306 306 306 306 306
## [217] 306 306 306 306 306 306 306 306 306 306 306 306 306 306 306 306 306 306
## [235] 306 306 306 306 306 306
## Levels: 1 101 202 306 405 503 603 701
df_rank_graph$id <- factor(df_rank_graph$rho_c1_rk_wage,
                           labels = c('Rank= 1 when λ=0.99', #200
                                      'Rank= 101 when λ=0.99', #110
                                      'Rank= 201 when λ=0.99', #95
                                      'Rank= 301 when λ=0.99', #217
                                      'Rank= 401 when λ=0.99', #274
                                      'Rank= 501 when λ=0.99', #101
                                      'Rank= 601 when λ=0.99', #131
                                      'Rank= 701 when λ=0.99' #3610
                                       ))

# x-labels
x.labels <- c('λ=0.99', 'λ=0.90', 'λ=0', 'λ=-10', 'λ=-100')
x.breaks <- c(0.01, 0.10, 1, 10, 100)

# title line 2
title_line1 <- sprintf("LINEAR WAGE: Optimal Tageting Queue, NSW Lalonde (AER, 1986)")
title_line2 <- sprintf("How Do Allocation Rankings (Targeting Queue) Shift with λ?")

# Graph Results--Draw
line.plot <- df_rank_graph %>%
  ggplot(aes(x=one_minus_rho, y=!!sym(svr_inpalc),
             group=fct_rev(id),
             colour=fct_rev(id),
             size=2)) +
  # geom_line(aes(linetype =fct_rev(id)), size=0.75) +
  geom_line(size=0.75) +
  geom_vline(xintercept=c(1), linetype="dotted") +
  labs(title = paste0(title_line1, '\n', title_line2),
       x = 'log10 Rescale of λ, Log10(λ)\nλ=1 Utilitarian, λ=-infty Rawlsian',
       y = 'Optimal Targeting Queue Rank (1=highest)',
       caption = 'Training RCT with 297 treated, 425 untreated. Optimal Binary Targeting Queue.') +
  scale_x_continuous(trans='log10', labels = x.labels, breaks = x.breaks) +
  theme_bw()

print(line.plot)

Bump Plot for Optimal Binary Allocations

# tb_opti_alloc_all_rho_long %>%
#   ggplot(aes(x = rho, y = rank, group = id)) +
#     geom_line(aes(color = race, alpha = 1), size = 2) +
#     geom_point(aes(color = race, alpha = 1), size = 4) +
#     scale_x_discrete(expand = c(0.85,0))+
#     scale_y_reverse(breaks = 1:nrow(tb_opti_alloc_all_rho_long))+
#     theme(legend.position = "none") +
#     labs(x = "Equality vs Efficiency",
#          y = "Rank",
#          title = "Binary Allocation Rank, which untrained to receive training first") +
#     ffy_opt_ghthm_dk() +
#     geom_text(data =tb_opti_alloc_all_rho,aes(y=rho_c1_rk,x=0.6,label=id),hjust="right")