Function Test Binary Allocation with Linear Regression • Optimal Allocation

The effects of mother’s smoking status on birth weight. If we aim to improve health outcomes, what is the priority sequence of mothers whom we would want to discourage from smoking.

Load Packages and Data

Load Dependencies

rm(list = ls(all.names = TRUE))
options(knitr.duplicate.label = 'allow')

library(dplyr)
library(tidyr)
library(tibble)
library(stringr)
library(broom)
library(forcats)

library(ggplot2)

library(REconTools)

library(PrjOptiAlloc)

library(knitr)
library(kableExtra)

Load Data

Generate four categories by initial height and mother’s education levels combinations.

# Load Data
data(df_opt_birthwt)

# Summarize
str(df_opt_birthwt)

## 'data.frame':    189 obs. of  11 variables:
##  $ Index: int  85 86 87 88 89 91 92 93 94 95 ...
##  $ low  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ age  : int  19 33 20 21 18 21 22 17 29 26 ...
##  $ lwt  : int  182 155 105 108 107 124 118 103 123 113 ...
##  $ race : int  2 3 1 1 1 3 1 3 1 1 ...
##  $ smoke: int  0 0 1 1 1 0 0 0 1 1 ...
##  $ ptl  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ ht   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ ui   : int  1 0 0 1 1 0 0 0 0 0 ...
##  $ ftv  : int  0 3 1 2 0 0 1 1 1 0 ...
##  $ bwt  : int  2523 2551 2557 2594 2600 2622 2637 2637 2663 2665 ...

summary(df_opt_birthwt)

##      Index            low              age             lwt       
##  Min.   :  4.0   Min.   :0.0000   Min.   :14.00   Min.   : 80.0  
##  1st Qu.: 68.0   1st Qu.:0.0000   1st Qu.:19.00   1st Qu.:110.0  
##  Median :123.0   Median :0.0000   Median :23.00   Median :121.0  
##  Mean   :121.1   Mean   :0.3122   Mean   :23.24   Mean   :129.8  
##  3rd Qu.:176.0   3rd Qu.:1.0000   3rd Qu.:26.00   3rd Qu.:140.0  
##  Max.   :226.0   Max.   :1.0000   Max.   :45.00   Max.   :250.0  
##       race           smoke             ptl               ht         
##  Min.   :1.000   Min.   :0.0000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:1.000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :1.000   Median :0.0000   Median :0.0000   Median :0.00000  
##  Mean   :1.847   Mean   :0.3915   Mean   :0.1958   Mean   :0.06349  
##  3rd Qu.:3.000   3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :3.000   Max.   :1.0000   Max.   :3.0000   Max.   :1.00000  
##        ui              ftv              bwt      
##  Min.   :0.0000   Min.   :0.0000   Min.   : 709  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:2414  
##  Median :0.0000   Median :0.0000   Median :2977  
##  Mean   :0.1481   Mean   :0.7937   Mean   :2945  
##  3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:3487  
##  Max.   :1.0000   Max.   :6.0000   Max.   :4990

# Generate Discrete Version of continuous variables
df_opt_birthwt <- df_opt_birthwt %>%
    mutate(momwgtLowHigh = cut(lwt,
                               breaks=c(-Inf, 129, Inf),
                               labels=c("LW","HW"))) %>%
    mutate(mombirthage = cut(age,
                               breaks=c(-Inf, 24, Inf),
                               labels=c("young","older"))) %>%
    mutate(ftvm3 = cut(ftv,
                               breaks=c(-Inf, 1, 2, Inf),
                               labels=c("novisit","1visit","morevisit"))) %>%
    mutate(ftvm3 = cut(ftv,
                               breaks=c(-Inf, 1, 2, Inf),
                               labels=c("novisit","1visit","morevisit")))

# Generate Not Smoke Variable
not_smoke_levels <- c("1" = "0", "0" = "1")
df_opt_birthwt <- df_opt_birthwt %>% 
    mutate(notsmoke = case_when(smoke == 0 ~ 1, TRUE ~ 0))

attach(df_opt_birthwt)
summary(df_opt_birthwt)

##      Index            low              age             lwt       
##  Min.   :  4.0   Min.   :0.0000   Min.   :14.00   Min.   : 80.0  
##  1st Qu.: 68.0   1st Qu.:0.0000   1st Qu.:19.00   1st Qu.:110.0  
##  Median :123.0   Median :0.0000   Median :23.00   Median :121.0  
##  Mean   :121.1   Mean   :0.3122   Mean   :23.24   Mean   :129.8  
##  3rd Qu.:176.0   3rd Qu.:1.0000   3rd Qu.:26.00   3rd Qu.:140.0  
##  Max.   :226.0   Max.   :1.0000   Max.   :45.00   Max.   :250.0  
##       race           smoke             ptl               ht         
##  Min.   :1.000   Min.   :0.0000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:1.000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :1.000   Median :0.0000   Median :0.0000   Median :0.00000  
##  Mean   :1.847   Mean   :0.3915   Mean   :0.1958   Mean   :0.06349  
##  3rd Qu.:3.000   3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :3.000   Max.   :1.0000   Max.   :3.0000   Max.   :1.00000  
##        ui              ftv              bwt       momwgtLowHigh mombirthage
##  Min.   :0.0000   Min.   :0.0000   Min.   : 709   LW:110        young:120  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:2414   HW: 79        older: 69  
##  Median :0.0000   Median :0.0000   Median :2977                            
##  Mean   :0.1481   Mean   :0.7937   Mean   :2945                            
##  3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:3487                            
##  Max.   :1.0000   Max.   :6.0000   Max.   :4990                            
##        ftvm3        notsmoke     
##  novisit  :147   Min.   :0.0000  
##  1visit   : 30   1st Qu.:0.0000  
##  morevisit: 12   Median :1.0000  
##                  Mean   :0.6085  
##                  3rd Qu.:1.0000  
##                  Max.   :1.0000

Exploration

Tabulation

Tabulate:

# tabulate
df_opt_birthwt %>%
  group_by(race, smoke, ftv) %>%
  summarize(freq = n()) %>%
  pivot_wider(names_from = race, values_from = freq)

## `summarise()` has grouped output by 'race', 'smoke'. You can override using the `.groups` argument.

## # A tibble: 11 x 5
## # Groups:   smoke [2]
##    smoke   ftv   `1`   `2`   `3`
##    <int> <int> <int> <int> <int>
##  1     0     0    13     8    34
##  2     0     1    20     4    11
##  3     0     2     9     3     7
##  4     0     3     1    NA     2
##  5     0     4     1     1     1
##  6     1     0    30     6     9
##  7     1     1     9     2     1
##  8     1     2     9     1     1
##  9     1     3     3     1    NA
## 10     1     4     1    NA    NA
## 11     1     6    NA    NA     1

Summarize all, conditional on smoking or not:

# summarize all
REconTools::ff_summ_percentiles(df_opt_birthwt, bl_statsasrows = FALSE)

## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

## Warning: attributes are not identical across measure variables;
## they will be dropped

## # A tibble: 12 x 19
##    var          n unique NAobs ZEROobs    mean      sd    cv   min    p01    p05
##    <chr>    <dbl>  <dbl> <dbl>   <dbl>   <dbl>   <dbl> <dbl> <dbl>  <dbl>  <dbl>
##  1 age        189     24     0       0 2.32e+1   5.30  0.228    14   14     16  
##  2 bwt        189    131     0       0 2.94e+3 729.    0.248   709 1121.  1801. 
##  3 ftv        189      6     0     100 7.94e-1   1.06  1.33      0    0      0  
##  4 ht         189      2     0     177 6.35e-2   0.244 3.85      0    0      0  
##  5 Index      189    189     0       0 1.21e+2  63.3   0.523     4   10.9   20.8
##  6 low        189      2     0     130 3.12e-1   0.465 1.49      0    0      0  
##  7 lwt        189     75     0       0 1.30e+2  30.6   0.236    80   85     94.4
##  8 notsmoke   189      2     0      74 6.08e-1   0.489 0.804     0    0      0  
##  9 ptl        189      4     0     159 1.96e-1   0.493 2.52      0    0      0  
## 10 race       189      3     0       0 1.85e+0   0.918 0.497     1    1      1  
## 11 smoke      189      2     0     115 3.92e-1   0.489 1.25      0    0      0  
## 12 ui         189      2     0     161 1.48e-1   0.356 2.40      0    0      0  
## # ... with 8 more variables: p10 <dbl>, p25 <dbl>, p50 <dbl>, p75 <dbl>,
## #   p90 <dbl>, p95 <dbl>, p99 <dbl>, max <dbl>

# summarize smoking only
REconTools::ff_summ_percentiles(df_opt_birthwt %>% filter(smoke == 0), bl_statsasrows = FALSE)

## Warning: attributes are not identical across measure variables;
## they will be dropped

## # A tibble: 12 x 19
##    var        n unique NAobs ZEROobs    mean      sd      cv   min    p01    p05
##    <chr>  <dbl>  <dbl> <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <dbl>  <dbl>  <dbl>
##  1 age      115     23     0       0 2.34e+1   5.47    0.233    14   14.1   16  
##  2 bwt      115     87     0       0 3.06e+3 753.      0.246  1021 1350.  1721. 
##  3 ftv      115      5     0      55 8.17e-1   0.979   1.20      0    0      0  
##  4 ht       115      2     0     108 6.09e-2   0.240   3.95      0    0      0  
##  5 Index    115    115     0       0 1.31e+2  64.8     0.496    10   13.3   18.7
##  6 low      115      2     0      86 2.52e-1   0.436   1.73      0    0      0  
##  7 lwt      115     58     0       0 1.31e+2  28.4     0.217    85   89.8   96.7
##  8 notsm~   115      1     0       0 1   e+0   0       0         1    1      1  
##  9 ptl      115      3     0     103 1.22e-1   0.378   3.11      0    0      0  
## 10 race     115      3     0       0 2.10e+0   0.927   0.442     1    1      1  
## 11 smoke    115      1     0     115 0         0     NaN         0    0      0  
## 12 ui       115      2     0     100 1.30e-1   0.338   2.59      0    0      0  
## # ... with 8 more variables: p10 <dbl>, p25 <dbl>, p50 <dbl>, p75 <dbl>,
## #   p90 <dbl>, p95 <dbl>, p99 <dbl>, max <dbl>

# summarize non-smoking only, 74 smokers
REconTools::ff_summ_percentiles(df_opt_birthwt %>% filter(smoke == 1), bl_statsasrows = FALSE)

## Warning: attributes are not identical across measure variables;
## they will be dropped

## # A tibble: 12 x 19
##    var        n unique NAobs ZEROobs    mean      sd      cv   min    p01    p05
##    <chr>  <dbl>  <dbl> <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <dbl>  <dbl>  <dbl>
##  1 age       74     21     0       0 2.29e+1   5.05    0.220    14 1.55e1   17  
##  2 bwt       74     61     0       0 2.77e+3 660.      0.238   709 1.02e3 1862. 
##  3 ftv       74      6     0      45 7.57e-1   1.18    1.56      0 0         0  
##  4 ht        74      2     0      69 6.76e-2   0.253   3.74      0 0         0  
##  5 Index     74     74     0       0 1.06e+2  58.1     0.548     4 9.11e0   22.6
##  6 low       74      2     0      44 4.05e-1   0.494   1.22      0 0         0  
##  7 lwt       74     45     0       0 1.28e+2  33.8     0.264    80 8.36e1   90  
##  8 notsm~    74      1     0      74 0         0     NaN         0 0         0  
##  9 ptl       74      4     0      56 3.11e-1   0.618   1.99      0 0         0  
## 10 race      74      3     0       0 1.46e+0   0.762   0.522     1 1   e0    1  
## 11 smoke     74      1     0       0 1   e+0   0       0         1 1   e0    1  
## 12 ui        74      2     0      61 1.76e-1   0.383   2.18      0 0         0  
## # ... with 8 more variables: p10 <dbl>, p25 <dbl>, p50 <dbl>, p75 <dbl>,
## #   p90 <dbl>, p95 <dbl>, p99 <dbl>, max <dbl>

Explorational Regressions

# birth weight on mother age, race, ht and ui
summary(lm(bwt ~ lwt + factor(mombirthage) + factor(race) + factor(ht) + factor(ui)))

## 
## Call:
## lm(formula = bwt ~ lwt + factor(mombirthage) + factor(race) + 
##     factor(ht) + factor(ui))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1594.45  -453.35    40.06   468.89  1891.04 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              2572.173    237.094  10.849  < 2e-16 ***
## lwt                         5.216      1.735   3.006  0.00302 ** 
## factor(mombirthage)older -114.841    102.918  -1.116  0.26596    
## factor(race)2            -453.756    150.971  -3.006  0.00302 ** 
## factor(race)3            -216.550    108.069  -2.004  0.04657 *  
## factor(ht)1              -633.028    205.271  -3.084  0.00236 ** 
## factor(ui)1              -563.302    138.153  -4.077  6.8e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 664.4 on 182 degrees of freedom
## Multiple R-squared:  0.1964, Adjusted R-squared:  0.1699 
## F-statistic: 7.412 on 6 and 182 DF,  p-value: 4.131e-07

# # summary(lm(bwt ~ factor(ftv) - 1))
# summary(lm(bwt ~ factor(ftv) + factor(ht) + factor(ui) + factor(ptl) - 1))

# Birth wegith on mother's weight last menstral cycle, and mother age, race and smoking interaction
summary(lm(bwt ~ lwt + age + factor(race) + factor(race):factor(notsmoke) - 1))

## 
## Call:
## lm(formula = bwt ~ lwt + age + factor(race) + factor(race):factor(notsmoke) - 
##     1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2360.77  -434.48    42.18   469.28  1710.36 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## lwt                                3.711      1.739   2.134 0.034154 *  
## age                               -4.760     10.054  -0.473 0.636500    
## factor(race)1                   2466.776    304.057   8.113 7.29e-14 ***
## factor(race)2                   2089.529    381.625   5.475 1.44e-07 ***
## factor(race)3                   2404.104    345.416   6.960 6.04e-11 ***
## factor(race)1:factor(notsmoke)1  570.605    143.546   3.975 0.000102 ***
## factor(race)2:factor(notsmoke)1  305.314    277.419   1.101 0.272553    
## factor(race)3:factor(notsmoke)1   75.981    216.468   0.351 0.725994    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 678.9 on 181 degrees of freedom
## Multiple R-squared:  0.952,  Adjusted R-squared:  0.9499 
## F-statistic: 448.9 on 8 and 181 DF,  p-value: < 2.2e-16

# summary(lm(bwt ~ lwt + factor(race) + (ftv) - 1))

Regression with Data and Construct Input Arrays

Binary Problem Regress Birth Weight on Mother’s Smoking Status

# A. Regress child birth weight on mother's age, race and mother's weight
# print(summary(lm(bwt ~ lwt + factor(mombirthage) + factor(race))))
# print(summary(lm(bwt ~ lwt + factor(mombirthage) + factor(race) + factor(ht) + factor(ui))))
# print(summary(lm(bwt ~ lwt + age + factor(race) + factor(ht) + factor(ui) + factor(ptl))))
# 
# print(summary(lm(bwt ~ lwt + age + factor(race) + factor(smoke))))
# print(summary(lm(bwt ~ lwt + age + factor(race) + factor(ht) + factor(ui) +  factor(smoke))))
# 
# print(summary(lm(bwt ~ lwt + age + factor(momwgtLowHigh):factor(smoke) - 1)))
# print(summary(lm(bwt ~ lwt + age + factor(ht) + factor(ui) + factor(race) + factor(race):factor(smoke))))
print(summary(lm(bwt ~ lwt + age + factor(race) + factor(race):factor(smoke) - 1)))

## 
## Call:
## lm(formula = bwt ~ lwt + age + factor(race) + factor(race):factor(smoke) - 
##     1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2360.77  -434.48    42.18   469.28  1710.36 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## lwt                             3.711      1.739   2.134 0.034154 *  
## age                            -4.760     10.054  -0.473 0.636500    
## factor(race)1                3037.381    339.863   8.937 4.55e-16 ***
## factor(race)2                2394.843    344.390   6.954 6.25e-11 ***
## factor(race)3                2480.085    293.002   8.464 8.57e-15 ***
## factor(race)1:factor(smoke)1 -570.605    143.546  -3.975 0.000102 ***
## factor(race)2:factor(smoke)1 -305.314    277.419  -1.101 0.272553    
## factor(race)3:factor(smoke)1  -75.981    216.468  -0.351 0.725994    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 678.9 on 181 degrees of freedom
## Multiple R-squared:  0.952,  Adjusted R-squared:  0.9499 
## F-statistic: 448.9 on 8 and 181 DF,  p-value: < 2.2e-16

# Preferred way of appearing, in the following regression we can se:
# 1. more more power less MPG, Straight engine slightly higher MPG
# 2. V-shape engine car, going from auto to manual trans gain 4.1 MPG
# 2. straight shape engine, going from auto to manual trans gain 6.67 MPG

# Store Regression Results
mt_model <- model.matrix(~ lwt + age + factor(race) + factor(notsmoke):factor(race))
rs_wgt_on_smke = lm(bwt ~ mt_model - 1)
print(summary(rs_wgt_on_smke))

## 
## Call:
## lm(formula = bwt ~ mt_model - 1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2360.77  -434.48    42.18   469.28  1710.36 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)
## mt_model(Intercept)                     2466.776    304.057   8.113 7.29e-14
## mt_modellwt                                3.711      1.739   2.134 0.034154
## mt_modelage                               -4.760     10.054  -0.473 0.636500
## mt_modelfactor(race)2                   -377.246    236.212  -1.597 0.111995
## mt_modelfactor(race)3                    -62.672    217.474  -0.288 0.773538
## mt_modelfactor(race)1:factor(notsmoke)1  570.605    143.546   3.975 0.000102
## mt_modelfactor(race)2:factor(notsmoke)1  305.314    277.419   1.101 0.272553
## mt_modelfactor(race)3:factor(notsmoke)1   75.981    216.468   0.351 0.725994
##                                            
## mt_model(Intercept)                     ***
## mt_modellwt                             *  
## mt_modelage                                
## mt_modelfactor(race)2                      
## mt_modelfactor(race)3                      
## mt_modelfactor(race)1:factor(notsmoke)1 ***
## mt_modelfactor(race)2:factor(notsmoke)1    
## mt_modelfactor(race)3:factor(notsmoke)1    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 678.9 on 181 degrees of freedom
## Multiple R-squared:  0.952,  Adjusted R-squared:  0.9499 
## F-statistic: 448.9 on 8 and 181 DF,  p-value: < 2.2e-16

rs_wgt_on_smke_tidy = tidy(rs_wgt_on_smke)
rs_wgt_on_smke_tidy

## # A tibble: 8 x 5
##   term                                    estimate std.error statistic  p.value
##   <chr>                                      <dbl>     <dbl>     <dbl>    <dbl>
## 1 mt_model(Intercept)                      2467.      304.       8.11  7.29e-14
## 2 mt_modellwt                                 3.71      1.74     2.13  3.42e- 2
## 3 mt_modelage                                -4.76     10.1     -0.473 6.36e- 1
## 4 mt_modelfactor(race)2                    -377.      236.      -1.60  1.12e- 1
## 5 mt_modelfactor(race)3                     -62.7     217.      -0.288 7.74e- 1
## 6 mt_modelfactor(race)1:factor(notsmoke)1   571.      144.       3.98  1.02e- 4
## 7 mt_modelfactor(race)2:factor(notsmoke)1   305.      277.       1.10  2.73e- 1
## 8 mt_modelfactor(race)3:factor(notsmoke)1    76.0     216.       0.351 7.26e- 1

Construct Input Arrays \(A_i\) and \(\alpha_i\)

Multiply coefficient vector by covariate matrix to generate A vector that is child/individual specific.

# Estimates Table
head(rs_wgt_on_smke_tidy, 6)

## # A tibble: 6 x 5
##   term                                    estimate std.error statistic  p.value
##   <chr>                                      <dbl>     <dbl>     <dbl>    <dbl>
## 1 mt_model(Intercept)                      2467.      304.       8.11  7.29e-14
## 2 mt_modellwt                                 3.71      1.74     2.13  3.42e- 2
## 3 mt_modelage                                -4.76     10.1     -0.473 6.36e- 1
## 4 mt_modelfactor(race)2                    -377.      236.      -1.60  1.12e- 1
## 5 mt_modelfactor(race)3                     -62.7     217.      -0.288 7.74e- 1
## 6 mt_modelfactor(race)1:factor(notsmoke)1   571.      144.       3.98  1.02e- 4

# Covariates
head(mt_model, 5)

##   (Intercept) lwt age factor(race)2 factor(race)3
## 1           1 182  19             1             0
## 2           1 155  33             0             1
## 3           1 105  20             0             0
## 4           1 108  21             0             0
## 5           1 107  18             0             0
##   factor(race)1:factor(notsmoke)1 factor(race)2:factor(notsmoke)1
## 1                               0                               1
## 2                               0                               0
## 3                               0                               0
## 4                               0                               0
## 5                               0                               0
##   factor(race)3:factor(notsmoke)1
## 1                               0
## 2                               1
## 3                               0
## 4                               0
## 5                               0

# Covariates coefficients from regression (including constant)
ar_fl_cova_esti <- as.matrix(rs_wgt_on_smke_tidy %>% filter(!str_detect(term, 'notsmoke')) %>% select(estimate))
ar_fl_main_esti <- as.matrix(rs_wgt_on_smke_tidy %>% filter(str_detect(term, 'notsmoke')) %>% select(estimate))
head(ar_fl_cova_esti, 5)

##         estimate
## [1,] 2466.775608
## [2,]    3.710935
## [3,]   -4.759693
## [4,] -377.246401
## [5,]  -62.671843

head(ar_fl_main_esti, 5)

##       estimate
## [1,] 570.60493
## [2,] 305.31426
## [3,]  75.98101

# Select Matrix subcomponents
mt_cova <- as.matrix(as_tibble(mt_model) %>% select(-contains("notsmoke")))
mt_intr <- model.matrix(~ factor(race) - 1)

# Generate A_i, use mt_cova_wth_const
ar_A_m <- mt_cova %*% ar_fl_cova_esti
head(ar_A_m, 5)

##      estimate
## [1,] 2674.485
## [2,] 2822.229
## [3,] 2761.230
## [4,] 2767.603
## [5,] 2778.171

# Generate alpha_i
ar_alpha_m <- mt_intr %*% ar_fl_main_esti
head(ar_alpha_m, 5)

##    estimate
## 1 305.31426
## 2  75.98101
## 3 570.60493
## 4 570.60493
## 5 570.60493

Individual Weight

# Child Weight
ar_beta_m <- rep(1/length(ar_A_m), times=length(ar_A_m))

Matrix with Required Inputs for Allocation

# Initate Dataframe that will store all estimates and optimal allocation relevant information
# combine identifying key information along with estimation A, alpha results
# note that we only need indi.id as key
mt_opti <- cbind(ar_alpha_m*0.01, ar_A_m*0.01, ar_beta_m)
ar_st_varnames <- c('alpha', 'A', 'beta')
df_esti_alpha_A_beta <- as_tibble(mt_opti) %>% rename_all(~c(ar_st_varnames))

## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if `.name_repair` is omitted as of tibble 2.0.0.
## Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

tb_key_alpha_A_beta <- bind_cols(df_opt_birthwt, df_esti_alpha_A_beta) %>%
              select(one_of(c('Index', 'age', 'lwt', 'race', 'notsmoke', 'ptl', 'ht', 'ui', ar_st_varnames)))

# Need to only include the smokers here
tb_key_alpha_A_beta <- tb_key_alpha_A_beta %>% filter(smoke == 1)

# Randomly select 15 smokers for easier visualization.
set.seed(999)
it_include <- 15
tb_key_alpha_A_beta <- tb_key_alpha_A_beta[sample(dim(tb_key_alpha_A_beta)[1], it_include, replace=FALSE),]

# Unique beta, A, and alpha check
tb_opti_unique <- tb_key_alpha_A_beta %>% group_by(!!!syms(ar_st_varnames)) %>%
                    arrange(!!!syms(ar_st_varnames)) %>%
                    summarise(n_obs_group=n())

## `summarise()` has grouped output by 'alpha', 'A'. You can override using the `.groups` argument.

# Show cars
head(tb_key_alpha_A_beta, 32)

##    Index age lwt race notsmoke ptl ht ui     alpha        A        beta
## 27   159  28 250    3        0   0  0  0 0.7598101 31.98566 0.005291005
## 68    68  17 120    1        0   0  0  0 5.7060493 28.31173 0.005291005
## 61    51  20 121    1        0   1  0  1 5.7060493 28.20605 0.005291005
## 71    78  14 101    3        0   1  0  0 0.7598101 27.12273 0.005291005
## 14   123  29 140    1        0   0  0  0 5.7060493 28.48275 0.005291005
## 65    61  24 105    2        0   0  0  0 3.0531426 23.64945 0.005291005
## 58    44  20  80    3        0   0  0  1 0.7598101 26.05785 0.005291005
## 10   113  17 122    1        0   0  0  0 5.7060493 28.38595 0.005291005
## 22   137  22  85    3        0   0  0  0 0.7598101 26.14820 0.005291005
## 35   187  19 235    1        0   0  1  0 5.7060493 32.48411 0.005291005
## 23   140  22 130    1        0   0  0  0 5.7060493 28.44484 0.005291005
## 56    40  20 120    2        0   0  0  0 3.0531426 24.39648 0.005291005
## 15   124  19 138    1        0   0  0  0 5.7060493 28.88451 0.005291005
## 5     95  26 113    1        0   0  0  0 5.7060493 27.62359 0.005291005
## 7    101  18 100    1        0   0  0  0 5.7060493 27.52195 0.005291005

Optimal Linear Allocations

Parameters for Optimal Allocation

# Child Count
it_obs = dim(tb_opti_unique)[1]

# Vector of Planner Preference
ar_rho <- 1 - (10^(c(seq(-2, 2, length.out=30))))
ar_rho <- unique(ar_rho)
print(ar_rho)

##  [1]   0.9900000   0.9862618   0.9811261   0.9740706   0.9643775   0.9510610
##  [7]   0.9327664   0.9076329   0.8731039   0.8256671   0.7604973   0.6709655
## [13]   0.5479646   0.3789831   0.1468321  -0.1721023  -0.6102620  -1.2122163
## [19]  -2.0391954  -3.1753189  -4.7361525  -6.8804628  -9.8263673 -13.8735211
## [25] -19.4335972 -27.0721620 -37.5662042 -51.9831691 -71.7895384 -99.0000000

Optimal binary Allocation

This also works with any CRS CES.

# Optimal Linear Equation
svr_inpalc <- 'rank'

# Solve with Function
ls_bin_solu_all_rhos <-
  ffp_opt_anlyz_rhgin_bin(tb_key_alpha_A_beta, svr_id_i = 'Index',
                          svr_A_i = 'A', svr_alpha_i = 'alpha', svr_beta_i = 'beta',
                          ar_rho = ar_rho,
                          svr_inpalc = svr_inpalc,
                          svr_expout = 'opti_exp_outcome')

## Joining, by = "Index"

tb_opti_alloc_all_rho <- ls_bin_solu_all_rhos$df_all_rho
tb_opti_alloc_all_rho_long <- ls_bin_solu_all_rhos$df_all_rho_long

# Converge rho to numeric so the graph below sorts along x-axis properly 
tb_opti_alloc_all_rho_long <- tb_opti_alloc_all_rho_long %>% mutate(rho_num = as.numeric(rho))

# Merge to get Race Data
tb_opti_alloc_all_rho_long <- tb_opti_alloc_all_rho_long %>% 
  left_join(tb_key_alpha_A_beta %>% select(Index, race), by='Index')

Bump Plot for Optimal Binary Allocations

st_title = paste0("Binary Deallocation Rank, Birth Weight and Mother Smoking")
st_subtitle = paste0("Which individual to encourage to stop smoking first\n",
                     "The expected outcome of interest is child birth weight\n",
                     "Deallocate smoking among 15 randomly selected smokers\n",
                     "Rank = 1, encourage this woman to stop smoking first")
st_caption = paste0("Linear regression of birth weight on smoking. Data from Hosmer et. al. (1998)")
st_x_lab = paste0("Efficiency (Utilitarian) --> Cobb-Douglas --> Equity (Rawlsian)")
st_y_lab = paste0("Rank Along Binary Allocation Queue ( 1 = highest ranked)")

tb_opti_alloc_all_rho_long %>%
  ggplot(aes(x = rho_num, y = !!sym(svr_inpalc), group = Index)) +
    geom_line(aes(color = race, alpha = 1), size = 2) +
    geom_point(aes(color = race, alpha = 1), size = 4) +
    scale_x_discrete(expand = c(0.85,0))+
    scale_y_reverse(breaks = 1:nrow(tb_opti_alloc_all_rho_long))+
    theme(legend.position = "none") +
    labs(x = st_x_lab,
         y = st_y_lab, 
         title = st_title, 
         subtitle = st_subtitle,
         caption = st_caption) +
    ffy_opt_ghthm_dk() +
    geom_text(data =tb_opti_alloc_all_rho,aes(y=rho_c1_rk,x=0.6,label=Index),hjust="right")