vignettes/ffv_opt_sodis_rkone_casch_allfn.Rmd
ffv_opt_sodis_rkone_casch_allfn.Rmd
Test discrete optimal allocation solution line by line without function. Use the California student test score dataset. Regress student English and Math test scores on Student-Teacher-Ratio.
This function produces the same results as DISCRETE–Discrete Optimal Allocation California Teacher Student Ratio (Line by Line). The differences are:
This file, by invoking functions, is much shorter than the line by line program.
library(dplyr)
library(tidyr)
library(tibble)
library(stringr)
library(broom)
library(ggplot2)
library(REconTools)
library(PrjOptiAlloc)
library(knitr)
library(kableExtra)
Set Preference Array:
id_i <- c(1,1,1,2,2,2,3,3,3)
id_il <- c(1,2,3,4,5,6,7,8,9)
D_max_i <- c(3,3,3,3,3,3,3,3,3)
D_il <- c(1,2,3,1,2,3,1,2,3)
A_il_i1 <- c(4,3,2,1)
A_il_i2 <- c(2.7,2.2,1.5,1.1)
A_il_i3 <- c(1.9,1.8,1.1,0.1)
A_il <- c(A_il_i1[2:4], A_il_i2[2:4], A_il_i3[2:4])
alpha_il <- c(diff(-A_il_i1), diff(-A_il_i2), diff(-A_il_i3))
beta_i <- c(1/3,1/3,1/3,1/3,1/3,1/3,1/3,1/3,1/3)
df_handinput_input_il <- as_tibble(cbind(id_i, id_il, D_max_i, D_il, A_il, alpha_il, beta_i))
print(df_handinput_input_il)
## # A tibble: 9 x 7
## id_i id_il D_max_i D_il A_il alpha_il beta_i
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 1 3 1 3 1 0.333
## 2 1 2 3 2 2 1 0.333
## 3 1 3 3 3 1 1 0.333
## 4 2 4 3 1 2.2 0.5 0.333
## 5 2 5 3 2 1.5 0.7 0.333
## 6 2 6 3 3 1.1 0.4 0.333
## 7 3 7 3 1 1.8 0.100 0.333
## 8 3 8 3 2 1.1 0.7 0.333
## 9 3 9 3 3 0.1 1 0.333
ls_df_queue_hand <- PrjOptiAlloc::ffp_opt_anlyz_rhgin_dis(ar_rho, it_w_agg,
bl_df_alloc_il = TRUE,
bl_return_V = TRUE,
bl_return_allQ_V = TRUE,
bl_return_inner_V = TRUE,
df_input_il = df_handinput_input_il)
## Joining, by = "id_il"
## Adding missing grouping variables: `id_i`
## Warning in ff_dist_gini_vector_pos(.[[svr_D_star_i]]): Data vector has only n=3,
## max-inequality/min-gini=0.5
## Warning in ff_dist_gini_vector_pos(.[[svr_D_star_i]]): Data vector has only n=3,
## max-inequality/min-gini=0.5
## Warning in ff_dist_gini_vector_pos(.[[svr_D_star_i]]): Data vector has only n=3,
## max-inequality/min-gini=0.5
## Warning in ff_dist_gini_vector_pos(.[[svr_D_star_i]]): Data vector has only n=3,
## max-inequality/min-gini=0.5
## Warning in ff_dist_gini_vector_pos(.[[svr_D_star_i_demean]]): Data vector has
## only n=3, max-inequality/min-gini=0.5
## Warning in ff_dist_gini_vector_pos(.[[svr_D_star_i_demean]]): Data vector has
## only n=3, max-inequality/min-gini=0.5
## Warning in ff_dist_gini_vector_pos(.[[svr_D_star_i_demean]]): Data vector has
## only n=3, max-inequality/min-gini=0.5
## Warning in ff_dist_gini_vector_pos(.[[svr_D_star_i_demean]]): Data vector has
## only n=3, max-inequality/min-gini=0.5
## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(rev)`
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(alloc_i_upto_Q)`
df_queue_il_long <- ls_df_queue_hand$df_queue_il_long
df_queue_il_wide <- ls_df_queue_hand$df_queue_il_wide
df_alloc_i_long <- ls_df_queue_hand$df_alloc_i_long
df_rho_gini <- ls_df_queue_hand$df_rho_gini
df_alloc_il_long <- ls_df_queue_hand$df_alloc_il_long
print(df_queue_il_long)
## # A tibble: 36 x 11
## rho rho_val id_i id_il D_max_i D_il Q_il D_Wbin_il V_sum_l V_inner_Q_il
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 1 0.99 1 1 3 1 4 1 1.31e+ 0 2.41e+ 0
## 2 2 0.785 1 1 3 1 4 1 9.89e- 1 1.97e+ 0
## 3 3 -3.64 1 1 3 1 9 0 2.14e- 3 4.33e- 2
## 4 4 -99 1 1 3 1 9 0 8.30e-61 8.44e-29
## 5 1 0.99 1 2 3 2 3 1 9.89e- 1 2.08e+ 0
## 6 2 0.785 1 2 3 2 3 1 7.89e- 1 1.77e+ 0
## 7 3 -3.64 1 2 3 2 6 0 6.10e- 3 6.42e- 2
## 8 4 -99 1 2 3 2 7 0 1.94e-48 8.44e-29
## 9 1 0.99 1 3 3 3 2 1 6.62e- 1 1.76e+ 0
## 10 2 0.785 1 3 3 3 2 1 5.74e- 1 1.55e+ 0
## # ... with 26 more rows, and 1 more variable: V_star_Q_il <dbl>
print(df_queue_il_wide)
## # A tibble: 9 x 14
## id_il A_il alpha_il beta_i rank_min rank_max avg_rank id_i D_max_i D_il
## <dbl> <dbl> <dbl> <dbl> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 3 1 0.333 9 4 6.5 1 3 1
## 2 2 2 1 0.333 7 3 4.75 1 3 2
## 3 3 1 1 0.333 2 2 2 1 3 3
## 4 4 2.2 0.5 0.333 8 7 7.25 2 3 1
## 5 5 1.5 0.7 0.333 6 5 5.5 2 3 2
## 6 6 1.1 0.4 0.333 8 4 6 2 3 3
## 7 7 1.8 0.100 0.333 9 6 8 3 3 1
## 8 8 1.1 0.7 0.333 5 3 4 3 3 2
## 9 9 0.1 1 0.333 1 1 1 3 3 3
## # ... with 4 more variables: rho_c1_rk <int>, rho_c2_rk <int>, rho_c3_rk <int>,
## # rho_c4_rk <int>
print(df_alloc_i_long)
## # A tibble: 12 x 7
## # Groups: id_i [3]
## rho rho_val id_i D_max_i D_star_i F_star_i EH_star_i
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.99 1 3 3 1 2
## 2 2 0.785 1 3 3 1 2
## 3 3 -3.64 1 3 1 0.333 4
## 4 4 -99 1 3 1 0.333 4
## 5 1 0.99 2 3 0 0 2.2
## 6 2 0.785 2 3 0 0 2.2
## 7 3 -3.64 2 3 2 0.667 2.2
## 8 4 -99 2 3 2 0.667 2.2
## 9 1 0.99 3 3 2 0.667 1.8
## 10 2 0.785 3 3 2 0.667 1.8
## 11 3 -3.64 3 3 2 0.667 1.8
## 12 4 -99 3 3 2 0.667 1.8
print(df_rho_gini)
## # A tibble: 12 x 9
## rho rho_val gini_D_star gini_EH_star atkinson_EH_star sd_D_star sd_EH_star
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.99 0.3 0.0333 0.0000334 1.53 0.2
## 2 2 0.785 0.3 0.0333 0.0000334 1.53 0.2
## 3 3 -3.64 0.1 0.137 0.0000334 0.577 1.17
## 4 4 -99 0.1 0.137 0.000720 0.577 1.17
## 5 5 0.99 0.3 0.0333 0.000720 1.53 0.2
## 6 6 0.785 0.3 0.0333 0.000720 1.53 0.2
## 7 7 -3.64 0.1 0.137 0.189 0.577 1.17
## 8 8 -99 0.1 0.137 0.189 0.577 1.17
## 9 9 0.99 0.3 0.0333 0.189 1.53 0.2
## 10 10 0.785 0.3 0.0333 0.317 1.53 0.2
## 11 11 -3.64 0.1 0.137 0.317 0.577 1.17
## 12 12 -99 0.1 0.137 0.317 0.577 1.17
## # ... with 2 more variables: mean_EH_star <dbl>, min_EH_star <dbl>
print(df_alloc_il_long)
## # A tibble: 36 x 5
## rho Q_il sid_1 sid_2 sid_3
## <dbl> <int> <dbl> <dbl> <dbl>
## 1 1 1 0 0 1
## 2 1 2 1 0 1
## 3 1 3 2 0 1
## 4 1 4 3 0 1
## 5 1 5 3 0 2
## 6 1 6 3 1 2
## 7 1 7 3 2 2
## 8 1 8 3 3 2
## 9 1 9 3 3 3
## 10 2 1 0 0 1
## # ... with 26 more rows
These are data thata were generated in the DISCRETE–Discrete Optimal Allocation California Teacher Student Ratio (Line by Line) function.
# Load Data
data(df_opt_caschool_prep_i)
data(df_opt_caschool_input_il)
data(df_opt_caschool_input_ib)
# Show Variables
str(df_opt_caschool_prep_i)
## 'data.frame': 420 obs. of 15 variables:
## $ id_i : int 1 2 3 4 5 6 7 8 9 10 ...
## $ D_max_i : num 11 11 83 14 72 6 10 42 19 108 ...
## $ D_o_i : num 2 2 17 3 14 1 2 8 4 22 ...
## $ Omega_i : num 706 673 657 656 656 ...
## $ theta_i : num -0.87 -0.87 -0.87 -0.87 -0.87 ...
## $ beta_i : num 0.00238 0.00238 0.00238 0.00238 0.00238 ...
## $ enrltot : num 195 240 1550 243 1335 ...
## $ teachers: num 11 11 83 14 72 6 10 42 19 108 ...
## $ stravg : num 17.7 21.8 18.7 17.4 18.5 ...
## $ distcod : int 75119 61499 61549 61457 61523 62042 68536 63834 62331 67306 ...
## $ county : Factor w/ 45 levels "Alameda","Butte",..: 1 2 2 2 2 6 29 11 6 25 ...
## $ district: Factor w/ 409 levels "Ackerman Elementary",..: 362 214 367 134 270 53 152 383 263 93 ...
## $ grspan : Factor w/ 2 levels "KK-06","KK-08": 2 2 2 2 2 2 2 2 2 1 ...
## $ avginc : num 22.69 9.82 8.98 8.98 9.08 ...
## $ testscr : num 691 661 644 648 641 ...
str(df_opt_caschool_input_il)
## tibble [54,204 x 7] (S3: tbl_df/tbl/data.frame)
## $ id_i : int [1:54204] 1 1 1 1 1 1 1 1 1 1 ...
## $ id_il : int [1:54204] 1 2 3 4 5 6 7 8 9 10 ...
## $ D_max_i : num [1:54204] 11 11 11 11 11 11 11 11 11 11 ...
## $ D_il : int [1:54204] 1 2 3 4 5 6 7 8 9 10 ...
## $ A_il : num [1:54204] 691 692 693 694 695 ...
## $ alpha_il: num [1:54204] 1.285 1.087 0.932 0.808 0.707 ...
## $ beta_i : num [1:54204] 0.00238 0.00238 0.00238 0.00238 0.00238 ...
str(df_opt_caschool_input_ib)
## 'data.frame': 420 obs. of 4 variables:
## $ id_i : int 1 2 3 4 5 6 7 8 9 10 ...
## $ A_i_l0 : num 691 654 641 641 639 ...
## $ alpha_o_i: num 2.37 2.92 2.76 2.66 2.63 ...
## $ beta_i : num 0.00238 0.00238 0.00238 0.00238 0.00238 ...
Note that input_il function already is based on fl_fi_max.
# 100 percent teacher at most per school, discretize floor as needed
# This is not a parameter that matters here, already a part of the input_il function
fl_fi_max = 1.00
# 20 percent total additional of all teachers
fl_fa_max = 0.20
# What is the number of teachers we can increase by
fl_teacher_increase_number <- sum(df_opt_caschool_prep_i$teachers)*fl_fa_max
fl_teacher_increase_number <- floor(fl_teacher_increase_number)
# Rho values to consider
ar_rho <- 1 - (10^(c(seq(-2,2, length.out=4))))
ar_rho <- unique(ar_rho)
Use the discrete allocation function across preference ffp_opt_anlyz_rhgin_dis.html function. This computes optimal allocation for multiple planner inequality aversion \(\lambda\) parameters at the same time. Note that in the function \(\lambda=\rho\).
# Optimal Allocation
ls_df_queue <- PrjOptiAlloc::ffp_opt_anlyz_rhgin_dis(ar_rho,
fl_teacher_increase_number,
bl_df_alloc_il = TRUE,
df_input_il = df_opt_caschool_input_il,
svr_rho = 'rho',
svr_id_i = 'id_i', svr_id_il = 'id_il',
svr_D_max_i = 'D_max_i', svr_D_il = 'D_il',
svr_D_star_i = 'D_star_i', svr_F_star_i = 'F_star_i',
svr_inpalc = 'Q_il', svr_D_Wbin_il = 'D_Wbin_il',
svr_A_il = 'A_il', svr_alpha_il = 'alpha_il', svr_beta_i = 'beta_i',
svr_expout = 'opti_exp_outcome',
st_idcol_prefix = 'sid_')
## Joining, by = "id_il"
## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(rev)`
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(alloc_i_upto_Q)`
df_queue_il_long <- ls_df_queue$df_queue_il_long
df_queue_il_wide <- ls_df_queue$df_queue_il_wide
df_alloc_i_long <- ls_df_queue$df_alloc_i_long
df_rho_gini <- ls_df_queue$df_rho_gini
df_alloc_il_long <- ls_df_queue$df_alloc_il_long
## # A tibble: 10 x 9
## rho rho_val id_i id_il D_max_i D_il Q_il D_Wbin_il V_star_Q_il
## <dbl> <dbl> <int> <int> <dbl> <int> <int> <dbl> <dbl>
## 1 1 0.99 1 1 11 1 138 1 292290.
## 2 2 0.785 1 1 11 1 140 1 1444269.
## 3 3 -3.64 1 1 11 1 244 1 125.
## 4 4 -99 1 1 11 1 36657 0 NA
## 5 1 0.99 1 2 11 2 209 1 292379.
## 6 2 0.785 1 2 11 2 212 1 1444712.
## 7 3 -3.64 1 2 11 2 372 1 125.
## 8 4 -99 1 2 11 2 39093 0 NA
## 9 1 0.99 1 3 11 3 297 1 292474.
## 10 2 0.785 1 3 11 3 305 1 1445198.
## # A tibble: 10 x 14
## id_il A_il alpha_il beta_i rank_min rank_max avg_rank id_i D_max_i D_il
## <int> <dbl> <dbl> <dbl> <int> <int> <dbl> <int> <dbl> <int>
## 1 1 691. 1.29 1 36657 138 9295. 1 11 1
## 2 2 692. 1.09 1 39093 209 9972. 1 11 2
## 3 3 693. 0.932 1 41307 297 10604 1 11 3
## 4 4 694. 0.808 1 43280 404 11190. 1 11 4
## 5 5 695. 0.707 1 45090 522 11750. 1 11 5
## 6 6 696. 0.624 1 46600 658 12248. 1 11 6
## 7 7 696. 0.554 1 47875 813 12704. 1 11 7
## 8 8 697. 0.496 1 48958 978 13115 1 11 8
## 9 9 697. 0.446 1 49841 1165 13495 1 11 9
## 10 10 698. 0.404 1 50512 1357 13826. 1 11 10
## # ... with 4 more variables: rho_c1_rk <int>, rho_c2_rk <int>, rho_c3_rk <int>,
## # rho_c4_rk <int>
## # A tibble: 10 x 7
## # Groups: id_i [3]
## rho rho_val id_i D_max_i D_star_i F_star_i EH_star_i
## <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.99 1 11 11 1 699.
## 2 2 0.785 1 11 11 1 699.
## 3 3 -3.64 1 11 11 1 699.
## 4 4 -99 1 11 0 0 691.
## 5 1 0.99 2 11 11 1 664.
## 6 2 0.785 2 11 11 1 664.
## 7 3 -3.64 2 11 11 1 664.
## 8 4 -99 2 11 6 0.545 661.
## 9 1 0.99 3 83 54 0.651 647.
## 10 2 0.785 3 83 54 0.651 647.
# print(str(df_queue_il_wide))
print(df_rho_gini)
## # A tibble: 1,680 x 9
## rho rho_val gini_D_star gini_EH_star atkinson_EH_star sd_D_star sd_EH_star
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.99 0.445 0.0154 0.00000362 20.1 17.8
## 2 2 0.785 0.445 0.0154 0.00000362 20.2 17.8
## 3 3 -3.64 0.445 0.0151 0.00000362 20.3 17.5
## 4 4 -99 0.821 0.0125 0.00000362 60.8 14.4
## 5 5 0.99 0.445 0.0154 0.00000362 20.1 17.8
## 6 6 0.785 0.445 0.0154 0.00000362 20.2 17.8
## 7 7 -3.64 0.445 0.0151 0.00000362 20.3 17.5
## 8 8 -99 0.821 0.0125 0.00000362 60.8 14.4
## 9 9 0.99 0.445 0.0154 0.00000362 20.1 17.8
## 10 10 0.785 0.445 0.0154 0.00000362 20.2 17.8
## # ... with 1,670 more rows, and 2 more variables: mean_EH_star <dbl>,
## # min_EH_star <dbl>
## # A tibble: 10 x 422
## rho Q_il sid_1 sid_2 sid_3 sid_4 sid_5 sid_6 sid_7 sid_8 sid_9 sid_10
## <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 1 0 0 0 0 0 0 0 0 0 0
## 2 1 2 0 0 0 0 0 0 0 0 0 0
## 3 1 3 0 0 0 0 0 0 0 0 0 0
## 4 1 4 0 0 0 0 0 0 0 0 0 0
## 5 1 5 0 0 0 0 0 0 0 0 0 0
## 6 1 6 0 0 0 0 0 0 0 0 0 0
## 7 1 7 0 0 0 0 0 0 0 0 0 0
## 8 1 8 0 0 0 0 0 1 0 0 0 0
## 9 1 9 0 0 0 0 0 1 0 0 0 0
## 10 1 10 0 0 0 0 0 1 0 0 0 0
## # ... with 410 more variables: sid_11 <dbl>, sid_12 <dbl>, sid_13 <dbl>,
## # sid_14 <dbl>, sid_15 <dbl>, sid_16 <dbl>, sid_17 <dbl>, sid_18 <dbl>,
## # sid_19 <dbl>, sid_20 <dbl>, sid_21 <dbl>, sid_22 <dbl>, sid_23 <dbl>,
## # sid_24 <dbl>, sid_25 <dbl>, sid_26 <dbl>, sid_27 <dbl>, sid_28 <dbl>,
## # sid_29 <dbl>, sid_30 <dbl>, sid_31 <dbl>, sid_32 <dbl>, sid_33 <dbl>,
## # sid_34 <dbl>, sid_35 <dbl>, sid_36 <dbl>, sid_37 <dbl>, sid_38 <dbl>,
## # sid_39 <dbl>, sid_40 <dbl>, sid_41 <dbl>, sid_42 <dbl>, sid_43 <dbl>, ...
tb_rho_rev <-
PrjOptiAlloc::ffp_opt_anlyz_sodis_rev(ar_rho,
fl_teacher_increase_number,
df_input_ib = df_opt_caschool_input_ib,
df_queue_il_long_with_V = df_queue_il_long)
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(rev)`
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(fl_util_alter_alloc)`
# Display Results
print(tb_rho_rev)
## # A tibble: 4 x 4
## rho rho_val REV AlterOutcome
## <int> <dbl> <dbl> <dbl>
## 1 1 0.99 0.836 293297.
## 2 2 0.785 0.836 1449231.
## 3 3 -3.64 0.831 125.
## 4 4 -99 0.949 601.
To generate some graphs and more easily readable results, rather than using all school districts, select some random subset of school districts from the existing data frames.
# select four school districts
# cas = california schools
it_O <- 4
set.seed(1)
df_cas_prep_sub_i <- df_opt_caschool_prep_i %>% filter(teachers <= 100 & teachers >= 50)
df_cas_prep_sub_i <- df_cas_prep_sub_i[sample(dim(df_cas_prep_sub_i)[1], it_O, replace=FALSE),]
ar_cas_id_selected <- df_cas_prep_sub_i %>% pull(id_i)
# Select from il and ib only ids that are randomly selected
df_cas_input_sub_il <- df_opt_caschool_input_il %>% filter(id_i %in% ar_cas_id_selected)
df_cas_input_sub_ib <- df_opt_caschool_input_ib %>% filter(id_i %in% ar_cas_id_selected)
# Print
print(df_cas_prep_sub_i)
## id_i D_max_i D_o_i Omega_i theta_i beta_i enrltot teachers stravg
## 4 32 100 20 636.0332 -0.8699505 0.002380952 2102 100 21.02000
## 39 327 51 10 686.9896 -0.8699505 0.002380952 900 51 17.64706
## 1 3 83 17 657.0958 -0.8699505 0.002380952 1550 83 18.67470
## 34 286 65 13 671.6432 -0.8699505 0.002380952 1252 65 19.26154
## distcod county district grspan avginc testscr
## 4 75440 Monterey Soledad Unified KK-08 12.40900 624.95
## 39 68007 San Diego Cardiff Elementary KK-06 21.96700 668.65
## 1 61549 Butte Thermalito Union Elementary KK-08 8.97800 643.60
## 34 71035 Sonoma Wright Elementary KK-06 12.58425 663.85
print(df_cas_input_sub_ib)
## id_i A_i_l0 alpha_o_i beta_i
## 1 3 640.8497 2.761831 1
## 2 32 617.7468 3.047727 1
## 3 286 654.8866 2.792764 1
## 4 327 671.6376 2.516732 1
print(df_cas_input_sub_il)
## # A tibble: 299 x 7
## id_i id_il D_max_i D_il A_il alpha_il beta_i
## <int> <int> <dbl> <int> <dbl> <dbl> <dbl>
## 1 3 23 83 1 641. 0.193 1
## 2 3 24 83 2 641. 0.189 1
## 3 3 25 83 3 641. 0.184 1
## 4 3 26 83 4 641. 0.180 1
## 5 3 27 83 5 642. 0.176 1
## 6 3 28 83 6 642. 0.172 1
## 7 3 29 83 7 642. 0.168 1
## 8 3 30 83 8 642. 0.165 1
## 9 3 31 83 9 642. 0.161 1
## 10 3 32 83 10 642. 0.158 1
## # ... with 289 more rows
Now we also have a new total number of teacher increase, no longer based on all districts. The policy here is to potentially increase each school district by fl_fi_max, already coded into the input_il file, so that can not change. The total number of available new teachers is fl_fa_max times the existing number of teachers in the selected districts overall
Use the discrete allocation function across preference ffp_opt_anlyz_rhgin_dis.html function. This computes optimal allocation for multiple planner inequality aversion \(\lambda\) parameters at the same time. Note that in the function \(\lambda=\rho\).
# Rho values to consider
ar_rho <- 1 - (10^(c(seq(-2,2, length.out=4))))
ar_rho <- unique(ar_rho)
# Optimal Allocation
ls_df_sub_queue <- PrjOptiAlloc::ffp_opt_anlyz_rhgin_dis(ar_rho, fl_teacher_increase_sub_number,
df_input_il = df_cas_input_sub_il,
bl_df_alloc_il = TRUE)
## Joining, by = "id_il"
## Adding missing grouping variables: `id_i`
## Warning in ff_dist_gini_vector_pos(.[[svr_D_star_i]]): Data vector has only n=4,
## max-inequality/min-gini=0.6
## Warning in ff_dist_gini_vector_pos(.[[svr_D_star_i]]): Data vector has only n=4,
## max-inequality/min-gini=0.6
## Warning in ff_dist_gini_vector_pos(.[[svr_D_star_i]]): Data vector has only n=4,
## max-inequality/min-gini=0.6
## Warning in ff_dist_gini_vector_pos(.[[svr_D_star_i]]): Data vector has only n=4,
## max-inequality/min-gini=0.6
## Warning in ff_dist_gini_vector_pos(.[[svr_D_star_i_demean]]): Data vector has
## only n=4, max-inequality/min-gini=0.6
## Warning in ff_dist_gini_vector_pos(.[[svr_D_star_i_demean]]): Data vector has
## only n=4, max-inequality/min-gini=0.6
## Warning in ff_dist_gini_vector_pos(.[[svr_D_star_i_demean]]): Data vector has
## only n=4, max-inequality/min-gini=0.6
## Warning in ff_dist_gini_vector_pos(.[[svr_D_star_i_demean]]): Data vector has
## only n=4, max-inequality/min-gini=0.6
## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(rev)`
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(alloc_i_upto_Q)`
df_queue_il_sub_long <- ls_df_sub_queue$df_queue_il_long
df_queue_il_sub_wide <- ls_df_sub_queue$df_queue_il_wide
df_alloc_i_long <- ls_df_sub_queue$df_alloc_i_long
df_rho_gini <- ls_df_sub_queue$df_rho_gini
df_alloc_il_long <- ls_df_sub_queue$df_alloc_il_long
# REV
ar_util_rev_loop_func <- ffp_opt_anlyz_sodis_rev(ar_rho,fl_teacher_increase_sub_number,
df_input_ib = df_cas_input_sub_ib,
df_queue_il_long_with_V = df_queue_il_sub_long)
## Warning in min(df_queue_il_with_V %>% filter(!!sym(svr_V_star_Q_il) >=
## fl_util_alter_alloc) %>% : no non-missing arguments to min; returning Inf
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(rev)`
## Warning in min(df_queue_il_with_V %>% filter(!!sym(svr_V_star_Q_il) >=
## fl_util_alter_alloc) %>% : no non-missing arguments to min; returning Inf
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(fl_util_alter_alloc)`
Show Results
# Display Results
print(df_queue_il_sub_long)
## # A tibble: 1,196 x 9
## rho rho_val id_i id_il D_max_i D_il Q_il D_Wbin_il V_star_Q_il
## <dbl> <dbl> <int> <int> <dbl> <int> <int> <dbl> <dbl>
## 1 1 0.99 3 23 83 1 24 1 2627.
## 2 2 0.785 3 23 83 1 23 1 3790.
## 3 3 -3.64 3 23 83 1 19 1 441.
## 4 4 -99 3 23 83 1 101 0 NA
## 5 1 0.99 3 24 83 2 26 1 2628.
## 6 2 0.785 3 24 83 2 26 1 3791.
## 7 3 -3.64 3 24 83 2 22 1 441.
## 8 4 -99 3 24 83 2 102 0 NA
## 9 1 0.99 3 25 83 3 29 1 2628.
## 10 2 0.785 3 25 83 3 29 1 3792.
## # ... with 1,186 more rows
print(df_alloc_i_long)
## # A tibble: 16 x 7
## # Groups: id_i [4]
## rho rho_val id_i D_max_i D_star_i F_star_i EH_star_i
## <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.99 3 83 11 0.133 643.
## 2 2 0.785 3 83 11 0.133 643.
## 3 3 -3.64 3 83 12 0.145 643.
## 4 4 -99 3 83 0 0 641.
## 5 1 0.99 32 100 9 0.09 619.
## 6 2 0.785 32 100 9 0.09 619.
## 7 3 -3.64 32 100 19 0.19 621.
## 8 4 -99 32 100 59 0.59 625.
## 9 1 0.99 286 65 19 0.292 659.
## 10 2 0.785 286 65 19 0.292 659.
## 11 3 -3.64 286 65 15 0.231 658.
## 12 4 -99 286 65 0 0 655.
## 13 1 0.99 327 51 20 0.392 676.
## 14 2 0.785 327 51 20 0.392 676.
## 15 3 -3.64 327 51 13 0.255 675.
## 16 4 -99 327 51 0 0 672.
print(df_rho_gini)
## # A tibble: 16 x 9
## rho rho_val gini_D_star gini_EH_star atkinson_EH_star sd_D_star sd_EH_star
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.99 0.139 0.0143 0.00000519 5.56 24.1
## 2 2 0.785 0.139 0.0143 0.00000519 5.56 24.1
## 3 3 -3.64 0.0780 0.0137 0.00000519 3.10 23.0
## 4 4 -99 0.6 0.0120 0.00000519 29.5 20.1
## 5 5 0.99 0.139 0.0143 0.000112 5.56 24.1
## 6 6 0.785 0.139 0.0143 0.000112 5.56 24.1
## 7 7 -3.64 0.0780 0.0137 0.000112 3.10 23.0
## 8 8 -99 0.6 0.0120 0.000112 29.5 20.1
## 9 9 0.99 0.139 0.0143 0.00220 5.56 24.1
## 10 10 0.785 0.139 0.0143 0.00220 5.56 24.1
## 11 11 -3.64 0.0780 0.0137 0.00220 3.10 23.0
## 12 12 -99 0.6 0.0120 0.00220 29.5 20.1
## 13 13 0.99 0.139 0.0143 0.0234 5.56 24.1
## 14 14 0.785 0.139 0.0143 0.0234 5.56 24.1
## 15 15 -3.64 0.0780 0.0137 0.0234 3.10 23.0
## 16 16 -99 0.6 0.0120 0.0234 29.5 20.1
## # ... with 2 more variables: mean_EH_star <dbl>, min_EH_star <dbl>
print(df_alloc_il_long)
## # A tibble: 1,196 x 6
## rho Q_il sid_3 sid_32 sid_286 sid_327
## <dbl> <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 1 0 0 0 1
## 2 1 2 0 0 0 2
## 3 1 3 0 0 0 3
## 4 1 4 0 0 0 4
## 5 1 5 0 0 0 5
## 6 1 6 0 0 1 5
## 7 1 7 0 0 2 5
## 8 1 8 0 0 2 6
## 9 1 9 0 0 3 6
## 10 1 10 0 0 3 7
## # ... with 1,186 more rows
print(ar_util_rev_loop_func)
## # A tibble: 4 x 4
## rho rho_val REV AlterOutcome
## <int> <dbl> <dbl> <dbl>
## 1 1 0.99 0.0339 2633.
## 2 2 0.785 0.0339 3799.
## 3 3 -3.64 0 443.
## 4 4 -99 0.644 621.
Consider the population of schools is measure one, there are \(N\) distinct types of schools, and the mass of each unique type is different, but they sum up to 1. If each school can have 5 additional teachers, than the mass of teachers is measure 5.
Use the number of students per school as its mass, and introduce a new vector for the allocation function that is this mass. This is to illustrate ideas. The mass should not be the number of students, but the number of schools of the same type.
Note that under discrete allocation, there were 10840 available teachers to be allocated. But in the problem below, the idea is:
Several things to note:
Analyze previous allocation not based on measure, do this only for one planner:
Note that this is to be done inside the allocation function, after queue has been found, cumulative sum mass along the queue, and set cut-off at the total measure of resources available.
# Cumulative Mass
df_queue_il_long_mass_cumu <- df_queue_il_long %>% filter(rho_val == ar_rho[1]) %>%
select(-rho, -rho_val) %>%
left_join(df_opt_caschool_prep_i %>%
mutate(mass = enrltot/sum(enrltot)) %>%
select(id_i, mass), by='id_i') %>%
arrange(Q_il) %>%
mutate(mass_cumu_queue = cumsum(mass))
# View Around the Teacher Available Cut-off mass
df_queue_il_long_mass_cumu %>%
filter(Q_il >= fl_teacher_increase_number - 5
& Q_il <= fl_teacher_increase_number + 5)
## # A tibble: 11 x 9
## id_i id_il D_max_i D_il Q_il D_Wbin_il V_star_Q_il mass mass_cumu_queue
## <int> <int> <dbl> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 97 16260 95 55 10835 1 294663. 0.00167 14.4
## 2 339 47438 135 43 10836 1 294663. 0.00235 14.4
## 3 406 52898 132 37 10837 1 294663. 0.00212 14.4
## 4 282 40715 89 59 10838 1 294663. 0.00162 14.4
## 5 72 13654 141 50 10839 1 294663. 0.00271 14.4
## 6 412 53590 87 40 10840 1 294663. 0.00119 14.4
## 7 45 5664 221 7 10841 0 NA 0.00386 14.4
## 8 411 53444 139 33 10842 0 NA 0.00219 14.4
## 9 351 49277 127 49 10843 0 NA 0.00230 14.4
## 10 404 52726 112 42 10844 0 NA 0.00176 14.4
## 11 373 50222 80 59 10845 0 NA 0.00143 14.4
# Summarize
REconTools::ff_summ_percentiles(df_queue_il_long_mass_cumu, bl_statsasrows=FALSE)
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## Warning: attributes are not identical across measure variables;
## they will be dropped
## # A tibble: 9 x 19
## var n unique NAobs ZEROobs mean sd cv min p01
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 D.il 54204 1429 0 0 2.02e+2 2.25e+2 1.11 1 e+0 2 e+0
## 2 D.max.i 54204 203 0 0 4.02e+2 3.33e+2 0.829 5 e+0 1.2 e+1
## 3 D.Wbin.~ 54204 2 0 43364 2.00e-1 4.00e-1 2.00 0 0
## 4 id.i 54204 420 0 0 1.83e+2 1.16e+2 0.635 1 e+0 1.6 e+1
## 5 id.il 54204 54204 0 0 2.71e+4 1.56e+4 0.577 1 e+0 5.43e+2
## 6 mass 54204 391 0 0 7.51e-3 6.21e-3 0.827 7.34e-5 2.02e-4
## 7 mass.cu~ 54204 54204 0 0 1.22e+2 1.14e+2 0.931 1.17e-4 1.07e-1
## 8 Q.il 54204 54204 0 0 2.71e+4 1.56e+4 0.577 1 e+0 5.43e+2
## 9 V.star.~ 54204 10841 43364 NA 2.94e+5 6.07e+2 0.00206 2.92e+5 2.92e+5
## # ... with 9 more variables: p05 <dbl>, p10 <dbl>, p25 <dbl>, p50 <dbl>,
## # p75 <dbl>, p90 <dbl>, p95 <dbl>, p99 <dbl>, max <dbl>
The mass variable is not contained in df_opt_caschool_prep_i, but is contained in df_opt_caschool_prep_i, merge together to get that number of students as mass. This variable does not need to sum to one. Reweighting to sum to one should not change results.
# Summarize
REconTools::ff_summ_percentiles(df_opt_caschool_input_il, bl_statsasrows=FALSE)
## Warning: attributes are not identical across measure variables;
## they will be dropped
## # A tibble: 7 x 19
## var n unique NAobs ZEROobs mean sd cv min p01
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 A.il 54204 54204 0 0 656. 1.69e+1 0.0258 6.13e+2 6.24e+2
## 2 alpha.il 54204 53590 0 0 0.0662 1.44e-1 2.17 2.90e-3 4.14e-3
## 3 beta.i 54204 1 0 0 1 0 0 1 e+0 1 e+0
## 4 D.il 54204 1429 0 0 202. 2.25e+2 1.11 1 e+0 2 e+0
## 5 D.max.i 54204 203 0 0 402. 3.33e+2 0.829 5 e+0 1.2 e+1
## 6 id.i 54204 420 0 0 183. 1.16e+2 0.635 1 e+0 1.6 e+1
## 7 id.il 54204 54204 0 0 27102. 1.56e+4 0.577 1 e+0 5.43e+2
## # ... with 9 more variables: p05 <dbl>, p10 <dbl>, p25 <dbl>, p50 <dbl>,
## # p75 <dbl>, p90 <dbl>, p95 <dbl>, p99 <dbl>, max <dbl>
REconTools::ff_summ_percentiles(df_opt_caschool_input_ib, bl_statsasrows=FALSE)
## Warning: attributes are not identical across measure variables;
## they will be dropped
## # A tibble: 4 x 19
## var n unique NAobs ZEROobs mean sd cv min p01 p05
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 A.i.l0 420 420 0 0 654. 17.2 0.0264 613. 618. 624.
## 2 alpha.o~ 420 410 0 0 2.84 0.327 0.115 1.81 1.95 2.27
## 3 beta.i 420 1 0 0 1 0 0 1 1 1
## 4 id.i 420 420 0 0 210. 121. 0.577 1 5.19 22.0
## # ... with 8 more variables: p10 <dbl>, p25 <dbl>, p50 <dbl>, p75 <dbl>,
## # p90 <dbl>, p95 <dbl>, p99 <dbl>, max <dbl>
# Mass of Students for IL file
df_opt_caschool_input_mass_il <- df_opt_caschool_input_il %>%
left_join(df_opt_caschool_prep_i %>%
mutate(mass_i = enrltot/sum(enrltot)) %>%
select(id_i, mass_i)
, by='id_i') %>% ungroup()
# Mass of Students for IL file
# IB file done with the Assumption that fl_fa_max additional teachers per school
# Since mass per teacher/school same, total mass per school for ib allocation
# is determined by mass_i still. Do not nuum to multiply mass by the number of teachers
# assigned.
df_opt_caschool_input_mass_ib <- df_opt_caschool_input_ib %>%
left_join(df_opt_caschool_prep_i %>%
mutate(mass_i = enrltot/sum(enrltot)) %>%
mutate(cumu_sum_group_i = mass_i*round(teachers*fl_fa_max)) %>%
select(id_i, mass_i, cumu_sum_group_i, teachers)
, by='id_i') %>% ungroup()
# Total mass
sum(df_opt_caschool_input_mass_il$mass_i)
## [1] 407.3127
sum(df_opt_caschool_input_mass_ib$cumu_sum_group_i)
## [1] 81.46857
# Alternative Allocation and Mass for Alternative Allocations
REconTools::ff_summ_percentiles(df_opt_caschool_input_mass_il, bl_statsasrows=FALSE)
## Warning: attributes are not identical across measure variables;
## they will be dropped
## # A tibble: 8 x 19
## var n unique NAobs ZEROobs mean sd cv min p01
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 A.il 54204 54204 0 0 656. 1.69e+1 0.0258 6.13e+2 6.24e+2
## 2 alpha.il 54204 53590 0 0 0.0662 1.44e-1 2.17 2.90e-3 4.14e-3
## 3 beta.i 54204 1 0 0 1 0 0 1 e+0 1 e+0
## 4 D.il 54204 1429 0 0 202. 2.25e+2 1.11 1 e+0 2 e+0
## 5 D.max.i 54204 203 0 0 402. 3.33e+2 0.829 5 e+0 1.2 e+1
## 6 id.i 54204 420 0 0 183. 1.16e+2 0.635 1 e+0 1.6 e+1
## 7 id.il 54204 54204 0 0 27102. 1.56e+4 0.577 1 e+0 5.43e+2
## 8 mass.i 54204 391 0 0 0.00751 6.21e-3 0.827 7.34e-5 2.02e-4
## # ... with 9 more variables: p05 <dbl>, p10 <dbl>, p25 <dbl>, p50 <dbl>,
## # p75 <dbl>, p90 <dbl>, p95 <dbl>, p99 <dbl>, max <dbl>
REconTools::ff_summ_percentiles(df_opt_caschool_input_mass_ib, bl_statsasrows=FALSE)
## Warning: attributes are not identical across measure variables;
## they will be dropped
## # A tibble: 7 x 19
## var n unique NAobs ZEROobs mean sd cv min p01
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 A.i.l0 420 420 0 0 6.54e+2 1.72e+1 0.0264 6.13e+2 6.18e+2
## 2 alpha.o.i 420 410 0 0 2.84e+0 3.27e-1 0.115 1.81e+0 1.95e+0
## 3 beta.i 420 1 0 0 1 e+0 0 0 1 e+0 1 e+0
## 4 cumu.sum~ 420 398 0 0 1.94e-1 6.28e-1 3.24 7.34e-5 9.33e-5
## 5 id.i 420 420 0 0 2.10e+2 1.21e+2 0.577 1 e+0 5.19e+0
## 6 mass.i 420 391 0 0 2.38e-3 3.54e-3 1.49 7.34e-5 9.33e-5
## 7 teachers 420 203 0 0 1.29e+2 1.88e+2 1.46 5 e+0 5 e+0
## # ... with 9 more variables: p05 <dbl>, p10 <dbl>, p25 <dbl>, p50 <dbl>,
## # p75 <dbl>, p90 <dbl>, p95 <dbl>, p99 <dbl>, max <dbl>
Within Group Cumulative Mass Sum with Allocations:
# Within Group Cumulative Sum of Mass
# For the Value Calculation, the A and alpha are updated to be given allocation up to that point
# that means the mass calculation is not mass at a particular unit of allocation, but mass
# for that individual group/type, up to the point. This is a within group cumulative sum,
# Note that so for welfare and for allocation queue, in the welfare case, we need cumulative within
# individual group mass, and in the allocation queue, need point mass
df_opt_caschool_input_mass_il %>%
arrange(id_i, D_il) %>%
group_by(id_i) %>%
arrange(D_il) %>%
mutate(cumu_sum_group_i = cumsum(mass_i)) %>%
arrange(id_i, D_il) %>%
ungroup()
## # A tibble: 54,204 x 9
## id_i id_il D_max_i D_il A_il alpha_il beta_i mass_i cumu_sum_group_i
## <int> <int> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 1 11 1 691. 1.29 1 0.000177 0.000177
## 2 1 2 11 2 692. 1.09 1 0.000177 0.000353
## 3 1 3 11 3 693. 0.932 1 0.000177 0.000530
## 4 1 4 11 4 694. 0.808 1 0.000177 0.000706
## 5 1 5 11 5 695. 0.707 1 0.000177 0.000883
## 6 1 6 11 6 696. 0.624 1 0.000177 0.00106
## 7 1 7 11 7 696. 0.554 1 0.000177 0.00124
## 8 1 8 11 8 697. 0.496 1 0.000177 0.00141
## 9 1 9 11 9 697. 0.446 1 0.000177 0.00159
## 10 1 10 11 10 698. 0.404 1 0.000177 0.00177
## # ... with 54,194 more rows
If want to analyze more closely what is happening in a subset of cases, use the subsetting option:
it_O <- 2
set.seed(1)
df_cas_prep_sub_i <- df_opt_caschool_prep_i %>% filter(teachers <= 100 & teachers >= 50)
df_cas_prep_sub_i <- df_cas_prep_sub_i[sample(dim(df_cas_prep_sub_i)[1], it_O, replace=FALSE),]
ar_cas_id_selected <- df_cas_prep_sub_i %>% pull(id_i)
# Select from il and ib only ids that are randomly selected
bl_subsetting <- FALSE
if (bl_subsetting) {
df_opt_caschool_input_mass_il <- df_opt_caschool_input_mass_il %>% filter(id_i %in% ar_cas_id_selected)
df_opt_caschool_input_mass_ib <- df_opt_caschool_input_mass_ib %>% filter(id_i %in% ar_cas_id_selected)
}
# Summarize
REconTools::ff_summ_percentiles(df_opt_caschool_input_mass_il, bl_statsasrows=FALSE)
## Warning: attributes are not identical across measure variables;
## they will be dropped
## # A tibble: 8 x 19
## var n unique NAobs ZEROobs mean sd cv min p01
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 A.il 54204 54204 0 0 656. 1.69e+1 0.0258 6.13e+2 6.24e+2
## 2 alpha.il 54204 53590 0 0 0.0662 1.44e-1 2.17 2.90e-3 4.14e-3
## 3 beta.i 54204 1 0 0 1 0 0 1 e+0 1 e+0
## 4 D.il 54204 1429 0 0 202. 2.25e+2 1.11 1 e+0 2 e+0
## 5 D.max.i 54204 203 0 0 402. 3.33e+2 0.829 5 e+0 1.2 e+1
## 6 id.i 54204 420 0 0 183. 1.16e+2 0.635 1 e+0 1.6 e+1
## 7 id.il 54204 54204 0 0 27102. 1.56e+4 0.577 1 e+0 5.43e+2
## 8 mass.i 54204 391 0 0 0.00751 6.21e-3 0.827 7.34e-5 2.02e-4
## # ... with 9 more variables: p05 <dbl>, p10 <dbl>, p25 <dbl>, p50 <dbl>,
## # p75 <dbl>, p90 <dbl>, p95 <dbl>, p99 <dbl>, max <dbl>
REconTools::ff_summ_percentiles(df_opt_caschool_input_mass_ib, bl_statsasrows=FALSE)
## Warning: attributes are not identical across measure variables;
## they will be dropped
## # A tibble: 7 x 19
## var n unique NAobs ZEROobs mean sd cv min p01
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 A.i.l0 420 420 0 0 6.54e+2 1.72e+1 0.0264 6.13e+2 6.18e+2
## 2 alpha.o.i 420 410 0 0 2.84e+0 3.27e-1 0.115 1.81e+0 1.95e+0
## 3 beta.i 420 1 0 0 1 e+0 0 0 1 e+0 1 e+0
## 4 cumu.sum~ 420 398 0 0 1.94e-1 6.28e-1 3.24 7.34e-5 9.33e-5
## 5 id.i 420 420 0 0 2.10e+2 1.21e+2 0.577 1 e+0 5.19e+0
## 6 mass.i 420 391 0 0 2.38e-3 3.54e-3 1.49 7.34e-5 9.33e-5
## 7 teachers 420 203 0 0 1.29e+2 1.88e+2 1.46 5 e+0 5 e+0
## # ... with 9 more variables: p05 <dbl>, p10 <dbl>, p25 <dbl>, p50 <dbl>,
## # p75 <dbl>, p90 <dbl>, p95 <dbl>, p99 <dbl>, max <dbl>
Copying the code from prior, use fl_teacher_increase_sub_number and df_opt_caschool_input_mass_il without the mass column mass_i specified.
# Measure of Available Teachers
fl_teacher_increase_sub_number <- sum(df_opt_caschool_input_mass_ib$teachers)*fl_fa_max
fl_teacher_increase_sub_number <- floor(fl_teacher_increase_sub_number)
# Allocate Based on the Measure of Available Teachers
ls_df_queue <- PrjOptiAlloc::ffp_opt_anlyz_rhgin_dis(
ar_rho,
fl_teacher_increase_sub_number,
bl_df_alloc_il = TRUE,
df_input_il = df_opt_caschool_input_mass_il,
svr_rho = 'rho',
svr_id_i = 'id_i', svr_id_il = 'id_il',
svr_D_max_i = 'D_max_i', svr_D_il = 'D_il',
svr_D_star_i = 'D_star_i', svr_F_star_i = 'F_star_i',
svr_inpalc = 'Q_il', svr_D_Wbin_il = 'D_Wbin_il',
svr_A_il = 'A_il', svr_alpha_il = 'alpha_il',
svr_beta_i = 'beta_i',
svr_expout = 'opti_exp_outcome',
st_idcol_prefix = 'sid_')
## Joining, by = "id_il"
## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(rev)`
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(alloc_i_upto_Q)`
df_queue_il_long_count <- ls_df_queue$df_queue_il_long
df_queue_il_wide_count <- ls_df_queue$df_queue_il_wide
df_alloc_i_long_count <- ls_df_queue$df_alloc_i_long
df_rho_gini_count <- ls_df_queue$df_rho_gini
df_alloc_il_long_count <- ls_df_queue$df_alloc_il_long
Analyze results using the total measure threshold from the measure allocation
# Cumulative Mass
df_queue_il_long_mass_cntcumu <- df_queue_il_long_count %>% filter(rho_val == ar_rho[1]) %>%
select(-rho, -rho_val) %>%
left_join(df_opt_caschool_prep_i %>%
mutate(mass_i = enrltot/sum(enrltot)) %>%
select(id_i, mass_i), by='id_i') %>%
arrange(Q_il) %>%
mutate(mass_cumu_il = cumsum(mass_i))
# View Around the Teacher Available Cut-off mass
df_queue_il_long_mass_cntcumu %>%
filter(Q_il >= fl_teacher_increase_sub_number - 4
& Q_il <= fl_teacher_increase_sub_number + 4)
## # A tibble: 9 x 9
## id_i id_il D_max_i D_il Q_il D_Wbin_il V_star_Q_il mass_i mass_cumu_il
## <int> <int> <dbl> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 339 47438 135 43 10836 1 294663. 0.00235 14.4
## 2 406 52898 132 37 10837 1 294663. 0.00212 14.4
## 3 282 40715 89 59 10838 1 294663. 0.00162 14.4
## 4 72 13654 141 50 10839 1 294663. 0.00271 14.4
## 5 412 53590 87 40 10840 1 294663. 0.00119 14.4
## 6 45 5664 221 7 10841 0 NA 0.00386 14.4
## 7 411 53444 139 33 10842 0 NA 0.00219 14.4
## 8 351 49277 127 49 10843 0 NA 0.00230 14.4
## 9 404 52726 112 42 10844 0 NA 0.00176 14.4
# Summarize
REconTools::ff_summ_percentiles(df_queue_il_long_mass_cntcumu, bl_statsasrows=FALSE)
## Warning: attributes are not identical across measure variables;
## they will be dropped
## # A tibble: 9 x 19
## var n unique NAobs ZEROobs mean sd cv min p01
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 D.il 54204 1429 0 0 2.02e+2 2.25e+2 1.11 1 e+0 2 e+0
## 2 D.max.i 54204 203 0 0 4.02e+2 3.33e+2 0.829 5 e+0 1.2 e+1
## 3 D.Wbin~ 54204 2 0 43364 2.00e-1 4.00e-1 2.00 0 0
## 4 id.i 54204 420 0 0 1.83e+2 1.16e+2 0.635 1 e+0 1.6 e+1
## 5 id.il 54204 54204 0 0 2.71e+4 1.56e+4 0.577 1 e+0 5.43e+2
## 6 mass.c~ 54204 54204 0 0 1.22e+2 1.14e+2 0.931 1.17e-4 1.07e-1
## 7 mass.i 54204 391 0 0 7.51e-3 6.21e-3 0.827 7.34e-5 2.02e-4
## 8 Q.il 54204 54204 0 0 2.71e+4 1.56e+4 0.577 1 e+0 5.43e+2
## 9 V.star~ 54204 10841 43364 NA 2.94e+5 6.07e+2 0.00206 2.92e+5 2.92e+5
## # ... with 9 more variables: p05 <dbl>, p10 <dbl>, p25 <dbl>, p50 <dbl>,
## # p75 <dbl>, p90 <dbl>, p95 <dbl>, p99 <dbl>, max <dbl>
Same as before, but now include the mass column:
# Measure of Available Teachers
fl_measure_teacher_increase_number <- sum(df_opt_caschool_input_mass_ib$cumu_sum_group_i)
# Allocate Based on the Measure of Available Teachers
ls_df_queue <- PrjOptiAlloc::ffp_opt_anlyz_rhgin_dis(
ar_rho,
fl_measure_teacher_increase_number,
bl_df_alloc_il = TRUE,
df_input_il = df_opt_caschool_input_mass_il,
svr_rho = 'rho',
svr_id_i = 'id_i', svr_id_il = 'id_il',
svr_D_max_i = 'D_max_i', svr_D_il = 'D_il',
svr_D_star_i = 'D_star_i', svr_F_star_i = 'F_star_i',
svr_inpalc = 'Q_il', svr_D_Wbin_il = 'D_Wbin_il',
svr_A_il = 'A_il', svr_alpha_il = 'alpha_il',
svr_beta_i = 'beta_i',
svr_measure_i = 'mass_i',
svr_expout = 'opti_exp_outcome',
st_idcol_prefix = 'sid_')
## Joining, by = "id_il"
## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(rev)`
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(alloc_i_upto_Q)`
df_queue_il_long_mass <- ls_df_queue$df_queue_il_long
df_queue_il_wide_mass <- ls_df_queue$df_queue_il_wide
df_alloc_i_long_mass <- ls_df_queue$df_alloc_i_long
df_rho_gini_mass <- ls_df_queue$df_rho_gini
df_alloc_il_long_mass <- ls_df_queue$df_alloc_il_long
Analyze the Allocation Results based on measure:
df_queue_il_long_mass %>% filter(rho_val == ar_rho[3]) %>%
select(-rho, -rho_val) %>% arrange(Q_il) %>%
filter(mass_cumu_il >= fl_measure_teacher_increase_number - 0.01
& mass_cumu_il <= fl_measure_teacher_increase_number + 0.01)
## # A tibble: 3 x 8
## id_i id_il D_max_i D_il Q_il D_Wbin_il mass_cumu_il V_star_Q_il
## <int> <int> <dbl> <int> <int> <dbl> <dbl> <dbl>
## 1 264 38122 550 33 25779 1 81.5 654.
## 2 220 32688 408 79 25780 0 81.5 NA
## 3 105 17069 228 154 25781 0 81.5 NA
# Summarize
REconTools::ff_summ_percentiles(df_queue_il_long_mass_cumu, bl_statsasrows=FALSE)
## Warning: attributes are not identical across measure variables;
## they will be dropped
## # A tibble: 9 x 19
## var n unique NAobs ZEROobs mean sd cv min p01
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 D.il 54204 1429 0 0 2.02e+2 2.25e+2 1.11 1 e+0 2 e+0
## 2 D.max.i 54204 203 0 0 4.02e+2 3.33e+2 0.829 5 e+0 1.2 e+1
## 3 D.Wbin.~ 54204 2 0 43364 2.00e-1 4.00e-1 2.00 0 0
## 4 id.i 54204 420 0 0 1.83e+2 1.16e+2 0.635 1 e+0 1.6 e+1
## 5 id.il 54204 54204 0 0 2.71e+4 1.56e+4 0.577 1 e+0 5.43e+2
## 6 mass 54204 391 0 0 7.51e-3 6.21e-3 0.827 7.34e-5 2.02e-4
## 7 mass.cu~ 54204 54204 0 0 1.22e+2 1.14e+2 0.931 1.17e-4 1.07e-1
## 8 Q.il 54204 54204 0 0 2.71e+4 1.56e+4 0.577 1 e+0 5.43e+2
## 9 V.star.~ 54204 10841 43364 NA 2.94e+5 6.07e+2 0.00206 2.92e+5 2.92e+5
## # ... with 9 more variables: p05 <dbl>, p10 <dbl>, p25 <dbl>, p50 <dbl>,
## # p75 <dbl>, p90 <dbl>, p95 <dbl>, p99 <dbl>, max <dbl>
Under the Utilitarian filter(rho_val == ar_rho[1]):
Based on count, there were 30 teacher spots to allocate, and we move up to queue position 30 for allocation. Based on relative measure/weight, because the schools with more teacher spots assigned under alternative allocation also had higher weight, so we end up with a measure such that the 10 + 20 teachers assigned to each type of school, when looked at on the queue, sorted by queue position, and cumulatively summed with the weight of school school type, ends up giving us enough to go to the 38th queue position. So ignoring the weight of the schools, we would have allocated incorrectly, because we thought there were only 30 slots to be allocated, but given weights, there are actually up to the 38th spot on the queue.
Under filter(rho_val == ar_rho[2], [3], [4]):
Under alternative planners, importantly, the queue is different, hence cumulative weight up the queue is different, so the number of “spots” that could be allocated is different. There are 37, 31, 24 spots that are allocated under increasing Rawlsian preferences. This reflects the fact that under more Rawlsian preferences, the districts more more students are ranked higher on the queue, and they take up more mass. At the extreme, given N=2, all the mass from the smaller mass type go to the larger mass type, so the queue goes up to how many spots the larger school districts were assigned uniformly, and additional spot for the larger school if the school smallers’ additional teachers are equally assigned to larger schools given relative mass of smaller and larger schools.
tb_rho_rev_count <-
PrjOptiAlloc::ffp_opt_anlyz_sodis_rev(ar_rho,
fl_teacher_increase_sub_number,
df_input_ib = df_opt_caschool_input_mass_ib,
df_queue_il_long_with_V = df_queue_il_long_count,
svr_beta_i = 'beta_i')
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(rev)`
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(fl_util_alter_alloc)`
# Display Results
print(tb_rho_rev_count)
## # A tibble: 4 x 4
## rho rho_val REV AlterOutcome
## <int> <dbl> <dbl> <dbl>
## 1 1 0.99 0.836 293297.
## 2 2 0.785 0.836 1449231.
## 3 3 -3.64 0.831 125.
## 4 4 -99 0.949 601.
tb_rho_rev_mass <-
PrjOptiAlloc::ffp_opt_anlyz_sodis_rev(ar_rho,
fl_measure_teacher_increase_number,
df_input_ib = df_opt_caschool_input_mass_ib,
df_queue_il_long_with_V = df_queue_il_long_mass,
svr_beta_i = 'beta_i', svr_measure_i = 'mass_i')
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(rev)`
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(fl_util_alter_alloc)`
# Display Results
print(tb_rho_rev_mass)
## # A tibble: 4 x 4
## rho rho_val REV AlterOutcome
## <int> <dbl> <dbl> <dbl>
## 1 1 0.99 0.607 653.
## 2 2 0.785 0.606 653.
## 3 3 -3.64 0.583 652.
## 4 4 -99 0.911 638.
Since we are comparing against uniform allocation, the REV difference is the smallest when we consider close to cobb-douglas preferences. The Utilitarian as well as the Rawlsian planner are both interested in allocating in an unequal way. The Utilitarian wants to allocate more to for higher alpha, the Rawlsian wants to allocate more to lower A. The Cobb Douglas planner seeks a balance between the two alternatives.
Depending on the Alternative allocation, the REV results might be decreasing, increasing, or have different patterns.