Function Test Discrete Allocation with Linear Regression • Optimal Allocation

Test discrete optimal allocation solution line by line without function. Use the California student test score dataset. Regress student English and Math test scores on Student-Teacher-Ratio.

This function produces the same results as DISCRETE–Discrete Optimal Allocation California Teacher Student Ratio (Line by Line). The differences are:

the program here uses the dataframes created by the line by line file as Inputs
the program here uses functions that are identical to the line by line function, but are saved in the R folder.

This file, by invoking functions, is much shorter than the line by line program.

rm(list = ls(all.names = TRUE))
options(knitr.duplicate.label = 'allow')

library(dplyr)
library(tidyr)
library(tibble)
library(stringr)
library(broom)
library(ggplot2)
library(REconTools)

library(PrjOptiAlloc)

library(knitr)
library(kableExtra)

Hand Input Data Example

Set Preference Array:

ar_rho <- 1 - (10^(c(seq(-2,2, length.out=4))))
ar_rho <- unique(ar_rho)
it_w_agg <- 5

id_i    <- c(1,1,1,2,2,2,3,3,3)
id_il   <- c(1,2,3,4,5,6,7,8,9)
D_max_i <- c(3,3,3,3,3,3,3,3,3)
D_il    <- c(1,2,3,1,2,3,1,2,3)

A_il_i1 <- c(4,3,2,1)
A_il_i2 <- c(2.7,2.2,1.5,1.1)
A_il_i3 <- c(1.9,1.8,1.1,0.1)
A_il <- c(A_il_i1[2:4], A_il_i2[2:4], A_il_i3[2:4])
alpha_il <- c(diff(-A_il_i1), diff(-A_il_i2), diff(-A_il_i3))
beta_i <- c(1/3,1/3,1/3,1/3,1/3,1/3,1/3,1/3,1/3)

df_handinput_input_il <- as_tibble(cbind(id_i, id_il, D_max_i, D_il, A_il, alpha_il, beta_i))

print(df_handinput_input_il)

## # A tibble: 9 x 7
##    id_i id_il D_max_i  D_il  A_il alpha_il beta_i
##   <dbl> <dbl>   <dbl> <dbl> <dbl>    <dbl>  <dbl>
## 1     1     1       3     1   3      1      0.333
## 2     1     2       3     2   2      1      0.333
## 3     1     3       3     3   1      1      0.333
## 4     2     4       3     1   2.2    0.5    0.333
## 5     2     5       3     2   1.5    0.7    0.333
## 6     2     6       3     3   1.1    0.4    0.333
## 7     3     7       3     1   1.8    0.100  0.333
## 8     3     8       3     2   1.1    0.7    0.333
## 9     3     9       3     3   0.1    1      0.333

ls_df_queue_hand <- PrjOptiAlloc::ffp_opt_anlyz_rhgin_dis(ar_rho, it_w_agg,
                                            bl_df_alloc_il = TRUE,
                                            bl_return_V = TRUE,
                                            bl_return_allQ_V = TRUE,
                                            bl_return_inner_V = TRUE,
                                            df_input_il = df_handinput_input_il)

## Joining, by = "id_il"

## Adding missing grouping variables: `id_i`

## Warning in ff_dist_gini_vector_pos(.[[svr_D_star_i]]): Data vector has only n=3,
## max-inequality/min-gini=0.5

## Warning in ff_dist_gini_vector_pos(.[[svr_D_star_i]]): Data vector has only n=3,
## max-inequality/min-gini=0.5

## Warning in ff_dist_gini_vector_pos(.[[svr_D_star_i]]): Data vector has only n=3,
## max-inequality/min-gini=0.5

## Warning in ff_dist_gini_vector_pos(.[[svr_D_star_i]]): Data vector has only n=3,
## max-inequality/min-gini=0.5

## Warning in ff_dist_gini_vector_pos(.[[svr_D_star_i_demean]]): Data vector has
## only n=3, max-inequality/min-gini=0.5

## Warning in ff_dist_gini_vector_pos(.[[svr_D_star_i_demean]]): Data vector has
## only n=3, max-inequality/min-gini=0.5

## Warning in ff_dist_gini_vector_pos(.[[svr_D_star_i_demean]]): Data vector has
## only n=3, max-inequality/min-gini=0.5

## Warning in ff_dist_gini_vector_pos(.[[svr_D_star_i_demean]]): Data vector has
## only n=3, max-inequality/min-gini=0.5

## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`

## Warning: `cols` is now required when using unnest().
## Please use `cols = c(rev)`

## Warning: `cols` is now required when using unnest().
## Please use `cols = c(alloc_i_upto_Q)`

df_queue_il_long <- ls_df_queue_hand$df_queue_il_long
df_queue_il_wide <- ls_df_queue_hand$df_queue_il_wide
df_alloc_i_long  <- ls_df_queue_hand$df_alloc_i_long
df_rho_gini  <- ls_df_queue_hand$df_rho_gini
df_alloc_il_long <- ls_df_queue_hand$df_alloc_il_long

print(df_queue_il_long)

## # A tibble: 36 x 11
##      rho rho_val  id_i id_il D_max_i  D_il  Q_il D_Wbin_il  V_sum_l V_inner_Q_il
##    <dbl>   <dbl> <dbl> <dbl>   <dbl> <dbl> <int>     <dbl>    <dbl>        <dbl>
##  1     1   0.99      1     1       3     1     4         1 1.31e+ 0     2.41e+ 0
##  2     2   0.785     1     1       3     1     4         1 9.89e- 1     1.97e+ 0
##  3     3  -3.64      1     1       3     1     9         0 2.14e- 3     4.33e- 2
##  4     4 -99         1     1       3     1     9         0 8.30e-61     8.44e-29
##  5     1   0.99      1     2       3     2     3         1 9.89e- 1     2.08e+ 0
##  6     2   0.785     1     2       3     2     3         1 7.89e- 1     1.77e+ 0
##  7     3  -3.64      1     2       3     2     6         0 6.10e- 3     6.42e- 2
##  8     4 -99         1     2       3     2     7         0 1.94e-48     8.44e-29
##  9     1   0.99      1     3       3     3     2         1 6.62e- 1     1.76e+ 0
## 10     2   0.785     1     3       3     3     2         1 5.74e- 1     1.55e+ 0
## # ... with 26 more rows, and 1 more variable: V_star_Q_il <dbl>

print(df_queue_il_wide)

## # A tibble: 9 x 14
##   id_il  A_il alpha_il beta_i rank_min rank_max avg_rank  id_i D_max_i  D_il
##   <dbl> <dbl>    <dbl>  <dbl>    <int>    <int>    <dbl> <dbl>   <dbl> <dbl>
## 1     1   3      1      0.333        9        4     6.5      1       3     1
## 2     2   2      1      0.333        7        3     4.75     1       3     2
## 3     3   1      1      0.333        2        2     2        1       3     3
## 4     4   2.2    0.5    0.333        8        7     7.25     2       3     1
## 5     5   1.5    0.7    0.333        6        5     5.5      2       3     2
## 6     6   1.1    0.4    0.333        8        4     6        2       3     3
## 7     7   1.8    0.100  0.333        9        6     8        3       3     1
## 8     8   1.1    0.7    0.333        5        3     4        3       3     2
## 9     9   0.1    1      0.333        1        1     1        3       3     3
## # ... with 4 more variables: rho_c1_rk <int>, rho_c2_rk <int>, rho_c3_rk <int>,
## #   rho_c4_rk <int>

print(df_alloc_i_long)

## # A tibble: 12 x 7
## # Groups:   id_i [3]
##      rho rho_val  id_i D_max_i D_star_i F_star_i EH_star_i
##    <dbl>   <dbl> <dbl>   <dbl>    <dbl>    <dbl>     <dbl>
##  1     1   0.99      1       3        3    1           2  
##  2     2   0.785     1       3        3    1           2  
##  3     3  -3.64      1       3        1    0.333       4  
##  4     4 -99         1       3        1    0.333       4  
##  5     1   0.99      2       3        0    0           2.2
##  6     2   0.785     2       3        0    0           2.2
##  7     3  -3.64      2       3        2    0.667       2.2
##  8     4 -99         2       3        2    0.667       2.2
##  9     1   0.99      3       3        2    0.667       1.8
## 10     2   0.785     3       3        2    0.667       1.8
## 11     3  -3.64      3       3        2    0.667       1.8
## 12     4 -99         3       3        2    0.667       1.8

print(df_rho_gini)

## # A tibble: 12 x 9
##      rho rho_val gini_D_star gini_EH_star atkinson_EH_star sd_D_star sd_EH_star
##    <int>   <dbl>       <dbl>        <dbl>            <dbl>     <dbl>      <dbl>
##  1     1   0.99          0.3       0.0333        0.0000334     1.53        0.2 
##  2     2   0.785         0.3       0.0333        0.0000334     1.53        0.2 
##  3     3  -3.64          0.1       0.137         0.0000334     0.577       1.17
##  4     4 -99             0.1       0.137         0.000720      0.577       1.17
##  5     5   0.99          0.3       0.0333        0.000720      1.53        0.2 
##  6     6   0.785         0.3       0.0333        0.000720      1.53        0.2 
##  7     7  -3.64          0.1       0.137         0.189         0.577       1.17
##  8     8 -99             0.1       0.137         0.189         0.577       1.17
##  9     9   0.99          0.3       0.0333        0.189         1.53        0.2 
## 10    10   0.785         0.3       0.0333        0.317         1.53        0.2 
## 11    11  -3.64          0.1       0.137         0.317         0.577       1.17
## 12    12 -99             0.1       0.137         0.317         0.577       1.17
## # ... with 2 more variables: mean_EH_star <dbl>, min_EH_star <dbl>

print(df_alloc_il_long)

## # A tibble: 36 x 5
##      rho  Q_il sid_1 sid_2 sid_3
##    <dbl> <int> <dbl> <dbl> <dbl>
##  1     1     1     0     0     1
##  2     1     2     1     0     1
##  3     1     3     2     0     1
##  4     1     4     3     0     1
##  5     1     5     3     0     2
##  6     1     6     3     1     2
##  7     1     7     3     2     2
##  8     1     8     3     3     2
##  9     1     9     3     3     3
## 10     2     1     0     0     1
## # ... with 26 more rows

Load Pre-Generated Data

These are data thata were generated in the DISCRETE–Discrete Optimal Allocation California Teacher Student Ratio (Line by Line) function.

# Load Data
data(df_opt_caschool_prep_i)
data(df_opt_caschool_input_il)
data(df_opt_caschool_input_ib)

# Show Variables
str(df_opt_caschool_prep_i)

## 'data.frame':    420 obs. of  15 variables:
##  $ id_i    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ D_max_i : num  11 11 83 14 72 6 10 42 19 108 ...
##  $ D_o_i   : num  2 2 17 3 14 1 2 8 4 22 ...
##  $ Omega_i : num  706 673 657 656 656 ...
##  $ theta_i : num  -0.87 -0.87 -0.87 -0.87 -0.87 ...
##  $ beta_i  : num  0.00238 0.00238 0.00238 0.00238 0.00238 ...
##  $ enrltot : num  195 240 1550 243 1335 ...
##  $ teachers: num  11 11 83 14 72 6 10 42 19 108 ...
##  $ stravg  : num  17.7 21.8 18.7 17.4 18.5 ...
##  $ distcod : int  75119 61499 61549 61457 61523 62042 68536 63834 62331 67306 ...
##  $ county  : Factor w/ 45 levels "Alameda","Butte",..: 1 2 2 2 2 6 29 11 6 25 ...
##  $ district: Factor w/ 409 levels "Ackerman Elementary",..: 362 214 367 134 270 53 152 383 263 93 ...
##  $ grspan  : Factor w/ 2 levels "KK-06","KK-08": 2 2 2 2 2 2 2 2 2 1 ...
##  $ avginc  : num  22.69 9.82 8.98 8.98 9.08 ...
##  $ testscr : num  691 661 644 648 641 ...

str(df_opt_caschool_input_il)

## tibble [54,204 x 7] (S3: tbl_df/tbl/data.frame)
##  $ id_i    : int [1:54204] 1 1 1 1 1 1 1 1 1 1 ...
##  $ id_il   : int [1:54204] 1 2 3 4 5 6 7 8 9 10 ...
##  $ D_max_i : num [1:54204] 11 11 11 11 11 11 11 11 11 11 ...
##  $ D_il    : int [1:54204] 1 2 3 4 5 6 7 8 9 10 ...
##  $ A_il    : num [1:54204] 691 692 693 694 695 ...
##  $ alpha_il: num [1:54204] 1.285 1.087 0.932 0.808 0.707 ...
##  $ beta_i  : num [1:54204] 0.00238 0.00238 0.00238 0.00238 0.00238 ...

str(df_opt_caschool_input_ib)

## 'data.frame':    420 obs. of  4 variables:
##  $ id_i     : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ A_i_l0   : num  691 654 641 641 639 ...
##  $ alpha_o_i: num  2.37 2.92 2.76 2.66 2.63 ...
##  $ beta_i   : num  0.00238 0.00238 0.00238 0.00238 0.00238 ...

# Modifying Beta Value
# When individuals have the same weights, beta values do not matter
df_opt_caschool_input_il <- df_opt_caschool_input_il %>% mutate(beta_i = 1)
df_opt_caschool_input_ib <- df_opt_caschool_input_ib %>% mutate(beta_i = 1)

Solve All Districts Discrete Optimal Allocation Across Planner Preferences

Specify Policy Parameters

Note that input_il function already is based on fl_fi_max.

# 100 percent teacher at most per school, discretize floor as needed
# This is not a parameter that matters here, already a part of the input_il function
fl_fi_max = 1.00
# 20 percent total additional of all teachers
fl_fa_max = 0.20
# What is the number of teachers we can increase by
fl_teacher_increase_number <- sum(df_opt_caschool_prep_i$teachers)*fl_fa_max
fl_teacher_increase_number <- floor(fl_teacher_increase_number)

# Rho values to consider
ar_rho <- 1 - (10^(c(seq(-2,2, length.out=4))))
ar_rho <- unique(ar_rho)

Optimal Queue and Allocations

Use the discrete allocation function across preference ffp_opt_anlyz_rhgin_dis.html function. This computes optimal allocation for multiple planner inequality aversion \(\lambda\) parameters at the same time. Note that in the function \(\lambda=\rho\).

# Optimal Allocation
ls_df_queue <- PrjOptiAlloc::ffp_opt_anlyz_rhgin_dis(ar_rho,
                                       fl_teacher_increase_number,
                                       bl_df_alloc_il = TRUE,
                                       df_input_il = df_opt_caschool_input_il,
                                       svr_rho = 'rho',
                                       svr_id_i = 'id_i', svr_id_il = 'id_il',
                                       svr_D_max_i = 'D_max_i', svr_D_il = 'D_il',
                                       svr_D_star_i = 'D_star_i', svr_F_star_i = 'F_star_i',
                                       svr_inpalc = 'Q_il', svr_D_Wbin_il = 'D_Wbin_il',
                                       svr_A_il = 'A_il', svr_alpha_il = 'alpha_il', svr_beta_i = 'beta_i',
                                       svr_expout = 'opti_exp_outcome',
                                       st_idcol_prefix = 'sid_')

## Joining, by = "id_il"

## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`

## Warning: `cols` is now required when using unnest().
## Please use `cols = c(rev)`

## Warning: `cols` is now required when using unnest().
## Please use `cols = c(alloc_i_upto_Q)`

df_queue_il_long <- ls_df_queue$df_queue_il_long
df_queue_il_wide <- ls_df_queue$df_queue_il_wide
df_alloc_i_long <- ls_df_queue$df_alloc_i_long
df_rho_gini  <- ls_df_queue$df_rho_gini
df_alloc_il_long <- ls_df_queue$df_alloc_il_long

# Display Results
print(head(df_queue_il_long, 10))

## # A tibble: 10 x 9
##      rho rho_val  id_i id_il D_max_i  D_il  Q_il D_Wbin_il V_star_Q_il
##    <dbl>   <dbl> <int> <int>   <dbl> <int> <int>     <dbl>       <dbl>
##  1     1   0.99      1     1      11     1   138         1     292290.
##  2     2   0.785     1     1      11     1   140         1    1444269.
##  3     3  -3.64      1     1      11     1   244         1        125.
##  4     4 -99         1     1      11     1 36657         0         NA 
##  5     1   0.99      1     2      11     2   209         1     292379.
##  6     2   0.785     1     2      11     2   212         1    1444712.
##  7     3  -3.64      1     2      11     2   372         1        125.
##  8     4 -99         1     2      11     2 39093         0         NA 
##  9     1   0.99      1     3      11     3   297         1     292474.
## 10     2   0.785     1     3      11     3   305         1    1445198.

# print(str(df_queue_il_long))
print(head(df_queue_il_wide, 10))

## # A tibble: 10 x 14
##    id_il  A_il alpha_il beta_i rank_min rank_max avg_rank  id_i D_max_i  D_il
##    <int> <dbl>    <dbl>  <dbl>    <int>    <int>    <dbl> <int>   <dbl> <int>
##  1     1  691.    1.29       1    36657      138    9295.     1      11     1
##  2     2  692.    1.09       1    39093      209    9972.     1      11     2
##  3     3  693.    0.932      1    41307      297   10604      1      11     3
##  4     4  694.    0.808      1    43280      404   11190.     1      11     4
##  5     5  695.    0.707      1    45090      522   11750.     1      11     5
##  6     6  696.    0.624      1    46600      658   12248.     1      11     6
##  7     7  696.    0.554      1    47875      813   12704.     1      11     7
##  8     8  697.    0.496      1    48958      978   13115      1      11     8
##  9     9  697.    0.446      1    49841     1165   13495      1      11     9
## 10    10  698.    0.404      1    50512     1357   13826.     1      11    10
## # ... with 4 more variables: rho_c1_rk <int>, rho_c2_rk <int>, rho_c3_rk <int>,
## #   rho_c4_rk <int>

# print(str(df_queue_il_wide))
print(head(df_alloc_i_long, 10))

## # A tibble: 10 x 7
## # Groups:   id_i [3]
##      rho rho_val  id_i D_max_i D_star_i F_star_i EH_star_i
##    <dbl>   <dbl> <int>   <dbl>    <dbl>    <dbl>     <dbl>
##  1     1   0.99      1      11       11    1          699.
##  2     2   0.785     1      11       11    1          699.
##  3     3  -3.64      1      11       11    1          699.
##  4     4 -99         1      11        0    0          691.
##  5     1   0.99      2      11       11    1          664.
##  6     2   0.785     2      11       11    1          664.
##  7     3  -3.64      2      11       11    1          664.
##  8     4 -99         2      11        6    0.545      661.
##  9     1   0.99      3      83       54    0.651      647.
## 10     2   0.785     3      83       54    0.651      647.

# print(str(df_queue_il_wide))
print(df_rho_gini)

## # A tibble: 1,680 x 9
##      rho rho_val gini_D_star gini_EH_star atkinson_EH_star sd_D_star sd_EH_star
##    <int>   <dbl>       <dbl>        <dbl>            <dbl>     <dbl>      <dbl>
##  1     1   0.99        0.445       0.0154       0.00000362      20.1       17.8
##  2     2   0.785       0.445       0.0154       0.00000362      20.2       17.8
##  3     3  -3.64        0.445       0.0151       0.00000362      20.3       17.5
##  4     4 -99           0.821       0.0125       0.00000362      60.8       14.4
##  5     5   0.99        0.445       0.0154       0.00000362      20.1       17.8
##  6     6   0.785       0.445       0.0154       0.00000362      20.2       17.8
##  7     7  -3.64        0.445       0.0151       0.00000362      20.3       17.5
##  8     8 -99           0.821       0.0125       0.00000362      60.8       14.4
##  9     9   0.99        0.445       0.0154       0.00000362      20.1       17.8
## 10    10   0.785       0.445       0.0154       0.00000362      20.2       17.8
## # ... with 1,670 more rows, and 2 more variables: mean_EH_star <dbl>,
## #   min_EH_star <dbl>

# print(str(df_alloc_i_long))
print(head(df_alloc_il_long, 10))

## # A tibble: 10 x 422
##      rho  Q_il sid_1 sid_2 sid_3 sid_4 sid_5 sid_6 sid_7 sid_8 sid_9 sid_10
##    <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
##  1     1     1     0     0     0     0     0     0     0     0     0      0
##  2     1     2     0     0     0     0     0     0     0     0     0      0
##  3     1     3     0     0     0     0     0     0     0     0     0      0
##  4     1     4     0     0     0     0     0     0     0     0     0      0
##  5     1     5     0     0     0     0     0     0     0     0     0      0
##  6     1     6     0     0     0     0     0     0     0     0     0      0
##  7     1     7     0     0     0     0     0     0     0     0     0      0
##  8     1     8     0     0     0     0     0     1     0     0     0      0
##  9     1     9     0     0     0     0     0     1     0     0     0      0
## 10     1    10     0     0     0     0     0     1     0     0     0      0
## # ... with 410 more variables: sid_11 <dbl>, sid_12 <dbl>, sid_13 <dbl>,
## #   sid_14 <dbl>, sid_15 <dbl>, sid_16 <dbl>, sid_17 <dbl>, sid_18 <dbl>,
## #   sid_19 <dbl>, sid_20 <dbl>, sid_21 <dbl>, sid_22 <dbl>, sid_23 <dbl>,
## #   sid_24 <dbl>, sid_25 <dbl>, sid_26 <dbl>, sid_27 <dbl>, sid_28 <dbl>,
## #   sid_29 <dbl>, sid_30 <dbl>, sid_31 <dbl>, sid_32 <dbl>, sid_33 <dbl>,
## #   sid_34 <dbl>, sid_35 <dbl>, sid_36 <dbl>, sid_37 <dbl>, sid_38 <dbl>,
## #   sid_39 <dbl>, sid_40 <dbl>, sid_41 <dbl>, sid_42 <dbl>, sid_43 <dbl>, ...

REV Compute

tb_rho_rev <-
  PrjOptiAlloc::ffp_opt_anlyz_sodis_rev(ar_rho,
                          fl_teacher_increase_number,
                          df_input_ib = df_opt_caschool_input_ib,
                          df_queue_il_long_with_V = df_queue_il_long)

## Warning: `cols` is now required when using unnest().
## Please use `cols = c(rev)`

## Warning: `cols` is now required when using unnest().
## Please use `cols = c(fl_util_alter_alloc)`

# Display Results
print(tb_rho_rev)

## # A tibble: 4 x 4
##     rho rho_val   REV AlterOutcome
##   <int>   <dbl> <dbl>        <dbl>
## 1     1   0.99  0.836      293297.
## 2     2   0.785 0.836     1449231.
## 3     3  -3.64  0.831         125.
## 4     4 -99     0.949         601.

Optimal Allocation Among Subset of Districts

Select a Small Subset of Districts

To generate some graphs and more easily readable results, rather than using all school districts, select some random subset of school districts from the existing data frames.

Select a random subset of district IDs
New input df that only includes these IDs.

# select four school districts
# cas = california schools
it_O <- 4
set.seed(1)
df_cas_prep_sub_i <- df_opt_caschool_prep_i %>% filter(teachers <= 100 & teachers >= 50)
df_cas_prep_sub_i <- df_cas_prep_sub_i[sample(dim(df_cas_prep_sub_i)[1], it_O, replace=FALSE),]
ar_cas_id_selected <- df_cas_prep_sub_i %>% pull(id_i)

# Select from il and ib only ids that are randomly selected
df_cas_input_sub_il <- df_opt_caschool_input_il %>% filter(id_i %in% ar_cas_id_selected)
df_cas_input_sub_ib <- df_opt_caschool_input_ib %>% filter(id_i %in% ar_cas_id_selected)

# Print
print(df_cas_prep_sub_i)

##    id_i D_max_i D_o_i  Omega_i    theta_i      beta_i enrltot teachers   stravg
## 4    32     100    20 636.0332 -0.8699505 0.002380952    2102      100 21.02000
## 39  327      51    10 686.9896 -0.8699505 0.002380952     900       51 17.64706
## 1     3      83    17 657.0958 -0.8699505 0.002380952    1550       83 18.67470
## 34  286      65    13 671.6432 -0.8699505 0.002380952    1252       65 19.26154
##    distcod    county                    district grspan   avginc testscr
## 4    75440  Monterey             Soledad Unified  KK-08 12.40900  624.95
## 39   68007 San Diego          Cardiff Elementary  KK-06 21.96700  668.65
## 1    61549     Butte Thermalito Union Elementary  KK-08  8.97800  643.60
## 34   71035    Sonoma           Wright Elementary  KK-06 12.58425  663.85

print(df_cas_input_sub_ib)

##   id_i   A_i_l0 alpha_o_i beta_i
## 1    3 640.8497  2.761831      1
## 2   32 617.7468  3.047727      1
## 3  286 654.8866  2.792764      1
## 4  327 671.6376  2.516732      1

print(df_cas_input_sub_il)

## # A tibble: 299 x 7
##     id_i id_il D_max_i  D_il  A_il alpha_il beta_i
##    <int> <int>   <dbl> <int> <dbl>    <dbl>  <dbl>
##  1     3    23      83     1  641.    0.193      1
##  2     3    24      83     2  641.    0.189      1
##  3     3    25      83     3  641.    0.184      1
##  4     3    26      83     4  641.    0.180      1
##  5     3    27      83     5  642.    0.176      1
##  6     3    28      83     6  642.    0.172      1
##  7     3    29      83     7  642.    0.168      1
##  8     3    30      83     8  642.    0.165      1
##  9     3    31      83     9  642.    0.161      1
## 10     3    32      83    10  642.    0.158      1
## # ... with 289 more rows

Now we also have a new total number of teacher increase, no longer based on all districts. The policy here is to potentially increase each school district by fl_fi_max, already coded into the input_il file, so that can not change. The total number of available new teachers is fl_fa_max times the existing number of teachers in the selected districts overall

fl_fa_max = 0.20
# What is the number of teachers we can increase by
fl_teacher_increase_sub_number <- sum(df_cas_prep_sub_i$teachers)*fl_fa_max
fl_teacher_increase_sub_number <- floor(fl_teacher_increase_sub_number)

Optimal Queue, Allocations and REV

# Rho values to consider
ar_rho <- 1 - (10^(c(seq(-2,2, length.out=4))))
ar_rho <- unique(ar_rho)

# Optimal Allocation
ls_df_sub_queue <- PrjOptiAlloc::ffp_opt_anlyz_rhgin_dis(ar_rho, fl_teacher_increase_sub_number,
                                           df_input_il = df_cas_input_sub_il,
                                           bl_df_alloc_il = TRUE)

## Joining, by = "id_il"

## Adding missing grouping variables: `id_i`

## Warning in ff_dist_gini_vector_pos(.[[svr_D_star_i]]): Data vector has only n=4,
## max-inequality/min-gini=0.6

## Warning in ff_dist_gini_vector_pos(.[[svr_D_star_i]]): Data vector has only n=4,
## max-inequality/min-gini=0.6

## Warning in ff_dist_gini_vector_pos(.[[svr_D_star_i]]): Data vector has only n=4,
## max-inequality/min-gini=0.6

## Warning in ff_dist_gini_vector_pos(.[[svr_D_star_i]]): Data vector has only n=4,
## max-inequality/min-gini=0.6

## Warning in ff_dist_gini_vector_pos(.[[svr_D_star_i_demean]]): Data vector has
## only n=4, max-inequality/min-gini=0.6

## Warning in ff_dist_gini_vector_pos(.[[svr_D_star_i_demean]]): Data vector has
## only n=4, max-inequality/min-gini=0.6

## Warning in ff_dist_gini_vector_pos(.[[svr_D_star_i_demean]]): Data vector has
## only n=4, max-inequality/min-gini=0.6

## Warning in ff_dist_gini_vector_pos(.[[svr_D_star_i_demean]]): Data vector has
## only n=4, max-inequality/min-gini=0.6

## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`

## Warning: `cols` is now required when using unnest().
## Please use `cols = c(rev)`

## Warning: `cols` is now required when using unnest().
## Please use `cols = c(alloc_i_upto_Q)`

df_queue_il_sub_long <- ls_df_sub_queue$df_queue_il_long
df_queue_il_sub_wide <- ls_df_sub_queue$df_queue_il_wide
df_alloc_i_long <- ls_df_sub_queue$df_alloc_i_long
df_rho_gini <- ls_df_sub_queue$df_rho_gini
df_alloc_il_long <- ls_df_sub_queue$df_alloc_il_long

# REV
ar_util_rev_loop_func <- ffp_opt_anlyz_sodis_rev(ar_rho,fl_teacher_increase_sub_number,
                                                 df_input_ib = df_cas_input_sub_ib,
                                                 df_queue_il_long_with_V = df_queue_il_sub_long)

## Warning in min(df_queue_il_with_V %>% filter(!!sym(svr_V_star_Q_il) >=
## fl_util_alter_alloc) %>% : no non-missing arguments to min; returning Inf

## Warning: `cols` is now required when using unnest().
## Please use `cols = c(rev)`

## Warning in min(df_queue_il_with_V %>% filter(!!sym(svr_V_star_Q_il) >=
## fl_util_alter_alloc) %>% : no non-missing arguments to min; returning Inf

## Warning: `cols` is now required when using unnest().
## Please use `cols = c(fl_util_alter_alloc)`

Show Results

# Display Results
print(df_queue_il_sub_long)

## # A tibble: 1,196 x 9
##      rho rho_val  id_i id_il D_max_i  D_il  Q_il D_Wbin_il V_star_Q_il
##    <dbl>   <dbl> <int> <int>   <dbl> <int> <int>     <dbl>       <dbl>
##  1     1   0.99      3    23      83     1    24         1       2627.
##  2     2   0.785     3    23      83     1    23         1       3790.
##  3     3  -3.64      3    23      83     1    19         1        441.
##  4     4 -99         3    23      83     1   101         0         NA 
##  5     1   0.99      3    24      83     2    26         1       2628.
##  6     2   0.785     3    24      83     2    26         1       3791.
##  7     3  -3.64      3    24      83     2    22         1        441.
##  8     4 -99         3    24      83     2   102         0         NA 
##  9     1   0.99      3    25      83     3    29         1       2628.
## 10     2   0.785     3    25      83     3    29         1       3792.
## # ... with 1,186 more rows

print(df_alloc_i_long)

## # A tibble: 16 x 7
## # Groups:   id_i [4]
##      rho rho_val  id_i D_max_i D_star_i F_star_i EH_star_i
##    <dbl>   <dbl> <int>   <dbl>    <dbl>    <dbl>     <dbl>
##  1     1   0.99      3      83       11    0.133      643.
##  2     2   0.785     3      83       11    0.133      643.
##  3     3  -3.64      3      83       12    0.145      643.
##  4     4 -99         3      83        0    0          641.
##  5     1   0.99     32     100        9    0.09       619.
##  6     2   0.785    32     100        9    0.09       619.
##  7     3  -3.64     32     100       19    0.19       621.
##  8     4 -99        32     100       59    0.59       625.
##  9     1   0.99    286      65       19    0.292      659.
## 10     2   0.785   286      65       19    0.292      659.
## 11     3  -3.64    286      65       15    0.231      658.
## 12     4 -99       286      65        0    0          655.
## 13     1   0.99    327      51       20    0.392      676.
## 14     2   0.785   327      51       20    0.392      676.
## 15     3  -3.64    327      51       13    0.255      675.
## 16     4 -99       327      51        0    0          672.

print(df_rho_gini)

## # A tibble: 16 x 9
##      rho rho_val gini_D_star gini_EH_star atkinson_EH_star sd_D_star sd_EH_star
##    <int>   <dbl>       <dbl>        <dbl>            <dbl>     <dbl>      <dbl>
##  1     1   0.99       0.139        0.0143       0.00000519      5.56       24.1
##  2     2   0.785      0.139        0.0143       0.00000519      5.56       24.1
##  3     3  -3.64       0.0780       0.0137       0.00000519      3.10       23.0
##  4     4 -99          0.6          0.0120       0.00000519     29.5        20.1
##  5     5   0.99       0.139        0.0143       0.000112        5.56       24.1
##  6     6   0.785      0.139        0.0143       0.000112        5.56       24.1
##  7     7  -3.64       0.0780       0.0137       0.000112        3.10       23.0
##  8     8 -99          0.6          0.0120       0.000112       29.5        20.1
##  9     9   0.99       0.139        0.0143       0.00220         5.56       24.1
## 10    10   0.785      0.139        0.0143       0.00220         5.56       24.1
## 11    11  -3.64       0.0780       0.0137       0.00220         3.10       23.0
## 12    12 -99          0.6          0.0120       0.00220        29.5        20.1
## 13    13   0.99       0.139        0.0143       0.0234          5.56       24.1
## 14    14   0.785      0.139        0.0143       0.0234          5.56       24.1
## 15    15  -3.64       0.0780       0.0137       0.0234          3.10       23.0
## 16    16 -99          0.6          0.0120       0.0234         29.5        20.1
## # ... with 2 more variables: mean_EH_star <dbl>, min_EH_star <dbl>

print(df_alloc_il_long)

## # A tibble: 1,196 x 6
##      rho  Q_il sid_3 sid_32 sid_286 sid_327
##    <dbl> <int> <dbl>  <dbl>   <dbl>   <dbl>
##  1     1     1     0      0       0       1
##  2     1     2     0      0       0       2
##  3     1     3     0      0       0       3
##  4     1     4     0      0       0       4
##  5     1     5     0      0       0       5
##  6     1     6     0      0       1       5
##  7     1     7     0      0       2       5
##  8     1     8     0      0       2       6
##  9     1     9     0      0       3       6
## 10     1    10     0      0       3       7
## # ... with 1,186 more rows

print(ar_util_rev_loop_func)

## # A tibble: 4 x 4
##     rho rho_val    REV AlterOutcome
##   <int>   <dbl>  <dbl>        <dbl>
## 1     1   0.99  0.0339        2633.
## 2     2   0.785 0.0339        3799.
## 3     3  -3.64  0              443.
## 4     4 -99     0.644          621.

Differential Mass Optimal Allocation

Consider the population of schools is measure one, there are \(N\) distinct types of schools, and the mass of each unique type is different, but they sum up to 1. If each school can have 5 additional teachers, than the mass of teachers is measure 5.

Use the number of students per school as its mass, and introduce a new vector for the allocation function that is this mass. This is to illustrate ideas. The mass should not be the number of students, but the number of schools of the same type.

Note that under discrete allocation, there were 10840 available teachers to be allocated. But in the problem below, the idea is:

suppose there is some system that currently allocates teachers by school attributes
we know the number of teachers allocated to each type of school
we know the proportion of each type of school in the school population.
given a population measure of 1 as the sum of all schools, if on average 4 teachers are allocated to each school, the total of teachers allocated is measure 4. In discrete terms, that just means, if there are in total 1000 different schools (not different unique type, just different schools), and on average each school gets 4 teachers, we are allocating 4000 teachers.

Several things to note:

Get the total measure of resources used by the alternative allocation, this is not summing up just the mass of all individuals, which sums to one, but sums the measure of resources allocated given individual mass, and how much is allocated to each individual
Allocation queue: sort by allocation queue, and cumulative sum over individual mass, repeatedly over each incremental allocation for each individual
Welfare calculation: the welfare/utility is not by cumulative mass, but by probability mass for each type, this does not vary depending on how much is allocated. Allocation changes alpha and A, not the mass in utility.

Analyze Previous Level Allocation with Measure Units

Analyze previous allocation not based on measure, do this only for one planner:

sort by queue
cumulative sum the mass variable: mass_cumu_queue, cumulative mass (resource allocated) by this queue point.

Note that this is to be done inside the allocation function, after queue has been found, cumulative sum mass along the queue, and set cut-off at the total measure of resources available.

# Cumulative Mass
df_queue_il_long_mass_cumu <- df_queue_il_long %>% filter(rho_val == ar_rho[1]) %>%
  select(-rho, -rho_val) %>%
  left_join(df_opt_caschool_prep_i %>% 
              mutate(mass = enrltot/sum(enrltot)) %>%
              select(id_i, mass), by='id_i') %>%
  arrange(Q_il) %>%
  mutate(mass_cumu_queue = cumsum(mass))
# View Around the Teacher Available Cut-off mass
df_queue_il_long_mass_cumu %>% 
  filter(Q_il >= fl_teacher_increase_number - 5 
          & Q_il <= fl_teacher_increase_number + 5)

## # A tibble: 11 x 9
##     id_i id_il D_max_i  D_il  Q_il D_Wbin_il V_star_Q_il    mass mass_cumu_queue
##    <int> <int>   <dbl> <int> <int>     <dbl>       <dbl>   <dbl>           <dbl>
##  1    97 16260      95    55 10835         1     294663. 0.00167            14.4
##  2   339 47438     135    43 10836         1     294663. 0.00235            14.4
##  3   406 52898     132    37 10837         1     294663. 0.00212            14.4
##  4   282 40715      89    59 10838         1     294663. 0.00162            14.4
##  5    72 13654     141    50 10839         1     294663. 0.00271            14.4
##  6   412 53590      87    40 10840         1     294663. 0.00119            14.4
##  7    45  5664     221     7 10841         0         NA  0.00386            14.4
##  8   411 53444     139    33 10842         0         NA  0.00219            14.4
##  9   351 49277     127    49 10843         0         NA  0.00230            14.4
## 10   404 52726     112    42 10844         0         NA  0.00176            14.4
## 11   373 50222      80    59 10845         0         NA  0.00143            14.4

# Summarize
REconTools::ff_summ_percentiles(df_queue_il_long_mass_cumu, bl_statsasrows=FALSE)

## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

## Warning: attributes are not identical across measure variables;
## they will be dropped

## # A tibble: 9 x 19
##   var          n unique NAobs ZEROobs      mean      sd      cv     min      p01
##   <chr>    <dbl>  <dbl> <dbl>   <dbl>     <dbl>   <dbl>   <dbl>   <dbl>    <dbl>
## 1 D.il     54204   1429     0       0   2.02e+2 2.25e+2 1.11    1   e+0  2   e+0
## 2 D.max.i  54204    203     0       0   4.02e+2 3.33e+2 0.829   5   e+0  1.2 e+1
## 3 D.Wbin.~ 54204      2     0   43364   2.00e-1 4.00e-1 2.00    0        0      
## 4 id.i     54204    420     0       0   1.83e+2 1.16e+2 0.635   1   e+0  1.6 e+1
## 5 id.il    54204  54204     0       0   2.71e+4 1.56e+4 0.577   1   e+0  5.43e+2
## 6 mass     54204    391     0       0   7.51e-3 6.21e-3 0.827   7.34e-5  2.02e-4
## 7 mass.cu~ 54204  54204     0       0   1.22e+2 1.14e+2 0.931   1.17e-4  1.07e-1
## 8 Q.il     54204  54204     0       0   2.71e+4 1.56e+4 0.577   1   e+0  5.43e+2
## 9 V.star.~ 54204  10841 43364      NA   2.94e+5 6.07e+2 0.00206 2.92e+5  2.92e+5
## # ... with 9 more variables: p05 <dbl>, p10 <dbl>, p25 <dbl>, p50 <dbl>,
## #   p75 <dbl>, p90 <dbl>, p95 <dbl>, p99 <dbl>, max <dbl>

Generate The mass Variable

The mass variable is not contained in df_opt_caschool_prep_i, but is contained in df_opt_caschool_prep_i, merge together to get that number of students as mass. This variable does not need to sum to one. Reweighting to sum to one should not change results.

# Summarize
REconTools::ff_summ_percentiles(df_opt_caschool_input_il, bl_statsasrows=FALSE)

## Warning: attributes are not identical across measure variables;
## they will be dropped

## # A tibble: 7 x 19
##   var          n unique NAobs ZEROobs       mean       sd     cv     min     p01
##   <chr>    <dbl>  <dbl> <dbl>   <dbl>      <dbl>    <dbl>  <dbl>   <dbl>   <dbl>
## 1 A.il     54204  54204     0       0   656.      1.69e+1 0.0258 6.13e+2 6.24e+2
## 2 alpha.il 54204  53590     0       0     0.0662  1.44e-1 2.17   2.90e-3 4.14e-3
## 3 beta.i   54204      1     0       0     1       0       0      1   e+0 1   e+0
## 4 D.il     54204   1429     0       0   202.      2.25e+2 1.11   1   e+0 2   e+0
## 5 D.max.i  54204    203     0       0   402.      3.33e+2 0.829  5   e+0 1.2 e+1
## 6 id.i     54204    420     0       0   183.      1.16e+2 0.635  1   e+0 1.6 e+1
## 7 id.il    54204  54204     0       0 27102.      1.56e+4 0.577  1   e+0 5.43e+2
## # ... with 9 more variables: p05 <dbl>, p10 <dbl>, p25 <dbl>, p50 <dbl>,
## #   p75 <dbl>, p90 <dbl>, p95 <dbl>, p99 <dbl>, max <dbl>

REconTools::ff_summ_percentiles(df_opt_caschool_input_ib, bl_statsasrows=FALSE)

## Warning: attributes are not identical across measure variables;
## they will be dropped

## # A tibble: 4 x 19
##   var          n unique NAobs ZEROobs   mean      sd     cv    min    p01    p05
##   <chr>    <dbl>  <dbl> <dbl>   <dbl>  <dbl>   <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
## 1 A.i.l0     420    420     0       0 654.    17.2   0.0264 613.   618.   624.  
## 2 alpha.o~   420    410     0       0   2.84   0.327 0.115    1.81   1.95   2.27
## 3 beta.i     420      1     0       0   1      0     0        1      1      1   
## 4 id.i       420    420     0       0 210.   121.    0.577    1      5.19  22.0 
## # ... with 8 more variables: p10 <dbl>, p25 <dbl>, p50 <dbl>, p75 <dbl>,
## #   p90 <dbl>, p95 <dbl>, p99 <dbl>, max <dbl>

# Mass of Students for IL file 
df_opt_caschool_input_mass_il <- df_opt_caschool_input_il %>% 
  left_join(df_opt_caschool_prep_i %>% 
              mutate(mass_i = enrltot/sum(enrltot)) %>%
              select(id_i, mass_i)
            , by='id_i') %>% ungroup()

# Mass of Students for IL file 
# IB file done with the Assumption that fl_fa_max additional teachers per school
# Since mass per teacher/school same, total mass per school for ib allocation 
# is determined by mass_i still. Do not nuum to multiply mass by the number of teachers
# assigned. 
df_opt_caschool_input_mass_ib <- df_opt_caschool_input_ib %>%
  left_join(df_opt_caschool_prep_i %>% 
              mutate(mass_i = enrltot/sum(enrltot)) %>%
              mutate(cumu_sum_group_i = mass_i*round(teachers*fl_fa_max)) %>%
              select(id_i, mass_i, cumu_sum_group_i, teachers)
            , by='id_i') %>% ungroup()

# Total mass
sum(df_opt_caschool_input_mass_il$mass_i)

## [1] 407.3127

sum(df_opt_caschool_input_mass_ib$cumu_sum_group_i)

## [1] 81.46857

# Alternative Allocation and Mass for Alternative Allocations
REconTools::ff_summ_percentiles(df_opt_caschool_input_mass_il, bl_statsasrows=FALSE)

## Warning: attributes are not identical across measure variables;
## they will be dropped

## # A tibble: 8 x 19
##   var          n unique NAobs ZEROobs        mean      sd     cv     min     p01
##   <chr>    <dbl>  <dbl> <dbl>   <dbl>       <dbl>   <dbl>  <dbl>   <dbl>   <dbl>
## 1 A.il     54204  54204     0       0   656.      1.69e+1 0.0258 6.13e+2 6.24e+2
## 2 alpha.il 54204  53590     0       0     0.0662  1.44e-1 2.17   2.90e-3 4.14e-3
## 3 beta.i   54204      1     0       0     1       0       0      1   e+0 1   e+0
## 4 D.il     54204   1429     0       0   202.      2.25e+2 1.11   1   e+0 2   e+0
## 5 D.max.i  54204    203     0       0   402.      3.33e+2 0.829  5   e+0 1.2 e+1
## 6 id.i     54204    420     0       0   183.      1.16e+2 0.635  1   e+0 1.6 e+1
## 7 id.il    54204  54204     0       0 27102.      1.56e+4 0.577  1   e+0 5.43e+2
## 8 mass.i   54204    391     0       0     0.00751 6.21e-3 0.827  7.34e-5 2.02e-4
## # ... with 9 more variables: p05 <dbl>, p10 <dbl>, p25 <dbl>, p50 <dbl>,
## #   p75 <dbl>, p90 <dbl>, p95 <dbl>, p99 <dbl>, max <dbl>

REconTools::ff_summ_percentiles(df_opt_caschool_input_mass_ib, bl_statsasrows=FALSE)

## Warning: attributes are not identical across measure variables;
## they will be dropped

## # A tibble: 7 x 19
##   var           n unique NAobs ZEROobs     mean      sd     cv      min      p01
##   <chr>     <dbl>  <dbl> <dbl>   <dbl>    <dbl>   <dbl>  <dbl>    <dbl>    <dbl>
## 1 A.i.l0      420    420     0       0  6.54e+2 1.72e+1 0.0264  6.13e+2  6.18e+2
## 2 alpha.o.i   420    410     0       0  2.84e+0 3.27e-1 0.115   1.81e+0  1.95e+0
## 3 beta.i      420      1     0       0  1   e+0 0       0       1   e+0  1   e+0
## 4 cumu.sum~   420    398     0       0  1.94e-1 6.28e-1 3.24    7.34e-5  9.33e-5
## 5 id.i        420    420     0       0  2.10e+2 1.21e+2 0.577   1   e+0  5.19e+0
## 6 mass.i      420    391     0       0  2.38e-3 3.54e-3 1.49    7.34e-5  9.33e-5
## 7 teachers    420    203     0       0  1.29e+2 1.88e+2 1.46    5   e+0  5   e+0
## # ... with 9 more variables: p05 <dbl>, p10 <dbl>, p25 <dbl>, p50 <dbl>,
## #   p75 <dbl>, p90 <dbl>, p95 <dbl>, p99 <dbl>, max <dbl>

Within Group Cumulative Mass Sum with Allocations:

# Within Group Cumulative Sum of Mass
# For the Value Calculation, the A and alpha are updated to be given allocation up to that point
# that means the mass calculation is not mass at a particular unit of allocation, but mass
# for that individual group/type, up to the point. This is a within group cumulative sum, 
# Note that so for welfare and for allocation queue, in the welfare case, we need cumulative within
# individual group mass, and in the allocation queue, need point mass

df_opt_caschool_input_mass_il %>%
  arrange(id_i, D_il) %>% 
  group_by(id_i) %>% 
  arrange(D_il) %>% 
  mutate(cumu_sum_group_i = cumsum(mass_i)) %>%
  arrange(id_i, D_il) %>%
  ungroup()

## # A tibble: 54,204 x 9
##     id_i id_il D_max_i  D_il  A_il alpha_il beta_i   mass_i cumu_sum_group_i
##    <int> <int>   <dbl> <int> <dbl>    <dbl>  <dbl>    <dbl>            <dbl>
##  1     1     1      11     1  691.    1.29       1 0.000177         0.000177
##  2     1     2      11     2  692.    1.09       1 0.000177         0.000353
##  3     1     3      11     3  693.    0.932      1 0.000177         0.000530
##  4     1     4      11     4  694.    0.808      1 0.000177         0.000706
##  5     1     5      11     5  695.    0.707      1 0.000177         0.000883
##  6     1     6      11     6  696.    0.624      1 0.000177         0.00106 
##  7     1     7      11     7  696.    0.554      1 0.000177         0.00124 
##  8     1     8      11     8  697.    0.496      1 0.000177         0.00141 
##  9     1     9      11     9  697.    0.446      1 0.000177         0.00159 
## 10     1    10      11    10  698.    0.404      1 0.000177         0.00177 
## # ... with 54,194 more rows

Select Subset of Individuals For Allocaitons

If want to analyze more closely what is happening in a subset of cases, use the subsetting option:

it_O <- 2
set.seed(1)
df_cas_prep_sub_i <- df_opt_caschool_prep_i %>% filter(teachers <= 100 & teachers >= 50)
df_cas_prep_sub_i <- df_cas_prep_sub_i[sample(dim(df_cas_prep_sub_i)[1], it_O, replace=FALSE),]
ar_cas_id_selected <- df_cas_prep_sub_i %>% pull(id_i)

# Select from il and ib only ids that are randomly selected
bl_subsetting <- FALSE
if (bl_subsetting) {
  df_opt_caschool_input_mass_il <- df_opt_caschool_input_mass_il %>% filter(id_i %in% ar_cas_id_selected)
  df_opt_caschool_input_mass_ib <- df_opt_caschool_input_mass_ib %>% filter(id_i %in% ar_cas_id_selected)
}

# Summarize
REconTools::ff_summ_percentiles(df_opt_caschool_input_mass_il, bl_statsasrows=FALSE)

## Warning: attributes are not identical across measure variables;
## they will be dropped

## # A tibble: 8 x 19
##   var          n unique NAobs ZEROobs        mean      sd     cv     min     p01
##   <chr>    <dbl>  <dbl> <dbl>   <dbl>       <dbl>   <dbl>  <dbl>   <dbl>   <dbl>
## 1 A.il     54204  54204     0       0   656.      1.69e+1 0.0258 6.13e+2 6.24e+2
## 2 alpha.il 54204  53590     0       0     0.0662  1.44e-1 2.17   2.90e-3 4.14e-3
## 3 beta.i   54204      1     0       0     1       0       0      1   e+0 1   e+0
## 4 D.il     54204   1429     0       0   202.      2.25e+2 1.11   1   e+0 2   e+0
## 5 D.max.i  54204    203     0       0   402.      3.33e+2 0.829  5   e+0 1.2 e+1
## 6 id.i     54204    420     0       0   183.      1.16e+2 0.635  1   e+0 1.6 e+1
## 7 id.il    54204  54204     0       0 27102.      1.56e+4 0.577  1   e+0 5.43e+2
## 8 mass.i   54204    391     0       0     0.00751 6.21e-3 0.827  7.34e-5 2.02e-4
## # ... with 9 more variables: p05 <dbl>, p10 <dbl>, p25 <dbl>, p50 <dbl>,
## #   p75 <dbl>, p90 <dbl>, p95 <dbl>, p99 <dbl>, max <dbl>

REconTools::ff_summ_percentiles(df_opt_caschool_input_mass_ib, bl_statsasrows=FALSE)

## Warning: attributes are not identical across measure variables;
## they will be dropped

## # A tibble: 7 x 19
##   var           n unique NAobs ZEROobs     mean      sd     cv      min      p01
##   <chr>     <dbl>  <dbl> <dbl>   <dbl>    <dbl>   <dbl>  <dbl>    <dbl>    <dbl>
## 1 A.i.l0      420    420     0       0  6.54e+2 1.72e+1 0.0264  6.13e+2  6.18e+2
## 2 alpha.o.i   420    410     0       0  2.84e+0 3.27e-1 0.115   1.81e+0  1.95e+0
## 3 beta.i      420      1     0       0  1   e+0 0       0       1   e+0  1   e+0
## 4 cumu.sum~   420    398     0       0  1.94e-1 6.28e-1 3.24    7.34e-5  9.33e-5
## 5 id.i        420    420     0       0  2.10e+2 1.21e+2 0.577   1   e+0  5.19e+0
## 6 mass.i      420    391     0       0  2.38e-3 3.54e-3 1.49    7.34e-5  9.33e-5
## 7 teachers    420    203     0       0  1.29e+2 1.88e+2 1.46    5   e+0  5   e+0
## # ... with 9 more variables: p05 <dbl>, p10 <dbl>, p25 <dbl>, p50 <dbl>,
## #   p75 <dbl>, p90 <dbl>, p95 <dbl>, p99 <dbl>, max <dbl>

Optimal Allocations

Optimal Allocation with Number of Teachers as before

Copying the code from prior, use fl_teacher_increase_sub_number and df_opt_caschool_input_mass_il without the mass column mass_i specified.

# Measure of Available Teachers
fl_teacher_increase_sub_number <- sum(df_opt_caschool_input_mass_ib$teachers)*fl_fa_max
fl_teacher_increase_sub_number <- floor(fl_teacher_increase_sub_number)
# Allocate Based on the Measure of Available Teachers
ls_df_queue <- PrjOptiAlloc::ffp_opt_anlyz_rhgin_dis(
  ar_rho,
  fl_teacher_increase_sub_number,
  bl_df_alloc_il = TRUE,
  df_input_il = df_opt_caschool_input_mass_il,
  svr_rho = 'rho',
  svr_id_i = 'id_i', svr_id_il = 'id_il',
  svr_D_max_i = 'D_max_i', svr_D_il = 'D_il',
  svr_D_star_i = 'D_star_i', svr_F_star_i = 'F_star_i',
  svr_inpalc = 'Q_il', svr_D_Wbin_il = 'D_Wbin_il',
  svr_A_il = 'A_il', svr_alpha_il = 'alpha_il', 
  svr_beta_i = 'beta_i',
  svr_expout = 'opti_exp_outcome',
  st_idcol_prefix = 'sid_')

## Joining, by = "id_il"

## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`

## Warning: `cols` is now required when using unnest().
## Please use `cols = c(rev)`

## Warning: `cols` is now required when using unnest().
## Please use `cols = c(alloc_i_upto_Q)`

df_queue_il_long_count <- ls_df_queue$df_queue_il_long
df_queue_il_wide_count <- ls_df_queue$df_queue_il_wide
df_alloc_i_long_count <- ls_df_queue$df_alloc_i_long
df_rho_gini_count  <- ls_df_queue$df_rho_gini
df_alloc_il_long_count <- ls_df_queue$df_alloc_il_long

Analyze Measure at Count Allocation

Analyze results using the total measure threshold from the measure allocation

# Cumulative Mass
df_queue_il_long_mass_cntcumu <- df_queue_il_long_count %>% filter(rho_val == ar_rho[1]) %>%
  select(-rho, -rho_val) %>%
  left_join(df_opt_caschool_prep_i %>% 
            mutate(mass_i = enrltot/sum(enrltot)) %>%
            select(id_i, mass_i), by='id_i') %>%  
  arrange(Q_il) %>%
  mutate(mass_cumu_il = cumsum(mass_i))
# View Around the Teacher Available Cut-off mass
df_queue_il_long_mass_cntcumu %>% 
  filter(Q_il >= fl_teacher_increase_sub_number - 4
          & Q_il <= fl_teacher_increase_sub_number + 4)

## # A tibble: 9 x 9
##    id_i id_il D_max_i  D_il  Q_il D_Wbin_il V_star_Q_il  mass_i mass_cumu_il
##   <int> <int>   <dbl> <int> <int>     <dbl>       <dbl>   <dbl>        <dbl>
## 1   339 47438     135    43 10836         1     294663. 0.00235         14.4
## 2   406 52898     132    37 10837         1     294663. 0.00212         14.4
## 3   282 40715      89    59 10838         1     294663. 0.00162         14.4
## 4    72 13654     141    50 10839         1     294663. 0.00271         14.4
## 5   412 53590      87    40 10840         1     294663. 0.00119         14.4
## 6    45  5664     221     7 10841         0         NA  0.00386         14.4
## 7   411 53444     139    33 10842         0         NA  0.00219         14.4
## 8   351 49277     127    49 10843         0         NA  0.00230         14.4
## 9   404 52726     112    42 10844         0         NA  0.00176         14.4

# Summarize
REconTools::ff_summ_percentiles(df_queue_il_long_mass_cntcumu, bl_statsasrows=FALSE)

## Warning: attributes are not identical across measure variables;
## they will be dropped

## # A tibble: 9 x 19
##   var         n unique NAobs ZEROobs      mean       sd      cv     min      p01
##   <chr>   <dbl>  <dbl> <dbl>   <dbl>     <dbl>    <dbl>   <dbl>   <dbl>    <dbl>
## 1 D.il    54204   1429     0       0   2.02e+2  2.25e+2 1.11    1   e+0  2   e+0
## 2 D.max.i 54204    203     0       0   4.02e+2  3.33e+2 0.829   5   e+0  1.2 e+1
## 3 D.Wbin~ 54204      2     0   43364   2.00e-1  4.00e-1 2.00    0        0      
## 4 id.i    54204    420     0       0   1.83e+2  1.16e+2 0.635   1   e+0  1.6 e+1
## 5 id.il   54204  54204     0       0   2.71e+4  1.56e+4 0.577   1   e+0  5.43e+2
## 6 mass.c~ 54204  54204     0       0   1.22e+2  1.14e+2 0.931   1.17e-4  1.07e-1
## 7 mass.i  54204    391     0       0   7.51e-3  6.21e-3 0.827   7.34e-5  2.02e-4
## 8 Q.il    54204  54204     0       0   2.71e+4  1.56e+4 0.577   1   e+0  5.43e+2
## 9 V.star~ 54204  10841 43364      NA   2.94e+5  6.07e+2 0.00206 2.92e+5  2.92e+5
## # ... with 9 more variables: p05 <dbl>, p10 <dbl>, p25 <dbl>, p50 <dbl>,
## #   p75 <dbl>, p90 <dbl>, p95 <dbl>, p99 <dbl>, max <dbl>

Optimal Allocation with Mass

Same as before, but now include the mass column:

# Measure of Available Teachers
fl_measure_teacher_increase_number <- sum(df_opt_caschool_input_mass_ib$cumu_sum_group_i)
# Allocate Based on the Measure of Available Teachers
ls_df_queue <- PrjOptiAlloc::ffp_opt_anlyz_rhgin_dis(
  ar_rho,
  fl_measure_teacher_increase_number,
  bl_df_alloc_il = TRUE,
  df_input_il = df_opt_caschool_input_mass_il,
  svr_rho = 'rho',
  svr_id_i = 'id_i', svr_id_il = 'id_il',
  svr_D_max_i = 'D_max_i', svr_D_il = 'D_il',
  svr_D_star_i = 'D_star_i', svr_F_star_i = 'F_star_i',
  svr_inpalc = 'Q_il', svr_D_Wbin_il = 'D_Wbin_il',
  svr_A_il = 'A_il', svr_alpha_il = 'alpha_il', 
  svr_beta_i = 'beta_i',
  svr_measure_i = 'mass_i',
  svr_expout = 'opti_exp_outcome',
  st_idcol_prefix = 'sid_')

## Joining, by = "id_il"

## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`
## Adding missing grouping variables: `id_i`

## Warning: `cols` is now required when using unnest().
## Please use `cols = c(rev)`

## Warning: `cols` is now required when using unnest().
## Please use `cols = c(alloc_i_upto_Q)`

df_queue_il_long_mass <- ls_df_queue$df_queue_il_long
df_queue_il_wide_mass <- ls_df_queue$df_queue_il_wide
df_alloc_i_long_mass <- ls_df_queue$df_alloc_i_long
df_rho_gini_mass  <- ls_df_queue$df_rho_gini
df_alloc_il_long_mass <- ls_df_queue$df_alloc_il_long

Analyze Measure at Mass Allocation

Analyze the Allocation Results based on measure:

df_queue_il_long_mass %>% filter(rho_val == ar_rho[3]) %>%
  select(-rho, -rho_val) %>% arrange(Q_il) %>%
  filter(mass_cumu_il >= fl_measure_teacher_increase_number - 0.01
          & mass_cumu_il <= fl_measure_teacher_increase_number + 0.01)

## # A tibble: 3 x 8
##    id_i id_il D_max_i  D_il  Q_il D_Wbin_il mass_cumu_il V_star_Q_il
##   <int> <int>   <dbl> <int> <int>     <dbl>        <dbl>       <dbl>
## 1   264 38122     550    33 25779         1         81.5        654.
## 2   220 32688     408    79 25780         0         81.5         NA 
## 3   105 17069     228   154 25781         0         81.5         NA

# Summarize
REconTools::ff_summ_percentiles(df_queue_il_long_mass_cumu, bl_statsasrows=FALSE)

## Warning: attributes are not identical across measure variables;
## they will be dropped

## # A tibble: 9 x 19
##   var          n unique NAobs ZEROobs      mean      sd      cv     min      p01
##   <chr>    <dbl>  <dbl> <dbl>   <dbl>     <dbl>   <dbl>   <dbl>   <dbl>    <dbl>
## 1 D.il     54204   1429     0       0   2.02e+2 2.25e+2 1.11    1   e+0  2   e+0
## 2 D.max.i  54204    203     0       0   4.02e+2 3.33e+2 0.829   5   e+0  1.2 e+1
## 3 D.Wbin.~ 54204      2     0   43364   2.00e-1 4.00e-1 2.00    0        0      
## 4 id.i     54204    420     0       0   1.83e+2 1.16e+2 0.635   1   e+0  1.6 e+1
## 5 id.il    54204  54204     0       0   2.71e+4 1.56e+4 0.577   1   e+0  5.43e+2
## 6 mass     54204    391     0       0   7.51e-3 6.21e-3 0.827   7.34e-5  2.02e-4
## 7 mass.cu~ 54204  54204     0       0   1.22e+2 1.14e+2 0.931   1.17e-4  1.07e-1
## 8 Q.il     54204  54204     0       0   2.71e+4 1.56e+4 0.577   1   e+0  5.43e+2
## 9 V.star.~ 54204  10841 43364      NA   2.94e+5 6.07e+2 0.00206 2.92e+5  2.92e+5
## # ... with 9 more variables: p05 <dbl>, p10 <dbl>, p25 <dbl>, p50 <dbl>,
## #   p75 <dbl>, p90 <dbl>, p95 <dbl>, p99 <dbl>, max <dbl>

Compare Measure and Count Allocation Results

Under the Utilitarian filter(rho_val == ar_rho[1]):

Based on count, there were 30 teacher spots to allocate, and we move up to queue position 30 for allocation. Based on relative measure/weight, because the schools with more teacher spots assigned under alternative allocation also had higher weight, so we end up with a measure such that the 10 + 20 teachers assigned to each type of school, when looked at on the queue, sorted by queue position, and cumulatively summed with the weight of school school type, ends up giving us enough to go to the 38th queue position. So ignoring the weight of the schools, we would have allocated incorrectly, because we thought there were only 30 slots to be allocated, but given weights, there are actually up to the 38th spot on the queue.

Under filter(rho_val == ar_rho[2], [3], [4]):

Under alternative planners, importantly, the queue is different, hence cumulative weight up the queue is different, so the number of “spots” that could be allocated is different. There are 37, 31, 24 spots that are allocated under increasing Rawlsian preferences. This reflects the fact that under more Rawlsian preferences, the districts more more students are ranked higher on the queue, and they take up more mass. At the extreme, given N=2, all the mass from the smaller mass type go to the larger mass type, so the queue goes up to how many spots the larger school districts were assigned uniformly, and additional spot for the larger school if the school smallers’ additional teachers are equally assigned to larger schools given relative mass of smaller and larger schools.

REV Compute

REV Compute with Count

tb_rho_rev_count <-
  PrjOptiAlloc::ffp_opt_anlyz_sodis_rev(ar_rho,
                          fl_teacher_increase_sub_number,
                          df_input_ib = df_opt_caschool_input_mass_ib,
                          df_queue_il_long_with_V = df_queue_il_long_count,
                          svr_beta_i = 'beta_i')

## Warning: `cols` is now required when using unnest().
## Please use `cols = c(rev)`

## Warning: `cols` is now required when using unnest().
## Please use `cols = c(fl_util_alter_alloc)`

# Display Results
print(tb_rho_rev_count)

## # A tibble: 4 x 4
##     rho rho_val   REV AlterOutcome
##   <int>   <dbl> <dbl>        <dbl>
## 1     1   0.99  0.836      293297.
## 2     2   0.785 0.836     1449231.
## 3     3  -3.64  0.831         125.
## 4     4 -99     0.949         601.

REV Compute with Mass

tb_rho_rev_mass <-
  PrjOptiAlloc::ffp_opt_anlyz_sodis_rev(ar_rho,
                          fl_measure_teacher_increase_number,
                          df_input_ib = df_opt_caschool_input_mass_ib,
                          df_queue_il_long_with_V = df_queue_il_long_mass,
                          svr_beta_i = 'beta_i', svr_measure_i = 'mass_i')

## Warning: `cols` is now required when using unnest().
## Please use `cols = c(rev)`

## Warning: `cols` is now required when using unnest().
## Please use `cols = c(fl_util_alter_alloc)`

# Display Results
print(tb_rho_rev_mass)

## # A tibble: 4 x 4
##     rho rho_val   REV AlterOutcome
##   <int>   <dbl> <dbl>        <dbl>
## 1     1   0.99  0.607         653.
## 2     2   0.785 0.606         653.
## 3     3  -3.64  0.583         652.
## 4     4 -99     0.911         638.

REV results Analysis

Since we are comparing against uniform allocation, the REV difference is the smallest when we consider close to cobb-douglas preferences. The Utilitarian as well as the Rawlsian planner are both interested in allocating in an unequal way. The Utilitarian wants to allocate more to for higher alpha, the Rawlsian wants to allocate more to lower A. The Cobb Douglas planner seeks a balance between the two alternatives.

Depending on the Alternative allocation, the REV results might be decreasing, increasing, or have different patterns.