Generates data from the Guatemala and Cebu dataset jitter/rand version, select cebu only between 0 and 24. Generates four categories based on initial height and mother education levels. Regress height on protein inputs allowing for heterogeneous effecst for each of the four categories.

Set Up

rm(list = ls(all.names = TRUE))
options(knitr.duplicate.label = 'allow')
# setwd('C:/Users/fan/PrjOptiAlloc')
library(dplyr)
library(tidyr)
library(broom)
library(stringr)
library(REconTools)

bl_save_rda = FALSE

Get Data

Generate four categories by initial height and mother’s education levels combinations.

# Load Library

# Select Cebu Only
df_hw_cebu_m24 <- REconTools::df_hgt_wgt %>%
  filter(S.country == 'Cebu' & svymthRound == 24 & prot > 0 & hgt > 0) %>% drop_na()

# Generate Discrete Version of momEdu
df_hw_cebu_m24 <- df_hw_cebu_m24 %>%
    mutate(momEduRound = cut(momEdu,
                             breaks=c(-Inf, 10, Inf),
                             labels=c("MEduLow","MEduHigh"))) %>%
    mutate(hgt0med = cut(hgt0,
                             breaks=c(-Inf, 50, Inf),
                             labels=c("h0low","h0high")))

df_hw_cebu_m24$momEduRound = as.factor(df_hw_cebu_m24$momEduRound)
df_hw_cebu_m24$hgt0med = as.factor(df_hw_cebu_m24$hgt0med)

# Attach
attach(df_hw_cebu_m24)

Regression with Data and Construct Input Arrays

Linear Regression

Estimation Production functions or any other function. Below, Regression height outcomes on input interacted with the categories created before. This will generate category specific marginal effects.

# Input Matrix
mt_lincv <- model.matrix(~ hgt0 + wgt0)
mt_linht <- model.matrix(~ sex:hgt0med - 1)

# Regress Height At Month 24 on Nutritional Inputs with controls
rs_hgt_prot_lin = lm(hgt ~ prot:mt_linht + mt_lincv - 1)
print(summary(rs_hgt_prot_lin))
## 
## Call:
## lm(formula = hgt ~ prot:mt_linht + mt_lincv - 1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.212  -2.339   0.088   2.437   9.805 
## 
## Coefficients:
##                                       Estimate Std. Error t value Pr(>|t|)    
## mt_lincv(Intercept)                  5.638e+01  3.683e+00  15.307  < 2e-16 ***
## mt_lincvhgt0                         3.774e-01  8.723e-02   4.326 1.66e-05 ***
## mt_lincvwgt0                         1.154e-03  3.705e-04   3.115  0.00189 ** 
## prot:mt_linhtsexFemale:hgt0medh0low  1.014e-02  1.058e-02   0.959  0.33783    
## prot:mt_linhtsexMale:hgt0medh0low    6.018e-02  9.625e-03   6.252 5.91e-10 ***
## prot:mt_linhtsexFemale:hgt0medh0high 3.228e-02  1.332e-02   2.423  0.01558 *  
## prot:mt_linhtsexMale:hgt0medh0high   6.619e-02  1.001e-02   6.612 6.05e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.473 on 1036 degrees of freedom
## Multiple R-squared:  0.9981, Adjusted R-squared:  0.9981 
## F-statistic: 7.79e+04 on 7 and 1036 DF,  p-value: < 2.2e-16
rs_hgt_prot_lin_tidy = broom::tidy(rs_hgt_prot_lin)

Log-Linear Regression

Now log-linear regressions, where inptut coefficient differs by input groups.

# Input Matrix Generation
mt_logcv <- model.matrix(~ hgt0 + wgt0)
mt_loght <- model.matrix(~ sex:hgt0med - 1)

# Log and log regression for month 24
rs_hgt_prot_log = lm(log(hgt) ~ log(prot):mt_loght + mt_logcv - 1)
print(summary(rs_hgt_prot_log))
## 
## Call:
## lm(formula = log(hgt) ~ log(prot):mt_loght + mt_logcv - 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.223890 -0.030027  0.002461  0.031047  0.118097 
## 
## Coefficients:
##                                            Estimate Std. Error t value Pr(>|t|)
## mt_logcv(Intercept)                       4.087e+00  5.342e-02  76.512  < 2e-16
## mt_logcvhgt0                              4.305e-03  1.224e-03   3.516 0.000457
## mt_logcvwgt0                              1.465e-05  4.733e-06   3.096 0.002012
## log(prot):mt_loghtsexFemale:hgt0medh0low  6.687e-03  2.218e-03   3.014 0.002640
## log(prot):mt_loghtsexMale:hgt0medh0low    1.209e-02  2.176e-03   5.557 3.49e-08
## log(prot):mt_loghtsexFemale:hgt0medh0high 9.251e-03  2.441e-03   3.790 0.000159
## log(prot):mt_loghtsexMale:hgt0medh0high   1.394e-02  2.288e-03   6.094 1.55e-09
##                                              
## mt_logcv(Intercept)                       ***
## mt_logcvhgt0                              ***
## mt_logcvwgt0                              ** 
## log(prot):mt_loghtsexFemale:hgt0medh0low  ** 
## log(prot):mt_loghtsexMale:hgt0medh0low    ***
## log(prot):mt_loghtsexFemale:hgt0medh0high ***
## log(prot):mt_loghtsexMale:hgt0medh0high   ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04436 on 1036 degrees of freedom
## Multiple R-squared:  0.9999, Adjusted R-squared:  0.9999 
## F-statistic: 1.448e+06 on 7 and 1036 DF,  p-value: < 2.2e-16
rs_hgt_prot_log_tidy = broom::tidy(rs_hgt_prot_log)

Construct Input Arrays \(A_i\) and \(\alpha_i\)

Multiply coefficient vector by covariate matrix to generate A vector that is child/individual specific.

# Generate A_i
ar_Ai_lin <- mt_lincv %*% as.matrix(rs_hgt_prot_lin_tidy %>% filter(!str_detect(term, 'prot')) %>% select(estimate))
ar_Ai_log <- mt_logcv %*% as.matrix(rs_hgt_prot_log_tidy %>% filter(!str_detect(term, 'prot')) %>% select(estimate))

# Generate alpha_i
ar_alphai_lin <- mt_linht %*% as.matrix(rs_hgt_prot_lin_tidy %>% filter(str_detect(term, 'prot')) %>% select(estimate))
ar_alphai_log <- mt_loght %*% as.matrix(rs_hgt_prot_log_tidy %>% filter(str_detect(term, 'prot')) %>% select(estimate))

# Child Weight
ar_beta <- rep(1/length(ar_Ai_lin), times=length(ar_Ai_lin))

# Estimation Results lin and log both store
mt_opti <- cbind(ar_alphai_lin, ar_alphai_log, ar_Ai_lin, ar_Ai_log, ar_beta)
ar_st_varnames <- c('alpha_lin', 'alpha_log', 'A_lin', 'A_log', 'beta')
df_esti_alpha_A_beta <- as_tibble(mt_opti) %>% rename_all(~c(ar_st_varnames))
## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if `.name_repair` is omitted as of tibble 2.0.0.
## Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
# estimation results file to export (keeping data keys)
df_esti <- bind_cols(df_hw_cebu_m24, df_esti_alpha_A_beta) %>%
              select(one_of(c('S.country', 'vil.id', 'indi.id', 'svymthRound', ar_st_varnames)))

# Initate Dataframe that will store all estimates and optimal allocation relevant information
mt_opti <- cbind(ar_alphai_lin, ar_Ai_lin, ar_beta)
ar_st_varnames <- c('alpha', 'A', 'beta')
tb_opti <- as_tibble(mt_opti) %>% rename_all(~c(ar_st_varnames))

# Unique beta, A, and alpha groups
tb_opti_unique <- tb_opti %>% group_by(!!!syms(ar_st_varnames)) %>%
                    arrange(!!!syms(ar_st_varnames)) %>%
                    summarise(n_obs_group=n())
## `summarise()` has grouped output by 'alpha', 'A'. You can override using the `.groups` argument.

Export Data to /data Folder

I would like to be able to directly call the dataset generated here in various functions. Save the datafile we just created in the project folder.

if (bl_save_rda) {
  df_opt_dtgch_cbem4 <- df_esti
  usethis::use_data(df_opt_dtgch_cbem4, df_opt_dtgch_cbem4, overwrite = TRUE)
}