Chapter 3 Functions

3.1 Dataframe Mutate

3.1.1 Row Input Functions

Go back to fan’s REconTools research support package, R4Econ examples page, PkgTestR packaging guide, or Stat4Econ course page.

We want evaluate nonlinear function f(Q_i, y_i, ar_x, ar_y, c, d), where c and d are constants, and ar_x and ar_y are arrays, both fixed. x_i and y_i vary over each row of matrix. We would like to evaluate this nonlinear function concurrently across \(N\) individuals. The eventual goal is to find the \(i\) specific \(Q\) that solves the nonlinear equations.

This is a continuation of R use Apply, Sapply and dplyr Mutate to Evaluate one Function Across Rows of a Matrix

3.1.1.1 Set up Input Arrays

There is a function that takes \(M=Q+P\) inputs, we want to evaluate this function \(N\) times. Each time, there are \(M\) inputs, where all but \(Q\) of the \(M\) inputs, meaning \(P\) of the \(M\) inputs, are the same. In particular, \(P=Q*N\).

\[M = Q+P = Q + Q*N\]

# it_child_count = N, the number of children
it_N_child_cnt = 5
# it_heter_param = Q, number of parameters that are heterogeneous across children
it_Q_hetpa_cnt = 2

# P fixed parameters, nN is N dimensional, nP is P dimensional
ar_nN_A = seq(-2, 2, length.out = it_N_child_cnt)
ar_nN_alpha = seq(0.1, 0.9, length.out = it_N_child_cnt)
ar_nP_A_alpha = c(ar_nN_A, ar_nN_alpha)
ar_nN_N_choice = seq(1,it_N_child_cnt)/sum(seq(1,it_N_child_cnt))

# N by Q varying parameters
mt_nN_by_nQ_A_alpha = cbind(ar_nN_A, ar_nN_alpha, ar_nN_N_choice)

# Convert Matrix to Tibble
ar_st_col_names = c('fl_A', 'fl_alpha', 'fl_N')
tb_nN_by_nQ_A_alpha <- as_tibble(mt_nN_by_nQ_A_alpha) %>% rename_all(~c(ar_st_col_names))

# Show
kable(tb_nN_by_nQ_A_alpha) %>%
  kable_styling_fc()
fl_A fl_alpha fl_N
-2 0.1 0.0666667
-1 0.3 0.1333333
0 0.5 0.2000000
1 0.7 0.2666667
2 0.9 0.3333333

3.1.1.2 Mutate over Simple Function

For this example, use a very simple function with only one type of input, all inputs are scalars.

# Define Implicit Function
ffi_nonlinear <- function(fl_A, fl_alpha){

  fl_out <- (fl_A + fl_alpha*fl_A)/(fl_A)^2

  return(fl_out)
}

Apply the function over the dataframe, note five different ways below, the third way allows for parameters to be strings.

# variable names
svr_fl_A <- 'fl_A'
svr_fl_alpha <- 'fl_alpha'

# Evaluate
tb_nN_by_nQ_A_alpha_mutate_rows <- tb_nN_by_nQ_A_alpha %>%
  mutate(fl_out_m1 = ffi_nonlinear(fl_A=.$fl_A, fl_alpha=.$fl_alpha),
         fl_out_m2 = ffi_nonlinear(fl_A=`$`(., 'fl_A'), fl_alpha=`$`(., 'fl_alpha')),
         fl_out_m3 = ffi_nonlinear(fl_A=.[[svr_fl_A]], fl_alpha=.[[svr_fl_alpha]]),
         fl_out_m4 = ffi_nonlinear(fl_A=fl_A, fl_alpha=fl_alpha),
         fl_out_m5 = ffi_nonlinear(fl_A, fl_alpha))

# print
kable(tb_nN_by_nQ_A_alpha_mutate_rows) %>% kable_styling_fc()
fl_A fl_alpha fl_N fl_out_m1 fl_out_m2 fl_out_m3 fl_out_m4 fl_out_m5
-2 0.1 0.0666667 -0.55 -0.55 -0.55 -0.55 -0.55
-1 0.3 0.1333333 -1.30 -1.30 -1.30 -1.30 -1.30
0 0.5 0.2000000 NaN NaN NaN NaN NaN
1 0.7 0.2666667 1.70 1.70 1.70 1.70 1.70
2 0.9 0.3333333 0.95 0.95 0.95 0.95 0.95

3.1.1.3 Testing Function with Scalar and Arrays

Test non-linear Equation.

# Test Parameters
fl_N_agg = 100
fl_rho = -1
fl_N_q = ar_nN_N_choice[4]*fl_N_agg
ar_A_alpha = mt_nN_by_nQ_A_alpha[4,]
# Apply Function
ar_p1_s1 = exp((ar_A_alpha[1] - ar_nN_A)*fl_rho)
ar_p1_s2 = (ar_A_alpha[2]/ar_nN_alpha)
ar_p1_s3 = (1/(ar_nN_alpha*fl_rho - 1))
ar_p1 = (ar_p1_s1*ar_p1_s2)^ar_p1_s3
ar_p2 = fl_N_q^((ar_A_alpha[2]*fl_rho-1)/(ar_nN_alpha*fl_rho-1))
ar_overall = ar_p1*ar_p2
fl_overall = fl_N_agg - sum(ar_overall)
print(fl_overall)
## [1] -598.2559

Implement the non-linear problem’s evaluation using apply over all \(N\) individuals.

# Define Implicit Function
ffi_nonlin_dplyrdo <- function(fl_A, fl_alpha, fl_N, ar_A, ar_alpha, fl_N_agg, fl_rho){
  # ar_A_alpha[1] is A
  # ar_A_alpha[2] is alpha

  # # Test Parameters
  # fl_N = 100
  # fl_rho = -1
  # fl_N_q = 10

  # Apply Function
  ar_p1_s1 = exp((fl_A - ar_A)*fl_rho)
  ar_p1_s2 = (fl_alpha/ar_alpha)
  ar_p1_s3 = (1/(ar_alpha*fl_rho - 1))
  ar_p1 = (ar_p1_s1*ar_p1_s2)^ar_p1_s3
  ar_p2 = fl_N^((fl_alpha*fl_rho-1)/(ar_alpha*fl_rho-1))
  ar_overall = ar_p1*ar_p2
  fl_overall = fl_N_agg - sum(ar_overall)

  return(fl_overall)
}

# Parameters
fl_rho = -1

# Evaluate Function
print(ffi_nonlin_dplyrdo(mt_nN_by_nQ_A_alpha[1,1],
                         mt_nN_by_nQ_A_alpha[1,2],
                         mt_nN_by_nQ_A_alpha[1,3]*fl_N_agg,
                         ar_nN_A, ar_nN_alpha, fl_N_agg, fl_rho))
## [1] 81.86645
for (i in seq(1,dim(mt_nN_by_nQ_A_alpha)[1])){
  fl_eval = ffi_nonlin_dplyrdo(mt_nN_by_nQ_A_alpha[i,1],
                               mt_nN_by_nQ_A_alpha[i,2],
                               mt_nN_by_nQ_A_alpha[i,3]*fl_N_agg,
                               ar_nN_A, ar_nN_alpha, fl_N_agg, fl_rho)
  print(fl_eval)
}
## [1] 81.86645
## [1] 54.48885
## [1] -65.5619
## [1] -598.2559
## [1] -3154.072

3.1.1.4 Evaluate Nonlinear Function using dplyr mutate

# Define Implicit Function
ffi_nonlin_dplyrdo <- function(fl_A, fl_alpha, fl_N, ar_A, ar_alpha, fl_N_agg, fl_rho){

  # Test Parameters
  # ar_A = ar_nN_A
  # ar_alpha = ar_nN_alpha
  # fl_N = 100
  # fl_rho = -1
  # fl_N_q = 10

  # Apply Function
  ar_p1_s1 = exp((fl_A - ar_A)*fl_rho)
  ar_p1_s2 = (fl_alpha/ar_alpha)
  ar_p1_s3 = (1/(ar_alpha*fl_rho - 1))
  ar_p1 = (ar_p1_s1*ar_p1_s2)^ar_p1_s3
  ar_p2 = (fl_N*fl_N_agg)^((fl_alpha*fl_rho-1)/(ar_alpha*fl_rho-1))
  ar_overall = ar_p1*ar_p2
  fl_overall = fl_N_agg - sum(ar_overall)

  return(fl_overall)
}

# fl_A, fl_alpha are from columns of tb_nN_by_nQ_A_alpha
tb_nN_by_nQ_A_alpha = tb_nN_by_nQ_A_alpha %>% rowwise() %>%
                        mutate(dplyr_eval = ffi_nonlin_dplyrdo(fl_A, fl_alpha, fl_N,
                                                               ar_nN_A, ar_nN_alpha,
                                                               fl_N_agg, fl_rho))
# Show
kable(tb_nN_by_nQ_A_alpha) %>%
  kable_styling_fc()
fl_A fl_alpha fl_N dplyr_eval
-2 0.1 0.0666667 81.86645
-1 0.3 0.1333333 54.48885
0 0.5 0.2000000 -65.56190
1 0.7 0.2666667 -598.25595
2 0.9 0.3333333 -3154.07226

3.1.2 Evaluate Choices Across States

Go back to fan’s REconTools research support package, R4Econ examples page, PkgTestR packaging guide, or Stat4Econ course page.

See the ff_opti_bisect_pmap_multi function from Fan’s REconTools Package, which provides a resuable function based on the algorithm worked out here.

We want evaluate linear function \(0=f(z_{ij}, x_i, y_i, \textbf{X}, \textbf{Y}, c, d)\). There are \(i\) functions that have \(i\) specific \(x\) and \(y\). For each \(i\) function, we evaluate along a grid of feasible values for \(z\), over \(j\in J\) grid points, potentially looking for the \(j\) that is closest to the root. \(\textbf{X}\) and \(\textbf{Y}\) are arrays common across the \(i\) equations, and \(c\) and \(d\) are constants.

The evaluation strategy is the following, given min and max for \(z\) that are specific for each \(j\), and given common number of grid points, generate a matrix of \(z_{ij}\). Suppose there the number of \(i\) is \(I\), and the number of grid points for \(j\) is \(J\).

  1. Generate a \(J \cdot I\) by \(3\) matrix where the columns are \(z,x,y\) as tibble
  2. Follow this Mutate to evaluate the \(f(\cdot)\) function.
  3. Add two categorical columns for grid levels and wich \(i\), \(i\) and \(j\) index. Plot Mutate output evaluated column categorized by \(i\) as color and \(j\) as x-axis.

3.1.2.1 Set up Input Arrays

There is a function that takes \(M=Q+P\) inputs, we want to evaluate this function \(N\) times. Each time, there are \(M\) inputs, where all but \(Q\) of the \(M\) inputs, meaning \(P\) of the \(M\) inputs, are the same. In particular, \(P=Q*N\).

\[M = Q+P = Q + Q*N\]

Now we need to expand this by the number of choice grid. Each row, representing one equation, is expanded by the number of choice grids. We are graphically searching, or rather brute force searching, which means if we have 100 individuals, we want to plot out the nonlinear equation for each of these lines, and show graphically where each line crosses zero. We achieve this, by evaluating the equation for each of the 100 individuals along a grid of feasible choices.

In this problem here, the feasible choices are shared across individuals.

# Parameters
fl_rho = 0.20
svr_id_var = 'INDI_ID'

# it_child_count = N, the number of children
it_N_child_cnt = 4
# it_heter_param = Q, number of parameters that are heterogeneous across children
it_Q_hetpa_cnt = 2

# P fixed parameters, nN is N dimensional, nP is P dimensional
ar_nN_A = seq(-2, 2, length.out = it_N_child_cnt)
ar_nN_alpha = seq(0.1, 0.9, length.out = it_N_child_cnt)
ar_nP_A_alpha = c(ar_nN_A, ar_nN_alpha)

# N by Q varying parameters
mt_nN_by_nQ_A_alpha = cbind(ar_nN_A, ar_nN_alpha)

# Choice Grid for nutritional feasible choices for each
fl_N_agg = 100
fl_N_min = 0
it_N_choice_cnt_ttest = 3
it_N_choice_cnt_dense = 100
ar_N_choices_ttest = seq(fl_N_min, fl_N_agg, length.out = it_N_choice_cnt_ttest)
ar_N_choices_dense = seq(fl_N_min, fl_N_agg, length.out = it_N_choice_cnt_dense)

# Mesh Expand
tb_states_choices <- as_tibble(mt_nN_by_nQ_A_alpha) %>% rowid_to_column(var=svr_id_var)
tb_states_choices_ttest <- tb_states_choices %>% expand_grid(choices = ar_N_choices_ttest)
tb_states_choices_dense <- tb_states_choices %>% expand_grid(choices = ar_N_choices_dense)

# display
summary(tb_states_choices_dense)
##     INDI_ID        ar_nN_A    ar_nN_alpha     choices   
##  Min.   :1.00   Min.   :-2   Min.   :0.1   Min.   :  0  
##  1st Qu.:1.75   1st Qu.:-1   1st Qu.:0.3   1st Qu.: 25  
##  Median :2.50   Median : 0   Median :0.5   Median : 50  
##  Mean   :2.50   Mean   : 0   Mean   :0.5   Mean   : 50  
##  3rd Qu.:3.25   3rd Qu.: 1   3rd Qu.:0.7   3rd Qu.: 75  
##  Max.   :4.00   Max.   : 2   Max.   :0.9   Max.   :100
kable(tb_states_choices_ttest) %>%
  kable_styling_fc()
INDI_ID ar_nN_A ar_nN_alpha choices
1 -2.0000000 0.1000000 0
1 -2.0000000 0.1000000 50
1 -2.0000000 0.1000000 100
2 -0.6666667 0.3666667 0
2 -0.6666667 0.3666667 50
2 -0.6666667 0.3666667 100
3 0.6666667 0.6333333 0
3 0.6666667 0.6333333 50
3 0.6666667 0.6333333 100
4 2.0000000 0.9000000 0
4 2.0000000 0.9000000 50
4 2.0000000 0.9000000 100

3.1.2.2 Apply Same Function all Rows, Some Inputs Row-specific, other Shared

There are two types of inputs, row-specific inputs, and inputs that should be applied for each row. The Function just requires all of these inputs, it does not know what is row-specific and what is common for all row. Dplyr recognizes which parameter inputs already existing in the piped dataframe/tibble, given rowwise, those will be row-specific inputs. Additional function parameters that do not exist in dataframe as variable names, but that are pre-defined scalars or arrays will be applied to all rows.

  • (param?) string variable name of input where functions are evaluated, these are already contained in the dataframe, existing variable names, row specific, rowwise computation over these, each rowwise calculation using different rows: fl_A, fl_alpha, fl_N
  • (param?) scalar and array values that are applied to every rowwise calculation, all rowwise calculations using the same scalars and arrays:ar_A, ar_alpha, fl_N_agg, fl_rho
  • (param?) string output variable name

The function looks within group, finds min/max etc that are relevant.

3.1.2.2.1 3 Points and Denser Dataframs and Define Function
# Convert Matrix to Tibble
ar_st_col_names = c(svr_id_var,'fl_A', 'fl_alpha')
tb_states_choices <- tb_states_choices %>% rename_all(~c(ar_st_col_names))
ar_st_col_names = c(svr_id_var,'fl_A', 'fl_alpha', 'fl_N')
tb_states_choices_ttest <- tb_states_choices_ttest %>% rename_all(~c(ar_st_col_names))
tb_states_choices_dense <- tb_states_choices_dense %>% rename_all(~c(ar_st_col_names))

# Define Implicit Function
ffi_nonlin_dplyrdo <- function(fl_A, fl_alpha, fl_N, ar_A, ar_alpha, fl_N_agg, fl_rho){
  # scalar value that are row-specific, in dataframe already: *fl_A*, *fl_alpha*, *fl_N*
  # array and scalars not in dataframe, common all rows: *ar_A*, *ar_alpha*, *fl_N_agg*, *fl_rho*

  # Test Parameters
  # ar_A = ar_nN_A
  # ar_alpha = ar_nN_alpha
  # fl_N = 100
  # fl_rho = -1
  # fl_N_q = 10

  # Apply Function
  ar_p1_s1 = exp((fl_A - ar_A)*fl_rho)
  ar_p1_s2 = (fl_alpha/ar_alpha)
  ar_p1_s3 = (1/(ar_alpha*fl_rho - 1))
  ar_p1 = (ar_p1_s1*ar_p1_s2)^ar_p1_s3
  ar_p2 = fl_N^((fl_alpha*fl_rho-1)/(ar_alpha*fl_rho-1))
  ar_overall = ar_p1*ar_p2
  fl_overall = fl_N_agg - sum(ar_overall)

  return(fl_overall)
}
3.1.2.2.2 Evaluate at Three Choice Points and Show Table

In the example below, just show results evaluating over three choice points and show table.

# fl_A, fl_alpha are from columns of tb_nN_by_nQ_A_alpha
tb_states_choices_ttest_eval = tb_states_choices_ttest %>% rowwise() %>%
                        mutate(dplyr_eval = ffi_nonlin_dplyrdo(fl_A, fl_alpha, fl_N,
                                                               ar_nN_A, ar_nN_alpha,
                                                               fl_N_agg, fl_rho))
# Show
kable(tb_states_choices_ttest_eval) %>%
  kable_styling_fc()
INDI_ID fl_A fl_alpha fl_N dplyr_eval
1 -2.0000000 0.1000000 0 100.00000
1 -2.0000000 0.1000000 50 -5666.95576
1 -2.0000000 0.1000000 100 -12880.28392
2 -0.6666667 0.3666667 0 100.00000
2 -0.6666667 0.3666667 50 -595.73454
2 -0.6666667 0.3666667 100 -1394.70698
3 0.6666667 0.6333333 0 100.00000
3 0.6666667 0.6333333 50 -106.51058
3 0.6666667 0.6333333 100 -323.94216
4 2.0000000 0.9000000 0 100.00000
4 2.0000000 0.9000000 50 22.55577
4 2.0000000 0.9000000 100 -51.97161
3.1.2.2.3 Evaluate at Many Choice Points and Show Graphically

Same as above, but now we evaluate the function over the individuals at many choice points so that we can graph things out.

# fl_A, fl_alpha are from columns of tb_nN_by_nQ_A_alpha
tb_states_choices_dense_eval = tb_states_choices_dense %>% rowwise() %>%
                        mutate(dplyr_eval = ffi_nonlin_dplyrdo(fl_A, fl_alpha, fl_N,
                                                               ar_nN_A, ar_nN_alpha,
                                                               fl_N_agg, fl_rho))
# Labeling
st_title <- paste0('Evaluate Non-Linear Functions to Search for Roots')
st_subtitle <- paste0('https://fanwangecon.github.io/',
                      'R4Econ/function/mutatef/htmlpdfr/fs_func_choice_states.html')
st_caption <- paste0('Evaluating the function, ',
                     'https://fanwangecon.github.io/R4Econ/')
st_x_label <- 'x values'
st_y_label <- 'f(x)'

# Show
dim(tb_states_choices_dense_eval)
## [1] 400   5
summary(tb_states_choices_dense_eval)
##     INDI_ID          fl_A       fl_alpha        fl_N       dplyr_eval       
##  Min.   :1.00   Min.   :-2   Min.   :0.1   Min.   :  0   Min.   :-12880.28  
##  1st Qu.:1.75   1st Qu.:-1   1st Qu.:0.3   1st Qu.: 25   1st Qu.: -1167.29  
##  Median :2.50   Median : 0   Median :0.5   Median : 50   Median :  -202.42  
##  Mean   :2.50   Mean   : 0   Mean   :0.5   Mean   : 50   Mean   : -1645.65  
##  3rd Qu.:3.25   3rd Qu.: 1   3rd Qu.:0.7   3rd Qu.: 75   3rd Qu.:     0.96  
##  Max.   :4.00   Max.   : 2   Max.   :0.9   Max.   :100   Max.   :   100.00
lineplot <- tb_states_choices_dense_eval %>%
    ggplot(aes(x=fl_N, y=dplyr_eval)) +
        geom_line() +
        facet_wrap( . ~ INDI_ID, scales = "free") +
        geom_hline(yintercept=0, linetype="dashed",
                color = "red", size=1) +
        labs(title = st_title,
             subtitle = st_subtitle,
             x = st_x_label,
             y = st_y_label,
             caption = st_caption)
print(lineplot)

3.2 Dataframe Do Anything

3.2.1 (Mx1 by N) to (MxQ by N+1)

Go back to fan’s REconTools research support package, R4Econ examples page, PkgTestR packaging guide, or Stat4Econ course page.

Case One: There is a dataframe with \(M\) rows, based on these \(m\) specific information, generate dataframes for each \(m\). Stack these indivdiual dataframes together and merge original \(m\) specific information in as well. The number of rows for each \(m\) is \(Q_m\), each \(m\) could have different number of expansion rows.

Generate a panel with \(M\) individuals, each individual is observed for different spans of times (uncount). Before expanding, generate individual specific normal distribution standard deviation. All individuals share the same mean, but have increasing standard deviations.

3.2.1.1 Generate Dataframe with M Rows.

This is the first step, generate \(M\) rows of data, to be expanded. Each row contains the number of normal draws to make and the mean and the standard deviation for normal daraws that are \(m\) specific.

# Parameter Setups
it_M <- 3
it_Q_max <- 5
fl_rnorm_mu <- 1000
ar_rnorm_sd <- seq(0.01, 200, length.out=it_M)
ar_it_q <- sample.int(it_Q_max, it_M, replace=TRUE)

# N by Q varying parameters
mt_data = cbind(ar_it_q, ar_rnorm_sd)
tb_M <- as_tibble(mt_data) %>% rowid_to_column(var = "ID") %>%
                rename(sd = ar_rnorm_sd, Q = ar_it_q) %>%
                mutate(mean = fl_rnorm_mu)

# display
kable(tb_M) %>%
  kable_styling_fc()
ID Q sd mean
1 1 0.010 1000
2 3 100.005 1000
3 4 200.000 1000

3.2.1.2 Random Normal Draw Expansion

The steps are:

  1. do anything
  2. use “.$” sign to refer to variable names, or [[‘name’]]
  3. unnest
  4. left_join expanded and original

Note these all give the same results

Use dot dollar to get variables

# Generate $Q_m$ individual specific incomes, expanded different number of times for each m
tb_income <- tb_M %>% group_by(ID) %>%
  do(income = rnorm(.$Q, mean=.$mean, sd=.$sd)) %>%
  unnest(c(income))

# Merge back with tb_M
tb_income_full_dd <- tb_income %>%
  left_join(tb_M)

# display
kable(tb_income) %>%
  kable_styling_fc()
ID income
1 999.9803
2 1070.1391
2 952.7185
2 893.2123
3 956.4050
3 794.7991
3 854.2218
3 874.9921
kable(tb_income_full_dd) %>%
  kable_styling_fc()
ID income Q sd mean
1 999.9803 1 0.010 1000
2 1070.1391 3 100.005 1000
2 952.7185 3 100.005 1000
2 893.2123 3 100.005 1000
3 956.4050 4 200.000 1000
3 794.7991 4 200.000 1000
3 854.2218 4 200.000 1000
3 874.9921 4 200.000 1000

3.2.2 (MxP by N) to (Mx1 by 1)

Go back to fan’s REconTools research support package, R4Econ examples page, PkgTestR packaging guide, or Stat4Econ course page.

3.2.2.1 Wages from Many Countries and Country-specific GINI

There is a Panel with \(M\) individuals and each individual has \(Q\) records/rows. A function generate an individual specific outcome given the \(Q\) individual specific inputs, along with shared parameters/values stored as variables that contain common values for each of the \(M\) individuals.

For example, suppose we have a dataframe of individual wage information from different countries (the number of countries is \(M\)). Each row is an individual from one country, giving us \(Q \cdot M\) observations of wages.

We want to generate country specific gini based on the individual wage data for each country in the dataframe. Additionally, perhaps the gini formula requires not just individual wages but some additional parameters or shared dataframes as inputs. We will use the ff_dist_gini_vector_pos.html function from REconTools.

First, we simulate a dataframe with \(M\) countries, and up to \(Q\) people in each country. The countries share the same mean income, but have different standard deviations.

# Parameter Setups
it_M <- 10
it_Q_max <- 100
fl_rnorm_mu <- 1
ar_rnorm_sd <- seq(0.01, 0.2, length.out=it_M)
set.seed('789')
ar_it_q <- sample.int(it_Q_max, it_M, replace=TRUE)

# N by Q varying parameters
mt_data <- cbind(ar_it_q, ar_rnorm_sd)
tb_M <- as_tibble(mt_data) %>% rowid_to_column(var = "ID") %>%
                rename(sd = ar_rnorm_sd,
                       Q = ar_it_q) %>%
                mutate(mean = fl_rnorm_mu) %>%
                select(ID, Q,
                       mean, sd)

# Show table
kable(tb_M, caption = paste0("M=", it_M,
  " countries (ID is country ID), observation per country (Q)",
  ", mean and s.d. of wages each country")) %>%
  kable_styling_fc()
Table 3.1: M=10 countries (ID is country ID), observation per country (Q), mean and s.d. of wages each country
ID Q mean sd
1 45 1 0.0100000
2 12 1 0.0311111
3 42 1 0.0522222
4 26 1 0.0733333
5 99 1 0.0944444
6 37 1 0.1155556
7 100 1 0.1366667
8 43 1 0.1577778
9 67 1 0.1788889
10 70 1 0.2000000

Second, we now expand the dataframe so that each country has not just one row, but \(Q_i\) of observations (\(i\) is country), or randomly drawn income based on the country-specific income distribution. Note that there are three ways of referring to variable names with dot, which are all shown below:

  1. We can explicitly refer to names
  2. We can use the dollar dot structure to use string variable names in do anything.
  3. We can use dot bracket, this is the only option that works with string variable names
# A. Normal Draw Expansion, Explicitly Name
set.seed('123')
tb_income_norm_dot_dollar <- tb_M %>% group_by(ID) %>%
  do(income = rnorm(.$Q, mean=.$mean, sd=.$sd)) %>%
  unnest(c(income)) %>%
  left_join(tb_M, by="ID")

# Normal Draw Expansion again, dot dollar differently with string variable name
set.seed('123')
tb_income_norm_dollar_dot <- tb_M %>% group_by(ID) %>%
  do(income = rnorm(`$`(., 'Q'), mean = `$`(., 'mean'), sd = `$`(., 'sd'))) %>%
  unnest(c(income)) %>%
  left_join(tb_M, by="ID")

# Normal Draw Expansion again, dot double bracket
set.seed('123')
svr_mean <- 'mean'
svr_sd <- 'sd'
svr_Q <- 'Q'
tb_income_norm_dot_bracket_db <- tb_M %>% group_by(ID) %>%
  do(income = rnorm(.[[svr_Q]], mean = .[[svr_mean]], sd = .[[svr_sd]])) %>%
  unnest(c(income)) %>%
  left_join(tb_M, by="ID")

Third, we print the first set of rows of the dataframe, and also summarize income by country groups.

# Show dataframe dimension
print(dim(tb_income_norm_dot_bracket_db))
## [1] 541   5
# Show first 20 rows
kable(head(tb_income_norm_dot_bracket_db, 20),
  caption = "ID = country ID, wage draws"
  ) %>% kable_styling_fc()
Table 3.2: ID = country ID, wage draws
ID income Q mean sd
1 0.9943952 45 1 0.01
1 0.9976982 45 1 0.01
1 1.0155871 45 1 0.01
1 1.0007051 45 1 0.01
1 1.0012929 45 1 0.01
1 1.0171506 45 1 0.01
1 1.0046092 45 1 0.01
1 0.9873494 45 1 0.01
1 0.9931315 45 1 0.01
1 0.9955434 45 1 0.01
1 1.0122408 45 1 0.01
1 1.0035981 45 1 0.01
1 1.0040077 45 1 0.01
1 1.0011068 45 1 0.01
1 0.9944416 45 1 0.01
1 1.0178691 45 1 0.01
1 1.0049785 45 1 0.01
1 0.9803338 45 1 0.01
1 1.0070136 45 1 0.01
1 0.9952721 45 1 0.01
# Display country-specific summaries
REconTools::ff_summ_bygroup(tb_income_norm_dot_bracket_db, c("ID"), "income")$df_table_grp_stats

Fourth, there is only one input for the gini function ar_pos. Note that the gini are not very large even with large SD, because these are normal distributions. By Construction, most peple are in the middle. So with almost zero standard deviation, we have perfect equality, as standard deviation increases, inequality increases, but still pretty equal overall, there is no fat upper tail.

# Gini by Group
tb_gini_norm <- tb_income_norm_dot_bracket_db %>% group_by(ID) %>%
  do(inc_gini_norm = REconTools::ff_dist_gini_vector_pos(.$income)) %>%
  unnest(c(inc_gini_norm)) %>%
  left_join(tb_M, by="ID")

# display
kable(tb_gini_norm,
  caption = paste0(
    "Country-specific wage GINI based on income draws",
    ", ID=country-ID, Q=sample-size-per-country",
    ", mean=true-income-mean, sd=true-income-sd"
  )) %>%
  kable_styling_fc()
Table 3.3: Country-specific wage GINI based on income draws, ID=country-ID, Q=sample-size-per-country, mean=true-income-mean, sd=true-income-sd
ID inc_gini_norm Q mean sd
1 0.0052111 45 1 0.0100000
2 0.0137174 12 1 0.0311111
3 0.0245939 42 1 0.0522222
4 0.0303468 26 1 0.0733333
5 0.0527628 99 1 0.0944444
6 0.0544053 37 1 0.1155556
7 0.0786986 100 1 0.1366667
8 0.0818873 43 1 0.1577778
9 0.1014639 67 1 0.1788889
10 0.0903825 70 1 0.2000000

3.2.3 (MxP by N) to (MxQ by N+Z)

Go back to fan’s REconTools research support package, R4Econ examples page, PkgTestR packaging guide, or Stat4Econ course page.

There is a dataframe composed of M mini-dataframes. Group by a variable that identifies each unique sub-dataframe, and use the sub-dataframes with P rows as inputs to a function.

The function outputs Q by Z rows and columns of results, stack the results. The output file has MxQ rows and the Z columns of additional results should be appended.

3.2.3.1 Generate the MxP by N Dataframe

M Grouping characteristics, P rows for each group, and N Variables.

  1. M are individuals
  2. P are dates
  3. A wage variable for individual wage at each date. And a savings varaible as well.
# Define
it_M <- 3
it_P <- 5
svr_m <- 'group_m'
svr_mp <- 'info_mp'

# dataframe
set.seed(123)
df_panel_skeleton <- as_tibble(matrix(it_P, nrow=it_M, ncol=1)) %>%
  rowid_to_column(var = svr_m) %>%
  uncount(V1) %>%
  group_by(!!sym(svr_m)) %>% mutate(!!sym(svr_mp) := row_number()) %>%
  ungroup() %>%
  rowwise() %>% mutate(wage = rnorm(1, 100, 10), 
                       savings = rnorm(1, 200, 30)) %>%
  ungroup() %>% 
  rowid_to_column(var = "id_ji")

# Print
kable(df_panel_skeleton) %>% kable_styling_fc()
id_ji group_m info_mp wage savings
1 1 1 94.39524 253.6074
2 1 2 97.69823 214.9355
3 1 3 115.58708 141.0015
4 1 4 100.70508 221.0407
5 1 5 101.29288 185.8163
6 2 1 117.15065 167.9653
7 2 2 104.60916 193.4608
8 2 3 87.34939 169.2199
9 2 4 93.13147 178.1333
10 2 5 95.54338 181.2488
11 3 1 112.24082 149.3992
12 3 2 103.59814 225.1336
13 3 3 104.00771 204.6012
14 3 4 101.10683 165.8559
15 3 5 94.44159 237.6144

3.2.3.2 Subgroup Compute and Expand

Use the M sub-dataframes, generate Q by Z result for each of the M groups. Stack all results together.

Base on all the wages for each individual, generate individual specific mean and standard deviations. Do this for three things, the wage variable, the savings variable, and the sum of wage and savings:

  1. Z=2: 2 columns, mean and standard deviation
  2. Q=3: 3 rows, statistics based on wage, savings, and the sum of both

First, here is the processing function that takes the dataframe as input, with a parameter for rounding:

# define function
ffi_subset_mean_sd <- function(df_sub, it_round=1) {
  #' A function that generates mean and sd for several variables
  #'
  #' @description
  #' Assume there are two variables in df_sub wage and savings
  #'
  #' @param df_sub dataframe where each individual row is a different 
  #' data point, over which we compute mean and sd, Assum there are two 
  #' variables, savings and wage
  #' @param it_round integer rounding for resulting dataframe
  #' @return a dataframe where each row is aggregate for a different type
  #' of variablea and each column is a different statistics
  
  fl_wage_mn = mean(df_sub$wage)
  fl_wage_sd = sd(df_sub$wage)
  
  fl_save_mn = mean(df_sub$savings)
  fl_save_sd = sd(df_sub$savings)
  
  fl_wgsv_mn = mean(df_sub$wage + df_sub$savings)
  fl_wgsv_sd = sd(df_sub$wage + df_sub$savings)  
  
  ar_mn <- c(fl_wage_mn, fl_save_mn, fl_wgsv_mn)
  ar_sd <- c(fl_wage_sd, fl_save_sd, fl_wgsv_sd)
  ar_st_row_lab <- c('wage', 'savings', 'wage_and_savings')
  
  mt_stats <- cbind(ar_mn, ar_sd)
  mt_stats <- round(mt_stats, it_round)
  
  ar_st_varnames <- c('mean', 'sd', 'variables')
  df_combine <- as_tibble(mt_stats) %>% 
    add_column(ar_st_row_lab) %>%
    rename_all(~c(ar_st_varnames)) %>%
    select(variables, 'mean', 'sd') %>%
    rowid_to_column(var = "id_q")
    
  return(df_combine)
}
# testing function
ffi_subset_mean_sd(df_panel_skeleton %>% filter(!!sym(svr_m)==1))

Second, call ffi_subset_mean_sd function for each of the groups indexed by j and stack results together with j index:

  1. group by
  2. call function
  3. unnest
# run group stats and stack dataframes
df_outputs <- df_panel_skeleton %>% group_by(!!sym(svr_m)) %>%
  do(df_stats = ffi_subset_mean_sd(., it_round=2)) %>%
  unnest() %>%
  rowid_to_column(var = "id_mq")
# print
kable(df_outputs) %>% kable_styling_fc()
id_mq group_m id_q variables mean sd
1 1 1 wage 101.94 8.11
2 1 2 savings 203.28 42.33
3 1 3 wage_and_savings 305.22 34.83
4 2 1 wage 99.56 11.63
5 2 2 savings 178.01 10.34
6 2 3 wage_and_savings 277.56 15.48
7 3 1 wage 103.08 6.39
8 3 2 savings 196.52 37.86
9 3 3 wage_and_savings 299.60 33.50

In the resulting file, we went from a matrix with MxP rows to a matrix with MxQ Rows.

3.3 Apply and pmap

3.3.1 Apply and Sapply

Go back to fan’s REconTools research support package, R4Econ examples page, PkgTestR packaging guide, or Stat4Econ course page.

We want evaluate linear function f(x_i, y_i, ar_x, ar_y, c, d), where c and d are constants, and ar_x and ar_y are arrays, both fixed. x_i and y_i vary over each row of matrix. More specifically, we have a functions, this function takes inputs that are individual specific. We would like to evaluate this function concurrently across \(N\) individuals.

The function is such that across the \(N\) individuals, some of the function parameter inputs are the same, but others are different. If we are looking at demand for a particular product, the prices of all products enter the demand equation for each product, but the product’s own price enters also in a different way.

The objective is either to just evaluate this function across \(N\) individuals, or this is a part of a nonlinear solution system.

What is the relationship between apply, lapply and vectorization? see Is the “*apply” family really not vectorized?.

3.3.1.1 Set up Input Arrays

There is a function that takes \(M=Q+P\) inputs, we want to evaluate this function \(N\) times. Each time, there are \(M\) inputs, where all but \(Q\) of the \(M\) inputs, meaning \(P\) of the \(M\) inputs, are the same. In particular, \(P=Q*N\).

\[M = Q+P = Q + Q*N\]

# it_child_count = N, the number of children
it_N_child_cnt = 5
# it_heter_param = Q, number of parameters that are
# heterogeneous across children
it_Q_hetpa_cnt = 2

# P fixed parameters, nN is N dimensional, nP is P dimensional
ar_nN_A = seq(-2, 2, length.out = it_N_child_cnt)
ar_nN_alpha = seq(0.1, 0.9, length.out = it_N_child_cnt)
ar_nP_A_alpha = c(ar_nN_A, ar_nN_alpha)

# N by Q varying parameters
mt_nN_by_nQ_A_alpha = cbind(ar_nN_A, ar_nN_alpha)

# display
kable(mt_nN_by_nQ_A_alpha) %>%
  kable_styling_fc()
ar_nN_A ar_nN_alpha
-2 0.1
-1 0.3
0 0.5
1 0.7
2 0.9

3.3.1.2 Using apply

3.3.1.2.1 Named Function

First we use the apply function, we have to hard-code the arrays that are fixed for each of the \(N\) individuals. Then apply allows us to loop over the matrix that is \(N\) by \(Q\), each row one at a time, from \(1\) to \(N\).

# Define Implicit Function
ffi_linear_hardcode <- function(ar_A_alpha){
  # ar_A_alpha[1] is A
  # ar_A_alpha[2] is alpha

  fl_out = sum(ar_A_alpha[1]*ar_nN_A +
                 1/(ar_A_alpha[2] + 1/ar_nN_alpha))

  return(fl_out)
}

# Evaluate function row by row
ar_func_apply = apply(mt_nN_by_nQ_A_alpha, 1, ffi_linear_hardcode)
3.3.1.2.2 Anonymous Function
  • apply over matrix

Apply with anonymous function generating a list of arrays of different lengths. In the example below, we want to drawn \(N\) sets of random uniform numbers, but for each set the number of draws we want to have is \(Q_i\). Furthermore, we want to rescale the random uniform draws so that they all become proportions that sum u pto one for each \(i\), but then we multply each row’s values by the row specific aggregates.

The anonymous function has hard coded parameters. Using an anonymous function here allows for parameters to be provided inside the function that are shared across each looped evaluation. This is perhaps more convenient than sapply with additional parameters.

set.seed(1039)

# Define the number of draws each row and total amount
it_N <- 4
fl_unif_min <- 1
fl_unif_max <- 2
mt_draw_define <- cbind(sample(it_N, it_N, replace=TRUE),
                        runif(it_N, min=1, max=10))
tb_draw_define <- as_tibble(mt_draw_define) %>%
  rowid_to_column(var = "draw_group")
print(tb_draw_define)

# apply row by row, anonymous function has hard
# coded min and max
ls_ar_draws_shares_lvls =
  apply(tb_draw_define,
        1,
        function(row) {
          it_draw <- row[2]
          fl_sum <- row[3]
          ar_unif <- runif(it_draw,
                           min=fl_unif_min,
                           max=fl_unif_max)
          ar_share <- ar_unif/sum(ar_unif)
          ar_levels <- ar_share*fl_sum
          return(list(ar_share=ar_share,
                      ar_levels=ar_levels))
        })

# Show Results as list
print(ls_ar_draws_shares_lvls)
## [[1]]
## [[1]]$ar_share
## [1] 0.2783638 0.2224140 0.2797840 0.2194381
## 
## [[1]]$ar_levels
## [1] 1.492414 1.192446 1.500028 1.176491
## 
## 
## [[2]]
## [[2]]$ar_share
## [1] 0.5052919 0.4947081
## 
## [[2]]$ar_levels
## [1] 3.866528 3.785541
## 
## 
## [[3]]
## [[3]]$ar_share
## [1] 1
## 
## [[3]]$ar_levels
##       V2 
## 9.572211 
## 
## 
## [[4]]
## [[4]]$ar_share
## [1] 0.4211426 0.2909812 0.2878762
## 
## [[4]]$ar_levels
## [1] 4.051971 2.799640 2.769765

Above, our results is a list of lists. We can convert this to a table. If all results are scalar, would be regular table where each cell has a single scalar value.

# Show results as table
kable(as_tibble(do.call(rbind, ls_ar_draws_shares_lvls))) %>%
  kable_styling_fc()
ar_share ar_levels
0.2783638, 0.2224140, 0.2797840, 0.2194381 1.492414, 1.192446, 1.500028, 1.176491
0.5052919, 0.4947081 3.866528, 3.785541
1 9.572211
0.4211426, 0.2909812, 0.2878762 4.051971, 2.799640, 2.769765

We will try to do the same thing as above, but now the output will be a stacked dataframe. Note that within each element of the apply row by row loop, we are generating two variables ar_share and ar_levels. We will not generate a dataframe with multiple columns, storing ar_share, ar_levels as well as information on min, max, number of draws and rescale total sum.

set.seed(1039)
# apply row by row, anonymous function has hard coded min and max
ls_mt_draws_shares_lvls =
  apply(tb_draw_define, 1, function(row) {

    it_draw_group <- row[1]
    it_draw <- row[2]
    fl_sum <- row[3]

    ar_unif <- runif(it_draw,
                     min=fl_unif_min,
                     max=fl_unif_max)
    ar_share <- ar_unif/sum(ar_unif)
    ar_levels <- ar_share*fl_sum

    mt_all_res <- cbind(it_draw_group, it_draw, fl_sum,
                        ar_unif, ar_share, ar_levels)
    colnames(mt_all_res) <-
      c('draw_group', 'draw_count', 'sum',
        'unif_draw', 'share', 'rescale')
    rownames(mt_all_res) <- NULL

    return(mt_all_res)
  })
mt_draws_shares_lvls_all <- do.call(rbind, ls_mt_draws_shares_lvls)
# Show Results
kable(mt_draws_shares_lvls_all) %>% kable_styling_fc()
draw_group draw_count sum unif_draw share rescale
1 4 5.361378 1.125668 0.1988606 1.066167
1 4 5.361378 1.668536 0.2947638 1.580340
1 4 5.361378 1.419382 0.2507483 1.344356
1 4 5.361378 1.447001 0.2556274 1.370515
2 2 7.652069 1.484598 0.4605236 3.523959
2 2 7.652069 1.739119 0.5394764 4.128110
3 1 9.572211 1.952468 1.0000000 9.572211
4 3 9.621375 1.957931 0.3609352 3.472693
4 3 9.621375 1.926995 0.3552324 3.417824
4 3 9.621375 1.539678 0.2838324 2.730858

3.3.1.3 Using sapply

3.3.1.3.1 Named Function
  • r convert matrix to list
  • Convert a matrix to a list of vectors in R

Sapply allows us to not have tohard code in the A and alpha arrays. But Sapply works over List or Vector, not Matrix. So we have to convert the \(N\) by \(Q\) matrix to a N element list Now update the function with sapply.

ls_ar_nN_by_nQ_A_alpha = as.list(data.frame(t(mt_nN_by_nQ_A_alpha)))

# Define Implicit Function
ffi_linear_sapply <- function(ar_A_alpha, ar_A, ar_alpha){
  # ar_A_alpha[1] is A
  # ar_A_alpha[2] is alpha

  fl_out = sum(ar_A_alpha[1]*ar_nN_A +
                 1/(ar_A_alpha[2] + 1/ar_nN_alpha))

  return(fl_out)
}

# Evaluate function row by row
ar_func_sapply = sapply(ls_ar_nN_by_nQ_A_alpha, ffi_linear_sapply,
                        ar_A=ar_nN_A, ar_alpha=ar_nN_alpha)
3.3.1.3.2 Anonymous Function, list of arrays as output
  • sapply anonymous function
  • r anoymous function multiple lines

Sapply with anonymous function generating a list of arrays of different lengths. In the example below, we want to drawn \(N\) sets of random uniform numbers, but for each set the number of draws we want to have is \(Q_i\). Furthermore, we want to rescale the random uniform draws so that they all become proportions that sum u pto one for each \(i\).

it_N <- 4
fl_unif_min <- 1
fl_unif_max <- 2

# Generate using runif without anonymous function
set.seed(1039)
ls_ar_draws = sapply(seq(it_N),
                     runif,
                     min=fl_unif_min, max=fl_unif_max)
print(ls_ar_draws)
## [[1]]
## [1] 1.125668
## 
## [[2]]
## [1] 1.668536 1.419382
## 
## [[3]]
## [1] 1.447001 1.484598 1.739119
## 
## [[4]]
## [1] 1.952468 1.957931 1.926995 1.539678
# Generate Using Anonymous Function
set.seed(1039)
ls_ar_draws_shares = sapply(seq(it_N),
                            function(n, min, max) {
                              ar_unif <- runif(n,min,max)
                              ar_share <- ar_unif/sum(ar_unif)
                              return(ar_share)
                            },
                            min=fl_unif_min, max=fl_unif_max)
# Print Share
print(ls_ar_draws_shares)
## [[1]]
## [1] 1
## 
## [[2]]
## [1] 0.5403432 0.4596568
## 
## [[3]]
## [1] 0.3098027 0.3178522 0.3723451
## 
## [[4]]
## [1] 0.2646671 0.2654076 0.2612141 0.2087113
# Sapply with anonymous function to check sums
sapply(seq(it_N), function(x) {sum(ls_ar_draws[[x]])})
## [1] 1.125668 3.087918 4.670717 7.377071
sapply(seq(it_N), function(x) {sum(ls_ar_draws_shares[[x]])})
## [1] 1 1 1 1
3.3.1.3.3 Anonymous Function, matrix as output

Below, we provide another example with sapply, we generate probabilities for discrete random variables that follow the binomial distribution. We do this for twice, with “chance of success” set at different values.

The output in this case is a matrix, where each column stores the output from a different dbinom call.

# First, generate the results without sapply
ar_binomprob <- matrix(c(0.1, 0.9), nrow=2, ncol=1)
# Second, generate the results with sapply
# dbinom call: dbinom(x, size, prob, log = FALSE)
# The function requires x, size, and prob.
# we provide x and size, and each element of ar_binomprob
# will be a different prob.
mt_dbinom <- sapply(ar_binomprob, dbinom, x=seq(0,4), size=4)
# Third compare results
print(paste0('binomial p=', ar_binomprob[1]))
## [1] "binomial p=0.1"
print(dbinom(seq(0,4), 4, ar_binomprob[1]))
## [1] 0.6561 0.2916 0.0486 0.0036 0.0001
print(mt_dbinom[,1])
## [1] 0.6561 0.2916 0.0486 0.0036 0.0001
print(paste0('binomial p=', ar_binomprob[2]))
## [1] "binomial p=0.9"
print(dbinom(seq(0,4), 4, ar_binomprob[2]))
## [1] 0.0001 0.0036 0.0486 0.2916 0.6561
print(mt_dbinom[,2])
## [1] 0.0001 0.0036 0.0486 0.2916 0.6561

3.3.1.4 Compare Results

# Show overall Results
mt_results <- cbind(ar_func_apply, ar_func_sapply)
colnames(mt_results) <- c('eval_lin_apply', 'eval_lin_sapply')
kable(mt_results) %>% kable_styling_fc()
eval_lin_apply eval_lin_sapply
X1 2.346356 2.346356
X2 2.094273 2.094273
X3 1.895316 1.895316
X4 1.733708 1.733708
X5 1.599477 1.599477

3.3.2 Mutate Evaluate Functions

Go back to fan’s REconTools research support package, R4Econ examples page, PkgTestR packaging guide, or Stat4Econ course page.

Apply a function over rows of a matrix using mutate, rowwise, etc.

3.3.2.1 Set up Input Arrays

There is a function that takes \(M=Q+P\) inputs, we want to evaluate this function \(N\) times. Each time, there are \(M\) inputs, where all but \(Q\) of the \(M\) inputs, meaning \(P\) of the \(M\) inputs, are the same. In particular, \(P=Q*N\).

\[M = Q+P = Q + Q*N\]

# it_child_count = N, the number of children
it_N_child_cnt = 5
# it_heter_param = Q, number of parameters that are
# heterogeneous across children
it_Q_hetpa_cnt = 2

# P fixed parameters, nN is N dimensional, nP is P dimensional
ar_nN_A = seq(-2, 2, length.out = it_N_child_cnt)
ar_nN_alpha = seq(0.1, 0.9, length.out = it_N_child_cnt)
ar_nP_A_alpha = c(ar_nN_A, ar_nN_alpha)

# N by Q varying parameters
mt_nN_by_nQ_A_alpha = cbind(ar_nN_A, ar_nN_alpha)

# display
kable(mt_nN_by_nQ_A_alpha) %>%
  kable_styling_fc()
ar_nN_A ar_nN_alpha
-2 0.1
-1 0.3
0 0.5
1 0.7
2 0.9
# Convert Matrix to Tibble
ar_st_col_names = c('fl_A', 'fl_alpha')
tb_nN_by_nQ_A_alpha <- as_tibble(mt_nN_by_nQ_A_alpha) %>%
  rename_all(~c(ar_st_col_names))
# Show
kable(tb_nN_by_nQ_A_alpha) %>%
  kable_styling_fc()
fl_A fl_alpha
-2 0.1
-1 0.3
0 0.5
1 0.7
2 0.9

3.3.2.2 mutate rowwise

  • dplyr mutate own function
  • dplyr all row function
  • dplyr do function
  • apply function each row dplyr
  • applying a function to every row of a table using dplyr
  • dplyr rowwise
# Define Implicit Function
ffi_linear_dplyrdo <- function(fl_A, fl_alpha, ar_nN_A, ar_nN_alpha){
  # ar_A_alpha[1] is A
  # ar_A_alpha[2] is alpha

  print(paste0('cur row, fl_A=', fl_A, ', fl_alpha=', fl_alpha))
  fl_out = sum(fl_A*ar_nN_A + 1/(fl_alpha + 1/ar_nN_alpha))

  return(fl_out)
}

# Evaluate function row by row of tibble
# fl_A, fl_alpha are from columns of tb_nN_by_nQ_A_alpha
tb_nN_by_nQ_A_alpha_show <- tb_nN_by_nQ_A_alpha %>%
  rowwise() %>%
  mutate(dplyr_eval =
           ffi_linear_dplyrdo(fl_A, fl_alpha, ar_nN_A, ar_nN_alpha))
## [1] "cur row, fl_A=-2, fl_alpha=0.1"
## [1] "cur row, fl_A=-1, fl_alpha=0.3"
## [1] "cur row, fl_A=0, fl_alpha=0.5"
## [1] "cur row, fl_A=1, fl_alpha=0.7"
## [1] "cur row, fl_A=2, fl_alpha=0.9"
# Show
kable(tb_nN_by_nQ_A_alpha_show) %>%
  kable_styling_fc()
fl_A fl_alpha dplyr_eval
-2 0.1 2.346356
-1 0.3 2.094273
0 0.5 1.895316
1 0.7 1.733708
2 0.9 1.599477

same as before, still rowwise, but hard code some inputs:

# Define function, fixed inputs are not parameters, but
# defined earlier as a part of the function
# ar_nN_A, ar_nN_alpha are fixed, not parameters
ffi_linear_dplyrdo_func <- function(fl_A, fl_alpha){
  fl_out <- sum(fl_A*ar_nN_A + 1/(fl_alpha + 1/ar_nN_alpha))
  return(fl_out)
}

# Evaluate function row by row of tibble
tbfunc_A_nN_by_nQ_A_alpha_rowwise = tb_nN_by_nQ_A_alpha %>% rowwise() %>%
  mutate(dplyr_eval = ffi_linear_dplyrdo_func(fl_A, fl_alpha))
# Show
kable(tbfunc_A_nN_by_nQ_A_alpha_rowwise) %>%
  kable_styling_fc()
fl_A fl_alpha dplyr_eval
-2 0.1 2.346356
-1 0.3 2.094273
0 0.5 1.895316
1 0.7 1.733708
2 0.9 1.599477

3.3.2.3 mutate with pmap

Apparantly rowwise() is not a good idea, and pmap should be used, below is the pmap solution to the problem. Which does seem nicer. Crucially, don’t have to define input parameter names, automatically I think they are matching up to the names in the function

  • dplyr mutate pass function
  • r function quosure string multiple
  • r function multiple parameters as one string
  • dplyr mutate anonymous function
  • quosure style lambda
  • pmap tibble rows
  • dplyr pwalk
# Define function, fixed inputs are not parameters, but defined
# earlier as a part of the function Rorate fl_alpha and fl_A name
# compared to before to make sure pmap tracks by names
ffi_linear_dplyrdo_func <- function(fl_alpha, fl_A){
  fl_out <- sum(fl_A*ar_nN_A + 1/(fl_alpha + 1/ar_nN_alpha))
  return(fl_out)
}

# Evaluate a function row by row of dataframe, generate list,
# then to vector
tb_nN_by_nQ_A_alpha %>% pmap(ffi_linear_dplyrdo_func) %>% unlist()
## [1] 2.346356 2.094273 1.895316 1.733708 1.599477
# Same as above, but in line line and save output as new column
# in dataframe note this ONLY works if the tibble only has variables
# that are inputs for the function if tibble contains additional
#  variables, those should be droppd, or only the ones needed selected,
# inside the pmap call below.
tbfunc_A_nN_by_nQ_A_alpha_pmap <- tb_nN_by_nQ_A_alpha %>%
  mutate(dplyr_eval_pmap =
           unlist(
             pmap(tb_nN_by_nQ_A_alpha, ffi_linear_dplyrdo_func)
           )
  )

# Show
kable(tbfunc_A_nN_by_nQ_A_alpha_pmap) %>%
  kable_styling_fc()
fl_A fl_alpha dplyr_eval_pmap
-2 0.1 2.346356
-1 0.3 2.094273
0 0.5 1.895316
1 0.7 1.733708
2 0.9 1.599477

3.3.2.4 rowwise and do

Now, we have three types of parameters, for something like a bisection type calculation. We will supply the program with a function with some hard-coded value inside, and as parameters, we will have one parameter which is a row in the current matrix, and another parameter which is a sclar values. The three types of parameters are dealt with sparately:

  1. parameters that are fixed for all bisection iterations, but differ for each row
  • these are hard-coded into the function
  1. parameters that are fixed for all bisection iterations, but are shared across rows
  • these are the first parameter of the function, a list
  1. parameters that differ for each iteration, but differ acoss iterations
  • second scalar value parameter for the function

  • dplyr mutate function applow to each row dot notation

  • note rowwise might be bad according to Hadley, should use pmap?

ffi_linear_dplyrdo_fdot <- function(ls_row, fl_param){
  # Type 1 Param = ar_nN_A, ar_nN_alpha
  # Type 2 Param = ls_row$fl_A, ls_row$fl_alpha
  # Type 3 Param = fl_param

  fl_out <- (sum(ls_row$fl_A*ar_nN_A +
                   1/(ls_row$fl_alpha + 1/ar_nN_alpha))) + fl_param
  return(fl_out)
}

cur_func <- ffi_linear_dplyrdo_fdot
fl_param <- 0
dplyr_eval_flex <- tb_nN_by_nQ_A_alpha %>% rowwise() %>%
  do(dplyr_eval_flex = cur_func(., fl_param)) %>%
  unnest(dplyr_eval_flex)
tbfunc_B_nN_by_nQ_A_alpha <- tb_nN_by_nQ_A_alpha %>% add_column(dplyr_eval_flex)
# Show
kable(tbfunc_B_nN_by_nQ_A_alpha) %>%
  kable_styling_fc()
fl_A fl_alpha dplyr_eval_flex
-2 0.1 2.346356
-1 0.3 2.094273
0 0.5 1.895316
1 0.7 1.733708
2 0.9 1.599477

3.3.2.5 Compare Apply and Mutate Results

# Show overall Results
mt_results <- cbind(tb_nN_by_nQ_A_alpha_show['dplyr_eval'],
                    tbfunc_A_nN_by_nQ_A_alpha_rowwise['dplyr_eval'],
                    tbfunc_A_nN_by_nQ_A_alpha_pmap['dplyr_eval_pmap'],
                    tbfunc_B_nN_by_nQ_A_alpha['dplyr_eval_flex'],
                    mt_nN_by_nQ_A_alpha)
colnames(mt_results) <- c('eval_dplyr_mutate',
                          'eval_dplyr_mutate_hcode',
                          'eval_dplyr_mutate_pmap',
                          'eval_dplyr_mutate_flex',
                          'A_child', 'alpha_child')
kable(mt_results) %>%
  kable_styling_fc_wide()
eval_dplyr_mutate eval_dplyr_mutate_hcode eval_dplyr_mutate_pmap eval_dplyr_mutate_flex A_child alpha_child
2.346356 2.346356 2.346356 2.346356 -2 0.1
2.094273 2.094273 2.094273 2.094273 -1 0.3
1.895316 1.895316 1.895316 1.895316 0 0.5
1.733708 1.733708 1.733708 1.733708 1 0.7
1.599477 1.599477 1.599477 1.599477 2 0.9