Investment frequency and sizes

library(PrjThaiHFID)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

library(tidyr)
library(stringr)
library(readr)
library(ggplot2)
library(kableExtra)
#> 
#> Attaching package: 'kableExtra'
#> The following object is masked from 'package:dplyr':
#> 
#>     group_rows

bs_style <- c("striped", "hover", "condensed", "responsive")
options(kable_styling_bootstrap_options = bs_style)

We implement PrjThaiHFID-#10 in this function.

Parameters

Here, we set controlling parameters.

# Choose latex for paper output
# st_kableformat <- "latex"
st_kableformat <- "html"
# What is the minimum number of month duration for household to be counted
it_hh_mth_nbr <- 161
# Use data from temp folder or data folder rda files
bl_use_data_rda <- TRUE
# Use the investment function rather than use stored investment files
bl_use_func <- TRUE
# Percentiles of interest
# ar_fl_percentiles <- seq(0.01, 0.99, length.out = 99)
# ar_fl_percentiles <- seq(0.01, 0.99, length.out = 50)
# ar_fl_percentiles <- seq(0.02, 0.98, length.out = 33)
# ar_fl_percentiles <- seq(0.05, 0.95, length.out = 19)
# ar_fl_percentiles <- seq(0.10, 0.90, length.out=9)
# ar_fl_percentiles <- c(0.1, 0.25, 0.50, 0.75, 0.9, 0.95, 0.99)
ar_fl_percentiles <- c(0.05, 0.10, 0.25, 0.50, 0.75, 0.9, 0.95)

Path and files

Get path.

# Load input file
# Path root and data input file
spt_root_prj <- file.path(
  "C:", "Users", "fan",
  # "Documents",
  "Dropbox (UH-ECON)", "PrjThaiForInf",
  fsep = .Platform$file.sep
)
# Output folder path
spt_datatemp_folder <- file.path(
  spt_root_prj, "PrjThaiHFID", "data-temp",
  fsep = .Platform$file.sep
)
spt_datatemp_folder <- file.path(
  "data-temp",
  fsep = .Platform$file.sep
)

Load files.

if (bl_use_data_rda) {
  # Load files
  # for table 1
  tstm_hh_mthspan <- tstm_hh_mthspan

  # for table 2, ff_hfid_invest_winstats()
  # tstm_asset_loan <- PrjThaiHFID::tstm_asset_loan
  tstm_asset_loan <- tstm_asset_loan

  # Generate files from functions
  df <- tstm_asset_loan
  fl_sd_ithres <- stats::qnorm(0.99)
  it_thres_invest_mth_gap <- 2
  ls_return <- PrjThaiHFID::ffp_hfid_invest_gateway(
      df, fl_sd_ithres = fl_sd_ithres, 
      it_thres_invest_mth_gap=it_thres_invest_mth_gap
    )
  tstm_invest_func <- ls_return$tstm_invest
  tstm_invdates_uniq_func <- ls_return$tstm_invdates_uniq

  if (bl_use_func) {

    bl_func_script_consistency_tstm_invest <- identical(
      tstm_invest_func, tstm_invest
      )
    bl_func_script_consistency_tstm_invdates_uniq <- identical(
      tstm_invdates_uniq_func, tstm_invdates_uniq
      )
    print(glue::glue(
      "bl_func_script_consistency_tstm_invest = {bl_func_script_consistency_tstm_invest}\n",
      "bl_func_script_consistency_tstm_invdates_uniq = {bl_func_script_consistency_tstm_invdates_uniq}\n"
    ))

    tstm_invest <- tstm_invest_func
    tstm_invdates_uniq <- tstm_invdates_uniq_func

  } else {
    # for table 2, ff_hfid_invest_window()
    tstm_invdates_uniq <- tstm_invdates_uniq
    # for table 1, and table 2 ff_hfid_invest_winstats()
    # tstm_invest <- PrjThaiHFID::tstm_invest
    tstm_invest <- tstm_invest

  }



} else {

  # Get input paths
  # asset month by month file from R-script/ffs_hfid_gen_b_data.R
  spn_tstm_asset_loan <- file.path(spt_datatemp_folder, "tstm_asset_loan_fullinfo.csv", fsep = .Platform$file.sep)
  # household apperance start end file from R-script/ffs_hfid_gen_b_data.R
  spn_tstm_hh_mthspan <- file.path(spt_datatemp_folder, "tstm_hh_mthspan.csv", fsep = .Platform$file.sep)

  # Load files
  # for table 1
  tstm_hh_mthspan <- read_csv(spn_tstm_hh_mthspan)
  # for table 2, ff_hfid_invest_winstats()
  tstm_asset_loan <- read_csv(spn_tstm_asset_loan)

  # from hfid_invest_b
  spn_tstm_invdates_uniq <- file.path(spt_datatemp_folder, "tstm_invdates_uniq.csv", fsep = .Platform$file.sep)
  # all investment files from R-script/ffs_hfid_invest_c_sandbox.R
  spn_tstm_invest <- file.path(spt_datatemp_folder, "tstm_invest.csv", fsep = .Platform$file.sep)

  # for table 2, ff_hfid_invest_window()
  tstm_invdates_uniq <- read_csv(spn_tstm_invdates_uniq)
  # for table 1, and table 2 ff_hfid_invest_winstats()
  tstm_invest <- read_csv(spn_tstm_invest) %>%
    arrange(id, ivars, hh_inv_asset_ctr)
}
#> bl_func_script_consistency_tstm_invest = FALSE
#> bl_func_script_consistency_tstm_invdates_uniq = TRUE

Implement table to show lumpiness of investments

This block generates Table 1 issue 9.

Generate stats group 1

First, load in, househould start and end months (uoo=hh), and investment file (uoo=hhxivarxinvest), already done.

# Check
ar_st_ivars <- unique(tstm_invest$ivars)

Second, from invest file, group by hh x ivar, and count the numver of investments.

# Load files, already loaded tstm_hh_mthspan for hh start and end months
# investmetn file
st_id_ivar_n <- "id_ivar_n"
tstm_invest_idivar_n <- tstm_invest %>%
  group_by(id, ivars) %>%
  summarize(!!sym(st_id_ivar_n) := n())
#> `summarise()` has grouped output by 'id'. You can override using the `.groups`
#> argument.

# Check
unique(tstm_invest_idivar_n$ivars)
#> [1] "agg_BS_1011" "agg_BS_3011" "agg_BS_1012" "agg_BS_1021" "agg_BS_2011"
#> [6] "agg_BS_2021" "agg_BS_3021"

Third, merge with household startend file, replace NA with zero for each invest count.

# Subsetting hh with min obs count
tstm_hh_mthspan_sel <- tstm_hh_mthspan %>%
  filter(month_n == it_hh_mth_nbr)
# Left join to investment counts
# For households without any investments, they will have ivars=NA
tstm_hh_mthspan_wrk <- tstm_hh_mthspan_sel %>%
  left_join(tstm_invest_idivar_n, by = "id")
# Reshape long to wide, to have all ivars as cols, replace NA by 0
# Follow https://fanwangecon.github.io/R4Econ/panel/widelong/htmlpdfr/fs_pivotwider.html
st_ivar_prefix <- "ivar"
st_ivar_prefix_d <- paste0(st_ivar_prefix, "_")
tstm_hh_mthspan_wrk_wide <- tstm_hh_mthspan_wrk %>%
  pivot_wider(
    id_cols = c("id"),
    names_from = ivars,
    names_prefix = st_ivar_prefix_d,
    values_from = st_id_ivar_n
  )
#> Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
#> ℹ Please use `all_of()` or `any_of()` instead.
#>   # Was:
#>   data %>% select(st_id_ivar_n)
#> 
#>   # Now:
#>   data %>% select(all_of(st_id_ivar_n))
#> 
#> See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.

# Drop NA column, due to hh without any invest only ivar=NA
# creating an extra category, all 0 invest household still have
# all other ivars
tstm_hh_mthspan_wrk_wide <- tstm_hh_mthspan_wrk_wide %>%
  select(-ivar_NA)
# replace NA values by 0s
tstm_hh_mthspan_wrk_wide <- tstm_hh_mthspan_wrk_wide %>%
  mutate_at(
    vars(contains(st_ivar_prefix_d)),
    list(~ replace_na(., 0))
  )
# Reshape back to long
# Follow https://fanwangecon.github.io/R4Econ/panel/widelong/htmlpdfr/fs_pivotlonger.html
tstm_hh_mthspan_wrk_long <- tstm_hh_mthspan_wrk_wide %>%
  pivot_longer(
    cols = starts_with(st_ivar_prefix_d),
    names_to = c(st_ivar_prefix),
    names_pattern = paste0(st_ivar_prefix_d, "(.*)"),
    values_to = st_id_ivar_n
  )

Fourth, group by ivar and invest count, tally.

# tally
tb_ivar_n_tally <- tstm_hh_mthspan_wrk_long %>%
  group_by(ivar, id_ivar_n) %>%
  tally() %>%
  spread(ivar, n)
# NA to zero
tb_ivar_n_tally <- tb_ivar_n_tally %>%
  mutate_at(vars(contains("agg_")), list(~ replace_na(., 0)))
# print
print(tb_ivar_n_tally, n = Inf)
#> # A tibble: 9 × 8
#>   id_ivar_n agg_BS_1011 agg_BS_1012 agg_BS_1021 agg_BS_2011 agg_BS_2021
#>       <int>       <int>       <int>       <int>       <int>       <int>
#> 1         0          31          84         180          89         216
#> 2         1         127         183         224         188         209
#> 3         2         144         160         123         159         118
#> 4         3         151          89          53          85          47
#> 5         4          96          62          27          60          19
#> 6         5          43          25           5          21           6
#> 7         6          15          11           4          12           1
#> 8         7           8           1           0           1           0
#> 9         8           1           1           0           1           0
#> # ℹ 2 more variables: agg_BS_3011 <int>, agg_BS_3021 <int>

Fifth, proportional version

# Freq to shares
tb_ivar_n_tally_share <- tb_ivar_n_tally %>%
  mutate_at(
    vars(contains("agg_")),
    list(~ . / sum(.))
  )
# Check
colSums(tb_ivar_n_tally_share)
#>   id_ivar_n agg_BS_1011 agg_BS_1012 agg_BS_1021 agg_BS_2011 agg_BS_2021 
#>          36           1           1           1           1           1 
#> agg_BS_3011 agg_BS_3021 
#>           1           1

# print
print(round(tb_ivar_n_tally_share, 2), n = Inf)
#> # A tibble: 9 × 8
#>   id_ivar_n agg_BS_1011 agg_BS_1012 agg_BS_1021 agg_BS_2011 agg_BS_2021
#>       <dbl>       <dbl>       <dbl>       <dbl>       <dbl>       <dbl>
#> 1         0        0.05        0.14        0.29        0.14        0.35
#> 2         1        0.21        0.3         0.36        0.31        0.34
#> 3         2        0.23        0.26        0.2         0.26        0.19
#> 4         3        0.25        0.14        0.09        0.14        0.08
#> 5         4        0.16        0.1         0.04        0.1         0.03
#> 6         5        0.07        0.04        0.01        0.03        0.01
#> 7         6        0.02        0.02        0.01        0.02        0   
#> 8         7        0.01        0           0           0           0   
#> 9         8        0           0           0           0           0   
#> # ℹ 2 more variables: agg_BS_3011 <dbl>, agg_BS_3021 <dbl>

Generate stats group 2

Load in group 1 stats above, and compute weighted average overall for each ivar.

First, compute means including zeros.

# Prob * Val
tb_ivar_n_tally_mean <- tb_ivar_n_tally_share %>%
  mutate_at(
    vars(contains("agg_")),
    list(~ . * id_ivar_n)
  )
# Sum each column
tb_ivar_n_tally_mean <- tb_ivar_n_tally_mean %>%
  mutate_at(
    vars(contains("agg_")),
    list(~ sum(.))
  ) %>%
  slice_head(n = 1) %>%
  select(contains("agg_"))
# Divide by number of years
tb_ivar_n_tally_mean_peryear <- tb_ivar_n_tally_mean %>%
  mutate_at(
    vars(contains("agg_")),
    list(~ . / (it_hh_mth_nbr / 12))
  )

# Print
print(tb_ivar_n_tally_mean)
#> # A tibble: 1 × 7
#>   agg_BS_1011 agg_BS_1012 agg_BS_1021 agg_BS_2011 agg_BS_2021 agg_BS_3011
#>         <dbl>       <dbl>       <dbl>       <dbl>       <dbl>       <dbl>
#> 1        2.63        1.99        1.28        1.94        1.13        2.53
#> # ℹ 1 more variable: agg_BS_3021 <dbl>

print(tb_ivar_n_tally_mean_peryear)
#> # A tibble: 1 × 7
#>   agg_BS_1011 agg_BS_1012 agg_BS_1021 agg_BS_2011 agg_BS_2021 agg_BS_3011
#>         <dbl>       <dbl>       <dbl>       <dbl>       <dbl>       <dbl>
#> 1       0.196       0.148      0.0951       0.144      0.0845       0.189
#> # ℹ 1 more variable: agg_BS_3021 <dbl>

Second, compute means excluding zeros.

# Recompute shares, after excluding zero
tb_ivar_n_tally_share_nozr <- tb_ivar_n_tally %>%
  filter(id_ivar_n != 0) %>%
  mutate_at(
    vars(contains("agg_")),
    list(~ . / sum(.))
  )
# print
colSums(tb_ivar_n_tally_share_nozr)
#>   id_ivar_n agg_BS_1011 agg_BS_1012 agg_BS_1021 agg_BS_2011 agg_BS_2021 
#>          36           1           1           1           1           1 
#> agg_BS_3011 agg_BS_3021 
#>           1           1

# Prob * Val
tb_ivar_n_tally_mean_nozr <- tb_ivar_n_tally_share_nozr %>%
  filter(id_ivar_n != 0) %>%
  mutate_at(
    vars(contains("agg_")),
    list(~ . * id_ivar_n)
  )
# Sum each column
tb_ivar_n_tally_mean_nozr <- tb_ivar_n_tally_mean_nozr %>%
  mutate_at(
    vars(contains("agg_")),
    list(~ sum(.))
  ) %>%
  slice_head(n = 1) %>%
  select(contains("agg_"))
# Divide by number of years
tb_ivar_n_tally_mean_nozr_peryear <- tb_ivar_n_tally_mean_nozr %>%
  mutate_at(
    vars(contains("agg_")),
    list(~ . / (it_hh_mth_nbr / 12))
  )

# Print
print(tb_ivar_n_tally_mean_nozr)
#> # A tibble: 1 × 7
#>   agg_BS_1011 agg_BS_1012 agg_BS_1021 agg_BS_2011 agg_BS_2021 agg_BS_3011
#>         <dbl>       <dbl>       <dbl>       <dbl>       <dbl>       <dbl>
#> 1        2.77        2.30        1.80        2.26        1.74        2.59
#> # ℹ 1 more variable: agg_BS_3021 <dbl>

print(tb_ivar_n_tally_mean_nozr_peryear)
#> # A tibble: 1 × 7
#>   agg_BS_1011 agg_BS_1012 agg_BS_1021 agg_BS_2011 agg_BS_2021 agg_BS_3011
#>         <dbl>       <dbl>       <dbl>       <dbl>       <dbl>       <dbl>
#> 1       0.207       0.171       0.134       0.169       0.130       0.193
#> # ℹ 1 more variable: agg_BS_3021 <dbl>

Generate stats group 3

First, Load in household monthly asset file, already loaded tstm_asset_loan.

Second, convert if household x monthly elements are non-zero to 1, zero keep as 0.

tstm_asset_loan_bi <- tstm_asset_loan %>%
  select(id, month, one_of(ar_st_ivars)) %>%
  mutate(across(
    contains("agg_"),
    function(x) {
      ifelse(x > 0, 1, 0)
    }
  ))

Third group by, sum monthly binary if var exists, aggregate to household level, binary if sum > 1 or if sum = 0

# Aggregate
tstm_asset_loan_bi_hh <- tstm_asset_loan_bi %>%
  group_by(id) %>%
  summarize_at(
    vars(contains("agg_")),
    list(~ sum(., na.rm = TRUE))
  )
# hh-level binary
tstm_asset_loan_bi_hh_bi <- tstm_asset_loan_bi_hh %>%
  mutate(across(
    contains("agg_"),
    function(x) {
      ifelse(x > 0, 1, 0)
    }
  ))

Fourth, merge with household startend file, condition as needed.

tstm_asset_bi <- tstm_hh_mthspan_sel %>%
  left_join(tstm_asset_loan_bi_hh_bi, by = "id") %>%
  select(id, contains("agg_"))

Fifth, compute share of households with non-zero

tstm_asset_bi_agg <- tstm_asset_bi %>% ungroup() %>%
  summarize_at(
    vars(contains("agg_")),
    list(~ sum(.) / n())
  )

Table display

Table input processing

First, sorting instructions.

# # Variable sequence
# ar_st_vars_kbl <- c(
#   "id_ivar_n",
#   "agg_BS_10", "agg_BS_11", "agg_BS_12",
#   "agg_BS_23", "agg_BS_09"
# )
# Variable sequence
ar_st_vars_kbl <- c(
  "id_ivar_n",
  "agg_BS_2011", "agg_BS_2021",
  "agg_BS_3011", "agg_BS_3021",
  "agg_BS_1011", "agg_BS_1012", "agg_BS_1021"
)

Second, clean-up stats group 1.

tb_tally_cur <- tb_ivar_n_tally_share
# Round and add percentage signs
tb_tally_cur_wkr <- tb_tally_cur %>%
  mutate_at(
    vars(contains("agg_")),
    list(~ paste0(
      format(round(., 3) * 100, nsmall = 1),
      "%"
    ))
  ) %>%
  mutate(id_ivar_n = as.character(id_ivar_n))

Third, clean-up and combine auxliary stats

# Combine auxillary stats
tb_tally_aux1_cur <- bind_rows(
  tb_ivar_n_tally_mean_peryear %>% mutate(id_ivar_n = "Include 0s"),
  tb_ivar_n_tally_mean_nozr_peryear %>% mutate(id_ivar_n = "Exclude 0s")
) %>% mutate_at(
  vars(contains("agg_")),
  list(~ as.character(
    format(round(., 2), nsmall = 2)
  ))
)
# Auxillary share stasts
tb_tally_aux2_cur <- bind_rows(
  tstm_asset_bi_agg %>% mutate(id_ivar_n = "Share")
) %>% mutate_at(
  vars(contains("agg_")),
  list(~ paste0(
    format(round(., 3) * 100, nsmall = 1),
    "%"
  ))
)

Fourth, combine auxliary and main stats.

# Combine auxillary stats
tb_tally_jnt <- bind_rows(
  tb_tally_cur_wkr,
  tb_tally_aux1_cur,
  tb_tally_aux2_cur
) %>% select(one_of(ar_st_vars_kbl))

Table final display

First, we define column names, which correspond to previously defined variable selection list.

# Column names
ar_st_col_names <- c(
  "# of investments",
  "Land + livestock + agri assets",
  "Agri assets",
  "Business + household assets",
  "Business assets",
  "Land + livest. + agri + biz + household",
  "All (w/o household assets)",
  "Agri + business assets"
)
# "All (Land + livestock + agri + household + biz)",
# "Productive assets (Land + livestock + agri + biz)",
st_texsize <- "footnotesize"
ar_st_col_names <- paste0(
  "\\", st_texsize, "{", ar_st_col_names, "}"
)
# Define column groups, grouping the names above
# =1/3/2 are number of columns group title covers
ar_st_col_groups <- c(
  " " = 1,
  "Agricultural assets" = 2,
  "Business assets" = 2,
  "Agricultural + business assets" = 3
)

# Second, we construct main table, and add styling.
bk_asset_count <- kbl(
  tb_tally_jnt,
  format = st_kableformat,
  # escape = F,
  linesep = "",
  booktabs = T,
  align = "c",
  caption = "Percentage of households with number of investments, survey month 1 to 160",
  col.names = ar_st_col_names
) %>%
  # see https://cran.r-project.org/web/packages/kableExtra/vignettes/awesome_table_in_html.html#Bootstrap_table_classes
  kable_styling(
    bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = F, position = "left"
  )

# Third, we add in column groups.
bk_asset_count <- bk_asset_count %>% add_header_above(ar_st_col_groups)

# Fourth, we add in row groups.
bk_asset_count <- bk_asset_count %>%
  pack_rows(
    "Share of households with different number of investments over 160 months",
    1, 9,
    latex_gap_space = "0.5em"
  ) %>%
  pack_rows(
    "Mean number of investments per year over 160 months",
    10, 11,
    latex_gap_space = "0.5em", hline_before = T
  ) %>%
  pack_rows(
    "Share of household having any month with non-zero assets over 160 months",
    12, 12,
    latex_gap_space = "0.5em", hline_before = T
  )

# Fifth, column formatting.
bk_asset_count <- bk_asset_count %>%
  column_spec(1, width = "2.5cm") %>%
  column_spec(2:8, width = "2cm")

# FInal adjustments
bk_asset_count <- gsub(bk_asset_count,
  pattern = paste0("\\textbackslash{}", st_texsize, "\\"),
  replacement = paste0("\\", st_texsize), fixed = TRUE
)
bk_asset_count <- gsub(bk_asset_count,
  pattern = "\\}",
  replacement = "}", fixed = TRUE
)
bk_asset_count <- gsub(bk_asset_count,
  pattern = "hline",
  replacement = "midrule", fixed = TRUE
)

# Sixth, display.
# pl_bk_asset_count <- bk_asset_count %>% as_image()
bk_asset_count

Percentage of households with number of investments, survey month 1 to 160
	Agricultural assets		Business assets		Agricultural + business assets
{# of investments}	{Land + livestock + agri assets}	{Agri assets}	{Business + household assets}	{Business assets}	{Land + livest. + agri + biz + household}	{All (w/o household assets)}	{Agri + business assets}
Share of households with different number of investments over 160 months
0	14.4%	35.1%	2.3%	79.7%	5.0%	13.6%	29.2%
1	30.5%	33.9%	22.4%	14.0%	20.6%	29.7%	36.4%
2	25.8%	19.2%	30.0%	4.5%	23.4%	26.0%	20.0%
3	13.8%	7.6%	23.1%	1.0%	24.5%	14.4%	8.6%
4	9.7%	3.1%	14.0%	0.5%	15.6%	10.1%	4.4%
5	3.4%	1.0%	4.7%	0.2%	7.0%	4.1%	0.8%
6	1.9%	0.2%	2.8%	0.2%	2.4%	1.8%	0.6%
7	0.2%	0.0%	0.6%	0.0%	1.3%	0.2%	0.0%
8	0.2%	0.0%	0.2%	0.0%	0.2%	0.2%	0.0%
Mean number of investments per year over 160 months
Include 0s	0.14	0.08	0.19	0.02	0.20	0.15	0.10
Exclude 0s	0.17	0.13	0.19	0.11	0.21	0.17	0.13
Share of household having any month with non-zero assets over 160 months
Share	97.2%	81.3%	98.4%	23.5%	98.5%	97.6%	85.1%

Investment table to show sizes of investments

Table 2 issue 9.

Prepare input data

Generate investment windows by unique investments

Generate in investment file (uoo=hhxivarxinvest), and load in windowed stats file (uoo=hhxivarxinvestxvarxwin) 12 month as 1 month window versions.

First, generate in investment file (uoo=hhxivarxinvest), and load in windowed stats file (uoo=hhxivarxinvestxvarxwin) 12 month as 1 month window versions.

# 1 month window file, changes in capital-assets themselves (as well as cash/debt)
df_invwin_uniq_fw1bw1 <- PrjThaiHFID::ff_hfid_invest_window(
  df_invdates_uniq = tstm_invdates_uniq,
  it_window_forward = 1,
  it_window_backward = 1,
  bl_forward_include_imth = FALSE,
  bl_backward_include_imth = FALSE
)
# 12 month window file
df_invwin_uniq_fw12bw12 <- PrjThaiHFID::ff_hfid_invest_window(
  df_invdates_uniq = tstm_invdates_uniq,
  it_window_forward = 12,
  it_window_backward = 12,
  bl_forward_include_imth = FALSE,
  bl_backward_include_imth = FALSE
)

Second, generate mean and sum stats, short and long, respectively.

# Either sum or mean, not both
# 1 month window file, mean
df_invest_winstats_fw1bw1_mean <- PrjThaiHFID::ff_hfid_invest_winstats(
  df_asset_loan = tstm_asset_loan,
  df_invest = tstm_invest,
  df_invwin_uniq = df_invwin_uniq_fw1bw1,
  bl_compute_sum = FALSE,
  bl_compute_mean = TRUE
)
# 12 month window file, sum
df_invest_winstats_fw12bw12_sum <- PrjThaiHFID::ff_hfid_invest_winstats(
  df_asset_loan = tstm_asset_loan,
  df_invest = tstm_invest,
  df_invwin_uniq = df_invwin_uniq_fw12bw12,
  bl_compute_sum = TRUE,
  bl_compute_mean = FALSE
)

Third, combine output to list.

# Sets of windowed files
ls_df_invest_winstats_fwbw <- list(
  # Compare against backward window capital assets
  fw1bw1mean_back = df_invest_winstats_fw1bw1_mean,
  # Compare against backward window revenue sums
  fw12bw12sum_back = df_invest_winstats_fw12bw12_sum
)
ar_svr_window_comp <- c("win_backward", "win_backward")
# Get length and names
it_df_count <- length(ls_df_invest_winstats_fwbw)
ar_st_df_names <- names(ls_df_invest_winstats_fwbw)

Generate stats step 1, run window functions

Compute percentages.

12 month file: For revenue stats, based on 12 month file compute ratios of investments relative to last 12 months sum.
1 month file: Ratios of invest size var versus asset var. Stack 12 month and 1month files together

# Storage collection
ls_invest_win_shr <- vector(mode = "list", length = it_df_count)
names(ls_invest_win_shr) <- ar_st_df_names

# Loop over list of dataframes
for (it_df in seq(1, it_df_count)) {
  # Load in file
  st_df_name <- ar_st_df_names[it_df]
  df_invest_winstats_fwbw <- ls_df_invest_winstats_fwbw[[st_df_name]]
  svr_window_comp <- ar_svr_window_comp[it_df]

  # Compute proportions
  df_invest_winstats_shr <- df_invest_winstats_fwbw %>%
    # filter(blnc_vars == "IS_07") %>%
    filter(!!sym(svr_window_comp) != 0) %>%
    mutate(shr_stat = capital_invest / !!sym(svr_window_comp)) %>%
    select(
      id,
      ivars, hh_inv_asset_ctr, hh_inv_ctr,
      blnc_vars, blnc_agg_stats,
      # capital_invest, win_backward,
      shr_stat
    )

  # Store results
  ls_invest_win_shr[[st_df_name]] <- df_invest_winstats_shr
}

# df_invest_winstats_fw12bw12_shr <- df_invest_winstats_fw12bw12 %>%
#   # filter(blnc_vars == "IS_07") %>%
#   filter(win_backward != 0) %>%
#   mutate(shr_stat = capital_invest / win_backward) %>%
#   select(
#     id,
#     ivars, hh_inv_asset_ctr, hh_inv_ctr,
#     blnc_vars, blnc_agg_stats,
#     capital_invest, win_backward, shr_stat
#   )

Generate stats step 2, ratios and within group percentiles

Compute within balance cheet and investment variable CDF.

# Storage collection
ls_invest_win_pct <- vector(mode = "list", length = it_df_count)
names(ls_invest_win_pct) <- ar_st_df_names

# Loop over list of dataframes
for (it_df in seq(1, it_df_count)) {
  # Load in file
  st_df_name <- ar_st_df_names[it_df]
  df_invest_win_shr <- ls_invest_win_shr[[st_df_name]]

  # Generate within-group CDF
  df_invest_win_pct <- df_invest_win_shr %>%
    select(blnc_vars, ivars, shr_stat) %>%
    arrange(blnc_vars, ivars, shr_stat) %>%
    group_by(blnc_vars, ivars) %>%
    mutate(cdf = row_number() / n())

  # Store results
  ls_invest_win_pct[[st_df_name]] <- df_invest_win_pct
}
#> Adding missing grouping variables: `id`, `hh_inv_ctr`
#> Adding missing grouping variables: `id`, `hh_inv_ctr`


# # Generate within-group CDF
# df_invest_winstats_fw12bw12_pct <- df_invest_winstats_fw12bw12_shr %>%
#   arrange(blnc_vars, ivars, shr_stat) %>%
#   group_by(blnc_vars, ivars) %>%
#   mutate(cdf = row_number() / n())

Compute a subset of percentiles of interests.

# Define strings
st_var_prefix <- "shr"
st_var_prefix_perc <- paste0(st_var_prefix, "_p")
svr_mean <- paste0(st_var_prefix, "_mean")

# Storage collection
ls_df_cross_pct <- vector(mode = "list", length = it_df_count)
names(ls_df_cross_pct) <- ar_st_df_names

# Loop over list of dataframes
for (it_df in seq(1, it_df_count)) {
  # Load in file
  st_df_name <- ar_st_df_names[it_df]
  df_invest_win_pct <- ls_invest_win_pct[[st_df_name]]

  # Generate within-group percentiles
  for (it_percentile_ctr in seq(1, length(ar_fl_percentiles))) {
    # Current within group percentile to compute
    fl_percentile <- ar_fl_percentiles[it_percentile_ctr]

    # Percentile and mean stats
    svr_percentile <- paste0(st_var_prefix_perc, round(fl_percentile * 100))

    # Frame with specific percentile
    df_within_percentiles_cur <- df_invest_win_pct %>%
      group_by(blnc_vars, ivars) %>%
      filter(cdf >= fl_percentile) %>%
      slice(1) %>%
      mutate(!!sym(svr_percentile) := shr_stat) %>%
      select(blnc_vars, ivars, one_of(svr_percentile))

    # Merge percentile frames together
    if (it_percentile_ctr > 1) {
      df_within_percentiles <- df_within_percentiles %>%
        left_join(df_within_percentiles_cur,
          by = c("blnc_vars" = "blnc_vars", "ivars" = "ivars")
        )
    } else {
      df_within_percentiles <- df_within_percentiles_cur
    }
  }

  # Add in within group mean
  df_within_percentiles_mean <- df_invest_win_pct %>%
    group_by(blnc_vars, ivars) %>%
    mutate(!!sym(svr_mean) := mean(shr_stat, na.rm = TRUE)) %>%
    slice(1)
  # Join to file
  df_within_percentiles <- df_within_percentiles %>%
    left_join(df_within_percentiles_mean,
      by = c("blnc_vars" = "blnc_vars", "ivars" = "ivars")
    ) %>%
    select(
      blnc_vars, ivars,
      one_of(svr_mean), contains(st_var_prefix_perc)
    )

  # Store results
  ls_df_cross_pct[[st_df_name]] <- df_within_percentiles
}

# display
print(ls_df_cross_pct)
#> $fw1bw1mean_back
#> # A tibble: 147 × 10
#> # Groups:   blnc_vars, ivars [147]
#>    blnc_vars ivars      shr_mean  shr_p5 shr_p10 shr_p25 shr_p50 shr_p75 shr_p90
#>    <chr>     <chr>         <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
#>  1 BS_01     agg_BS_10…    1.06  0.0308  0.0515   0.122   0.286    0.658   1.40 
#>  2 BS_01     agg_BS_10…    0.861 0.00490 0.0116   0.0419  0.142    0.412   0.971
#>  3 BS_01     agg_BS_10…    0.394 0.00446 0.00892  0.0229  0.0886   0.307   0.772
#>  4 BS_01     agg_BS_20…    0.856 0.00517 0.0116   0.0388  0.135    0.386   0.947
#>  5 BS_01     agg_BS_20…    0.382 0.00475 0.00886  0.0210  0.0764   0.279   0.728
#>  6 BS_01     agg_BS_30…    0.712 0.0171  0.0292   0.0687  0.193    0.495   1.08 
#>  7 BS_01     agg_BS_30…    0.285 0.00146 0.00334  0.0108  0.0893   0.262   0.770
#>  8 BS_03     agg_BS_10…   84.4   0.210   0.396    1.21    5.72    29.7   113.   
#>  9 BS_03     agg_BS_10…   63.7   0.0543  0.150    0.598   2.70    14.9    57.1  
#> 10 BS_03     agg_BS_10…   43.8   0.0261  0.0641   0.251   1.50     8.82   41.0  
#> # ℹ 137 more rows
#> # ℹ 1 more variable: shr_p95 <dbl>
#> 
#> $fw12bw12sum_back
#> # A tibble: 147 × 10
#> # Groups:   blnc_vars, ivars [147]
#>    blnc_vars ivars      shr_mean  shr_p5 shr_p10 shr_p25 shr_p50 shr_p75 shr_p90
#>    <chr>     <chr>         <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
#>  1 BS_01     agg_BS_10…   0.0636 2.68e-3 4.39e-3 0.0108  0.0257   0.0598  0.122 
#>  2 BS_01     agg_BS_10…   0.0437 4.11e-4 1.01e-3 0.00359 0.0126   0.0345  0.0836
#>  3 BS_01     agg_BS_10…   0.0237 3.36e-4 6.55e-4 0.00200 0.00718  0.0245  0.0609
#>  4 BS_01     agg_BS_20…   0.0428 4.13e-4 9.90e-4 0.00336 0.0116   0.0332  0.0794
#>  5 BS_01     agg_BS_20…   0.0219 3.80e-4 6.42e-4 0.00174 0.00593  0.0213  0.0543
#>  6 BS_01     agg_BS_30…   0.0441 1.53e-3 2.52e-3 0.00611 0.0167   0.0440  0.0935
#>  7 BS_01     agg_BS_30…   0.0233 1.54e-4 3.13e-4 0.00102 0.00662  0.0217  0.0648
#>  8 BS_03     agg_BS_10…   8.71   1.98e-2 3.76e-2 0.119   0.524    2.22    9.16  
#>  9 BS_03     agg_BS_10…   6.89   4.76e-3 1.34e-2 0.0529  0.247    1.21    4.50  
#> 10 BS_03     agg_BS_10…   2.64   2.55e-3 6.28e-3 0.0223  0.131    0.752   3.90  
#> # ℹ 137 more rows
#> # ℹ 1 more variable: shr_p95 <dbl>

Table display

Table input processing

Get stats tables.

# Compare against backward window capital assets
df_fw1bw1mean_back <- ls_df_cross_pct$fw1bw1mean_back %>% ungroup()
df_fw12bw12sum_back <- ls_df_cross_pct$fw12bw12sum_back %>% ungroup()

Most recent month, relationship between investment and investment. Potentially shows that not only are investments rare, they are sizable.

Learned:

very small and very large ratios are both possible
highlight results from: BS_1021 agg_BS_1021, median of 1.3, pretty large changes, largest among total asset.
- include also components: BS_2021 agg_BS_2021
- include also components: BS_3021 agg_BS_3021
- could mention 1021 relative to 1011 as stats not in table but mentioned, but maybe also include: BS_1012 agg_BS_1021

# All
df_all_m1mean <- df_fw1bw1mean_back %>%
  filter(str_detect(blnc_vars, "BS_10"), str_detect(ivars, "BS_10"))
(df_all_m1mean) %>%
  kable() %>%
  kable_styling()

blnc_vars	ivars	shr_mean	shr_p5	shr_p10	shr_p25	shr_p50	shr_p75	shr_p90	shr_p95
BS_1011	agg_BS_1011	0.7409393	0.0094756	0.0187255	0.0510925	0.1554751	0.4562367	1.4642133	2.7488136
BS_1011	agg_BS_1012	0.3843737	0.0029097	0.0057487	0.0182804	0.0696245	0.2166457	0.6888826	1.5406523
BS_1011	agg_BS_1021	0.1467474	0.0021958	0.0036269	0.0088512	0.0308129	0.1094246	0.3196975	0.6704482
BS_1012	agg_BS_1011	4464.7098174	0.0094223	0.0182549	0.0506443	0.1674679	0.6089942	3.1186109	10.9375637
BS_1012	agg_BS_1012	3416.2226959	0.0033846	0.0067887	0.0215576	0.0892095	0.3351785	1.5240729	4.3593331
BS_1012	agg_BS_1021	0.4798713	0.0023282	0.0039521	0.0096689	0.0396702	0.1505598	0.5786516	1.2743372
BS_1021	agg_BS_1011	322.6270777	0.6026806	0.9534828	2.0851890	7.6318499	35.1452387	160.8831250	447.3166838
BS_1021	agg_BS_1012	230.8062149	0.2691948	0.3965798	0.9524189	3.3718758	16.3585899	69.1739812	226.7477142
BS_1021	agg_BS_1021	244.3168505	0.1310442	0.1965548	0.4304811	1.2548145	5.8782877	35.9970876	128.6468977

# Agriculture
df_agri_m1mean <- df_fw1bw1mean_back %>%
  filter(str_detect(blnc_vars, "BS_20"), str_detect(ivars, "BS_20"))
(df_agri_m1mean) %>%
  kable() %>%
  kable_styling()

blnc_vars	ivars	shr_mean	shr_p5	shr_p10	shr_p25	shr_p50	shr_p75	shr_p90	shr_p95
BS_2011	agg_BS_2011	3511.1318345	0.0030608	0.0064415	0.0203075	0.0853109	0.3511469	1.6363636	4.702961
BS_2011	agg_BS_2021	0.5136162	0.0019996	0.0034109	0.0082789	0.0326087	0.1332474	0.5301598	1.121944
BS_2021	agg_BS_2011	311.5305419	0.3121809	0.4988032	1.1655180	4.3628114	21.3646568	93.9352683	290.990472
BS_2021	agg_BS_2021	276.1153017	0.1401637	0.2225300	0.4518415	1.2881209	6.4738158	45.0920958	136.131047

# Business
df_biz_m1mean <- df_fw1bw1mean_back %>%
  filter(str_detect(blnc_vars, "BS_30"), str_detect(ivars, "BS_30"))
(df_biz_m1mean) %>%
  kable() %>%
  kable_styling()

blnc_vars	ivars	shr_mean	shr_p5	shr_p10	shr_p25	shr_p50	shr_p75	shr_p90	shr_p95
BS_3011	agg_BS_3011	4.104832	0.1751265	0.2289867	0.4433280	1.0510476	2.8532376	6.952639	12.117628
BS_3011	agg_BS_3021	1.237421	0.0121182	0.0168242	0.0522185	0.1665355	0.8546702	2.767697	6.132434
BS_3021	agg_BS_3011	47.937796	0.3550340	0.6475215	1.5600846	7.3068073	23.3413532	84.160046	137.289053
BS_3021	agg_BS_3021	55.123023	0.0817616	0.1523057	0.3732541	0.8460228	3.3656165	18.695871	65.872575

Select subset of key stats to be displayed.

# Results 1
df_m1mean_agri_bs2021 <- df_fw1bw1mean_back %>%
  filter(str_detect(blnc_vars, "BS_2021"), str_detect(ivars, "BS_2021"))
# Results 2
df_m1mean_biz_bs3021 <- df_fw1bw1mean_back %>%
  filter(str_detect(blnc_vars, "BS_3021"), str_detect(ivars, "BS_3021"))
# Results 3a
df_m1mean_all_bs1021 <- df_fw1bw1mean_back %>%
  filter(str_detect(blnc_vars, "BS_1021"), str_detect(ivars, "BS_1021"))
# Results 3b
df_m1mean_all_bs1021v1012 <- df_fw1bw1mean_back %>%
  filter(str_detect(blnc_vars, "BS_1012"), str_detect(ivars, "BS_1021"))

Select revenue past 12 month sum vs investment shares. We look at overall, business, and agriculture in three groups. Are they “large” relative to revenue, if they are, they are presumably hard to pay off right away.

Findings:

main: IS_1021 agg_BS_1021, note these are shares of revenue, they might seem low possibly, but firms would need to be highly profitable to be able to pay off debt with one year profits.
details:
- IS_2021 agg_BS_2021
- IS_3021 agg_BS_3021
aux:
- IS_1012 agg_BS_1021, do not consider wage income, too confusing, profit vs revenue, all else is revenue, not profit.

# All
df_all_m12sum <- df_fw12bw12sum_back %>%
  rowwise() %>%
  filter(str_detect(blnc_vars, "IS_10"), str_detect(ivars, "BS_10"))
(df_all_m12sum) %>%
  kable() %>%
  kable_styling()

blnc_vars	ivars	shr_mean	shr_p5	shr_p10	shr_p25	shr_p50	shr_p75	shr_p90	shr_p95
IS_1011	agg_BS_1011	5.2395354	0.0518194	0.0808174	0.1753382	0.3924437	0.9940082	3.0305128	7.357891
IS_1011	agg_BS_1012	2.2263033	0.0086380	0.0184070	0.0548920	0.1819977	0.5098756	1.5267083	3.443287
IS_1011	agg_BS_1021	0.4175894	0.0080693	0.0131166	0.0329538	0.0931717	0.2968234	0.9537436	1.752554
IS_1012	agg_BS_1011	32.4064602	0.0756927	0.1284194	0.2922908	0.8011254	3.1696988	16.5692433	50.550778
IS_1012	agg_BS_1012	6.8696336	0.0248754	0.0425379	0.1061457	0.3281604	1.0111954	4.3725962	12.716746
IS_1012	agg_BS_1021	2.5088201	0.0116684	0.0205167	0.0510150	0.1594974	0.5274727	1.9627276	4.324575
IS_1013	agg_BS_1011	46.5882017	0.0756927	0.1266173	0.2922908	0.8370755	3.8906974	31.8184223	102.081767
IS_1013	agg_BS_1012	12.3744098	0.0253947	0.0454316	0.1117097	0.3612966	1.2039110	6.5200087	24.424456
IS_1013	agg_BS_1021	3.8458373	0.0119344	0.0200739	0.0518324	0.1671119	0.6765582	2.3504982	7.649960
IS_1021	agg_BS_1011	40.2759785	0.0920663	0.1549652	0.3591124	1.0098054	3.8972197	18.4348148	59.815292
IS_1021	agg_BS_1012	9.1150478	0.0318605	0.0546490	0.1379379	0.4683347	1.6426657	6.8713062	19.289870
IS_1021	agg_BS_1021	2.7808661	0.0162154	0.0264470	0.0806926	0.2639442	1.1820886	4.0540541	10.863832

# Agriculture
df_agri_m12sum <- df_fw12bw12sum_back %>%
  rowwise() %>%
  filter(str_detect(blnc_vars, "IS_20"), str_detect(ivars, "BS_20"))
(df_agri_m12sum) %>%
  kable() %>%
  kable_styling()

blnc_vars	ivars	shr_mean	shr_p5	shr_p10	shr_p25	shr_p50	shr_p75	shr_p90	shr_p95
IS_2011	agg_BS_2011	27.666354	0.0331972	0.0548407	0.1452395	0.5090307	1.819809	14.817191	54.17936
IS_2011	agg_BS_2021	6.207505	0.0162411	0.0247792	0.0606670	0.1931694	1.061360	3.409755	12.32706
IS_2021	agg_BS_2011	62.850706	0.0445204	0.0713936	0.1878591	0.6367193	2.132510	11.090050	40.47634
IS_2021	agg_BS_2021	5.688836	0.0244642	0.0399742	0.1070825	0.4185530	1.743829	6.053045	12.84006

# Business
df_biz_m12sum <- df_fw12bw12sum_back %>%
  rowwise() %>%
  filter(str_detect(blnc_vars, "IS_30"), str_detect(ivars, "BS_30"))
(df_biz_m12sum) %>%
  kable() %>%
  kable_styling()

blnc_vars	ivars	shr_mean	shr_p5	shr_p10	shr_p25	shr_p50	shr_p75	shr_p90	shr_p95
IS_3011	agg_BS_3011	11.106808	0.0414426	0.0762695	0.1923083	0.5952935	2.2182231	11.324571	33.258074
IS_3011	agg_BS_3021	12.312756	0.0026585	0.0071920	0.0184186	0.0900940	0.2832460	1.972595	4.501801
IS_3021	agg_BS_3011	23.428332	0.0483213	0.0890390	0.3143638	1.6317897	8.8452561	34.274682	85.173870
IS_3021	agg_BS_3021	3.707475	0.0071231	0.0139786	0.0388632	0.1392530	0.5942617	3.155637	14.595556

Select subset of key stats to be displayed.

# Results 1
df_m12sum_agri_bs2021 <- df_fw12bw12sum_back %>%
  filter(str_detect(blnc_vars, "IS_2021"), str_detect(ivars, "BS_2021"))
# Results 2
df_m12sum_biz_bs3021 <- df_fw12bw12sum_back %>%
  filter(str_detect(blnc_vars, "IS_3021"), str_detect(ivars, "BS_3021"))
# Results 3a
df_m12sum_all_bs1021 <- df_fw12bw12sum_back %>%
  filter(str_detect(blnc_vars, "IS_1021"), str_detect(ivars, "BS_1021"))
# Results 3b
df_m12sum_all_bs1021v1012 <- df_fw12bw12sum_back %>%
  filter(str_detect(blnc_vars, "IS_1012"), str_detect(ivars, "BS_1021"))
# Bind rows for select

Combine 1 and 12 month info, transpose, and prep

First, combine all rows together and combine blnc_vars and ivars. Do not resort, preserve existing orders. Drop the mean, not informative.

# Bind rows for selected key results:
df_m1mean_m12_sum_jnt <- bind_rows(
  df_m1mean_agri_bs2021,
  df_m12sum_agri_bs2021,
  df_m1mean_biz_bs3021,
  df_m12sum_biz_bs3021,
  df_m1mean_all_bs1021v1012,
  df_m12sum_all_bs1021v1012,
  df_m1mean_all_bs1021,
  df_m12sum_all_bs1021
) %>%
  mutate(
    blnc_vars_ivars = paste0(ivars, "_d_", blnc_vars)
  ) %>%
  select(
    blnc_vars_ivars, -blnc_vars, -ivars,
    contains(st_var_prefix_perc)
  )

Second, reshape wide to long then to wide again to transpose.

df_m1mean_m12_sum_jnt_trans <- df_m1mean_m12_sum_jnt %>%
  pivot_longer(
    cols = starts_with(st_var_prefix_perc),
    names_to = c("percentile"),
    names_pattern = paste0(st_var_prefix_perc, "(.*)"),
    values_to = "value"
  ) %>%
  pivot_wider(
    names_from = blnc_vars_ivars,
    values_from = value
  )

Third, percentage formatting.

df_m1mean_m12_sum_jnt_trans <- df_m1mean_m12_sum_jnt_trans %>%
  mutate_at(
    vars(contains("agg_")),
    list(~ paste0(
      format(round(., 3) * 100,
        nsmall = 1,
        big.mark = ","
      ),
      "%"
    ))
  )

Table final display

First, we define column names, which correspond to previously defined variable selection list.

# Column names
ar_st_col_names <- c(
  "Percentiles",
  "$\\frac{\\text{Invest size}}{\\text{Pre mth assets}}$",
  "$\\frac{\\text{Invest size}}{\\text{Pre 12 mth rev}}$",
  "$\\frac{\\text{Invest size}}{\\text{Pre mth assets}}$",
  "$\\frac{\\text{Invest size}}{\\text{Pre 12 mth rev}}$",
  "$\\frac{\\text{Invest size}}{\\text{Pre mth assets}}$",
  "$\\frac{\\text{Invest size}}{\\text{Pre 12 mth rev}}$",
  "$\\frac{\\text{Invest size}}{\\text{Pre mth assets}}$",
  "$\\frac{\\text{Invest size}}{\\text{Pre 12 mth rev}}$"
)
# Define column groups, grouping the names above
ar_st_col_groups <- c(
  " " = 1,
  "Invest/assets/rev" = 2,
  "Invest/assets/rev" = 2,
  "All prod assets/rev" = 2,
  "Agri + biz assets/rev" = 2
)
# Define column groups, grouping the names above
ar_st_col_groups_super <- c(
  " " = 1,
  "Agricultural" = 2,
  "Business" = 2,
  "Agricutural + business investments" = 4
)

# Second, we construct main table, and add styling.
bk_invest_prior_ratio <- kbl(
  df_m1mean_m12_sum_jnt_trans,
  format = st_kableformat,
  # escape = F,
  linesep = "",
  booktabs = T,
  align = "c",
  caption = "Ratio of investment to prior to investment month assets and year revenues.",
  col.names = ar_st_col_names
) %>%
  # see https://cran.r-project.org/web/packages/kableExtra/vignettes/awesome_table_in_html.html#Bootstrap_table_classes
  kable_styling(
    bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = F, position = "left"
  )

# Third, we add in row groups
bk_invest_prior_ratio <- bk_invest_prior_ratio %>%
  add_header_above(ar_st_col_groups) %>%
  add_header_above(ar_st_col_groups_super) 
  # %>%
  # # collapse_rows(columns = 2:5, latex_hline = "major", valign = "middle")

# Fourth, we add in column groups. 
bk_invest_prior_ratio <- bk_invest_prior_ratio %>%
  pack_rows(
    "Bottom decile",
    1, 2,
    latex_gap_space = "0.5em"
  ) %>%
  pack_rows(
    "Quartiles",
    3, 5,
    latex_gap_space = "0.5em", hline_before = F
  ) %>%
  pack_rows(
    "Top decile",
    6, 7,
    latex_gap_space = "0.5em", hline_before = F
  )

# Fifth, column formatting.
bk_invest_prior_ratio <- bk_invest_prior_ratio %>%
  column_spec(1, width = "2.5cm") %>%
  column_spec(2:9, width = "2cm")

# Final adjustments
st_texcmd <- "frac"
bk_invest_prior_ratio <- gsub(bk_invest_prior_ratio,
  pattern = paste0("\\textbackslash{}", st_texcmd, "\\"),
  replacement = paste0("\\", st_texcmd), fixed = TRUE
)
st_texcmd <- "text"
bk_invest_prior_ratio <- gsub(bk_invest_prior_ratio,
  pattern = paste0("\\textbackslash{}", st_texcmd, "\\"),
  replacement = paste0("\\", st_texcmd), fixed = TRUE
)
bk_invest_prior_ratio <- gsub(bk_invest_prior_ratio,
  pattern = "\\}\\{",
  replacement = "}{", fixed = TRUE
)
bk_invest_prior_ratio <- gsub(bk_invest_prior_ratio,
  pattern = "\\}",
  replacement = "}", fixed = TRUE
)
bk_invest_prior_ratio <- gsub(bk_invest_prior_ratio,
  pattern = "\\$",
  replacement = "$", fixed = TRUE
)

# Sixth, display.
# pl_bk_asset_count <- bk_invest_prior_ratio %>% as_image()
bk_invest_prior_ratio

Ratio of investment to prior to investment month assets and year revenues.
	Agricultural		Business		Agricutural + business investments
	Invest/assets/rev		Invest/assets/rev		All prod assets/rev		Agri + biz assets/rev
Percentiles	\(\frac{\text{Invest size}}{\text{Pre mth assets}}\)	\(\frac{\text{Invest size}}{\text{Pre 12 mth rev}}\)	\(\frac{\text{Invest size}}{\text{Pre mth assets}}\)	\(\frac{\text{Invest size}}{\text{Pre 12 mth rev}}\)	\(\frac{\text{Invest size}}{\text{Pre mth assets}}\)	\(\frac{\text{Invest size}}{\text{Pre 12 mth rev}}\)	\(\frac{\text{Invest size}}{\text{Pre mth assets}}\)	\(\frac{\text{Invest size}}{\text{Pre 12 mth rev}}\)
Bottom decile
5	14.0%	2.4%	8.2%	0.7%	0.2%	1.2%	13.1%	1.6%
10	22.3%	4.0%	15.2%	1.4%	0.4%	2.1%	19.7%	2.6%
Quartiles
25	45.2%	10.7%	37.3%	3.9%	1.0%	5.1%	43.0%	8.1%
50	128.8%	41.9%	84.6%	13.9%	4.0%	15.9%	125.5%	26.4%
75	647.4%	174.4%	336.6%	59.4%	15.1%	52.7%	587.8%	118.2%
Top decile
90	4,509.2%	605.3%	1,869.6%	315.6%	57.9%	196.3%	3,599.7%	405.4%
95	13,613.1%	1,284.0%	6,587.3%	1,459.6%	127.4%	432.5%	12,864.7%	1,086.4%

Parameters

Path and files

Implement table to show lumpiness of investments

Generate stats group 1

Generate stats group 2

Generate stats group 3

Table display

Table input processing

Table final display

Investment table to show sizes of investments

Prepare input data

Generate stats step 1, run window functions

Generate stats step 2, ratios and within group percentiles

Table display

Table input processing

Review and select: Investment as share of prior month capital (level)

Review and select: Investment as a share of revenue past 12 months (level)

Combine 1 and 12 month info, transpose, and prep

Table final display