Skip to contents

We implement PrjThaiHFID-#9.

Data workflow (independent of the bridge gateway vignette). This vignette is not tied to ffv_invest_loan_bridge.qmd or to packaged data/tstm_invest.rda (which holds the MBF gateway’s three paper ivars only). We lazy-load tstm_hh_mthspan and tstm_asset_loan from data/ (both use anonymized tmid_hh in column id; see R/data.R and data-raw/id_anonymize/), then regenerate tstm_invest and tstm_invdates_uniq via ffp_hfid_invest_gateway() on all agg_BS_* columns, so unique(tstm_invest$ivars) is the full asset-definition set (~12 ivars), not the three-ivar gateway snapshot.

Unit of observation: household x asset-definition ivars for the frequency tables; investment spell x balance-sheet window aggregation for the size tables.

Outputs at a glance

This vignette produces summary tables only — it does not save any packaged dataset. Individual tables can be written to the package res/res_invest_freq_sizes folder by setting their entry in ls_save_res to TRUE (all default FALSE; the res/ artifacts are git-ignored and saved only for local convenience). Two table families are generated:

  • Investment lumpiness / frequency: distribution of the number of investments per household across asset definitions (ivars), with auxiliary shares.
  • Investment sizes: windowed balance-sheet changes around investments (forward/backward windows), reported as prior-ratio statistics by asset class.

Inputs are the packaged tstm_asset_loan and tstm_hh_mthspan; investments are regenerated in-file via ffp_hfid_invest_gateway() (full agg_BS_* set) rather than read from packaged tstm_invest.

Pipeline structure

flowchart TD
  subgraph inputs [Packaged data inputs]
    assetLoan[tstm_asset_loan]
    mthspan[tstm_hh_mthspan]
  end
  subgraph regen [Regenerate investments]
    gw[ffp_hfid_invest_gateway full agg_BS set]
    assetLoan --> gw
    gw --> invest[tstm_invest]
    gw --> invdates[tstm_invdates_uniq]
  end
  subgraph freq [Investment frequency]
    mthspan --> dur[filter min duration it_hh_mth_nbr]
    invest --> cnt[count investments per hh and ivars]
    dur --> cnt
    cnt --> freqtab[lumpiness frequency table]
  end
  subgraph sizes [Investment sizes]
    invdates --> win[window functions forward backward]
    assetLoan --> ws[winstats balance aggregation]
    invest --> ws
    win --> ws
    ws --> ratio[prior-ratio stats by asset class]
    ratio --> sizetab[investment sizes table]
  end

library(PrjThaiHFID)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(tidyr)
library(stringr)
library(glue)
library(ggplot2)
library(kableExtra)
#> 
#> Attaching package: 'kableExtra'
#> The following object is masked from 'package:dplyr':
#> 
#>     group_rows
spn_pkg_root <- rprojroot::find_root(rprojroot::has_file("DESCRIPTION"))
spt_res <- file.path(spn_pkg_root, "res", "res_invest_freq_sizes")
ls_save_res <- list(
  bk_asset_count = FALSE,
  bk_invest_prior_ratio = FALSE,
  tb_all_m1mean = FALSE,
  tb_agri_m1mean = FALSE,
  tb_biz_m1mean = FALSE,
  tb_all_m12sum = FALSE,
  tb_agri_m12sum = FALSE,
  tb_biz_m12sum = FALSE
)
bs_style <- c("striped", "hover", "condensed", "responsive")
options(kable_styling_bootstrap_options = bs_style)

Parameters

# Choose latex for paper output
# st_kableformat <- "latex"
st_kableformat <- "html"
# Minimum month duration for a household to be counted
it_hh_mth_nbr <- 161
# Percentiles of interest
ar_fl_percentiles <- c(0.05, 0.10, 0.25, 0.50, 0.75, 0.9, 0.95)

Load packaged inputs and regenerate investments

# Packaged inputs (documented in R/data.R; anonymized tmid_hh in column id)
tstm_hh_mthspan <- tstm_hh_mthspan
tstm_asset_loan <- tstm_asset_loan

# Invest-only gateway on full asset file (not packaged data/tstm_invest)
ls_return <- PrjThaiHFID::ffp_hfid_invest_gateway(
  tstm_asset_loan,
  fl_sd_ithres = stats::qnorm(0.99),
  it_thres_invest_mth_gap = 2
)
tstm_invest <- ls_return$tstm_invest
tstm_invdates_uniq <- ls_return$tstm_invdates_uniq

Implement table to show lumpiness of investments

This block generates Table 1 issue 9.

Generate stats group 1

First, load in, househould start and end months (uoo=hh), and investment file (uoo=hhxivarxinvest), already done.

# Check
ar_st_ivars <- unique(tstm_invest$ivars)

Second, from invest file, group by hh x ivar, and count the numver of investments.

# Load files, already loaded tstm_hh_mthspan for hh start and end months
# investmetn file
st_id_ivar_n <- "id_ivar_n"
tstm_invest_idivar_n <- tstm_invest %>%
  group_by(id, ivars) %>%
  summarize(!!sym(st_id_ivar_n) := n())
#> `summarise()` has regrouped the output.
#> ℹ Summaries were computed grouped by id and ivars.
#> ℹ Output is grouped by id.
#> ℹ Use `summarise(.groups = "drop_last")` to silence this message.
#> ℹ Use `summarise(.by = c(id, ivars))` for per-operation grouping
#>   (`?dplyr::dplyr_by`) instead.
# Check
unique(tstm_invest_idivar_n$ivars)
#>  [1] "agg_BS_1011" "agg_BS_1012" "agg_BS_1021" "agg_BS_1022" "agg_BS_1023"
#>  [6] "agg_BS_1025" "agg_BS_2011" "agg_BS_2021" "agg_BS_3011" "agg_BS_3022"
#> [11] "agg_BS_1024" "agg_BS_3021"

Third, merge with household startend file, replace NA with zero for each invest count.

# Subsetting hh with min obs count
tstm_hh_mthspan_sel <- tstm_hh_mthspan %>%
  filter(month_n == it_hh_mth_nbr)
# Left join to investment counts
# For households without any investments, they will have ivars=NA
tstm_hh_mthspan_wrk <- tstm_hh_mthspan_sel %>%
  left_join(tstm_invest_idivar_n, by = "id")
# Reshape long to wide, to have all ivars as cols, replace NA by 0
# Follow https://fanwangecon.github.io/R4Econ/panel/widelong/htmlpdfr/fs_pivotwider.html
st_ivar_prefix <- "ivar"
st_ivar_prefix_d <- paste0(st_ivar_prefix, "_")
tstm_hh_mthspan_wrk_wide <- tstm_hh_mthspan_wrk %>%
  pivot_wider(
    id_cols = c("id"),
    names_from = ivars,
    names_prefix = st_ivar_prefix_d,
    values_from = st_id_ivar_n
  )
#> Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
#> ℹ Please use `all_of()` or `any_of()` instead.
#>   # Was:
#>   data %>% select(st_id_ivar_n)
#> 
#>   # Now:
#>   data %>% select(all_of(st_id_ivar_n))
#> 
#> See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
# Drop NA column, due to hh without any invest only ivar=NA
# creating an extra category, all 0 invest household still have
# all other ivars
tstm_hh_mthspan_wrk_wide <- tstm_hh_mthspan_wrk_wide %>%
  select(-dplyr::any_of("ivar_NA"))
# replace NA values by 0s
tstm_hh_mthspan_wrk_wide <- tstm_hh_mthspan_wrk_wide %>%
  mutate_at(
    vars(contains(st_ivar_prefix_d)),
    list(~ replace_na(., 0))
  )
# Reshape back to long
# Follow https://fanwangecon.github.io/R4Econ/panel/widelong/htmlpdfr/fs_pivotlonger.html
tstm_hh_mthspan_wrk_long <- tstm_hh_mthspan_wrk_wide %>%
  pivot_longer(
    cols = starts_with(st_ivar_prefix_d),
    names_to = c(st_ivar_prefix),
    names_pattern = paste0(st_ivar_prefix_d, "(.*)"),
    values_to = st_id_ivar_n
  )

Fourth, group by ivar and invest count, tally.

# tally
tb_ivar_n_tally <- tstm_hh_mthspan_wrk_long %>%
  group_by(ivar, id_ivar_n) %>%
  tally() %>%
  spread(ivar, n)
# NA to zero
tb_ivar_n_tally <- tb_ivar_n_tally %>%
  mutate_at(vars(contains("agg_")), list(~ replace_na(., 0)))
# print
print(tb_ivar_n_tally, n = Inf)
#> # A tibble: 12 × 13
#>    id_ivar_n agg_BS_1011 agg_BS_1012 agg_BS_1021 agg_BS_1022 agg_BS_1023
#>        <int>       <int>       <int>       <int>       <int>       <int>
#>  1         0          22          75          59         170         100
#>  2         1         126         182         186         224         213
#>  3         2         144         160         171         123         142
#>  4         3         151          89         113          53          67
#>  5         4          96          62          49          27          43
#>  6         5          43          25          20           5          18
#>  7         6          15          11           8           4          14
#>  8         7           8           1           0           0           6
#>  9         8           1           1           0           0           0
#> 10         9           0           0           0           0           2
#> 11        10           0           0           0           0           0
#> 12        11           0           0           0           0           1
#> # ℹ 7 more variables: agg_BS_1024 <int>, agg_BS_1025 <int>, agg_BS_2011 <int>,
#> #   agg_BS_2021 <int>, agg_BS_3011 <int>, agg_BS_3021 <int>, agg_BS_3022 <int>

Fifth, proportional version

# Freq to shares
tb_ivar_n_tally_share <- tb_ivar_n_tally %>%
  mutate_at(
    vars(contains("agg_")),
    list(~ . / sum(.))
  )
# Check
colSums(tb_ivar_n_tally_share)
#>   id_ivar_n agg_BS_1011 agg_BS_1012 agg_BS_1021 agg_BS_1022 agg_BS_1023 
#>          66           1           1           1           1           1 
#> agg_BS_1024 agg_BS_1025 agg_BS_2011 agg_BS_2021 agg_BS_3011 agg_BS_3021 
#>           1           1           1           1           1           1 
#> agg_BS_3022 
#>           1
# print
print(round(tb_ivar_n_tally_share, 2), n = Inf)
#> # A tibble: 12 × 13
#>    id_ivar_n agg_BS_1011 agg_BS_1012 agg_BS_1021 agg_BS_1022 agg_BS_1023
#>        <dbl>       <dbl>       <dbl>       <dbl>       <dbl>       <dbl>
#>  1         0        0.04        0.12        0.1         0.28        0.17
#>  2         1        0.21        0.3         0.31        0.37        0.35
#>  3         2        0.24        0.26        0.28        0.2         0.23
#>  4         3        0.25        0.15        0.19        0.09        0.11
#>  5         4        0.16        0.1         0.08        0.04        0.07
#>  6         5        0.07        0.04        0.03        0.01        0.03
#>  7         6        0.02        0.02        0.01        0.01        0.02
#>  8         7        0.01        0           0           0           0.01
#>  9         8        0           0           0           0           0   
#> 10         9        0           0           0           0           0   
#> 11        10        0           0           0           0           0   
#> 12        11        0           0           0           0           0   
#> # ℹ 7 more variables: agg_BS_1024 <dbl>, agg_BS_1025 <dbl>, agg_BS_2011 <dbl>,
#> #   agg_BS_2021 <dbl>, agg_BS_3011 <dbl>, agg_BS_3021 <dbl>, agg_BS_3022 <dbl>

Generate stats group 2

Load in group 1 stats above, and compute weighted average overall for each ivar.

First, compute means including zeros.

# Prob * Val
tb_ivar_n_tally_mean <- tb_ivar_n_tally_share %>%
  mutate_at(
    vars(contains("agg_")),
    list(~ . * id_ivar_n)
  )
# Sum each column
tb_ivar_n_tally_mean <- tb_ivar_n_tally_mean %>%
  mutate_at(
    vars(contains("agg_")),
    list(~ sum(.))
  ) %>%
  slice_head(n = 1) %>%
  select(contains("agg_"))
# Divide by number of years
tb_ivar_n_tally_mean_peryear <- tb_ivar_n_tally_mean %>%
  mutate_at(
    vars(contains("agg_")),
    list(~ . / (it_hh_mth_nbr / 12))
  )

# Print
print(tb_ivar_n_tally_mean)
#> # A tibble: 1 × 12
#>   agg_BS_1011 agg_BS_1012 agg_BS_1021 agg_BS_1022 agg_BS_1023 agg_BS_1024
#>         <dbl>       <dbl>       <dbl>       <dbl>       <dbl>       <dbl>
#> 1        2.67        2.02        2.00        1.30        1.84        1.85
#> # ℹ 6 more variables: agg_BS_1025 <dbl>, agg_BS_2011 <dbl>, agg_BS_2021 <dbl>,
#> #   agg_BS_3011 <dbl>, agg_BS_3021 <dbl>, agg_BS_3022 <dbl>
print(tb_ivar_n_tally_mean_peryear)
#> # A tibble: 1 × 12
#>   agg_BS_1011 agg_BS_1012 agg_BS_1021 agg_BS_1022 agg_BS_1023 agg_BS_1024
#>         <dbl>       <dbl>       <dbl>       <dbl>       <dbl>       <dbl>
#> 1       0.199       0.150       0.149      0.0967       0.137       0.138
#> # ℹ 6 more variables: agg_BS_1025 <dbl>, agg_BS_2011 <dbl>, agg_BS_2021 <dbl>,
#> #   agg_BS_3011 <dbl>, agg_BS_3021 <dbl>, agg_BS_3022 <dbl>

Second, compute means excluding zeros.

# Recompute shares, after excluding zero
tb_ivar_n_tally_share_nozr <- tb_ivar_n_tally %>%
  filter(id_ivar_n != 0) %>%
  mutate_at(
    vars(contains("agg_")),
    list(~ . / sum(.))
  )
# print
colSums(tb_ivar_n_tally_share_nozr)
#>   id_ivar_n agg_BS_1011 agg_BS_1012 agg_BS_1021 agg_BS_1022 agg_BS_1023 
#>          66           1           1           1           1           1 
#> agg_BS_1024 agg_BS_1025 agg_BS_2011 agg_BS_2021 agg_BS_3011 agg_BS_3021 
#>           1           1           1           1           1           1 
#> agg_BS_3022 
#>           1
# Prob * Val
tb_ivar_n_tally_mean_nozr <- tb_ivar_n_tally_share_nozr %>%
  filter(id_ivar_n != 0) %>%
  mutate_at(
    vars(contains("agg_")),
    list(~ . * id_ivar_n)
  )
# Sum each column
tb_ivar_n_tally_mean_nozr <- tb_ivar_n_tally_mean_nozr %>%
  mutate_at(
    vars(contains("agg_")),
    list(~ sum(.))
  ) %>%
  slice_head(n = 1) %>%
  select(contains("agg_"))
# Divide by number of years
tb_ivar_n_tally_mean_nozr_peryear <- tb_ivar_n_tally_mean_nozr %>%
  mutate_at(
    vars(contains("agg_")),
    list(~ . / (it_hh_mth_nbr / 12))
  )

# Print
print(tb_ivar_n_tally_mean_nozr)
#> # A tibble: 1 × 12
#>   agg_BS_1011 agg_BS_1012 agg_BS_1021 agg_BS_1022 agg_BS_1023 agg_BS_1024
#>         <dbl>       <dbl>       <dbl>       <dbl>       <dbl>       <dbl>
#> 1        2.77        2.30        2.21        1.80        2.20        2.41
#> # ℹ 6 more variables: agg_BS_1025 <dbl>, agg_BS_2011 <dbl>, agg_BS_2021 <dbl>,
#> #   agg_BS_3011 <dbl>, agg_BS_3021 <dbl>, agg_BS_3022 <dbl>
print(tb_ivar_n_tally_mean_nozr_peryear)
#> # A tibble: 1 × 12
#>   agg_BS_1011 agg_BS_1012 agg_BS_1021 agg_BS_1022 agg_BS_1023 agg_BS_1024
#>         <dbl>       <dbl>       <dbl>       <dbl>       <dbl>       <dbl>
#> 1       0.207       0.172       0.165       0.134       0.164       0.179
#> # ℹ 6 more variables: agg_BS_1025 <dbl>, agg_BS_2011 <dbl>, agg_BS_2021 <dbl>,
#> #   agg_BS_3011 <dbl>, agg_BS_3021 <dbl>, agg_BS_3022 <dbl>

Generate stats group 3

First, Load in household monthly asset file, already loaded tstm_asset_loan.

Second, convert if household x monthly elements are non-zero to 1, zero keep as 0.

tstm_asset_loan_bi <- tstm_asset_loan %>%
  select(id, month, one_of(ar_st_ivars)) %>%
  mutate(across(
    contains("agg_"),
    function(x) {
      ifelse(x > 0, 1, 0)
    }
  ))

Third group by, sum monthly binary if var exists, aggregate to household level, binary if sum > 1 or if sum = 0

# Aggregate
tstm_asset_loan_bi_hh <- tstm_asset_loan_bi %>%
  group_by(id) %>%
  summarize_at(
    vars(contains("agg_")),
    list(~ sum(., na.rm = TRUE))
  )
# hh-level binary
tstm_asset_loan_bi_hh_bi <- tstm_asset_loan_bi_hh %>%
  mutate(across(
    contains("agg_"),
    function(x) {
      ifelse(x > 0, 1, 0)
    }
  ))

Fourth, merge with household startend file, condition as needed.

tstm_asset_bi <- tstm_hh_mthspan_sel %>%
  left_join(tstm_asset_loan_bi_hh_bi, by = "id") %>%
  select(id, contains("agg_"))

Fifth, compute share of households with non-zero

tstm_asset_bi_agg <- tstm_asset_bi %>%
  ungroup() %>%
  summarize_at(
    vars(contains("agg_")),
    list(~ sum(.) / n())
  )

Table display

Table input processing

First, sorting instructions.

2025-05-24 15:50:50, select:

  1. Agricultural: agg_BS_1025
  2. Business: agg_BS_3021
  3. Land: agg_BS_1024
  4. Household assets: agg_BS_3022
  5. Agri + biz: agg_BS_1021
  6. agri + biz + land: agg_BS_1012
  7. Agri + biz + land + household: agg_BS_1011
# # Variable sequence
# ar_st_vars_kbl <- c(
#   "id_ivar_n",
#   "agg_BS_10", "agg_BS_11", "agg_BS_12",
#   "agg_BS_23", "agg_BS_09"
# )
# Variable sequence
ar_st_vars_kbl <- c(
  "id_ivar_n",
  "agg_BS_1025", "agg_BS_3021",
  "agg_BS_1024", "agg_BS_3022",
  "agg_BS_1021", "agg_BS_1012", "agg_BS_1011"
)

Second, clean-up stats group 1.

# Sum up rows where id_ivar_n >= 8 and create a single row for that, then remove individual rows >= 8
# Convert id_ivar_n to numeric for filtering
tb_ivar_n_tally_share <- tb_ivar_n_tally_share %>%
  mutate(id_ivar_n = as.numeric(id_ivar_n))

# Separate rows with id_ivar_n < 8 and >= 8
tb_ivar_n_tally_share_lt8 <- tb_ivar_n_tally_share %>%
  filter(id_ivar_n < 8) %>%
  mutate(id_ivar_n = as.character(id_ivar_n))

tb_ivar_n_tally_share_ge8 <- tb_ivar_n_tally_share %>%
  filter(id_ivar_n >= 8)

# Sum across columns for rows with id_ivar_n >= 8
if (nrow(tb_ivar_n_tally_share_ge8) > 0) {
  summed_row <- tb_ivar_n_tally_share_ge8 %>%
    summarize(across(where(is.numeric), sum, na.rm = TRUE)) %>%
    mutate(id_ivar_n = "8+")
  # Bind together
  tb_ivar_n_tally_share <- bind_rows(tb_ivar_n_tally_share_lt8, summed_row)
} else {
  tb_ivar_n_tally_share <- tb_ivar_n_tally_share_lt8
}
#> Warning: There was 1 warning in `summarize()`.
#> ℹ In argument: `across(where(is.numeric), sum, na.rm = TRUE)`.
#> Caused by warning:
#> ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
#> Supply arguments directly to `.fns` through an anonymous function instead.
#> 
#>   # Previously
#>   across(a:b, mean, na.rm = TRUE)
#> 
#>   # Now
#>   across(a:b, \(x) mean(x, na.rm = TRUE))

# Restore id_ivar_n as character for downstream compatibility
tb_ivar_n_tally_share <- tb_ivar_n_tally_share %>%
  mutate(id_ivar_n = as.character(id_ivar_n))
tb_tally_cur <- tb_ivar_n_tally_share
# Round and add percentage signs
tb_tally_cur_wkr <- tb_tally_cur %>%
  mutate_at(
    vars(contains("agg_")),
    list(~ paste0(
      format(round(., 3) * 100, nsmall = 1),
      "%"
    ))
  ) %>%
  mutate(id_ivar_n = as.character(id_ivar_n))

Third, clean-up and combine auxliary stats

# Combine auxillary stats
tb_tally_aux1_cur <- bind_rows(
  tb_ivar_n_tally_mean_peryear %>% mutate(id_ivar_n = "Include 0s"),
  tb_ivar_n_tally_mean_nozr_peryear %>% mutate(id_ivar_n = "Exclude 0s")
) %>% mutate_at(
  vars(contains("agg_")),
  list(~ as.character(
    format(round(., 2), nsmall = 2)
  ))
)
# Auxillary share stasts
tb_tally_aux2_cur <- bind_rows(
  tstm_asset_bi_agg %>% mutate(id_ivar_n = "Share")
) %>% mutate_at(
  vars(contains("agg_")),
  list(~ paste0(
    format(round(., 3) * 100, nsmall = 1),
    "%"
  ))
)

Fourth, combine auxliary and main stats.

# Combine auxillary stats
tb_tally_jnt <- bind_rows(
  tb_tally_cur_wkr,
  tb_tally_aux1_cur,
  tb_tally_aux2_cur
) %>% select(one_of(ar_st_vars_kbl))

Table final display

First, we define column names, which correspond to previously defined variable selection list.

# Column names
# ar_st_col_names <- c(
#   "# of investments",
#   "Land + livestock + agri assets",
#   "Agri assets",
#   "Business + household assets",
#   "Business assets",
#   "Land + livest. + agri + biz + household",
#   "All (w/o household assets)",
#   "Agri + business assets"
# )
# 2025-05-24 15:53:38
ar_st_col_names <- c(
  "# of investments",
  "Agricultural",
  "Business",
  "Land",
  "Household",
  "Agricultural and business",
  "Agricultural, business, and land",
  "Agricultural, business, land, and household"
)
# "All (Land + livestock + agri + household + biz)",
# "Productive assets (Land + livestock + agri + biz)",
st_texsize <- "footnotesize"
ar_st_col_names <- paste0(
  "\\", st_texsize, "{", ar_st_col_names, "}"
)
# Define column groups, grouping the names above
# =1/3/2 are number of columns group title covers
ar_st_col_groups <- c(
  " " = 1,
  "Individual assets categories" = 4,
  "Joint assets definitions" = 3
)

# Second, we construct main table, and add styling.
f_bk_asset_count <- function(st_format) {
  bk_asset_count <- kbl(
    tb_tally_jnt,
    format = st_format,
    # escape = F,
    linesep = "",
    booktabs = T,
    align = "c",
    caption = "Percentage of households with number of investments, survey month 1 to 160",
    col.names = ar_st_col_names
  ) %>%
    # see https://cran.r-project.org/web/packages/kableExtra/vignettes/awesome_table_in_html.html#Bootstrap_table_classes
    kable_styling(
      bootstrap_options = c("striped", "hover", "condensed", "responsive"),
      full_width = F, position = "left"
    )

  # Third, we add in column groups.
  bk_asset_count <- bk_asset_count %>% add_header_above(ar_st_col_groups)

  # Fourth, we add in row groups.
  bk_asset_count <- bk_asset_count %>%
    pack_rows(
      "Share of households with different number of investments over 160 months",
      1, 9,
      latex_gap_space = "0.5em"
    ) %>%
    pack_rows(
      "Mean number of investments per year over 160 months",
      10, 11,
      latex_gap_space = "0.5em", hline_before = T
    ) %>%
    pack_rows(
      "Share of household having any month with non-zero assets over 160 months",
      12, 12,
      latex_gap_space = "0.5em", hline_before = T
    )

  # Fifth, column formatting.
  bk_asset_count <- bk_asset_count %>%
    column_spec(1, width = "2.5cm") %>%
    column_spec(2:8, width = "2cm")

  # FInal adjustments
  bk_asset_count <- gsub(bk_asset_count,
    pattern = paste0("\\textbackslash{}", st_texsize, "\\"),
    replacement = paste0("\\", st_texsize), fixed = TRUE
  )
  bk_asset_count <- gsub(bk_asset_count,
    pattern = "\\}",
    replacement = "}", fixed = TRUE
  )
  bk_asset_count <- gsub(bk_asset_count,
    pattern = "hline",
    replacement = "midrule", fixed = TRUE
  )

  return(bk_asset_count)
}

# Build for display and save a latex (+ csv) copy.
bk_asset_count <- f_bk_asset_count(st_kableformat)
ffp_save_res_table(
  f_bk_asset_count("latex"), "bk_asset_count", spt_res,
  df = tb_tally_jnt,
  bl_save = ls_save_res[["bk_asset_count"]]
)

# Sixth, display.
# pl_bk_asset_count <- bk_asset_count %>% as_image()
bk_asset_count
Percentage of households with number of investments, survey month 1 to 160
Individual assets categories
Joint assets definitions
\footnotesize{# of investments} \footnotesize{Agricultural} \footnotesize{Business} \footnotesize{Land} \footnotesize{Household} \footnotesize{Agricultural and business} \footnotesize{Agricultural, business, and land} \footnotesize{Agricultural, business, land, and household}
Share of households with different number of investments over 160 months
0 12.4% 79.4% 23.3% 0.7% 9.7% 12.4% 3.6%
1 27.9% 14.2% 33.0% 22.9% 30.7% 30.0% 20.8%
2 29.9% 4.6% 17.8% 29.9% 28.2% 26.4% 23.8%
3 17.5% 1.0% 10.4% 24.3% 18.6% 14.7% 24.9%
4 8.7% 0.5% 5.3% 13.9% 8.1% 10.2% 15.8%
5 2.5% 0.2% 3.8% 5.3% 3.3% 4.1% 7.1%
6 1.2% 0.2% 2.6% 2.3% 1.3% 1.8% 2.5%
7 0.0% 0.0% 2.6% 0.7% 0.0% 0.2% 1.3%
8+ 0.0% 0.0% 1.2% 0.2% 0.0% 0.2% 0.2%
Mean number of investments per year over 160 months
Include 0s 0.14 0.02 0.14 0.19 0.15 0.15 0.20
Exclude 0s 0.17 0.11 0.18 0.19 0.17 0.17 0.21
Share of household having any month with non-zero assets over 160 months
Share 91.9% 23.9% 94.6% 100.0% 94.6% 99.0% 100.0%

Investment table to show sizes of investments

Table 2 issue 9.

Prepare input data

Generate investment windows by unique investments

Generate in investment file (uoo=hhxivarxinvest), and load in windowed stats file (uoo=hhxivarxinvestxvarxwin) 12 month as 1 month window versions.

First, generate in investment file (uoo=hhxivarxinvest), and load in windowed stats file (uoo=hhxivarxinvestxvarxwin) 12 month as 1 month window versions.

# 1 month window file, changes in capital-assets themselves (as well as cash/debt)
df_invwin_uniq_fw1bw1 <- PrjThaiHFID::ff_hfid_invest_window(
  df_invdates_uniq = tstm_invdates_uniq,
  it_window_forward = 1,
  it_window_backward = 1,
  bl_forward_include_imth = FALSE,
  bl_backward_include_imth = FALSE
)
# 12 month window file
df_invwin_uniq_fw12bw12 <- PrjThaiHFID::ff_hfid_invest_window(
  df_invdates_uniq = tstm_invdates_uniq,
  it_window_forward = 12,
  it_window_backward = 12,
  bl_forward_include_imth = FALSE,
  bl_backward_include_imth = FALSE
)

Second, generate mean and sum stats, short and long, respectively.

# Either sum or mean, not both
# 1 month window file, mean
df_invest_winstats_fw1bw1_mean <- PrjThaiHFID::ff_hfid_invest_winstats(
  df_asset_loan = tstm_asset_loan,
  df_invest = tstm_invest,
  df_invwin_uniq = df_invwin_uniq_fw1bw1,
  bl_compute_sum = FALSE,
  bl_compute_mean = TRUE
)
# 12 month window file, sum
df_invest_winstats_fw12bw12_sum <- PrjThaiHFID::ff_hfid_invest_winstats(
  df_asset_loan = tstm_asset_loan,
  df_invest = tstm_invest,
  df_invwin_uniq = df_invwin_uniq_fw12bw12,
  bl_compute_sum = TRUE,
  bl_compute_mean = FALSE
)

Third, combine output to list.

# Sets of windowed files
ls_df_invest_winstats_fwbw <- list(
  # Compare against backward window capital assets
  fw1bw1mean_back = df_invest_winstats_fw1bw1_mean,
  # Compare against backward window revenue sums
  fw12bw12sum_back = df_invest_winstats_fw12bw12_sum
)
ar_svr_window_comp <- c("win_backward", "win_backward")
# Get length and names
it_df_count <- length(ls_df_invest_winstats_fwbw)
ar_st_df_names <- names(ls_df_invest_winstats_fwbw)

Generate stats step 1, run window functions

Compute percentages.

  • 12 month file: For revenue stats, based on 12 month file compute ratios of investments relative to last 12 months sum.
  • 1 month file: Ratios of invest size var versus asset var. Stack 12 month and 1month files together
# Storage collection
ls_invest_win_shr <- vector(mode = "list", length = it_df_count)
names(ls_invest_win_shr) <- ar_st_df_names

# Loop over list of dataframes
for (it_df in seq(1, it_df_count)) {
  # Load in file
  st_df_name <- ar_st_df_names[it_df]
  df_invest_winstats_fwbw <- ls_df_invest_winstats_fwbw[[st_df_name]]
  svr_window_comp <- ar_svr_window_comp[it_df]

  # Compute proportions
  df_invest_winstats_shr <- df_invest_winstats_fwbw %>%
    # filter(blnc_vars == "IS_07") %>%
    filter(!!sym(svr_window_comp) != 0) %>%
    mutate(shr_stat = capital_invest / !!sym(svr_window_comp)) %>%
    select(
      id,
      ivars, hh_inv_asset_ctr, hh_inv_ctr,
      blnc_vars, blnc_agg_stats,
      # capital_invest, win_backward,
      shr_stat
    )

  # Store results
  ls_invest_win_shr[[st_df_name]] <- df_invest_winstats_shr
}

# df_invest_winstats_fw12bw12_shr <- df_invest_winstats_fw12bw12 %>%
#   # filter(blnc_vars == "IS_07") %>%
#   filter(win_backward != 0) %>%
#   mutate(shr_stat = capital_invest / win_backward) %>%
#   select(
#     id,
#     ivars, hh_inv_asset_ctr, hh_inv_ctr,
#     blnc_vars, blnc_agg_stats,
#     capital_invest, win_backward, shr_stat
#   )

Generate stats step 2, ratios and within group percentiles

Compute within balance cheet and investment variable CDF.

# Storage collection
ls_invest_win_pct <- vector(mode = "list", length = it_df_count)
names(ls_invest_win_pct) <- ar_st_df_names

# Loop over list of dataframes
for (it_df in seq(1, it_df_count)) {
  # Load in file
  st_df_name <- ar_st_df_names[it_df]
  df_invest_win_shr <- ls_invest_win_shr[[st_df_name]]

  # Generate within-group CDF
  df_invest_win_pct <- df_invest_win_shr %>%
    select(blnc_vars, ivars, shr_stat) %>%
    arrange(blnc_vars, ivars, shr_stat) %>%
    group_by(blnc_vars, ivars) %>%
    mutate(cdf = row_number() / n())

  # Store results
  ls_invest_win_pct[[st_df_name]] <- df_invest_win_pct
}
#> Adding missing grouping variables: `id`, `hh_inv_ctr`
#> Adding missing grouping variables: `id`, `hh_inv_ctr`

# # Generate within-group CDF
# df_invest_winstats_fw12bw12_pct <- df_invest_winstats_fw12bw12_shr %>%
#   arrange(blnc_vars, ivars, shr_stat) %>%
#   group_by(blnc_vars, ivars) %>%
#   mutate(cdf = row_number() / n())

Compute a subset of percentiles of interests.

# Define strings
st_var_prefix <- "shr"
st_var_prefix_perc <- paste0(st_var_prefix, "_p")
svr_mean <- paste0(st_var_prefix, "_mean")

# Storage collection
ls_df_cross_pct <- vector(mode = "list", length = it_df_count)
names(ls_df_cross_pct) <- ar_st_df_names

# Loop over list of dataframes
for (it_df in seq(1, it_df_count)) {
  # Load in file
  st_df_name <- ar_st_df_names[it_df]
  df_invest_win_pct <- ls_invest_win_pct[[st_df_name]]

  # Generate within-group percentiles
  for (it_percentile_ctr in seq(1, length(ar_fl_percentiles))) {
    # Current within group percentile to compute
    fl_percentile <- ar_fl_percentiles[it_percentile_ctr]

    # Percentile and mean stats
    svr_percentile <- paste0(st_var_prefix_perc, round(fl_percentile * 100))

    # Frame with specific percentile
    df_within_percentiles_cur <- df_invest_win_pct %>%
      group_by(blnc_vars, ivars) %>%
      filter(cdf >= fl_percentile) %>%
      slice(1) %>%
      mutate(!!sym(svr_percentile) := shr_stat) %>%
      select(blnc_vars, ivars, one_of(svr_percentile))

    # Merge percentile frames together
    if (it_percentile_ctr > 1) {
      df_within_percentiles <- df_within_percentiles %>%
        left_join(df_within_percentiles_cur,
          by = c("blnc_vars" = "blnc_vars", "ivars" = "ivars")
        )
    } else {
      df_within_percentiles <- df_within_percentiles_cur
    }
  }

  # Add in within group mean
  df_within_percentiles_mean <- df_invest_win_pct %>%
    group_by(blnc_vars, ivars) %>%
    mutate(!!sym(svr_mean) := mean(shr_stat, na.rm = TRUE)) %>%
    slice(1)
  # Join to file
  df_within_percentiles <- df_within_percentiles %>%
    left_join(df_within_percentiles_mean,
      by = c("blnc_vars" = "blnc_vars", "ivars" = "ivars")
    ) %>%
    select(
      blnc_vars, ivars,
      one_of(svr_mean), contains(st_var_prefix_perc)
    )

  # Store results
  ls_df_cross_pct[[st_df_name]] <- df_within_percentiles
}

# display
print(ls_df_cross_pct)
#> $fw1bw1mean_back
#> # A tibble: 468 × 10
#> # Groups:   blnc_vars, ivars [468]
#>    blnc_vars ivars      shr_mean  shr_p5 shr_p10 shr_p25 shr_p50 shr_p75 shr_p90
#>    <chr>     <chr>         <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
#>  1 BS_01     agg_BS_10…    1.06  0.0308  0.0515  0.122    0.286    0.658   1.40 
#>  2 BS_01     agg_BS_10…    0.861 0.00490 0.0116  0.0419   0.142    0.412   0.971
#>  3 BS_01     agg_BS_10…    0.472 0.00358 0.00845 0.0269   0.100    0.292   0.766
#>  4 BS_01     agg_BS_10…    0.394 0.00446 0.00892 0.0229   0.0886   0.307   0.772
#>  5 BS_01     agg_BS_10…    0.723 0.00348 0.00763 0.0242   0.0947   0.359   0.878
#>  6 BS_01     agg_BS_10…    0.519 0.00205 0.00367 0.00964  0.0310   0.134   0.520
#>  7 BS_01     agg_BS_10…    0.453 0.00292 0.00662 0.0230   0.0876   0.263   0.699
#>  8 BS_01     agg_BS_20…    0.856 0.00517 0.0116  0.0388   0.135    0.386   0.947
#>  9 BS_01     agg_BS_20…    0.382 0.00475 0.00886 0.0210   0.0764   0.279   0.728
#> 10 BS_01     agg_BS_30…    0.712 0.0171  0.0292  0.0687   0.193    0.495   1.08 
#> # ℹ 458 more rows
#> # ℹ 1 more variable: shr_p95 <dbl>
#> 
#> $fw12bw12sum_back
#> # A tibble: 468 × 10
#> # Groups:   blnc_vars, ivars [468]
#>    blnc_vars ivars      shr_mean  shr_p5 shr_p10 shr_p25 shr_p50 shr_p75 shr_p90
#>    <chr>     <chr>         <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
#>  1 BS_01     agg_BS_10…   0.0636 2.68e-3 4.39e-3 1.08e-2 0.0257   0.0598  0.122 
#>  2 BS_01     agg_BS_10…   0.0437 4.11e-4 1.01e-3 3.59e-3 0.0126   0.0345  0.0836
#>  3 BS_01     agg_BS_10…   0.0228 3.02e-4 6.72e-4 2.33e-3 0.00851  0.0234  0.0607
#>  4 BS_01     agg_BS_10…   0.0237 3.36e-4 6.55e-4 2.00e-3 0.00718  0.0245  0.0609
#>  5 BS_01     agg_BS_10…   0.0420 3.10e-4 6.27e-4 2.21e-3 0.00825  0.0292  0.0779
#>  6 BS_01     agg_BS_10…   0.0291 1.75e-4 3.15e-4 8.30e-4 0.00276  0.0103  0.0413
#>  7 BS_01     agg_BS_10…   0.0210 2.38e-4 5.26e-4 2.04e-3 0.00738  0.0216  0.0542
#>  8 BS_01     agg_BS_20…   0.0428 4.13e-4 9.90e-4 3.36e-3 0.0116   0.0332  0.0794
#>  9 BS_01     agg_BS_20…   0.0219 3.80e-4 6.42e-4 1.74e-3 0.00593  0.0213  0.0543
#> 10 BS_01     agg_BS_30…   0.0441 1.53e-3 2.52e-3 6.11e-3 0.0167   0.0440  0.0935
#> # ℹ 458 more rows
#> # ℹ 1 more variable: shr_p95 <dbl>

Table display

Table input processing

Get stats tables.

# Compare against backward window capital assets
df_fw1bw1mean_back <- ls_df_cross_pct$fw1bw1mean_back %>% ungroup()
df_fw12bw12sum_back <- ls_df_cross_pct$fw12bw12sum_back %>% ungroup()

Review and select: Investment as share of prior month capital (level)

Most recent month, relationship between investment and investment. Potentially shows that not only are investments rare, they are sizable.

Learned:

  • very small and very large ratios are both possible
  • highlight results from: BS_1021 agg_BS_1021, median of 1.3, pretty large changes, largest among total asset.
    • include also components: BS_2021 agg_BS_2021
    • include also components: BS_3021 agg_BS_3021
    • could mention 1021 relative to 1011 as stats not in table but mentioned, but maybe also include: BS_1012 agg_BS_1021
# All
df_all_m1mean <- df_fw1bw1mean_back %>%
  filter(str_detect(blnc_vars, "BS_10"), str_detect(ivars, "BS_10"))
(df_all_m1mean) %>%
  kable() %>%
  kable_styling()
blnc_vars ivars shr_mean shr_p5 shr_p10 shr_p25 shr_p50 shr_p75 shr_p90 shr_p95
BS_1011 agg_BS_1011 0.7409393 0.0094756 0.0187255 0.0510925 0.1554751 0.4562367 1.4642133 2.7488136
BS_1011 agg_BS_1012 0.3843737 0.0029097 0.0057487 0.0182804 0.0696245 0.2166457 0.6888826 1.5406523
BS_1011 agg_BS_1021 0.1885822 0.0014941 0.0033931 0.0111579 0.0437246 0.1276403 0.3786298 0.7075963
BS_1011 agg_BS_1022 0.1467474 0.0021958 0.0036269 0.0088512 0.0308129 0.1094246 0.3196975 0.6704482
BS_1011 agg_BS_1023 0.3368104 0.0021817 0.0041579 0.0110017 0.0452081 0.1735644 0.5597619 1.3245048
BS_1011 agg_BS_1024 0.2556003 0.0008107 0.0014457 0.0044629 0.0122261 0.0714989 0.2875197 0.9054138
BS_1011 agg_BS_1025 0.1729666 0.0010837 0.0024574 0.0094515 0.0382867 0.1121256 0.3223563 0.6264688
BS_1012 agg_BS_1011 4464.7098174 0.0094223 0.0182549 0.0506443 0.1674679 0.6089942 3.1186109 10.9375637
BS_1012 agg_BS_1012 3416.2226959 0.0033846 0.0067887 0.0215576 0.0892095 0.3351785 1.5240729 4.3593331
BS_1012 agg_BS_1021 3456.0478044 0.0015478 0.0039521 0.0130890 0.0544887 0.1804410 0.8398873 2.2500000
BS_1012 agg_BS_1022 0.4798713 0.0023282 0.0039521 0.0096689 0.0396702 0.1505598 0.5786516 1.2743372
BS_1012 agg_BS_1023 1.0158854 0.0024145 0.0047720 0.0123914 0.0595946 0.2573797 1.0398978 2.6197992
BS_1012 agg_BS_1024 0.6970755 0.0008564 0.0017182 0.0051142 0.0149703 0.1113806 0.5178188 1.5585237
BS_1012 agg_BS_1025 3542.4196436 0.0011552 0.0027764 0.0100073 0.0497730 0.1647365 0.7000000 1.9700251
BS_1021 agg_BS_1011 5372.5448254 0.4603299 0.7054627 1.6395907 5.3772909 25.8912323 143.9644343 472.1979676
BS_1021 agg_BS_1012 3965.1289833 0.2472262 0.3366212 0.7231162 2.3536849 9.4441180 53.0088249 151.5792982
BS_1021 agg_BS_1021 3826.9472482 0.1705659 0.2554571 0.5440230 1.5332265 5.8821209 28.5230867 82.9447688
BS_1021 agg_BS_1022 52.8569734 0.0925525 0.1401637 0.2952586 0.9459944 3.9743116 22.8358869 82.9447688
BS_1021 agg_BS_1023 204.8073690 0.1027172 0.1773146 0.4590861 1.4591543 7.1004985 40.3456595 139.6101233
BS_1021 agg_BS_1024 164.9161726 0.0179356 0.0359888 0.1077672 0.3902669 2.5014213 14.6476417 54.4695665
BS_1021 agg_BS_1025 3900.1960006 0.1292174 0.2137537 0.4527832 1.3579275 4.9791078 25.1026889 71.2726389
BS_1022 agg_BS_1011 322.6270777 0.6026806 0.9534828 2.0851890 7.6318499 35.1452387 160.8831250 447.3166838
BS_1022 agg_BS_1012 230.8062149 0.2691948 0.3965798 0.9524189 3.3718758 16.3585899 69.1739812 226.7477142
BS_1022 agg_BS_1021 213.9462672 0.1807164 0.2842713 0.6745096 2.2972868 10.9307637 49.9502422 136.1310468
BS_1022 agg_BS_1022 244.3168505 0.1310442 0.1965548 0.4304811 1.2548145 5.8782877 35.9970876 128.6468977
BS_1022 agg_BS_1023 210.1156619 0.2216547 0.3121809 0.6947765 2.2286929 10.6492809 56.8153029 177.7134902
BS_1022 agg_BS_1024 40.3435419 0.0248228 0.0490516 0.1390711 0.5592155 3.2516608 24.2526266 66.0697449
BS_1022 agg_BS_1025 205.5867991 0.1401637 0.2277167 0.5818193 2.0315477 9.3602402 43.7005948 125.9988837
BS_1023 agg_BS_1011 5.9004119 0.0094128 0.0180764 0.0504210 0.1673132 0.6041616 3.2027449 12.6704347
BS_1023 agg_BS_1012 2.9200168 0.0033074 0.0067255 0.0209763 0.0894691 0.3295005 1.7865576 5.4705688
BS_1023 agg_BS_1021 1.9201113 0.0015432 0.0037064 0.0129135 0.0534755 0.1760733 0.9104802 2.9456718
BS_1023 agg_BS_1022 0.7776908 0.0023284 0.0039557 0.0097972 0.0412770 0.1538392 0.6383771 1.6810774
BS_1023 agg_BS_1023 1.7467336 0.0024145 0.0046942 0.0124273 0.0612103 0.2663786 1.2096323 3.0000000
BS_1023 agg_BS_1024 1.2647070 0.0008622 0.0017125 0.0051003 0.0151310 0.1161814 0.6125878 1.8083183
BS_1023 agg_BS_1025 1.9394852 0.0011552 0.0027565 0.0098712 0.0487120 0.1608886 0.7231347 2.7951427
BS_1024 agg_BS_1011 7.2629021 0.0092741 0.0174209 0.0481389 0.1568795 0.5273656 2.9361433 10.9475738
BS_1024 agg_BS_1012 3.7191041 0.0030656 0.0064703 0.0199025 0.0825611 0.2837974 1.7860843 5.3836417
BS_1024 agg_BS_1021 2.7546869 0.0014930 0.0035527 0.0119215 0.0491464 0.1602456 0.6420030 2.8406751
BS_1024 agg_BS_1022 2.1338530 0.0023404 0.0038278 0.0096657 0.0377177 0.1516240 0.5738230 2.9646044
BS_1024 agg_BS_1023 2.6611722 0.0023763 0.0045042 0.0122793 0.0579784 0.2505887 1.2709918 4.0282377
BS_1024 agg_BS_1024 1.4487369 0.0008595 0.0017144 0.0050997 0.0156429 0.1428643 0.7368667 2.0728088
BS_1024 agg_BS_1025 2.4095259 0.0011180 0.0025619 0.0096801 0.0446207 0.1387161 0.5817301 2.3300069
BS_1025 agg_BS_1011 5549.2036844 0.5403370 0.8235557 1.9111443 6.6326212 33.5732473 170.3991940 551.7568667
BS_1025 agg_BS_1012 4056.3185025 0.2707141 0.3897048 0.8524271 2.8388205 11.1715100 61.9131509 165.1864436
BS_1025 agg_BS_1021 3926.9527433 0.1986531 0.2871685 0.6249265 1.9454621 7.4409708 36.9481749 111.8913295
BS_1025 agg_BS_1022 76.7741759 0.0984555 0.1469442 0.3349635 1.1561695 5.6114062 32.7290955 118.6873652
BS_1025 agg_BS_1023 215.8134743 0.1032986 0.1920790 0.5064802 1.7502190 8.4191801 51.3187670 153.5552749
BS_1025 agg_BS_1024 168.1770101 0.0215421 0.0413641 0.1197309 0.4547106 3.2913341 17.9856641 59.7400031
BS_1025 agg_BS_1025 3983.3826665 0.1872337 0.2781652 0.5676795 1.6831382 6.3782364 30.8576225 101.6030770
ffp_save_res_table(
  kbl(df_all_m1mean,
    format = "latex", booktabs = TRUE,
    caption = "Investment as share of prior month capital (all, level)"
  ),
  "tb_all_m1mean", spt_res,
  df = df_all_m1mean, bl_save = ls_save_res[["tb_all_m1mean"]]
)
# Agriculture
df_agri_m1mean <- df_fw1bw1mean_back %>%
  filter(str_detect(blnc_vars, "BS_20"), str_detect(ivars, "BS_20"))
(df_agri_m1mean) %>%
  kable() %>%
  kable_styling()
blnc_vars ivars shr_mean shr_p5 shr_p10 shr_p25 shr_p50 shr_p75 shr_p90 shr_p95
BS_2011 agg_BS_2011 3511.1318345 0.0030608 0.0064415 0.0203075 0.0853109 0.3511469 1.6363636 4.702961
BS_2011 agg_BS_2021 0.5136162 0.0019996 0.0034109 0.0082789 0.0326087 0.1332474 0.5301598 1.121944
BS_2021 agg_BS_2011 311.5305419 0.3121809 0.4988032 1.1655180 4.3628114 21.3646568 93.9352683 290.990472
BS_2021 agg_BS_2021 276.1153017 0.1401637 0.2225300 0.4518415 1.2881209 6.4738158 45.0920958 136.131047
ffp_save_res_table(
  kbl(df_agri_m1mean,
    format = "latex", booktabs = TRUE,
    caption = "Investment as share of prior month capital (agriculture, level)"
  ),
  "tb_agri_m1mean", spt_res,
  df = df_agri_m1mean, bl_save = ls_save_res[["tb_agri_m1mean"]]
)
# Business
df_biz_m1mean <- df_fw1bw1mean_back %>%
  filter(str_detect(blnc_vars, "BS_30"), str_detect(ivars, "BS_30"))
(df_biz_m1mean) %>%
  kable() %>%
  kable_styling()
blnc_vars ivars shr_mean shr_p5 shr_p10 shr_p25 shr_p50 shr_p75 shr_p90 shr_p95
BS_3011 agg_BS_3011 4.104832 0.1751265 0.2289867 0.4433280 1.0510476 2.8532376 6.952639 12.117628
BS_3011 agg_BS_3021 1.237421 0.0121182 0.0168242 0.0522185 0.1665355 0.8546702 2.767697 6.132434
BS_3011 agg_BS_3022 3.927825 0.1649034 0.2133417 0.4081838 1.0032138 2.7359773 6.733992 11.892915
BS_3021 agg_BS_3011 47.937796 0.3550340 0.6475215 1.5600846 7.3068073 23.3413532 84.160046 137.289053
BS_3021 agg_BS_3021 55.123023 0.0817616 0.1523057 0.3732541 0.8460228 3.3656165 18.695871 65.872575
BS_3021 agg_BS_3022 30.124135 0.1637420 0.3123617 1.1242053 5.0923282 21.1817781 84.177903 130.565935
BS_3022 agg_BS_3011 4.363616 0.1817649 0.2419322 0.4676280 1.1230191 3.0285702 7.778116 13.128688
BS_3022 agg_BS_3021 1.672649 0.0123890 0.0168988 0.0624731 0.2186658 1.3631572 3.560229 7.019443
BS_3022 agg_BS_3022 4.180161 0.1816237 0.2341408 0.4504677 1.0766720 2.9745669 7.511008 12.622020
ffp_save_res_table(
  kbl(df_biz_m1mean,
    format = "latex", booktabs = TRUE,
    caption = "Investment as share of prior month capital (business, level)"
  ),
  "tb_biz_m1mean", spt_res,
  df = df_biz_m1mean, bl_save = ls_save_res[["tb_biz_m1mean"]]
)

Select subset of key stats to be displayed.

# Results 1
df_m1mean_agri_bs2021 <- df_fw1bw1mean_back %>%
  filter(str_detect(blnc_vars, "BS_2021"), str_detect(ivars, "BS_2021"))
# Results 2
df_m1mean_biz_bs3021 <- df_fw1bw1mean_back %>%
  filter(str_detect(blnc_vars, "BS_3021"), str_detect(ivars, "BS_3021"))
# Results 3a
df_m1mean_all_bs1021 <- df_fw1bw1mean_back %>%
  filter(str_detect(blnc_vars, "BS_1021"), str_detect(ivars, "BS_1021"))
# Results 3b
df_m1mean_all_bs1021v1012 <- df_fw1bw1mean_back %>%
  filter(str_detect(blnc_vars, "BS_1012"), str_detect(ivars, "BS_1021"))

Review and select: Investment as a share of revenue past 12 months (level)

Select revenue past 12 month sum vs investment shares. We look at overall, business, and agriculture in three groups. Are they “large” relative to revenue, if they are, they are presumably hard to pay off right away.

Findings:

  • main: IS_1021 agg_BS_1021, note these are shares of revenue, they might seem low possibly, but firms would need to be highly profitable to be able to pay off debt with one year profits.
  • details:
    • IS_2021 agg_BS_2021
    • IS_3021 agg_BS_3021
  • aux:
    • IS_1012 agg_BS_1021, do not consider wage income, too confusing, profit vs revenue, all else is revenue, not profit.
# All
df_all_m12sum <- df_fw12bw12sum_back %>%
  rowwise() %>%
  filter(str_detect(blnc_vars, "IS_10"), str_detect(ivars, "BS_10"))
(df_all_m12sum) %>%
  kable() %>%
  kable_styling()
blnc_vars ivars shr_mean shr_p5 shr_p10 shr_p25 shr_p50 shr_p75 shr_p90 shr_p95
IS_1011 agg_BS_1011 5.2395354 0.0518194 0.0808174 0.1753382 0.3924437 0.9940082 3.0305128 7.357891
IS_1011 agg_BS_1012 2.2263033 0.0086380 0.0184070 0.0548920 0.1819977 0.5098756 1.5267083 3.443287
IS_1011 agg_BS_1021 0.5031246 0.0065079 0.0145840 0.0456048 0.1339956 0.3539975 0.8775887 1.618410
IS_1011 agg_BS_1022 0.4175894 0.0080693 0.0131166 0.0329538 0.0931717 0.2968234 0.9537436 1.752554
IS_1011 agg_BS_1023 2.2916629 0.0072623 0.0130505 0.0325970 0.1085830 0.4273612 1.5220145 3.584285
IS_1011 agg_BS_1024 2.0008409 0.0032689 0.0052594 0.0136578 0.0388198 0.1473416 0.7783918 2.207854
IS_1011 agg_BS_1025 0.4615035 0.0042393 0.0110120 0.0403119 0.1232199 0.3314882 0.7838534 1.349235
IS_1012 agg_BS_1011 32.4064602 0.0756927 0.1284194 0.2922908 0.8011254 3.1696988 16.5692433 50.550778
IS_1012 agg_BS_1012 6.8696336 0.0248754 0.0425379 0.1061457 0.3281604 1.0111954 4.3725962 12.716746
IS_1012 agg_BS_1021 2.2373752 0.0179115 0.0308655 0.0876579 0.2379055 0.7080460 2.1670775 4.761905
IS_1012 agg_BS_1022 2.5088201 0.0116684 0.0205167 0.0510150 0.1594974 0.5274727 1.9627276 4.324575
IS_1012 agg_BS_1023 6.9969871 0.0138134 0.0237766 0.0546235 0.1870670 0.8082766 3.9523468 11.688158
IS_1012 agg_BS_1024 6.1619314 0.0044547 0.0081570 0.0215525 0.0633105 0.2985075 2.0833333 7.847534
IS_1012 agg_BS_1025 2.1578524 0.0128107 0.0257477 0.0749301 0.2226015 0.6770833 1.9801980 4.239820
IS_1013 agg_BS_1011 46.5882017 0.0756927 0.1266173 0.2922908 0.8370755 3.8906974 31.8184223 102.081767
IS_1013 agg_BS_1012 12.3744098 0.0253947 0.0454316 0.1117097 0.3612966 1.2039110 6.5200087 24.424456
IS_1013 agg_BS_1021 4.4369751 0.0187840 0.0325894 0.0958181 0.2868884 0.9243833 3.7842079 13.333333
IS_1013 agg_BS_1022 3.8458373 0.0119344 0.0200739 0.0518324 0.1671119 0.6765582 2.3504982 7.649960
IS_1013 agg_BS_1023 12.2650283 0.0158694 0.0248754 0.0590000 0.1916933 0.9276179 4.6201208 17.950937
IS_1013 agg_BS_1024 11.4022987 0.0049834 0.0089705 0.0230326 0.0687610 0.3052503 2.2642364 12.396694
IS_1013 agg_BS_1025 4.2921924 0.0130243 0.0260938 0.0822883 0.2612517 0.8705779 3.2444574 12.875537
IS_1021 agg_BS_1011 40.2759785 0.0920663 0.1549652 0.3591124 1.0098054 3.8972197 18.4348148 59.815292
IS_1021 agg_BS_1012 9.1150478 0.0318605 0.0546490 0.1379379 0.4683347 1.6426657 6.8713062 19.289870
IS_1021 agg_BS_1021 3.3122666 0.0215899 0.0412088 0.1207132 0.3986743 1.3449753 4.6547924 11.586239
IS_1021 agg_BS_1022 2.7808661 0.0162154 0.0264470 0.0806926 0.2639442 1.1820886 4.0540541 10.863832
IS_1021 agg_BS_1023 8.5037024 0.0196506 0.0328946 0.0748491 0.2572008 1.2359403 5.2308457 14.705882
IS_1021 agg_BS_1024 6.8326442 0.0058097 0.0111982 0.0267622 0.0787324 0.3624314 2.1739130 7.753352
IS_1021 agg_BS_1025 3.1690396 0.0151093 0.0303763 0.1082640 0.3737306 1.2601921 4.3766269 10.664687
ffp_save_res_table(
  kbl(df_all_m12sum,
    format = "latex", booktabs = TRUE,
    caption = "Investment as share of revenue past 12 months (all, level)"
  ),
  "tb_all_m12sum", spt_res,
  df = df_all_m12sum, bl_save = ls_save_res[["tb_all_m12sum"]]
)
# Agriculture
df_agri_m12sum <- df_fw12bw12sum_back %>%
  rowwise() %>%
  filter(str_detect(blnc_vars, "IS_20"), str_detect(ivars, "BS_20"))
(df_agri_m12sum) %>%
  kable() %>%
  kable_styling()
blnc_vars ivars shr_mean shr_p5 shr_p10 shr_p25 shr_p50 shr_p75 shr_p90 shr_p95
IS_2011 agg_BS_2011 27.666354 0.0331972 0.0548407 0.1452395 0.5090307 1.819809 14.817191 54.17936
IS_2011 agg_BS_2021 6.207505 0.0162411 0.0247792 0.0606670 0.1931694 1.061360 3.409755 12.32706
IS_2012 agg_BS_2011 -6102.032943 -34.6837979 -5.0109862 0.0986652 0.5055650 1.754311 7.876581 21.38594
IS_2012 agg_BS_2021 -12.834206 -4.3369302 0.0106034 0.0667535 0.2425136 1.234425 4.535870 11.87107
IS_2013 agg_BS_2011 26.338823 0.0441231 0.0727741 0.1817482 0.5896692 2.062262 16.723766 60.60606
IS_2013 agg_BS_2021 8.451405 0.0200296 0.0306632 0.0809061 0.2639406 1.459940 5.523906 15.86207
IS_2014 agg_BS_2011 -6178.299499 -40.5680845 -5.6014996 0.1189885 0.5760816 1.998542 9.090909 29.09978
IS_2014 agg_BS_2021 -8.934978 -7.6833243 0.0097571 0.0825770 0.3201358 1.682491 6.626069 17.34542
IS_2015 agg_BS_2011 84.904610 0.0336900 0.0535423 0.1512916 0.5698799 2.140090 14.817191 61.23807
IS_2015 agg_BS_2021 19.688696 0.0178263 0.0282123 0.0786899 0.2845931 1.499270 5.537795 15.88110
IS_2016 agg_BS_2011 85.050304 0.0232398 0.0608733 0.2116925 0.8304432 3.318593 18.930811 83.29324
IS_2016 agg_BS_2021 20.169753 0.0148897 0.0346153 0.1171678 0.4355601 2.107264 8.620690 27.02802
IS_2021 agg_BS_2011 62.850706 0.0445204 0.0713936 0.1878591 0.6367193 2.132510 11.090050 40.47634
IS_2021 agg_BS_2021 5.688836 0.0244642 0.0399742 0.1070825 0.4185530 1.743829 6.053045 12.84006
IS_2022 agg_BS_2011 59.768020 0.0246408 0.0710714 0.2562972 0.9324998 3.363046 14.483000 47.37205
IS_2022 agg_BS_2021 6.130690 0.0195575 0.0405807 0.1390665 0.5651988 2.354677 8.989975 19.04448
ffp_save_res_table(
  kbl(df_agri_m12sum,
    format = "latex", booktabs = TRUE,
    caption = "Investment as share of revenue past 12 months (agriculture, level)"
  ),
  "tb_agri_m12sum", spt_res,
  df = df_agri_m12sum, bl_save = ls_save_res[["tb_agri_m12sum"]]
)
# Business
df_biz_m12sum <- df_fw12bw12sum_back %>%
  rowwise() %>%
  filter(str_detect(blnc_vars, "IS_30"), str_detect(ivars, "BS_30"))
(df_biz_m12sum) %>%
  kable() %>%
  kable_styling()
blnc_vars ivars shr_mean shr_p5 shr_p10 shr_p25 shr_p50 shr_p75 shr_p90 shr_p95
IS_3011 agg_BS_3011 11.106808 0.0414426 0.0762695 0.1923083 0.5952935 2.2182231 11.324571 33.258074
IS_3011 agg_BS_3021 12.312756 0.0026585 0.0071920 0.0184186 0.0900940 0.2832460 1.972595 4.501801
IS_3011 agg_BS_3022 9.722507 0.0350741 0.0642672 0.1838533 0.5835434 2.1567316 11.324571 31.101592
IS_3021 agg_BS_3011 23.428332 0.0483213 0.0890390 0.3143638 1.6317897 8.8452561 34.274682 85.173870
IS_3021 agg_BS_3021 3.707475 0.0071231 0.0139786 0.0388632 0.1392530 0.5942617 3.155637 14.595556
IS_3021 agg_BS_3022 22.702538 0.0350403 0.0580712 0.2895929 1.5109778 8.0126246 33.911441 83.244354
IS_3022 agg_BS_3011 98.150125 0.0161570 0.2145536 0.7313960 2.7481983 11.6487200 38.494622 93.878400
IS_3022 agg_BS_3021 3.971525 -0.1539095 0.0066440 0.0785323 0.3008507 1.0622332 6.424242 30.000000
IS_3022 agg_BS_3022 96.542162 0.0161719 0.1193461 0.6812955 2.5219545 11.2359746 36.245194 89.790831
ffp_save_res_table(
  kbl(df_biz_m12sum,
    format = "latex", booktabs = TRUE,
    caption = "Investment as share of revenue past 12 months (business, level)"
  ),
  "tb_biz_m12sum", spt_res,
  df = df_biz_m12sum, bl_save = ls_save_res[["tb_biz_m12sum"]]
)

Select subset of key stats to be displayed.

# Results 1
df_m12sum_agri_bs2021 <- df_fw12bw12sum_back %>%
  filter(str_detect(blnc_vars, "IS_2021"), str_detect(ivars, "BS_2021"))
# Results 2
df_m12sum_biz_bs3021 <- df_fw12bw12sum_back %>%
  filter(str_detect(blnc_vars, "IS_3021"), str_detect(ivars, "BS_3021"))
# Results 3a
df_m12sum_all_bs1021 <- df_fw12bw12sum_back %>%
  filter(str_detect(blnc_vars, "IS_1021"), str_detect(ivars, "BS_1021"))
# Results 3b
df_m12sum_all_bs1021v1012 <- df_fw12bw12sum_back %>%
  filter(str_detect(blnc_vars, "IS_1012"), str_detect(ivars, "BS_1021"))
# Bind rows for select

Combine 1 and 12 month info, transpose, and prep

First, combine all rows together and combine blnc_vars and ivars. Do not resort, preserve existing orders. Drop the mean, not informative.

# Bind rows for selected key results:
df_m1mean_m12_sum_jnt <- bind_rows(
  df_m1mean_agri_bs2021,
  df_m12sum_agri_bs2021,
  df_m1mean_biz_bs3021,
  df_m12sum_biz_bs3021,
  df_m1mean_all_bs1021v1012,
  df_m12sum_all_bs1021v1012,
  df_m1mean_all_bs1021,
  df_m12sum_all_bs1021
) %>%
  mutate(
    blnc_vars_ivars = paste0(ivars, "_d_", blnc_vars)
  ) %>%
  select(
    blnc_vars_ivars, -blnc_vars, -ivars,
    contains(st_var_prefix_perc)
  )

Second, reshape wide to long then to wide again to transpose.

df_m1mean_m12_sum_jnt_trans <- df_m1mean_m12_sum_jnt %>%
  pivot_longer(
    cols = starts_with(st_var_prefix_perc),
    names_to = c("percentile"),
    names_pattern = paste0(st_var_prefix_perc, "(.*)"),
    values_to = "value"
  ) %>%
  pivot_wider(
    names_from = blnc_vars_ivars,
    values_from = value
  )

Third, percentage formatting.

df_m1mean_m12_sum_jnt_trans <- df_m1mean_m12_sum_jnt_trans %>%
  mutate_at(
    vars(contains("agg_")),
    list(~ paste0(
      format(round(., 3) * 100,
        nsmall = 1,
        big.mark = ","
      ),
      "%"
    ))
  )

Table final display

First, we define column names, which correspond to previously defined variable selection list.

# Column names
ar_st_col_names <- c(
  "Percentiles",
  "$\\frac{\\text{Invest size}}{\\text{Pre mth assets}}$",
  "$\\frac{\\text{Invest size}}{\\text{Pre 12 mth rev}}$",
  "$\\frac{\\text{Invest size}}{\\text{Pre mth assets}}$",
  "$\\frac{\\text{Invest size}}{\\text{Pre 12 mth rev}}$",
  "$\\frac{\\text{Invest size}}{\\text{Pre mth assets}}$",
  "$\\frac{\\text{Invest size}}{\\text{Pre 12 mth rev}}$",
  "$\\frac{\\text{Invest size}}{\\text{Pre mth assets}}$",
  "$\\frac{\\text{Invest size}}{\\text{Pre 12 mth rev}}$"
)
# Define column groups, grouping the names above
ar_st_col_groups <- c(
  " " = 1,
  "Invest/assets/rev" = 2,
  "Invest/assets/rev" = 2,
  "All prod assets/rev" = 2,
  "Agri + biz assets/rev" = 2
)
# Define column groups, grouping the names above
ar_st_col_groups_super <- c(
  " " = 1,
  "Agricultural" = 2,
  "Business" = 2,
  "Agricutural + business investments" = 4
)

# Second, we construct main table, and add styling.
f_bk_invest_prior_ratio <- function(st_format) {
  bk_invest_prior_ratio <- kbl(
    df_m1mean_m12_sum_jnt_trans,
    format = st_format,
    # escape = F,
    linesep = "",
    booktabs = T,
    align = "c",
    caption = "Ratio of investment to prior to investment month assets and year revenues.",
    col.names = ar_st_col_names
  ) %>%
    # see https://cran.r-project.org/web/packages/kableExtra/vignettes/awesome_table_in_html.html#Bootstrap_table_classes
    kable_styling(
      bootstrap_options = c("striped", "hover", "condensed", "responsive"),
      full_width = F, position = "left"
    )

  # Third, we add in row groups
  bk_invest_prior_ratio <- bk_invest_prior_ratio %>%
    add_header_above(ar_st_col_groups) %>%
    add_header_above(ar_st_col_groups_super)
  # %>%
  # # collapse_rows(columns = 2:5, latex_hline = "major", valign = "middle")

  # Fourth, we add in column groups.
  bk_invest_prior_ratio <- bk_invest_prior_ratio %>%
    pack_rows(
      "Bottom decile",
      1, 2,
      latex_gap_space = "0.5em"
    ) %>%
    pack_rows(
      "Quartiles",
      3, 5,
      latex_gap_space = "0.5em", hline_before = F
    ) %>%
    pack_rows(
      "Top decile",
      6, 7,
      latex_gap_space = "0.5em", hline_before = F
    )

  # Fifth, column formatting.
  bk_invest_prior_ratio <- bk_invest_prior_ratio %>%
    column_spec(1, width = "2.5cm") %>%
    column_spec(2:9, width = "2cm")

  # Final adjustments
  st_texcmd <- "frac"
  bk_invest_prior_ratio <- gsub(bk_invest_prior_ratio,
    pattern = paste0("\\textbackslash{}", st_texcmd, "\\"),
    replacement = paste0("\\", st_texcmd), fixed = TRUE
  )
  st_texcmd <- "text"
  bk_invest_prior_ratio <- gsub(bk_invest_prior_ratio,
    pattern = paste0("\\textbackslash{}", st_texcmd, "\\"),
    replacement = paste0("\\", st_texcmd), fixed = TRUE
  )
  bk_invest_prior_ratio <- gsub(bk_invest_prior_ratio,
    pattern = "\\}\\{",
    replacement = "}{", fixed = TRUE
  )
  bk_invest_prior_ratio <- gsub(bk_invest_prior_ratio,
    pattern = "\\}",
    replacement = "}", fixed = TRUE
  )
  bk_invest_prior_ratio <- gsub(bk_invest_prior_ratio,
    pattern = "\\$",
    replacement = "$", fixed = TRUE
  )

  return(bk_invest_prior_ratio)
}

# Build for display and save a latex (+ csv) copy.
bk_invest_prior_ratio <- f_bk_invest_prior_ratio(st_kableformat)
ffp_save_res_table(
  f_bk_invest_prior_ratio("latex"), "bk_invest_prior_ratio", spt_res,
  df = df_m1mean_m12_sum_jnt_trans,
  bl_save = ls_save_res[["bk_invest_prior_ratio"]]
)

# Sixth, display.
# pl_bk_asset_count <- bk_invest_prior_ratio %>% as_image()
bk_invest_prior_ratio
Ratio of investment to prior to investment month assets and year revenues.
Agricultural
Business
Agricutural + business investments
Invest/assets/rev
Invest/assets/rev
All prod assets/rev
Agri + biz assets/rev
Percentiles $\frac{\text{Invest size}}{\text{Pre mth assets}}$ $\frac{\text{Invest size}}{\text{Pre 12 mth rev}}$ $\frac{\text{Invest size}}{\text{Pre mth assets}}$ $\frac{\text{Invest size}}{\text{Pre 12 mth rev}}$ $\frac{\text{Invest size}}{\text{Pre mth assets}}$ $\frac{\text{Invest size}}{\text{Pre 12 mth rev}}$ $\frac{\text{Invest size}}{\text{Pre mth assets}}$ $\frac{\text{Invest size}}{\text{Pre 12 mth rev}}$
Bottom decile
5 14.0% 2.4% 8.2% 0.7% 0.2% 1.8% 17.1% 2.2%
10 22.3% 4.0% 15.2% 1.4% 0.4% 3.1% 25.5% 4.1%
Quartiles
25 45.2% 10.7% 37.3% 3.9% 1.3% 8.8% 54.4% 12.1%
50 128.8% 41.9% 84.6% 13.9% 5.4% 23.8% 153.3% 39.9%
75 647.4% 174.4% 336.6% 59.4% 18.0% 70.8% 588.2% 134.5%
Top decile
90 4,509.2% 605.3% 1,869.6% 315.6% 84.0% 216.7% 2,852.3% 465.5%
95 13,613.1% 1,284.0% 6,587.3% 1,459.6% 225.0% 476.2% 8,294.5% 1,158.6%