flowchart TD
subgraph inputs [Packaged data inputs]
assetLoan[tstm_asset_loan]
mthspan[tstm_hh_mthspan]
end
subgraph regen [Regenerate investments]
gw[ffp_hfid_invest_gateway full agg_BS set]
assetLoan --> gw
gw --> invest[tstm_invest]
gw --> invdates[tstm_invdates_uniq]
end
subgraph freq [Investment frequency]
mthspan --> dur[filter min duration it_hh_mth_nbr]
invest --> cnt[count investments per hh and ivars]
dur --> cnt
cnt --> freqtab[lumpiness frequency table]
end
subgraph sizes [Investment sizes]
invdates --> win[window functions forward backward]
assetLoan --> ws[winstats balance aggregation]
invest --> ws
win --> ws
ws --> ratio[prior-ratio stats by asset class]
ratio --> sizetab[investment sizes table]
end
We implement PrjThaiHFID-#9.
Data workflow (independent of the bridge gateway vignette). This vignette is not tied to ffv_invest_loan_bridge.qmd or to packaged data/tstm_invest.rda (which holds the MBF gateway’s three paper ivars only). We lazy-load tstm_hh_mthspan and tstm_asset_loan from data/ (both use anonymized tmid_hh in column id; see R/data.R and data-raw/id_anonymize/), then regenerate tstm_invest and tstm_invdates_uniq via ffp_hfid_invest_gateway() on all agg_BS_* columns, so unique(tstm_invest$ivars) is the full asset-definition set (~12 ivars), not the three-ivar gateway snapshot.
Unit of observation: household x asset-definition ivars for the frequency tables; investment spell x balance-sheet window aggregation for the size tables.
Outputs at a glance
This vignette produces summary tables only — it does not save any packaged dataset. Individual tables can be written to the package res/res_invest_freq_sizes folder by setting their entry in ls_save_res to TRUE (all default FALSE; the res/ artifacts are git-ignored and saved only for local convenience). Two table families are generated:
-
Investment lumpiness / frequency: distribution of the number of investments per household across asset definitions (
ivars), with auxiliary shares. - Investment sizes: windowed balance-sheet changes around investments (forward/backward windows), reported as prior-ratio statistics by asset class.
Inputs are the packaged tstm_asset_loan and tstm_hh_mthspan; investments are regenerated in-file via ffp_hfid_invest_gateway() (full agg_BS_* set) rather than read from packaged tstm_invest.
Pipeline structure
library(PrjThaiHFID)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(tidyr)
library(stringr)
library(glue)
library(ggplot2)
library(kableExtra)
#>
#> Attaching package: 'kableExtra'
#> The following object is masked from 'package:dplyr':
#>
#> group_rows
spn_pkg_root <- rprojroot::find_root(rprojroot::has_file("DESCRIPTION"))
spt_res <- file.path(spn_pkg_root, "res", "res_invest_freq_sizes")
ls_save_res <- list(
bk_asset_count = FALSE,
bk_invest_prior_ratio = FALSE,
tb_all_m1mean = FALSE,
tb_agri_m1mean = FALSE,
tb_biz_m1mean = FALSE,
tb_all_m12sum = FALSE,
tb_agri_m12sum = FALSE,
tb_biz_m12sum = FALSE
)
bs_style <- c("striped", "hover", "condensed", "responsive")
options(kable_styling_bootstrap_options = bs_style)Parameters
# Choose latex for paper output
# st_kableformat <- "latex"
st_kableformat <- "html"
# Minimum month duration for a household to be counted
it_hh_mth_nbr <- 161
# Percentiles of interest
ar_fl_percentiles <- c(0.05, 0.10, 0.25, 0.50, 0.75, 0.9, 0.95)Load packaged inputs and regenerate investments
# Packaged inputs (documented in R/data.R; anonymized tmid_hh in column id)
tstm_hh_mthspan <- tstm_hh_mthspan
tstm_asset_loan <- tstm_asset_loan
# Invest-only gateway on full asset file (not packaged data/tstm_invest)
ls_return <- PrjThaiHFID::ffp_hfid_invest_gateway(
tstm_asset_loan,
fl_sd_ithres = stats::qnorm(0.99),
it_thres_invest_mth_gap = 2
)
tstm_invest <- ls_return$tstm_invest
tstm_invdates_uniq <- ls_return$tstm_invdates_uniqImplement table to show lumpiness of investments
This block generates Table 1 issue 9.
Generate stats group 1
First, load in, househould start and end months (uoo=hh), and investment file (uoo=hhxivarxinvest), already done.
# Check
ar_st_ivars <- unique(tstm_invest$ivars)Second, from invest file, group by hh x ivar, and count the numver of investments.
# Load files, already loaded tstm_hh_mthspan for hh start and end months
# investmetn file
st_id_ivar_n <- "id_ivar_n"
tstm_invest_idivar_n <- tstm_invest %>%
group_by(id, ivars) %>%
summarize(!!sym(st_id_ivar_n) := n())
#> `summarise()` has regrouped the output.
#> ℹ Summaries were computed grouped by id and ivars.
#> ℹ Output is grouped by id.
#> ℹ Use `summarise(.groups = "drop_last")` to silence this message.
#> ℹ Use `summarise(.by = c(id, ivars))` for per-operation grouping
#> (`?dplyr::dplyr_by`) instead.
# Check
unique(tstm_invest_idivar_n$ivars)
#> [1] "agg_BS_1011" "agg_BS_1012" "agg_BS_1021" "agg_BS_1022" "agg_BS_1023"
#> [6] "agg_BS_1025" "agg_BS_2011" "agg_BS_2021" "agg_BS_3011" "agg_BS_3022"
#> [11] "agg_BS_1024" "agg_BS_3021"Third, merge with household startend file, replace NA with zero for each invest count.
# Subsetting hh with min obs count
tstm_hh_mthspan_sel <- tstm_hh_mthspan %>%
filter(month_n == it_hh_mth_nbr)
# Left join to investment counts
# For households without any investments, they will have ivars=NA
tstm_hh_mthspan_wrk <- tstm_hh_mthspan_sel %>%
left_join(tstm_invest_idivar_n, by = "id")
# Reshape long to wide, to have all ivars as cols, replace NA by 0
# Follow https://fanwangecon.github.io/R4Econ/panel/widelong/htmlpdfr/fs_pivotwider.html
st_ivar_prefix <- "ivar"
st_ivar_prefix_d <- paste0(st_ivar_prefix, "_")
tstm_hh_mthspan_wrk_wide <- tstm_hh_mthspan_wrk %>%
pivot_wider(
id_cols = c("id"),
names_from = ivars,
names_prefix = st_ivar_prefix_d,
values_from = st_id_ivar_n
)
#> Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
#> ℹ Please use `all_of()` or `any_of()` instead.
#> # Was:
#> data %>% select(st_id_ivar_n)
#>
#> # Now:
#> data %>% select(all_of(st_id_ivar_n))
#>
#> See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
# Drop NA column, due to hh without any invest only ivar=NA
# creating an extra category, all 0 invest household still have
# all other ivars
tstm_hh_mthspan_wrk_wide <- tstm_hh_mthspan_wrk_wide %>%
select(-dplyr::any_of("ivar_NA"))
# replace NA values by 0s
tstm_hh_mthspan_wrk_wide <- tstm_hh_mthspan_wrk_wide %>%
mutate_at(
vars(contains(st_ivar_prefix_d)),
list(~ replace_na(., 0))
)
# Reshape back to long
# Follow https://fanwangecon.github.io/R4Econ/panel/widelong/htmlpdfr/fs_pivotlonger.html
tstm_hh_mthspan_wrk_long <- tstm_hh_mthspan_wrk_wide %>%
pivot_longer(
cols = starts_with(st_ivar_prefix_d),
names_to = c(st_ivar_prefix),
names_pattern = paste0(st_ivar_prefix_d, "(.*)"),
values_to = st_id_ivar_n
)Fourth, group by ivar and invest count, tally.
# tally
tb_ivar_n_tally <- tstm_hh_mthspan_wrk_long %>%
group_by(ivar, id_ivar_n) %>%
tally() %>%
spread(ivar, n)
# NA to zero
tb_ivar_n_tally <- tb_ivar_n_tally %>%
mutate_at(vars(contains("agg_")), list(~ replace_na(., 0)))
# print
print(tb_ivar_n_tally, n = Inf)
#> # A tibble: 12 × 13
#> id_ivar_n agg_BS_1011 agg_BS_1012 agg_BS_1021 agg_BS_1022 agg_BS_1023
#> <int> <int> <int> <int> <int> <int>
#> 1 0 22 75 59 170 100
#> 2 1 126 182 186 224 213
#> 3 2 144 160 171 123 142
#> 4 3 151 89 113 53 67
#> 5 4 96 62 49 27 43
#> 6 5 43 25 20 5 18
#> 7 6 15 11 8 4 14
#> 8 7 8 1 0 0 6
#> 9 8 1 1 0 0 0
#> 10 9 0 0 0 0 2
#> 11 10 0 0 0 0 0
#> 12 11 0 0 0 0 1
#> # ℹ 7 more variables: agg_BS_1024 <int>, agg_BS_1025 <int>, agg_BS_2011 <int>,
#> # agg_BS_2021 <int>, agg_BS_3011 <int>, agg_BS_3021 <int>, agg_BS_3022 <int>Fifth, proportional version
# Freq to shares
tb_ivar_n_tally_share <- tb_ivar_n_tally %>%
mutate_at(
vars(contains("agg_")),
list(~ . / sum(.))
)
# Check
colSums(tb_ivar_n_tally_share)
#> id_ivar_n agg_BS_1011 agg_BS_1012 agg_BS_1021 agg_BS_1022 agg_BS_1023
#> 66 1 1 1 1 1
#> agg_BS_1024 agg_BS_1025 agg_BS_2011 agg_BS_2021 agg_BS_3011 agg_BS_3021
#> 1 1 1 1 1 1
#> agg_BS_3022
#> 1
# print
print(round(tb_ivar_n_tally_share, 2), n = Inf)
#> # A tibble: 12 × 13
#> id_ivar_n agg_BS_1011 agg_BS_1012 agg_BS_1021 agg_BS_1022 agg_BS_1023
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 0 0.04 0.12 0.1 0.28 0.17
#> 2 1 0.21 0.3 0.31 0.37 0.35
#> 3 2 0.24 0.26 0.28 0.2 0.23
#> 4 3 0.25 0.15 0.19 0.09 0.11
#> 5 4 0.16 0.1 0.08 0.04 0.07
#> 6 5 0.07 0.04 0.03 0.01 0.03
#> 7 6 0.02 0.02 0.01 0.01 0.02
#> 8 7 0.01 0 0 0 0.01
#> 9 8 0 0 0 0 0
#> 10 9 0 0 0 0 0
#> 11 10 0 0 0 0 0
#> 12 11 0 0 0 0 0
#> # ℹ 7 more variables: agg_BS_1024 <dbl>, agg_BS_1025 <dbl>, agg_BS_2011 <dbl>,
#> # agg_BS_2021 <dbl>, agg_BS_3011 <dbl>, agg_BS_3021 <dbl>, agg_BS_3022 <dbl>Generate stats group 2
Load in group 1 stats above, and compute weighted average overall for each ivar.
First, compute means including zeros.
# Prob * Val
tb_ivar_n_tally_mean <- tb_ivar_n_tally_share %>%
mutate_at(
vars(contains("agg_")),
list(~ . * id_ivar_n)
)
# Sum each column
tb_ivar_n_tally_mean <- tb_ivar_n_tally_mean %>%
mutate_at(
vars(contains("agg_")),
list(~ sum(.))
) %>%
slice_head(n = 1) %>%
select(contains("agg_"))
# Divide by number of years
tb_ivar_n_tally_mean_peryear <- tb_ivar_n_tally_mean %>%
mutate_at(
vars(contains("agg_")),
list(~ . / (it_hh_mth_nbr / 12))
)
# Print
print(tb_ivar_n_tally_mean)
#> # A tibble: 1 × 12
#> agg_BS_1011 agg_BS_1012 agg_BS_1021 agg_BS_1022 agg_BS_1023 agg_BS_1024
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 2.67 2.02 2.00 1.30 1.84 1.85
#> # ℹ 6 more variables: agg_BS_1025 <dbl>, agg_BS_2011 <dbl>, agg_BS_2021 <dbl>,
#> # agg_BS_3011 <dbl>, agg_BS_3021 <dbl>, agg_BS_3022 <dbl>
print(tb_ivar_n_tally_mean_peryear)
#> # A tibble: 1 × 12
#> agg_BS_1011 agg_BS_1012 agg_BS_1021 agg_BS_1022 agg_BS_1023 agg_BS_1024
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 0.199 0.150 0.149 0.0967 0.137 0.138
#> # ℹ 6 more variables: agg_BS_1025 <dbl>, agg_BS_2011 <dbl>, agg_BS_2021 <dbl>,
#> # agg_BS_3011 <dbl>, agg_BS_3021 <dbl>, agg_BS_3022 <dbl>Second, compute means excluding zeros.
# Recompute shares, after excluding zero
tb_ivar_n_tally_share_nozr <- tb_ivar_n_tally %>%
filter(id_ivar_n != 0) %>%
mutate_at(
vars(contains("agg_")),
list(~ . / sum(.))
)
# print
colSums(tb_ivar_n_tally_share_nozr)
#> id_ivar_n agg_BS_1011 agg_BS_1012 agg_BS_1021 agg_BS_1022 agg_BS_1023
#> 66 1 1 1 1 1
#> agg_BS_1024 agg_BS_1025 agg_BS_2011 agg_BS_2021 agg_BS_3011 agg_BS_3021
#> 1 1 1 1 1 1
#> agg_BS_3022
#> 1
# Prob * Val
tb_ivar_n_tally_mean_nozr <- tb_ivar_n_tally_share_nozr %>%
filter(id_ivar_n != 0) %>%
mutate_at(
vars(contains("agg_")),
list(~ . * id_ivar_n)
)
# Sum each column
tb_ivar_n_tally_mean_nozr <- tb_ivar_n_tally_mean_nozr %>%
mutate_at(
vars(contains("agg_")),
list(~ sum(.))
) %>%
slice_head(n = 1) %>%
select(contains("agg_"))
# Divide by number of years
tb_ivar_n_tally_mean_nozr_peryear <- tb_ivar_n_tally_mean_nozr %>%
mutate_at(
vars(contains("agg_")),
list(~ . / (it_hh_mth_nbr / 12))
)
# Print
print(tb_ivar_n_tally_mean_nozr)
#> # A tibble: 1 × 12
#> agg_BS_1011 agg_BS_1012 agg_BS_1021 agg_BS_1022 agg_BS_1023 agg_BS_1024
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 2.77 2.30 2.21 1.80 2.20 2.41
#> # ℹ 6 more variables: agg_BS_1025 <dbl>, agg_BS_2011 <dbl>, agg_BS_2021 <dbl>,
#> # agg_BS_3011 <dbl>, agg_BS_3021 <dbl>, agg_BS_3022 <dbl>
print(tb_ivar_n_tally_mean_nozr_peryear)
#> # A tibble: 1 × 12
#> agg_BS_1011 agg_BS_1012 agg_BS_1021 agg_BS_1022 agg_BS_1023 agg_BS_1024
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 0.207 0.172 0.165 0.134 0.164 0.179
#> # ℹ 6 more variables: agg_BS_1025 <dbl>, agg_BS_2011 <dbl>, agg_BS_2021 <dbl>,
#> # agg_BS_3011 <dbl>, agg_BS_3021 <dbl>, agg_BS_3022 <dbl>Generate stats group 3
First, Load in household monthly asset file, already loaded tstm_asset_loan.
Second, convert if household x monthly elements are non-zero to 1, zero keep as 0.
Third group by, sum monthly binary if var exists, aggregate to household level, binary if sum > 1 or if sum = 0
Fourth, merge with household startend file, condition as needed.
Fifth, compute share of households with non-zero
Table display
Table input processing
First, sorting instructions.
2025-05-24 15:50:50, select:
- Agricultural: agg_BS_1025
- Business: agg_BS_3021
- Land: agg_BS_1024
- Household assets: agg_BS_3022
- Agri + biz: agg_BS_1021
- agri + biz + land: agg_BS_1012
- Agri + biz + land + household: agg_BS_1011
# # Variable sequence
# ar_st_vars_kbl <- c(
# "id_ivar_n",
# "agg_BS_10", "agg_BS_11", "agg_BS_12",
# "agg_BS_23", "agg_BS_09"
# )
# Variable sequence
ar_st_vars_kbl <- c(
"id_ivar_n",
"agg_BS_1025", "agg_BS_3021",
"agg_BS_1024", "agg_BS_3022",
"agg_BS_1021", "agg_BS_1012", "agg_BS_1011"
)Second, clean-up stats group 1.
# Sum up rows where id_ivar_n >= 8 and create a single row for that, then remove individual rows >= 8
# Convert id_ivar_n to numeric for filtering
tb_ivar_n_tally_share <- tb_ivar_n_tally_share %>%
mutate(id_ivar_n = as.numeric(id_ivar_n))
# Separate rows with id_ivar_n < 8 and >= 8
tb_ivar_n_tally_share_lt8 <- tb_ivar_n_tally_share %>%
filter(id_ivar_n < 8) %>%
mutate(id_ivar_n = as.character(id_ivar_n))
tb_ivar_n_tally_share_ge8 <- tb_ivar_n_tally_share %>%
filter(id_ivar_n >= 8)
# Sum across columns for rows with id_ivar_n >= 8
if (nrow(tb_ivar_n_tally_share_ge8) > 0) {
summed_row <- tb_ivar_n_tally_share_ge8 %>%
summarize(across(where(is.numeric), sum, na.rm = TRUE)) %>%
mutate(id_ivar_n = "8+")
# Bind together
tb_ivar_n_tally_share <- bind_rows(tb_ivar_n_tally_share_lt8, summed_row)
} else {
tb_ivar_n_tally_share <- tb_ivar_n_tally_share_lt8
}
#> Warning: There was 1 warning in `summarize()`.
#> ℹ In argument: `across(where(is.numeric), sum, na.rm = TRUE)`.
#> Caused by warning:
#> ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
#> Supply arguments directly to `.fns` through an anonymous function instead.
#>
#> # Previously
#> across(a:b, mean, na.rm = TRUE)
#>
#> # Now
#> across(a:b, \(x) mean(x, na.rm = TRUE))
# Restore id_ivar_n as character for downstream compatibility
tb_ivar_n_tally_share <- tb_ivar_n_tally_share %>%
mutate(id_ivar_n = as.character(id_ivar_n))Third, clean-up and combine auxliary stats
# Combine auxillary stats
tb_tally_aux1_cur <- bind_rows(
tb_ivar_n_tally_mean_peryear %>% mutate(id_ivar_n = "Include 0s"),
tb_ivar_n_tally_mean_nozr_peryear %>% mutate(id_ivar_n = "Exclude 0s")
) %>% mutate_at(
vars(contains("agg_")),
list(~ as.character(
format(round(., 2), nsmall = 2)
))
)
# Auxillary share stasts
tb_tally_aux2_cur <- bind_rows(
tstm_asset_bi_agg %>% mutate(id_ivar_n = "Share")
) %>% mutate_at(
vars(contains("agg_")),
list(~ paste0(
format(round(., 3) * 100, nsmall = 1),
"%"
))
)Fourth, combine auxliary and main stats.
Table final display
First, we define column names, which correspond to previously defined variable selection list.
# Column names
# ar_st_col_names <- c(
# "# of investments",
# "Land + livestock + agri assets",
# "Agri assets",
# "Business + household assets",
# "Business assets",
# "Land + livest. + agri + biz + household",
# "All (w/o household assets)",
# "Agri + business assets"
# )
# 2025-05-24 15:53:38
ar_st_col_names <- c(
"# of investments",
"Agricultural",
"Business",
"Land",
"Household",
"Agricultural and business",
"Agricultural, business, and land",
"Agricultural, business, land, and household"
)
# "All (Land + livestock + agri + household + biz)",
# "Productive assets (Land + livestock + agri + biz)",
st_texsize <- "footnotesize"
ar_st_col_names <- paste0(
"\\", st_texsize, "{", ar_st_col_names, "}"
)
# Define column groups, grouping the names above
# =1/3/2 are number of columns group title covers
ar_st_col_groups <- c(
" " = 1,
"Individual assets categories" = 4,
"Joint assets definitions" = 3
)
# Second, we construct main table, and add styling.
f_bk_asset_count <- function(st_format) {
bk_asset_count <- kbl(
tb_tally_jnt,
format = st_format,
# escape = F,
linesep = "",
booktabs = T,
align = "c",
caption = "Percentage of households with number of investments, survey month 1 to 160",
col.names = ar_st_col_names
) %>%
# see https://cran.r-project.org/web/packages/kableExtra/vignettes/awesome_table_in_html.html#Bootstrap_table_classes
kable_styling(
bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = F, position = "left"
)
# Third, we add in column groups.
bk_asset_count <- bk_asset_count %>% add_header_above(ar_st_col_groups)
# Fourth, we add in row groups.
bk_asset_count <- bk_asset_count %>%
pack_rows(
"Share of households with different number of investments over 160 months",
1, 9,
latex_gap_space = "0.5em"
) %>%
pack_rows(
"Mean number of investments per year over 160 months",
10, 11,
latex_gap_space = "0.5em", hline_before = T
) %>%
pack_rows(
"Share of household having any month with non-zero assets over 160 months",
12, 12,
latex_gap_space = "0.5em", hline_before = T
)
# Fifth, column formatting.
bk_asset_count <- bk_asset_count %>%
column_spec(1, width = "2.5cm") %>%
column_spec(2:8, width = "2cm")
# FInal adjustments
bk_asset_count <- gsub(bk_asset_count,
pattern = paste0("\\textbackslash{}", st_texsize, "\\"),
replacement = paste0("\\", st_texsize), fixed = TRUE
)
bk_asset_count <- gsub(bk_asset_count,
pattern = "\\}",
replacement = "}", fixed = TRUE
)
bk_asset_count <- gsub(bk_asset_count,
pattern = "hline",
replacement = "midrule", fixed = TRUE
)
return(bk_asset_count)
}
# Build for display and save a latex (+ csv) copy.
bk_asset_count <- f_bk_asset_count(st_kableformat)
ffp_save_res_table(
f_bk_asset_count("latex"), "bk_asset_count", spt_res,
df = tb_tally_jnt,
bl_save = ls_save_res[["bk_asset_count"]]
)
# Sixth, display.
# pl_bk_asset_count <- bk_asset_count %>% as_image()
bk_asset_count| \footnotesize{# of investments} | \footnotesize{Agricultural} | \footnotesize{Business} | \footnotesize{Land} | \footnotesize{Household} | \footnotesize{Agricultural and business} | \footnotesize{Agricultural, business, and land} | \footnotesize{Agricultural, business, land, and household} |
|---|---|---|---|---|---|---|---|
| Share of households with different number of investments over 160 months | |||||||
| 0 | 12.4% | 79.4% | 23.3% | 0.7% | 9.7% | 12.4% | 3.6% |
| 1 | 27.9% | 14.2% | 33.0% | 22.9% | 30.7% | 30.0% | 20.8% |
| 2 | 29.9% | 4.6% | 17.8% | 29.9% | 28.2% | 26.4% | 23.8% |
| 3 | 17.5% | 1.0% | 10.4% | 24.3% | 18.6% | 14.7% | 24.9% |
| 4 | 8.7% | 0.5% | 5.3% | 13.9% | 8.1% | 10.2% | 15.8% |
| 5 | 2.5% | 0.2% | 3.8% | 5.3% | 3.3% | 4.1% | 7.1% |
| 6 | 1.2% | 0.2% | 2.6% | 2.3% | 1.3% | 1.8% | 2.5% |
| 7 | 0.0% | 0.0% | 2.6% | 0.7% | 0.0% | 0.2% | 1.3% |
| 8+ | 0.0% | 0.0% | 1.2% | 0.2% | 0.0% | 0.2% | 0.2% |
| Mean number of investments per year over 160 months | |||||||
| Include 0s | 0.14 | 0.02 | 0.14 | 0.19 | 0.15 | 0.15 | 0.20 |
| Exclude 0s | 0.17 | 0.11 | 0.18 | 0.19 | 0.17 | 0.17 | 0.21 |
| Share of household having any month with non-zero assets over 160 months | |||||||
| Share | 91.9% | 23.9% | 94.6% | 100.0% | 94.6% | 99.0% | 100.0% |
Investment table to show sizes of investments
Prepare input data
Generate investment windows by unique investments
Generate in investment file (uoo=hhxivarxinvest), and load in windowed stats file (uoo=hhxivarxinvestxvarxwin) 12 month as 1 month window versions.
First, generate in investment file (uoo=hhxivarxinvest), and load in windowed stats file (uoo=hhxivarxinvestxvarxwin) 12 month as 1 month window versions.
# 1 month window file, changes in capital-assets themselves (as well as cash/debt)
df_invwin_uniq_fw1bw1 <- PrjThaiHFID::ff_hfid_invest_window(
df_invdates_uniq = tstm_invdates_uniq,
it_window_forward = 1,
it_window_backward = 1,
bl_forward_include_imth = FALSE,
bl_backward_include_imth = FALSE
)
# 12 month window file
df_invwin_uniq_fw12bw12 <- PrjThaiHFID::ff_hfid_invest_window(
df_invdates_uniq = tstm_invdates_uniq,
it_window_forward = 12,
it_window_backward = 12,
bl_forward_include_imth = FALSE,
bl_backward_include_imth = FALSE
)Second, generate mean and sum stats, short and long, respectively.
# Either sum or mean, not both
# 1 month window file, mean
df_invest_winstats_fw1bw1_mean <- PrjThaiHFID::ff_hfid_invest_winstats(
df_asset_loan = tstm_asset_loan,
df_invest = tstm_invest,
df_invwin_uniq = df_invwin_uniq_fw1bw1,
bl_compute_sum = FALSE,
bl_compute_mean = TRUE
)
# 12 month window file, sum
df_invest_winstats_fw12bw12_sum <- PrjThaiHFID::ff_hfid_invest_winstats(
df_asset_loan = tstm_asset_loan,
df_invest = tstm_invest,
df_invwin_uniq = df_invwin_uniq_fw12bw12,
bl_compute_sum = TRUE,
bl_compute_mean = FALSE
)Third, combine output to list.
# Sets of windowed files
ls_df_invest_winstats_fwbw <- list(
# Compare against backward window capital assets
fw1bw1mean_back = df_invest_winstats_fw1bw1_mean,
# Compare against backward window revenue sums
fw12bw12sum_back = df_invest_winstats_fw12bw12_sum
)
ar_svr_window_comp <- c("win_backward", "win_backward")
# Get length and names
it_df_count <- length(ls_df_invest_winstats_fwbw)
ar_st_df_names <- names(ls_df_invest_winstats_fwbw)Generate stats step 1, run window functions
Compute percentages.
- 12 month file: For revenue stats, based on 12 month file compute ratios of investments relative to last 12 months sum.
- 1 month file: Ratios of invest size var versus asset var. Stack 12 month and 1month files together
# Storage collection
ls_invest_win_shr <- vector(mode = "list", length = it_df_count)
names(ls_invest_win_shr) <- ar_st_df_names
# Loop over list of dataframes
for (it_df in seq(1, it_df_count)) {
# Load in file
st_df_name <- ar_st_df_names[it_df]
df_invest_winstats_fwbw <- ls_df_invest_winstats_fwbw[[st_df_name]]
svr_window_comp <- ar_svr_window_comp[it_df]
# Compute proportions
df_invest_winstats_shr <- df_invest_winstats_fwbw %>%
# filter(blnc_vars == "IS_07") %>%
filter(!!sym(svr_window_comp) != 0) %>%
mutate(shr_stat = capital_invest / !!sym(svr_window_comp)) %>%
select(
id,
ivars, hh_inv_asset_ctr, hh_inv_ctr,
blnc_vars, blnc_agg_stats,
# capital_invest, win_backward,
shr_stat
)
# Store results
ls_invest_win_shr[[st_df_name]] <- df_invest_winstats_shr
}
# df_invest_winstats_fw12bw12_shr <- df_invest_winstats_fw12bw12 %>%
# # filter(blnc_vars == "IS_07") %>%
# filter(win_backward != 0) %>%
# mutate(shr_stat = capital_invest / win_backward) %>%
# select(
# id,
# ivars, hh_inv_asset_ctr, hh_inv_ctr,
# blnc_vars, blnc_agg_stats,
# capital_invest, win_backward, shr_stat
# )Generate stats step 2, ratios and within group percentiles
Compute within balance cheet and investment variable CDF.
# Storage collection
ls_invest_win_pct <- vector(mode = "list", length = it_df_count)
names(ls_invest_win_pct) <- ar_st_df_names
# Loop over list of dataframes
for (it_df in seq(1, it_df_count)) {
# Load in file
st_df_name <- ar_st_df_names[it_df]
df_invest_win_shr <- ls_invest_win_shr[[st_df_name]]
# Generate within-group CDF
df_invest_win_pct <- df_invest_win_shr %>%
select(blnc_vars, ivars, shr_stat) %>%
arrange(blnc_vars, ivars, shr_stat) %>%
group_by(blnc_vars, ivars) %>%
mutate(cdf = row_number() / n())
# Store results
ls_invest_win_pct[[st_df_name]] <- df_invest_win_pct
}
#> Adding missing grouping variables: `id`, `hh_inv_ctr`
#> Adding missing grouping variables: `id`, `hh_inv_ctr`
# # Generate within-group CDF
# df_invest_winstats_fw12bw12_pct <- df_invest_winstats_fw12bw12_shr %>%
# arrange(blnc_vars, ivars, shr_stat) %>%
# group_by(blnc_vars, ivars) %>%
# mutate(cdf = row_number() / n())Compute a subset of percentiles of interests.
# Define strings
st_var_prefix <- "shr"
st_var_prefix_perc <- paste0(st_var_prefix, "_p")
svr_mean <- paste0(st_var_prefix, "_mean")
# Storage collection
ls_df_cross_pct <- vector(mode = "list", length = it_df_count)
names(ls_df_cross_pct) <- ar_st_df_names
# Loop over list of dataframes
for (it_df in seq(1, it_df_count)) {
# Load in file
st_df_name <- ar_st_df_names[it_df]
df_invest_win_pct <- ls_invest_win_pct[[st_df_name]]
# Generate within-group percentiles
for (it_percentile_ctr in seq(1, length(ar_fl_percentiles))) {
# Current within group percentile to compute
fl_percentile <- ar_fl_percentiles[it_percentile_ctr]
# Percentile and mean stats
svr_percentile <- paste0(st_var_prefix_perc, round(fl_percentile * 100))
# Frame with specific percentile
df_within_percentiles_cur <- df_invest_win_pct %>%
group_by(blnc_vars, ivars) %>%
filter(cdf >= fl_percentile) %>%
slice(1) %>%
mutate(!!sym(svr_percentile) := shr_stat) %>%
select(blnc_vars, ivars, one_of(svr_percentile))
# Merge percentile frames together
if (it_percentile_ctr > 1) {
df_within_percentiles <- df_within_percentiles %>%
left_join(df_within_percentiles_cur,
by = c("blnc_vars" = "blnc_vars", "ivars" = "ivars")
)
} else {
df_within_percentiles <- df_within_percentiles_cur
}
}
# Add in within group mean
df_within_percentiles_mean <- df_invest_win_pct %>%
group_by(blnc_vars, ivars) %>%
mutate(!!sym(svr_mean) := mean(shr_stat, na.rm = TRUE)) %>%
slice(1)
# Join to file
df_within_percentiles <- df_within_percentiles %>%
left_join(df_within_percentiles_mean,
by = c("blnc_vars" = "blnc_vars", "ivars" = "ivars")
) %>%
select(
blnc_vars, ivars,
one_of(svr_mean), contains(st_var_prefix_perc)
)
# Store results
ls_df_cross_pct[[st_df_name]] <- df_within_percentiles
}
# display
print(ls_df_cross_pct)
#> $fw1bw1mean_back
#> # A tibble: 468 × 10
#> # Groups: blnc_vars, ivars [468]
#> blnc_vars ivars shr_mean shr_p5 shr_p10 shr_p25 shr_p50 shr_p75 shr_p90
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 BS_01 agg_BS_10… 1.06 0.0308 0.0515 0.122 0.286 0.658 1.40
#> 2 BS_01 agg_BS_10… 0.861 0.00490 0.0116 0.0419 0.142 0.412 0.971
#> 3 BS_01 agg_BS_10… 0.472 0.00358 0.00845 0.0269 0.100 0.292 0.766
#> 4 BS_01 agg_BS_10… 0.394 0.00446 0.00892 0.0229 0.0886 0.307 0.772
#> 5 BS_01 agg_BS_10… 0.723 0.00348 0.00763 0.0242 0.0947 0.359 0.878
#> 6 BS_01 agg_BS_10… 0.519 0.00205 0.00367 0.00964 0.0310 0.134 0.520
#> 7 BS_01 agg_BS_10… 0.453 0.00292 0.00662 0.0230 0.0876 0.263 0.699
#> 8 BS_01 agg_BS_20… 0.856 0.00517 0.0116 0.0388 0.135 0.386 0.947
#> 9 BS_01 agg_BS_20… 0.382 0.00475 0.00886 0.0210 0.0764 0.279 0.728
#> 10 BS_01 agg_BS_30… 0.712 0.0171 0.0292 0.0687 0.193 0.495 1.08
#> # ℹ 458 more rows
#> # ℹ 1 more variable: shr_p95 <dbl>
#>
#> $fw12bw12sum_back
#> # A tibble: 468 × 10
#> # Groups: blnc_vars, ivars [468]
#> blnc_vars ivars shr_mean shr_p5 shr_p10 shr_p25 shr_p50 shr_p75 shr_p90
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 BS_01 agg_BS_10… 0.0636 2.68e-3 4.39e-3 1.08e-2 0.0257 0.0598 0.122
#> 2 BS_01 agg_BS_10… 0.0437 4.11e-4 1.01e-3 3.59e-3 0.0126 0.0345 0.0836
#> 3 BS_01 agg_BS_10… 0.0228 3.02e-4 6.72e-4 2.33e-3 0.00851 0.0234 0.0607
#> 4 BS_01 agg_BS_10… 0.0237 3.36e-4 6.55e-4 2.00e-3 0.00718 0.0245 0.0609
#> 5 BS_01 agg_BS_10… 0.0420 3.10e-4 6.27e-4 2.21e-3 0.00825 0.0292 0.0779
#> 6 BS_01 agg_BS_10… 0.0291 1.75e-4 3.15e-4 8.30e-4 0.00276 0.0103 0.0413
#> 7 BS_01 agg_BS_10… 0.0210 2.38e-4 5.26e-4 2.04e-3 0.00738 0.0216 0.0542
#> 8 BS_01 agg_BS_20… 0.0428 4.13e-4 9.90e-4 3.36e-3 0.0116 0.0332 0.0794
#> 9 BS_01 agg_BS_20… 0.0219 3.80e-4 6.42e-4 1.74e-3 0.00593 0.0213 0.0543
#> 10 BS_01 agg_BS_30… 0.0441 1.53e-3 2.52e-3 6.11e-3 0.0167 0.0440 0.0935
#> # ℹ 458 more rows
#> # ℹ 1 more variable: shr_p95 <dbl>Table display
Table input processing
Get stats tables.
Review and select: Investment as share of prior month capital (level)
Most recent month, relationship between investment and investment. Potentially shows that not only are investments rare, they are sizable.
Learned:
- very small and very large ratios are both possible
- highlight results from: BS_1021 agg_BS_1021, median of 1.3, pretty large changes, largest among total asset.
- include also components: BS_2021 agg_BS_2021
- include also components: BS_3021 agg_BS_3021
- could mention 1021 relative to 1011 as stats not in table but mentioned, but maybe also include: BS_1012 agg_BS_1021
# All
df_all_m1mean <- df_fw1bw1mean_back %>%
filter(str_detect(blnc_vars, "BS_10"), str_detect(ivars, "BS_10"))
(df_all_m1mean) %>%
kable() %>%
kable_styling()| blnc_vars | ivars | shr_mean | shr_p5 | shr_p10 | shr_p25 | shr_p50 | shr_p75 | shr_p90 | shr_p95 |
|---|---|---|---|---|---|---|---|---|---|
| BS_1011 | agg_BS_1011 | 0.7409393 | 0.0094756 | 0.0187255 | 0.0510925 | 0.1554751 | 0.4562367 | 1.4642133 | 2.7488136 |
| BS_1011 | agg_BS_1012 | 0.3843737 | 0.0029097 | 0.0057487 | 0.0182804 | 0.0696245 | 0.2166457 | 0.6888826 | 1.5406523 |
| BS_1011 | agg_BS_1021 | 0.1885822 | 0.0014941 | 0.0033931 | 0.0111579 | 0.0437246 | 0.1276403 | 0.3786298 | 0.7075963 |
| BS_1011 | agg_BS_1022 | 0.1467474 | 0.0021958 | 0.0036269 | 0.0088512 | 0.0308129 | 0.1094246 | 0.3196975 | 0.6704482 |
| BS_1011 | agg_BS_1023 | 0.3368104 | 0.0021817 | 0.0041579 | 0.0110017 | 0.0452081 | 0.1735644 | 0.5597619 | 1.3245048 |
| BS_1011 | agg_BS_1024 | 0.2556003 | 0.0008107 | 0.0014457 | 0.0044629 | 0.0122261 | 0.0714989 | 0.2875197 | 0.9054138 |
| BS_1011 | agg_BS_1025 | 0.1729666 | 0.0010837 | 0.0024574 | 0.0094515 | 0.0382867 | 0.1121256 | 0.3223563 | 0.6264688 |
| BS_1012 | agg_BS_1011 | 4464.7098174 | 0.0094223 | 0.0182549 | 0.0506443 | 0.1674679 | 0.6089942 | 3.1186109 | 10.9375637 |
| BS_1012 | agg_BS_1012 | 3416.2226959 | 0.0033846 | 0.0067887 | 0.0215576 | 0.0892095 | 0.3351785 | 1.5240729 | 4.3593331 |
| BS_1012 | agg_BS_1021 | 3456.0478044 | 0.0015478 | 0.0039521 | 0.0130890 | 0.0544887 | 0.1804410 | 0.8398873 | 2.2500000 |
| BS_1012 | agg_BS_1022 | 0.4798713 | 0.0023282 | 0.0039521 | 0.0096689 | 0.0396702 | 0.1505598 | 0.5786516 | 1.2743372 |
| BS_1012 | agg_BS_1023 | 1.0158854 | 0.0024145 | 0.0047720 | 0.0123914 | 0.0595946 | 0.2573797 | 1.0398978 | 2.6197992 |
| BS_1012 | agg_BS_1024 | 0.6970755 | 0.0008564 | 0.0017182 | 0.0051142 | 0.0149703 | 0.1113806 | 0.5178188 | 1.5585237 |
| BS_1012 | agg_BS_1025 | 3542.4196436 | 0.0011552 | 0.0027764 | 0.0100073 | 0.0497730 | 0.1647365 | 0.7000000 | 1.9700251 |
| BS_1021 | agg_BS_1011 | 5372.5448254 | 0.4603299 | 0.7054627 | 1.6395907 | 5.3772909 | 25.8912323 | 143.9644343 | 472.1979676 |
| BS_1021 | agg_BS_1012 | 3965.1289833 | 0.2472262 | 0.3366212 | 0.7231162 | 2.3536849 | 9.4441180 | 53.0088249 | 151.5792982 |
| BS_1021 | agg_BS_1021 | 3826.9472482 | 0.1705659 | 0.2554571 | 0.5440230 | 1.5332265 | 5.8821209 | 28.5230867 | 82.9447688 |
| BS_1021 | agg_BS_1022 | 52.8569734 | 0.0925525 | 0.1401637 | 0.2952586 | 0.9459944 | 3.9743116 | 22.8358869 | 82.9447688 |
| BS_1021 | agg_BS_1023 | 204.8073690 | 0.1027172 | 0.1773146 | 0.4590861 | 1.4591543 | 7.1004985 | 40.3456595 | 139.6101233 |
| BS_1021 | agg_BS_1024 | 164.9161726 | 0.0179356 | 0.0359888 | 0.1077672 | 0.3902669 | 2.5014213 | 14.6476417 | 54.4695665 |
| BS_1021 | agg_BS_1025 | 3900.1960006 | 0.1292174 | 0.2137537 | 0.4527832 | 1.3579275 | 4.9791078 | 25.1026889 | 71.2726389 |
| BS_1022 | agg_BS_1011 | 322.6270777 | 0.6026806 | 0.9534828 | 2.0851890 | 7.6318499 | 35.1452387 | 160.8831250 | 447.3166838 |
| BS_1022 | agg_BS_1012 | 230.8062149 | 0.2691948 | 0.3965798 | 0.9524189 | 3.3718758 | 16.3585899 | 69.1739812 | 226.7477142 |
| BS_1022 | agg_BS_1021 | 213.9462672 | 0.1807164 | 0.2842713 | 0.6745096 | 2.2972868 | 10.9307637 | 49.9502422 | 136.1310468 |
| BS_1022 | agg_BS_1022 | 244.3168505 | 0.1310442 | 0.1965548 | 0.4304811 | 1.2548145 | 5.8782877 | 35.9970876 | 128.6468977 |
| BS_1022 | agg_BS_1023 | 210.1156619 | 0.2216547 | 0.3121809 | 0.6947765 | 2.2286929 | 10.6492809 | 56.8153029 | 177.7134902 |
| BS_1022 | agg_BS_1024 | 40.3435419 | 0.0248228 | 0.0490516 | 0.1390711 | 0.5592155 | 3.2516608 | 24.2526266 | 66.0697449 |
| BS_1022 | agg_BS_1025 | 205.5867991 | 0.1401637 | 0.2277167 | 0.5818193 | 2.0315477 | 9.3602402 | 43.7005948 | 125.9988837 |
| BS_1023 | agg_BS_1011 | 5.9004119 | 0.0094128 | 0.0180764 | 0.0504210 | 0.1673132 | 0.6041616 | 3.2027449 | 12.6704347 |
| BS_1023 | agg_BS_1012 | 2.9200168 | 0.0033074 | 0.0067255 | 0.0209763 | 0.0894691 | 0.3295005 | 1.7865576 | 5.4705688 |
| BS_1023 | agg_BS_1021 | 1.9201113 | 0.0015432 | 0.0037064 | 0.0129135 | 0.0534755 | 0.1760733 | 0.9104802 | 2.9456718 |
| BS_1023 | agg_BS_1022 | 0.7776908 | 0.0023284 | 0.0039557 | 0.0097972 | 0.0412770 | 0.1538392 | 0.6383771 | 1.6810774 |
| BS_1023 | agg_BS_1023 | 1.7467336 | 0.0024145 | 0.0046942 | 0.0124273 | 0.0612103 | 0.2663786 | 1.2096323 | 3.0000000 |
| BS_1023 | agg_BS_1024 | 1.2647070 | 0.0008622 | 0.0017125 | 0.0051003 | 0.0151310 | 0.1161814 | 0.6125878 | 1.8083183 |
| BS_1023 | agg_BS_1025 | 1.9394852 | 0.0011552 | 0.0027565 | 0.0098712 | 0.0487120 | 0.1608886 | 0.7231347 | 2.7951427 |
| BS_1024 | agg_BS_1011 | 7.2629021 | 0.0092741 | 0.0174209 | 0.0481389 | 0.1568795 | 0.5273656 | 2.9361433 | 10.9475738 |
| BS_1024 | agg_BS_1012 | 3.7191041 | 0.0030656 | 0.0064703 | 0.0199025 | 0.0825611 | 0.2837974 | 1.7860843 | 5.3836417 |
| BS_1024 | agg_BS_1021 | 2.7546869 | 0.0014930 | 0.0035527 | 0.0119215 | 0.0491464 | 0.1602456 | 0.6420030 | 2.8406751 |
| BS_1024 | agg_BS_1022 | 2.1338530 | 0.0023404 | 0.0038278 | 0.0096657 | 0.0377177 | 0.1516240 | 0.5738230 | 2.9646044 |
| BS_1024 | agg_BS_1023 | 2.6611722 | 0.0023763 | 0.0045042 | 0.0122793 | 0.0579784 | 0.2505887 | 1.2709918 | 4.0282377 |
| BS_1024 | agg_BS_1024 | 1.4487369 | 0.0008595 | 0.0017144 | 0.0050997 | 0.0156429 | 0.1428643 | 0.7368667 | 2.0728088 |
| BS_1024 | agg_BS_1025 | 2.4095259 | 0.0011180 | 0.0025619 | 0.0096801 | 0.0446207 | 0.1387161 | 0.5817301 | 2.3300069 |
| BS_1025 | agg_BS_1011 | 5549.2036844 | 0.5403370 | 0.8235557 | 1.9111443 | 6.6326212 | 33.5732473 | 170.3991940 | 551.7568667 |
| BS_1025 | agg_BS_1012 | 4056.3185025 | 0.2707141 | 0.3897048 | 0.8524271 | 2.8388205 | 11.1715100 | 61.9131509 | 165.1864436 |
| BS_1025 | agg_BS_1021 | 3926.9527433 | 0.1986531 | 0.2871685 | 0.6249265 | 1.9454621 | 7.4409708 | 36.9481749 | 111.8913295 |
| BS_1025 | agg_BS_1022 | 76.7741759 | 0.0984555 | 0.1469442 | 0.3349635 | 1.1561695 | 5.6114062 | 32.7290955 | 118.6873652 |
| BS_1025 | agg_BS_1023 | 215.8134743 | 0.1032986 | 0.1920790 | 0.5064802 | 1.7502190 | 8.4191801 | 51.3187670 | 153.5552749 |
| BS_1025 | agg_BS_1024 | 168.1770101 | 0.0215421 | 0.0413641 | 0.1197309 | 0.4547106 | 3.2913341 | 17.9856641 | 59.7400031 |
| BS_1025 | agg_BS_1025 | 3983.3826665 | 0.1872337 | 0.2781652 | 0.5676795 | 1.6831382 | 6.3782364 | 30.8576225 | 101.6030770 |
ffp_save_res_table(
kbl(df_all_m1mean,
format = "latex", booktabs = TRUE,
caption = "Investment as share of prior month capital (all, level)"
),
"tb_all_m1mean", spt_res,
df = df_all_m1mean, bl_save = ls_save_res[["tb_all_m1mean"]]
)
# Agriculture
df_agri_m1mean <- df_fw1bw1mean_back %>%
filter(str_detect(blnc_vars, "BS_20"), str_detect(ivars, "BS_20"))
(df_agri_m1mean) %>%
kable() %>%
kable_styling()| blnc_vars | ivars | shr_mean | shr_p5 | shr_p10 | shr_p25 | shr_p50 | shr_p75 | shr_p90 | shr_p95 |
|---|---|---|---|---|---|---|---|---|---|
| BS_2011 | agg_BS_2011 | 3511.1318345 | 0.0030608 | 0.0064415 | 0.0203075 | 0.0853109 | 0.3511469 | 1.6363636 | 4.702961 |
| BS_2011 | agg_BS_2021 | 0.5136162 | 0.0019996 | 0.0034109 | 0.0082789 | 0.0326087 | 0.1332474 | 0.5301598 | 1.121944 |
| BS_2021 | agg_BS_2011 | 311.5305419 | 0.3121809 | 0.4988032 | 1.1655180 | 4.3628114 | 21.3646568 | 93.9352683 | 290.990472 |
| BS_2021 | agg_BS_2021 | 276.1153017 | 0.1401637 | 0.2225300 | 0.4518415 | 1.2881209 | 6.4738158 | 45.0920958 | 136.131047 |
ffp_save_res_table(
kbl(df_agri_m1mean,
format = "latex", booktabs = TRUE,
caption = "Investment as share of prior month capital (agriculture, level)"
),
"tb_agri_m1mean", spt_res,
df = df_agri_m1mean, bl_save = ls_save_res[["tb_agri_m1mean"]]
)
# Business
df_biz_m1mean <- df_fw1bw1mean_back %>%
filter(str_detect(blnc_vars, "BS_30"), str_detect(ivars, "BS_30"))
(df_biz_m1mean) %>%
kable() %>%
kable_styling()| blnc_vars | ivars | shr_mean | shr_p5 | shr_p10 | shr_p25 | shr_p50 | shr_p75 | shr_p90 | shr_p95 |
|---|---|---|---|---|---|---|---|---|---|
| BS_3011 | agg_BS_3011 | 4.104832 | 0.1751265 | 0.2289867 | 0.4433280 | 1.0510476 | 2.8532376 | 6.952639 | 12.117628 |
| BS_3011 | agg_BS_3021 | 1.237421 | 0.0121182 | 0.0168242 | 0.0522185 | 0.1665355 | 0.8546702 | 2.767697 | 6.132434 |
| BS_3011 | agg_BS_3022 | 3.927825 | 0.1649034 | 0.2133417 | 0.4081838 | 1.0032138 | 2.7359773 | 6.733992 | 11.892915 |
| BS_3021 | agg_BS_3011 | 47.937796 | 0.3550340 | 0.6475215 | 1.5600846 | 7.3068073 | 23.3413532 | 84.160046 | 137.289053 |
| BS_3021 | agg_BS_3021 | 55.123023 | 0.0817616 | 0.1523057 | 0.3732541 | 0.8460228 | 3.3656165 | 18.695871 | 65.872575 |
| BS_3021 | agg_BS_3022 | 30.124135 | 0.1637420 | 0.3123617 | 1.1242053 | 5.0923282 | 21.1817781 | 84.177903 | 130.565935 |
| BS_3022 | agg_BS_3011 | 4.363616 | 0.1817649 | 0.2419322 | 0.4676280 | 1.1230191 | 3.0285702 | 7.778116 | 13.128688 |
| BS_3022 | agg_BS_3021 | 1.672649 | 0.0123890 | 0.0168988 | 0.0624731 | 0.2186658 | 1.3631572 | 3.560229 | 7.019443 |
| BS_3022 | agg_BS_3022 | 4.180161 | 0.1816237 | 0.2341408 | 0.4504677 | 1.0766720 | 2.9745669 | 7.511008 | 12.622020 |
ffp_save_res_table(
kbl(df_biz_m1mean,
format = "latex", booktabs = TRUE,
caption = "Investment as share of prior month capital (business, level)"
),
"tb_biz_m1mean", spt_res,
df = df_biz_m1mean, bl_save = ls_save_res[["tb_biz_m1mean"]]
)Select subset of key stats to be displayed.
# Results 1
df_m1mean_agri_bs2021 <- df_fw1bw1mean_back %>%
filter(str_detect(blnc_vars, "BS_2021"), str_detect(ivars, "BS_2021"))
# Results 2
df_m1mean_biz_bs3021 <- df_fw1bw1mean_back %>%
filter(str_detect(blnc_vars, "BS_3021"), str_detect(ivars, "BS_3021"))
# Results 3a
df_m1mean_all_bs1021 <- df_fw1bw1mean_back %>%
filter(str_detect(blnc_vars, "BS_1021"), str_detect(ivars, "BS_1021"))
# Results 3b
df_m1mean_all_bs1021v1012 <- df_fw1bw1mean_back %>%
filter(str_detect(blnc_vars, "BS_1012"), str_detect(ivars, "BS_1021"))Review and select: Investment as a share of revenue past 12 months (level)
Select revenue past 12 month sum vs investment shares. We look at overall, business, and agriculture in three groups. Are they “large” relative to revenue, if they are, they are presumably hard to pay off right away.
Findings:
- main: IS_1021 agg_BS_1021, note these are shares of revenue, they might seem low possibly, but firms would need to be highly profitable to be able to pay off debt with one year profits.
- details:
- IS_2021 agg_BS_2021
- IS_3021 agg_BS_3021
- aux:
- IS_1012 agg_BS_1021, do not consider wage income, too confusing, profit vs revenue, all else is revenue, not profit.
# All
df_all_m12sum <- df_fw12bw12sum_back %>%
rowwise() %>%
filter(str_detect(blnc_vars, "IS_10"), str_detect(ivars, "BS_10"))
(df_all_m12sum) %>%
kable() %>%
kable_styling()| blnc_vars | ivars | shr_mean | shr_p5 | shr_p10 | shr_p25 | shr_p50 | shr_p75 | shr_p90 | shr_p95 |
|---|---|---|---|---|---|---|---|---|---|
| IS_1011 | agg_BS_1011 | 5.2395354 | 0.0518194 | 0.0808174 | 0.1753382 | 0.3924437 | 0.9940082 | 3.0305128 | 7.357891 |
| IS_1011 | agg_BS_1012 | 2.2263033 | 0.0086380 | 0.0184070 | 0.0548920 | 0.1819977 | 0.5098756 | 1.5267083 | 3.443287 |
| IS_1011 | agg_BS_1021 | 0.5031246 | 0.0065079 | 0.0145840 | 0.0456048 | 0.1339956 | 0.3539975 | 0.8775887 | 1.618410 |
| IS_1011 | agg_BS_1022 | 0.4175894 | 0.0080693 | 0.0131166 | 0.0329538 | 0.0931717 | 0.2968234 | 0.9537436 | 1.752554 |
| IS_1011 | agg_BS_1023 | 2.2916629 | 0.0072623 | 0.0130505 | 0.0325970 | 0.1085830 | 0.4273612 | 1.5220145 | 3.584285 |
| IS_1011 | agg_BS_1024 | 2.0008409 | 0.0032689 | 0.0052594 | 0.0136578 | 0.0388198 | 0.1473416 | 0.7783918 | 2.207854 |
| IS_1011 | agg_BS_1025 | 0.4615035 | 0.0042393 | 0.0110120 | 0.0403119 | 0.1232199 | 0.3314882 | 0.7838534 | 1.349235 |
| IS_1012 | agg_BS_1011 | 32.4064602 | 0.0756927 | 0.1284194 | 0.2922908 | 0.8011254 | 3.1696988 | 16.5692433 | 50.550778 |
| IS_1012 | agg_BS_1012 | 6.8696336 | 0.0248754 | 0.0425379 | 0.1061457 | 0.3281604 | 1.0111954 | 4.3725962 | 12.716746 |
| IS_1012 | agg_BS_1021 | 2.2373752 | 0.0179115 | 0.0308655 | 0.0876579 | 0.2379055 | 0.7080460 | 2.1670775 | 4.761905 |
| IS_1012 | agg_BS_1022 | 2.5088201 | 0.0116684 | 0.0205167 | 0.0510150 | 0.1594974 | 0.5274727 | 1.9627276 | 4.324575 |
| IS_1012 | agg_BS_1023 | 6.9969871 | 0.0138134 | 0.0237766 | 0.0546235 | 0.1870670 | 0.8082766 | 3.9523468 | 11.688158 |
| IS_1012 | agg_BS_1024 | 6.1619314 | 0.0044547 | 0.0081570 | 0.0215525 | 0.0633105 | 0.2985075 | 2.0833333 | 7.847534 |
| IS_1012 | agg_BS_1025 | 2.1578524 | 0.0128107 | 0.0257477 | 0.0749301 | 0.2226015 | 0.6770833 | 1.9801980 | 4.239820 |
| IS_1013 | agg_BS_1011 | 46.5882017 | 0.0756927 | 0.1266173 | 0.2922908 | 0.8370755 | 3.8906974 | 31.8184223 | 102.081767 |
| IS_1013 | agg_BS_1012 | 12.3744098 | 0.0253947 | 0.0454316 | 0.1117097 | 0.3612966 | 1.2039110 | 6.5200087 | 24.424456 |
| IS_1013 | agg_BS_1021 | 4.4369751 | 0.0187840 | 0.0325894 | 0.0958181 | 0.2868884 | 0.9243833 | 3.7842079 | 13.333333 |
| IS_1013 | agg_BS_1022 | 3.8458373 | 0.0119344 | 0.0200739 | 0.0518324 | 0.1671119 | 0.6765582 | 2.3504982 | 7.649960 |
| IS_1013 | agg_BS_1023 | 12.2650283 | 0.0158694 | 0.0248754 | 0.0590000 | 0.1916933 | 0.9276179 | 4.6201208 | 17.950937 |
| IS_1013 | agg_BS_1024 | 11.4022987 | 0.0049834 | 0.0089705 | 0.0230326 | 0.0687610 | 0.3052503 | 2.2642364 | 12.396694 |
| IS_1013 | agg_BS_1025 | 4.2921924 | 0.0130243 | 0.0260938 | 0.0822883 | 0.2612517 | 0.8705779 | 3.2444574 | 12.875537 |
| IS_1021 | agg_BS_1011 | 40.2759785 | 0.0920663 | 0.1549652 | 0.3591124 | 1.0098054 | 3.8972197 | 18.4348148 | 59.815292 |
| IS_1021 | agg_BS_1012 | 9.1150478 | 0.0318605 | 0.0546490 | 0.1379379 | 0.4683347 | 1.6426657 | 6.8713062 | 19.289870 |
| IS_1021 | agg_BS_1021 | 3.3122666 | 0.0215899 | 0.0412088 | 0.1207132 | 0.3986743 | 1.3449753 | 4.6547924 | 11.586239 |
| IS_1021 | agg_BS_1022 | 2.7808661 | 0.0162154 | 0.0264470 | 0.0806926 | 0.2639442 | 1.1820886 | 4.0540541 | 10.863832 |
| IS_1021 | agg_BS_1023 | 8.5037024 | 0.0196506 | 0.0328946 | 0.0748491 | 0.2572008 | 1.2359403 | 5.2308457 | 14.705882 |
| IS_1021 | agg_BS_1024 | 6.8326442 | 0.0058097 | 0.0111982 | 0.0267622 | 0.0787324 | 0.3624314 | 2.1739130 | 7.753352 |
| IS_1021 | agg_BS_1025 | 3.1690396 | 0.0151093 | 0.0303763 | 0.1082640 | 0.3737306 | 1.2601921 | 4.3766269 | 10.664687 |
ffp_save_res_table(
kbl(df_all_m12sum,
format = "latex", booktabs = TRUE,
caption = "Investment as share of revenue past 12 months (all, level)"
),
"tb_all_m12sum", spt_res,
df = df_all_m12sum, bl_save = ls_save_res[["tb_all_m12sum"]]
)
# Agriculture
df_agri_m12sum <- df_fw12bw12sum_back %>%
rowwise() %>%
filter(str_detect(blnc_vars, "IS_20"), str_detect(ivars, "BS_20"))
(df_agri_m12sum) %>%
kable() %>%
kable_styling()| blnc_vars | ivars | shr_mean | shr_p5 | shr_p10 | shr_p25 | shr_p50 | shr_p75 | shr_p90 | shr_p95 |
|---|---|---|---|---|---|---|---|---|---|
| IS_2011 | agg_BS_2011 | 27.666354 | 0.0331972 | 0.0548407 | 0.1452395 | 0.5090307 | 1.819809 | 14.817191 | 54.17936 |
| IS_2011 | agg_BS_2021 | 6.207505 | 0.0162411 | 0.0247792 | 0.0606670 | 0.1931694 | 1.061360 | 3.409755 | 12.32706 |
| IS_2012 | agg_BS_2011 | -6102.032943 | -34.6837979 | -5.0109862 | 0.0986652 | 0.5055650 | 1.754311 | 7.876581 | 21.38594 |
| IS_2012 | agg_BS_2021 | -12.834206 | -4.3369302 | 0.0106034 | 0.0667535 | 0.2425136 | 1.234425 | 4.535870 | 11.87107 |
| IS_2013 | agg_BS_2011 | 26.338823 | 0.0441231 | 0.0727741 | 0.1817482 | 0.5896692 | 2.062262 | 16.723766 | 60.60606 |
| IS_2013 | agg_BS_2021 | 8.451405 | 0.0200296 | 0.0306632 | 0.0809061 | 0.2639406 | 1.459940 | 5.523906 | 15.86207 |
| IS_2014 | agg_BS_2011 | -6178.299499 | -40.5680845 | -5.6014996 | 0.1189885 | 0.5760816 | 1.998542 | 9.090909 | 29.09978 |
| IS_2014 | agg_BS_2021 | -8.934978 | -7.6833243 | 0.0097571 | 0.0825770 | 0.3201358 | 1.682491 | 6.626069 | 17.34542 |
| IS_2015 | agg_BS_2011 | 84.904610 | 0.0336900 | 0.0535423 | 0.1512916 | 0.5698799 | 2.140090 | 14.817191 | 61.23807 |
| IS_2015 | agg_BS_2021 | 19.688696 | 0.0178263 | 0.0282123 | 0.0786899 | 0.2845931 | 1.499270 | 5.537795 | 15.88110 |
| IS_2016 | agg_BS_2011 | 85.050304 | 0.0232398 | 0.0608733 | 0.2116925 | 0.8304432 | 3.318593 | 18.930811 | 83.29324 |
| IS_2016 | agg_BS_2021 | 20.169753 | 0.0148897 | 0.0346153 | 0.1171678 | 0.4355601 | 2.107264 | 8.620690 | 27.02802 |
| IS_2021 | agg_BS_2011 | 62.850706 | 0.0445204 | 0.0713936 | 0.1878591 | 0.6367193 | 2.132510 | 11.090050 | 40.47634 |
| IS_2021 | agg_BS_2021 | 5.688836 | 0.0244642 | 0.0399742 | 0.1070825 | 0.4185530 | 1.743829 | 6.053045 | 12.84006 |
| IS_2022 | agg_BS_2011 | 59.768020 | 0.0246408 | 0.0710714 | 0.2562972 | 0.9324998 | 3.363046 | 14.483000 | 47.37205 |
| IS_2022 | agg_BS_2021 | 6.130690 | 0.0195575 | 0.0405807 | 0.1390665 | 0.5651988 | 2.354677 | 8.989975 | 19.04448 |
ffp_save_res_table(
kbl(df_agri_m12sum,
format = "latex", booktabs = TRUE,
caption = "Investment as share of revenue past 12 months (agriculture, level)"
),
"tb_agri_m12sum", spt_res,
df = df_agri_m12sum, bl_save = ls_save_res[["tb_agri_m12sum"]]
)
# Business
df_biz_m12sum <- df_fw12bw12sum_back %>%
rowwise() %>%
filter(str_detect(blnc_vars, "IS_30"), str_detect(ivars, "BS_30"))
(df_biz_m12sum) %>%
kable() %>%
kable_styling()| blnc_vars | ivars | shr_mean | shr_p5 | shr_p10 | shr_p25 | shr_p50 | shr_p75 | shr_p90 | shr_p95 |
|---|---|---|---|---|---|---|---|---|---|
| IS_3011 | agg_BS_3011 | 11.106808 | 0.0414426 | 0.0762695 | 0.1923083 | 0.5952935 | 2.2182231 | 11.324571 | 33.258074 |
| IS_3011 | agg_BS_3021 | 12.312756 | 0.0026585 | 0.0071920 | 0.0184186 | 0.0900940 | 0.2832460 | 1.972595 | 4.501801 |
| IS_3011 | agg_BS_3022 | 9.722507 | 0.0350741 | 0.0642672 | 0.1838533 | 0.5835434 | 2.1567316 | 11.324571 | 31.101592 |
| IS_3021 | agg_BS_3011 | 23.428332 | 0.0483213 | 0.0890390 | 0.3143638 | 1.6317897 | 8.8452561 | 34.274682 | 85.173870 |
| IS_3021 | agg_BS_3021 | 3.707475 | 0.0071231 | 0.0139786 | 0.0388632 | 0.1392530 | 0.5942617 | 3.155637 | 14.595556 |
| IS_3021 | agg_BS_3022 | 22.702538 | 0.0350403 | 0.0580712 | 0.2895929 | 1.5109778 | 8.0126246 | 33.911441 | 83.244354 |
| IS_3022 | agg_BS_3011 | 98.150125 | 0.0161570 | 0.2145536 | 0.7313960 | 2.7481983 | 11.6487200 | 38.494622 | 93.878400 |
| IS_3022 | agg_BS_3021 | 3.971525 | -0.1539095 | 0.0066440 | 0.0785323 | 0.3008507 | 1.0622332 | 6.424242 | 30.000000 |
| IS_3022 | agg_BS_3022 | 96.542162 | 0.0161719 | 0.1193461 | 0.6812955 | 2.5219545 | 11.2359746 | 36.245194 | 89.790831 |
ffp_save_res_table(
kbl(df_biz_m12sum,
format = "latex", booktabs = TRUE,
caption = "Investment as share of revenue past 12 months (business, level)"
),
"tb_biz_m12sum", spt_res,
df = df_biz_m12sum, bl_save = ls_save_res[["tb_biz_m12sum"]]
)Select subset of key stats to be displayed.
# Results 1
df_m12sum_agri_bs2021 <- df_fw12bw12sum_back %>%
filter(str_detect(blnc_vars, "IS_2021"), str_detect(ivars, "BS_2021"))
# Results 2
df_m12sum_biz_bs3021 <- df_fw12bw12sum_back %>%
filter(str_detect(blnc_vars, "IS_3021"), str_detect(ivars, "BS_3021"))
# Results 3a
df_m12sum_all_bs1021 <- df_fw12bw12sum_back %>%
filter(str_detect(blnc_vars, "IS_1021"), str_detect(ivars, "BS_1021"))
# Results 3b
df_m12sum_all_bs1021v1012 <- df_fw12bw12sum_back %>%
filter(str_detect(blnc_vars, "IS_1012"), str_detect(ivars, "BS_1021"))
# Bind rows for selectCombine 1 and 12 month info, transpose, and prep
First, combine all rows together and combine blnc_vars and ivars. Do not resort, preserve existing orders. Drop the mean, not informative.
# Bind rows for selected key results:
df_m1mean_m12_sum_jnt <- bind_rows(
df_m1mean_agri_bs2021,
df_m12sum_agri_bs2021,
df_m1mean_biz_bs3021,
df_m12sum_biz_bs3021,
df_m1mean_all_bs1021v1012,
df_m12sum_all_bs1021v1012,
df_m1mean_all_bs1021,
df_m12sum_all_bs1021
) %>%
mutate(
blnc_vars_ivars = paste0(ivars, "_d_", blnc_vars)
) %>%
select(
blnc_vars_ivars, -blnc_vars, -ivars,
contains(st_var_prefix_perc)
)Second, reshape wide to long then to wide again to transpose.
df_m1mean_m12_sum_jnt_trans <- df_m1mean_m12_sum_jnt %>%
pivot_longer(
cols = starts_with(st_var_prefix_perc),
names_to = c("percentile"),
names_pattern = paste0(st_var_prefix_perc, "(.*)"),
values_to = "value"
) %>%
pivot_wider(
names_from = blnc_vars_ivars,
values_from = value
)Third, percentage formatting.
Table final display
First, we define column names, which correspond to previously defined variable selection list.
# Column names
ar_st_col_names <- c(
"Percentiles",
"$\\frac{\\text{Invest size}}{\\text{Pre mth assets}}$",
"$\\frac{\\text{Invest size}}{\\text{Pre 12 mth rev}}$",
"$\\frac{\\text{Invest size}}{\\text{Pre mth assets}}$",
"$\\frac{\\text{Invest size}}{\\text{Pre 12 mth rev}}$",
"$\\frac{\\text{Invest size}}{\\text{Pre mth assets}}$",
"$\\frac{\\text{Invest size}}{\\text{Pre 12 mth rev}}$",
"$\\frac{\\text{Invest size}}{\\text{Pre mth assets}}$",
"$\\frac{\\text{Invest size}}{\\text{Pre 12 mth rev}}$"
)
# Define column groups, grouping the names above
ar_st_col_groups <- c(
" " = 1,
"Invest/assets/rev" = 2,
"Invest/assets/rev" = 2,
"All prod assets/rev" = 2,
"Agri + biz assets/rev" = 2
)
# Define column groups, grouping the names above
ar_st_col_groups_super <- c(
" " = 1,
"Agricultural" = 2,
"Business" = 2,
"Agricutural + business investments" = 4
)
# Second, we construct main table, and add styling.
f_bk_invest_prior_ratio <- function(st_format) {
bk_invest_prior_ratio <- kbl(
df_m1mean_m12_sum_jnt_trans,
format = st_format,
# escape = F,
linesep = "",
booktabs = T,
align = "c",
caption = "Ratio of investment to prior to investment month assets and year revenues.",
col.names = ar_st_col_names
) %>%
# see https://cran.r-project.org/web/packages/kableExtra/vignettes/awesome_table_in_html.html#Bootstrap_table_classes
kable_styling(
bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = F, position = "left"
)
# Third, we add in row groups
bk_invest_prior_ratio <- bk_invest_prior_ratio %>%
add_header_above(ar_st_col_groups) %>%
add_header_above(ar_st_col_groups_super)
# %>%
# # collapse_rows(columns = 2:5, latex_hline = "major", valign = "middle")
# Fourth, we add in column groups.
bk_invest_prior_ratio <- bk_invest_prior_ratio %>%
pack_rows(
"Bottom decile",
1, 2,
latex_gap_space = "0.5em"
) %>%
pack_rows(
"Quartiles",
3, 5,
latex_gap_space = "0.5em", hline_before = F
) %>%
pack_rows(
"Top decile",
6, 7,
latex_gap_space = "0.5em", hline_before = F
)
# Fifth, column formatting.
bk_invest_prior_ratio <- bk_invest_prior_ratio %>%
column_spec(1, width = "2.5cm") %>%
column_spec(2:9, width = "2cm")
# Final adjustments
st_texcmd <- "frac"
bk_invest_prior_ratio <- gsub(bk_invest_prior_ratio,
pattern = paste0("\\textbackslash{}", st_texcmd, "\\"),
replacement = paste0("\\", st_texcmd), fixed = TRUE
)
st_texcmd <- "text"
bk_invest_prior_ratio <- gsub(bk_invest_prior_ratio,
pattern = paste0("\\textbackslash{}", st_texcmd, "\\"),
replacement = paste0("\\", st_texcmd), fixed = TRUE
)
bk_invest_prior_ratio <- gsub(bk_invest_prior_ratio,
pattern = "\\}\\{",
replacement = "}{", fixed = TRUE
)
bk_invest_prior_ratio <- gsub(bk_invest_prior_ratio,
pattern = "\\}",
replacement = "}", fixed = TRUE
)
bk_invest_prior_ratio <- gsub(bk_invest_prior_ratio,
pattern = "\\$",
replacement = "$", fixed = TRUE
)
return(bk_invest_prior_ratio)
}
# Build for display and save a latex (+ csv) copy.
bk_invest_prior_ratio <- f_bk_invest_prior_ratio(st_kableformat)
ffp_save_res_table(
f_bk_invest_prior_ratio("latex"), "bk_invest_prior_ratio", spt_res,
df = df_m1mean_m12_sum_jnt_trans,
bl_save = ls_save_res[["bk_invest_prior_ratio"]]
)
# Sixth, display.
# pl_bk_asset_count <- bk_invest_prior_ratio %>% as_image()
bk_invest_prior_ratio| Percentiles | $\frac{\text{Invest size}}{\text{Pre mth assets}}$ | $\frac{\text{Invest size}}{\text{Pre 12 mth rev}}$ | $\frac{\text{Invest size}}{\text{Pre mth assets}}$ | $\frac{\text{Invest size}}{\text{Pre 12 mth rev}}$ | $\frac{\text{Invest size}}{\text{Pre mth assets}}$ | $\frac{\text{Invest size}}{\text{Pre 12 mth rev}}$ | $\frac{\text{Invest size}}{\text{Pre mth assets}}$ | $\frac{\text{Invest size}}{\text{Pre 12 mth rev}}$ |
|---|---|---|---|---|---|---|---|---|
| Bottom decile | ||||||||
| 5 | 14.0% | 2.4% | 8.2% | 0.7% | 0.2% | 1.8% | 17.1% | 2.2% |
| 10 | 22.3% | 4.0% | 15.2% | 1.4% | 0.4% | 3.1% | 25.5% | 4.1% |
| Quartiles | ||||||||
| 25 | 45.2% | 10.7% | 37.3% | 3.9% | 1.3% | 8.8% | 54.4% | 12.1% |
| 50 | 128.8% | 41.9% | 84.6% | 13.9% | 5.4% | 23.8% | 153.3% | 39.9% |
| 75 | 647.4% | 174.4% | 336.6% | 59.4% | 18.0% | 70.8% | 588.2% | 134.5% |
| Top decile | ||||||||
| 90 | 4,509.2% | 605.3% | 1,869.6% | 315.6% | 84.0% | 216.7% | 2,852.3% | 465.5% |
| 95 | 13,613.1% | 1,284.0% | 6,587.3% | 1,459.6% | 225.0% | 476.2% | 8,294.5% | 1,158.6% |