Skip to contents

This vignette (PrjThaiHFID-#14) tabulates loan-term distributions for the two formal-bank lender types — Commercial Banks and BAAC — using the packaged loan-level file tstm_loans.

Unit of observation: loan (one row per loan record), restricted to G_LenderType in {Commercial Bank, BAAC}; tables report within-lender-type percentiles across loans.

Outputs at a glance

This vignette produces summary tables only — it does not save a packaged dataset. Individual tables can be written to the package res/res_loan_terms_dist_comm folder by setting their entry in ls_save_res to TRUE (all default FALSE; the res/ artifacts are git-ignored and saved only for local convenience).

  • Input: packaged tstm_loans filtered to Commercial Bank and BAAC loans (anonymized IDs; see R/data.R).
  • Tables: a full loan-terms percentile table by lender type and a partial/quartile summary table.

Pipeline structure

flowchart TD
  subgraph inputs [Packaged data input]
    loans[tstm_loans]
  end
  subgraph prep [Data prep]
    loans --> filt[filter Commercial Bank and BAAC]
    filt --> sel[select columns and drop NA loan length]
    sel --> pct[within-lender-type percentiles and means]
    pct --> trans[transpose percentiles into table layout]
  end
  subgraph display [Table display]
    trans --> full[full loan-terms percentile table]
    trans --> qrt[partial quartile table]
  end

library(PrjThaiHFID)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(tidyr)
library(readr)
library(forcats)
library(ggplot2)
library(kableExtra)
#> 
#> Attaching package: 'kableExtra'
#> The following object is masked from 'package:dplyr':
#> 
#>     group_rows

spn_pkg_root <- rprojroot::find_root(rprojroot::has_file("DESCRIPTION"))
# res/ auto-save controls: TRUE writes that output to spt_res. Default all FALSE.
spt_res <- file.path(spn_pkg_root, "res", "res_loan_terms_dist_comm")
ls_save_res <- list(
  bk_loan_terms = FALSE,
  bk_loan_qrt_terms = FALSE
)

Categories and tabulations

# code to prepare `tstm_loans` dataset goes here
# 1000. Generate Loan IDs and MISC ----
# tstm_loans <- PrjThaiHFID::tstm_loans
# Unique ID for each loan
tstm_loans <- tstm_loans %>%
  filter(G_LenderType %in% c("Commercial Bank", "BAAC")) %>%
  select(
    S_region, provid_Num, vilid_Num, hhid_Num, surveymonth, G_LenderType,
    everything()
  ) %>%
  drop_na(G_Loan_Repaid_Length) %>%
  arrange(hhid_Num, surveymonth, G_Loan_Repaid_Length) %>%
  mutate(loan_id = row_number())

# Tally loan types and drop NA
tstm_loans %>%
  group_by(G_LenderType) %>%
  tally()
#> # A tibble: 2 × 2
#>   G_LenderType        n
#>   <chr>           <int>
#> 1 BAAC             3396
#> 2 Commercial Bank    64
tstm_loans %>%
  group_by(G_LenderType, G_Location) %>%
  tally() %>%
  spread(G_Location, n)
#> # A tibble: 2 × 5
#> # Groups:   G_LenderType [2]
#>   G_LenderType    `Changwat and Out` `Tambon or Amphoe` Village `<NA>`
#>   <chr>                        <int>              <int>   <int>  <int>
#> 1 BAAC                           757               2636       2      1
#> 2 Commercial Bank                 17                 46       1     NA
# Drop NA
tstm_loans <- tstm_loans %>%
  drop_na(G_LenderType)

Loan terms tables

Show in table across loan types loan terms percentiles. This implements PrjThaiHFID-#14.

# Choose latex for paper output
# st_kableformat <- "latex"
st_kableformat <- "html"
# Percentiles of interest
# ar_fl_percentiles <- seq(0.01, 0.99, length.out = 99)
# ar_fl_percentiles <- seq(0.01, 0.99, length.out = 50)
# ar_fl_percentiles <- seq(0.02, 0.98, length.out = 33)
# ar_fl_percentiles <- seq(0.05, 0.95, length.out = 19)
# ar_fl_percentiles <- c(0.1, 0.25, 0.50, 0.75, 0.9, 0.95, 0.99)
ar_fl_percentiles <- c(
  0.05, 0.10, 0.20, 0.30, 0.40,
  0.25, 0.50, 0.75,
  0.60, 0.70, 0.80, 0.9, 0.95
)
# ar_fl_percentiles <- seq(0.10, 0.90, length.out=9)

Data prep part 1: rename and reshape

We loaded in tstm_loans already.

First select loanID, G_LenderType, term, and do so separately for each term, separate files.

# Select files
tstm_loans_sel <- tstm_loans %>%
  select(
    loan_id, G_LenderType,
    G_Loan_Init_Length, S_Init_Amount, G_Loan_Init_IntMthlyRat
  ) %>%
  rename(
    id = loan_id,
    loan_1length = G_Loan_Init_Length,
    loan_2amount = S_Init_Amount,
    loan_3interest = G_Loan_Init_IntMthlyRat
  )

Second, reshape wide to long.

# Select files
tstm_loans_sel_long <- tstm_loans_sel %>%
  pivot_longer(
    cols = starts_with("loan"),
    names_to = c("terms"),
    names_pattern = paste0("loan_(.*)"),
    values_to = "value"
  )
# Drop NA
tstm_loans_sel_long <- tstm_loans_sel_long %>%
  drop_na(value)

Data prep part 2: percentiles

First, compute CDF.

# Generate within-group CDF
tstm_loans_sel_long_pct <- tstm_loans_sel_long %>%
  arrange(terms, G_LenderType, value) %>%
  group_by(terms, G_LenderType) %>%
  mutate(cdf = row_number() / n())

Second, compute percentiles and moments.

# Define strings
st_var_prefix <- "val"
st_var_prefix_perc <- paste0(st_var_prefix, "_p")
svr_mean <- paste0(st_var_prefix, "_mean")
svr_std <- paste0(st_var_prefix, "_std")

# Generate within-group percentiles
for (it_percentile_ctr in seq(1, length(ar_fl_percentiles))) {
  # Current within group percentile to compute
  fl_percentile <- ar_fl_percentiles[it_percentile_ctr]

  # Percentile and mean stats
  svr_percentile <- paste0(st_var_prefix_perc, round(fl_percentile * 100))

  # Frame with specific percentile
  df_within_percentiles_cur <- tstm_loans_sel_long_pct %>%
    group_by(terms, G_LenderType) %>%
    filter(cdf >= fl_percentile) %>%
    slice(1) %>%
    mutate(!!sym(svr_percentile) := value) %>%
    select(terms, G_LenderType, one_of(svr_percentile))

  # Merge percentile frames together
  if (it_percentile_ctr > 1) {
    df_within_percentiles <- df_within_percentiles %>%
      left_join(df_within_percentiles_cur,
        by = c("G_LenderType" = "G_LenderType", "terms" = "terms")
      )
  } else {
    df_within_percentiles <- df_within_percentiles_cur
  }
}

# Add in within group mean
df_within_percentiles_mean <- tstm_loans_sel_long_pct %>%
  group_by(terms, G_LenderType) %>%
  mutate(!!sym(svr_mean) := mean(value, na.rm = TRUE)) %>%
  mutate(!!sym(svr_std) := sqrt(mean((value - !!sym(svr_mean))^2))) %>%
  slice(1)

# Join to file
df_within_percentiles <- df_within_percentiles %>%
  left_join(df_within_percentiles_mean,
    by = c("G_LenderType" = "G_LenderType", "terms" = "terms")
  ) %>%
  select(
    terms, G_LenderType,
    contains(st_var_prefix_perc),
    one_of(svr_mean)
    # ,
    # one_of(svr_std)
  )

# display
print(df_within_percentiles)
#> # A tibble: 6 × 16
#> # Groups:   terms, G_LenderType [6]
#>   terms     G_LenderType  val_p5 val_p10 val_p20 val_p30 val_p40 val_p25 val_p50
#>   <chr>     <chr>          <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
#> 1 1length   BAAC         4   e+0 6   e+0 1   e+1 1.2 e+1 1.2 e+1 1.2 e+1 1.2 e+1
#> 2 1length   Commercial … 1   e+0 4   e+0 1   e+1 1.2 e+1 1.3 e+1 1   e+1 1.3 e+1
#> 3 2amount   BAAC         1   e+4 1   e+4 2   e+4 2.10e+4 3   e+4 2   e+4 3.5 e+4
#> 4 2amount   Commercial … 1.95e+4 2   e+4 2.5 e+4 4   e+4 5.50e+4 3   e+4 1   e+5
#> 5 3interest BAAC         1.88e-3 3.18e-3 4.62e-3 5.38e-3 5.83e-3 5.00e-3 6.15e-3
#> 6 3interest Commercial … 0       2.56e-3 5.26e-3 5.38e-3 6.55e-3 5.36e-3 6.92e-3
#> # ℹ 7 more variables: val_p75 <dbl>, val_p60 <dbl>, val_p70 <dbl>,
#> #   val_p80 <dbl>, val_p90 <dbl>, val_p95 <dbl>, val_mean <dbl>

Data prep part 3: transpose percentiles

First, combine all rows together and combine blnc_vars and ivars. Do not resort, preserve existing orders. Drop the mean, not informative.

# Bind rows for selected key results:
df_within_percentiles <- df_within_percentiles %>%
  ungroup() %>%
  mutate(
    cate_jnt = paste0(terms, "_j_", G_LenderType)
  ) %>%
  select(
    cate_jnt, -terms, -G_LenderType,
    contains(st_var_prefix)
  )

Second, reshape wide to long then to wide again to transpose.

df_within_percentiles_trans <- df_within_percentiles %>%
  pivot_longer(
    cols = starts_with(st_var_prefix),
    names_to = c("percentile"),
    names_pattern = paste0(st_var_prefix, "_(.*)"),
    values_to = "value"
  ) %>%
  pivot_wider(
    names_from = cate_jnt,
    values_from = value
  )

Third, formatting. Length, levels, percentages. percentage formatting.

# format percentiles
df_within_percentiles_trans <- df_within_percentiles_trans %>%
  mutate(percentile = gsub(
    pattern = "p", replacement = "", x = percentile
  ))
# format length
df_within_percentiles_trans <- df_within_percentiles_trans %>%
  mutate_at(
    vars(contains("1length_")),
    list(~ paste0(
      format(round(., 1),
        nsmall = 1,
        big.mark = ","
      )
    ))
  )
# format length
df_within_percentiles_trans <- df_within_percentiles_trans %>%
  mutate_at(
    vars(contains("2amount_")),
    list(~ paste0(
      format(round(., 0),
        nsmall = 0,
        big.mark = ","
      )
    ))
  )
# format interest rates
df_within_percentiles_trans <- df_within_percentiles_trans %>%
  mutate_at(
    vars(contains("3interest_")),
    list(~ paste0(
      format(round(. * 100, 2),
        nsmall = 2,
        big.mark = ","
      ),
      "%"
    ))
  )

Table display (full table)

We present full information with lower and upper deciles, formal, informal, and quasiformal information jointly.

# First, we define column names, which correspond to previously defined variable selection list.
ar_st_col_names <- c(
  "Percentiles",
  "BAAC", "Commercial Bank",
  "BAAC", "Commercial Bank",
  "BAAC", "Commercial Bank"
)
# Define column groups, grouping the names above
ar_st_col_groups <- c(
  " " = 1,
  "Length (months)" = 2,
  "Amount (baht)" = 2,
  "Interest (monthly)" = 2
)
# Define column groups, grouping the names above
ar_st_col_groups_super <- c(
  " " = 1,
  "Loan terms" = 6
)
# Second, we construct main table, and add styling.
f_bk_loan_terms <- function(st_format) {
  kbl(
    df_within_percentiles_trans,
    format = st_format,
    # escape = F,
    linesep = "",
    booktabs = T,
    align = "c",
    caption = "Loan terms distributions.",
    col.names = ar_st_col_names
  ) %>%
    # see https://cran.r-project.org/web/packages/kableExtra/vignettes/awesome_table_in_html.html#Bootstrap_table_classes
    kable_styling(
      bootstrap_options = c("striped", "hover", "condensed", "responsive"),
      full_width = F, position = "left"
    ) %>%
    # Third, we add in row groups
    add_header_above(ar_st_col_groups) %>%
    # add_header_above(ar_st_col_groups_super)
    # Fourth, we add in column groups.
    pack_rows(
      "Below median deciles",
      1, 5,
      latex_gap_space = "0.5em"
    ) %>%
    pack_rows(
      "Quartiles",
      6, 8,
      latex_gap_space = "0.5em", hline_before = F
    ) %>%
    pack_rows(
      "Above median deciles",
      9, 13,
      latex_gap_space = "0.5em", hline_before = F
    ) %>%
    pack_rows(
      "Mean",
      14, 14,
      latex_gap_space = "0.5em", hline_before = F
    ) %>%
    # Fifth, column formatting.
    column_spec(1, width = "2.0cm") %>%
    column_spec(2:7, width = "1.6cm")
}
bk_loan_terms <- f_bk_loan_terms(st_kableformat)
# Note: the latex kable is built eagerly even when bl_save is FALSE. This is a
# cheap, deliberate pattern shared across all vignettes; ffp_save_res_table()
# centralizes the bl_save gate and only writes to disk when enabled.
ffp_save_res_table(f_bk_loan_terms("latex"), "bk_loan_terms", spt_res,
  df = df_within_percentiles_trans,
  bl_save = ls_save_res[["bk_loan_terms"]]
)

# Sixth, display.
# pl_bk_asset_count <- bk_loan_terms %>% as_image()
bk_loan_terms
Loan terms distributions.
Length (months)
Amount (baht)
Interest (monthly)
Percentiles BAAC Commercial Bank BAAC Commercial Bank BAAC Commercial Bank
Below median deciles
5 4.0 1.0 10,000 19,480 0.19% 0.00%
10 6.0 4.0 10,000 20,000 0.32% 0.26%
20 10.0 10.0 20,000 25,000 0.46% 0.53%
30 12.0 12.0 21,000 40,000 0.54% 0.54%
40 12.0 13.0 30,000 55,000 0.58% 0.65%
Quartiles
25 12.0 10.0 20,000 30,000 0.50% 0.54%
50 12.0 13.0 35,000 100,000 0.62% 0.69%
75 13.0 47.0 54,000 200,000 0.77% 2.81%
Above median deciles
60 13.0 24.0 44,125 150,000 0.67% 1.08%
70 13.0 37.0 50,000 200,000 0.75% 1.92%
80 13.0 60.0 70,000 300,000 0.88% 3.00%
90 36.0 66.0 100,000 500,000 1.14% 9.00%
95 61.0 136.0 200,000 850,000 1.71% 60.59%
Mean
mean 17.2 34.5 60,993 344,350 0.81% 28.79%

Table display (partial table)

We now present a subset of information contained in the prior table, we focus on just the quartiles and exclude quasi-formal.

First, we select a subset of the table to consider only quartiles and exclude quasi-formal.

df_within_percentiles_trans_sel <- df_within_percentiles_trans %>%
  filter(percentile %in% c("25", "50", "75", "mean")) %>%
  select(-contains("quasi"))

Second, we generate the plot.

# First, we define column names, which correspond to previously defined variable selection list.
ar_st_col_names <- c(
  "",
  "BAAC", "Commercial Bank",
  "BAAC", "Commercial Bank",
  "BAAC", "Commercial Bank"
)
# Define column groups, grouping the names above
ar_st_col_groups <- c(
  " " = 1,
  "Length (months)" = 2,
  "Amount (baht)" = 2,
  "Interest (monthly)" = 2
)
# Define column groups, grouping the names above
ar_st_col_groups_super <- c(
  " " = 1,
  "Loan terms" = 6
)
# Second, we construct main table, and add styling.
f_bk_loan_qrt_terms <- function(st_format) {
  kbl(
    df_within_percentiles_trans_sel,
    format = st_format,
    # escape = F,
    linesep = "",
    booktabs = T,
    align = "c",
    caption = "Loan terms distributions.",
    col.names = ar_st_col_names
  ) %>%
    # see https://cran.r-project.org/web/packages/kableExtra/vignettes/awesome_table_in_html.html#Bootstrap_table_classes
    kable_styling(
      bootstrap_options = c("striped", "hover", "condensed", "responsive"),
      full_width = F, position = "left"
    ) %>%
    # Third, we add in row groups
    add_header_above(ar_st_col_groups) %>%
    # add_header_above(ar_st_col_groups_super)
    # Fourth, we add in column groups.
    pack_rows(
      "Quartiles",
      1, 3,
      latex_gap_space = "0.5em", hline_before = F
    ) %>%
    pack_rows(
      "Mean",
      4, 4,
      latex_gap_space = "0.5em", hline_before = F
    ) %>%
    # Fifth, column formatting.
    column_spec(1, width = "2.0cm") %>%
    column_spec(2:7, width = "1.6cm")
}
bk_loan_qrt_terms <- f_bk_loan_qrt_terms(st_kableformat)
ffp_save_res_table(f_bk_loan_qrt_terms("latex"), "bk_loan_qrt_terms", spt_res,
  df = df_within_percentiles_trans_sel,
  bl_save = ls_save_res[["bk_loan_qrt_terms"]]
)

# Sixth, display.
# pl_bk_asset_count <- bk_loan_qrt_terms %>% as_image()
bk_loan_qrt_terms
Loan terms distributions.
Length (months)
Amount (baht)
Interest (monthly)
BAAC Commercial Bank BAAC Commercial Bank BAAC Commercial Bank
Quartiles
25 12.0 10.0 20,000 30,000 0.50% 0.54%
50 12.0 13.0 35,000 100,000 0.62% 0.69%
75 13.0 47.0 54,000 200,000 0.77% 2.81%
Mean
mean 17.2 34.5 60,993 344,350 0.81% 28.79%