Skip to contents
library(PrjThaiHFID)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(tidyr)
library(readr)
library(forcats)
library(ggplot2)
library(kableExtra)
#> 
#> Attaching package: 'kableExtra'
#> The following object is masked from 'package:dplyr':
#> 
#>     group_rows

Categories and tabulations

# code to prepare `tstm_loans` dataset goes here
# 1000. Generate Loan IDs and MISC ----
# tstm_loans <- PrjThaiHFID::tstm_loans
# Unique ID for each loan
tstm_loans <- tstm_loans %>%
  filter(G_LenderType %in% c("Commercial Bank", "BAAC")) %>%
  select(
    S_region, provid_Num, vilid_Num, hhid_Num, surveymonth, G_LenderType,
    everything()
  ) %>%
  drop_na(G_Loan_Repaid_Length) %>%
  arrange(hhid_Num, surveymonth, G_Loan_Repaid_Length) %>%
  mutate(loan_id = row_number())

# Tally loan types and drop NA
tstm_loans %>%
  group_by(G_LenderType) %>%
  tally()
#> # A tibble: 2 × 2
#>   G_LenderType        n
#>   <chr>           <int>
#> 1 BAAC             3396
#> 2 Commercial Bank    64
tstm_loans %>%
  group_by(G_LenderType, G_Location) %>%
  tally() %>%
  spread(G_Location, n)
#> # A tibble: 2 × 5
#> # Groups:   G_LenderType [2]
#>   G_LenderType    `Changwat and Out` `Tambon or Amphoe` Village `<NA>`
#>   <chr>                        <int>              <int>   <int>  <int>
#> 1 BAAC                           757               2636       2      1
#> 2 Commercial Bank                 17                 46       1     NA
# Drop NA
tstm_loans <- tstm_loans %>%
  drop_na(G_LenderType)

Loan terms tables

Show in table across loan types loan terms percentiles. This implements PrjThaiHFID-#14.

# Choose latex for paper output
# st_kableformat <- "latex"
st_kableformat <- "html"
# Percentiles of interest
# ar_fl_percentiles <- seq(0.01, 0.99, length.out = 99)
# ar_fl_percentiles <- seq(0.01, 0.99, length.out = 50)
# ar_fl_percentiles <- seq(0.02, 0.98, length.out = 33)
# ar_fl_percentiles <- seq(0.05, 0.95, length.out = 19)
# ar_fl_percentiles <- c(0.1, 0.25, 0.50, 0.75, 0.9, 0.95, 0.99)
ar_fl_percentiles <- c(
  0.05, 0.10, 0.20, 0.30, 0.40, 
  0.25, 0.50, 0.75, 
  0.60, 0.70, 0.80, 0.9, 0.95)
# ar_fl_percentiles <- seq(0.10, 0.90, length.out=9)

Data prep part 1: rename and reshape

We loaded in tstm_loans already.

First select loanID, G_LenderType, term, and do so separately for each term, separate files.

# Select files
tstm_loans_sel <- tstm_loans %>%
  select(
    loan_id, G_LenderType,
    G_Loan_Init_Length, S_Init_Amount, G_Loan_Init_IntMthlyRat
  ) %>%
  rename(
    id = loan_id,
    loan_1length = G_Loan_Init_Length,
    loan_2amount = S_Init_Amount,
    loan_3interest = G_Loan_Init_IntMthlyRat
  )

Second, reshape wide to long.

# Select files
tstm_loans_sel_long <- tstm_loans_sel %>%
  pivot_longer(
    cols = starts_with("loan"),
    names_to = c("terms"),
    names_pattern = paste0("loan_(.*)"),
    values_to = "value"
  )
# Drop NA
tstm_loans_sel_long <- tstm_loans_sel_long %>%
  drop_na(value)

Data prep part 2: percentiles

First, compute CDF.

# Generate within-group CDF
tstm_loans_sel_long_pct <- tstm_loans_sel_long %>%
  arrange(terms, G_LenderType, value) %>%
  group_by(terms, G_LenderType) %>%
  mutate(cdf = row_number() / n())

Second, compute percentiles and moments.

# Define strings
st_var_prefix <- "val"
st_var_prefix_perc <- paste0(st_var_prefix, "_p")
svr_mean <- paste0(st_var_prefix, "_mean")
svr_std <- paste0(st_var_prefix, "_std")

# Generate within-group percentiles
for (it_percentile_ctr in seq(1, length(ar_fl_percentiles))) {
  # Current within group percentile to compute
  fl_percentile <- ar_fl_percentiles[it_percentile_ctr]

  # Percentile and mean stats
  svr_percentile <- paste0(st_var_prefix_perc, round(fl_percentile * 100))

  # Frame with specific percentile
  df_within_percentiles_cur <- tstm_loans_sel_long_pct %>%
    group_by(terms, G_LenderType) %>%
    filter(cdf >= fl_percentile) %>%
    slice(1) %>%
    mutate(!!sym(svr_percentile) := value) %>%
    select(terms, G_LenderType, one_of(svr_percentile))

  # Merge percentile frames together
  if (it_percentile_ctr > 1) {
    df_within_percentiles <- df_within_percentiles %>%
      left_join(df_within_percentiles_cur,
        by = c("G_LenderType" = "G_LenderType", "terms" = "terms")
      )
  } else {
    df_within_percentiles <- df_within_percentiles_cur
  }
}

# Add in within group mean
df_within_percentiles_mean <- tstm_loans_sel_long_pct %>%
  group_by(terms, G_LenderType) %>%
  mutate(!!sym(svr_mean) := mean(value, na.rm = TRUE)) %>%
  mutate(!!sym(svr_std) := sqrt(mean((value - !!sym(svr_mean))^2))) %>%
  slice(1)

# Join to file
df_within_percentiles <- df_within_percentiles %>%
  left_join(df_within_percentiles_mean,
    by = c("G_LenderType" = "G_LenderType", "terms" = "terms")
  ) %>%
  select(
    terms, G_LenderType,
    contains(st_var_prefix_perc),
    one_of(svr_mean)
    # ,
    # one_of(svr_std)
  )

# display
print(df_within_percentiles)
#> # A tibble: 6 × 16
#> # Groups:   terms, G_LenderType [6]
#>   terms     G_LenderType  val_p5 val_p10 val_p20 val_p30 val_p40 val_p25 val_p50
#>   <chr>     <chr>          <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
#> 1 1length   BAAC         4   e+0 6   e+0 1   e+1 1.2 e+1 1.2 e+1 1.2 e+1 1.2 e+1
#> 2 1length   Commercial … 1   e+0 4   e+0 1   e+1 1.2 e+1 1.3 e+1 1   e+1 1.3 e+1
#> 3 2amount   BAAC         1   e+4 1   e+4 2   e+4 2.10e+4 3   e+4 2   e+4 3.5 e+4
#> 4 2amount   Commercial … 1.95e+4 2   e+4 2.5 e+4 4   e+4 5.50e+4 3   e+4 1   e+5
#> 5 3interest BAAC         1.88e-3 3.18e-3 4.62e-3 5.38e-3 5.83e-3 5.00e-3 6.15e-3
#> 6 3interest Commercial … 0       2.56e-3 5.26e-3 5.38e-3 6.55e-3 5.36e-3 6.92e-3
#> # ℹ 7 more variables: val_p75 <dbl>, val_p60 <dbl>, val_p70 <dbl>,
#> #   val_p80 <dbl>, val_p90 <dbl>, val_p95 <dbl>, val_mean <dbl>

Data prep part 3: transpose percentiles

First, combine all rows together and combine blnc_vars and ivars. Do not resort, preserve existing orders. Drop the mean, not informative.

# Bind rows for selected key results:
df_within_percentiles <- df_within_percentiles %>%
  ungroup() %>%
  mutate(
    cate_jnt = paste0(terms, "_j_", G_LenderType)
  ) %>%
  select(
    cate_jnt, -terms, -G_LenderType,
    contains(st_var_prefix)
  )

Second, reshape wide to long then to wide again to transpose.

df_within_percentiles_trans <- df_within_percentiles %>%
  pivot_longer(
    cols = starts_with(st_var_prefix),
    names_to = c("percentile"),
    names_pattern = paste0(st_var_prefix, "_(.*)"),
    values_to = "value"
  ) %>%
  pivot_wider(
    names_from = cate_jnt,
    values_from = value
  )

Third, formatting. Length, levels, percentages. percentage formatting.

# format percentiles
df_within_percentiles_trans <- df_within_percentiles_trans %>%
  mutate(percentile = gsub(
    percentile, pattern="p", replace=""))
# format length
df_within_percentiles_trans <- df_within_percentiles_trans %>%
  mutate_at(
    vars(contains("1length_")),
    list(~ paste0(
      format(round(., 1),
        nsmall = 1,
        big.mark = ","
      )
    ))
  )
# format length
df_within_percentiles_trans <- df_within_percentiles_trans %>%
  mutate_at(
    vars(contains("2amount_")),
    list(~ paste0(
      format(round(., 0),
        nsmall = 0,
        big.mark = ","
      )
    ))
  )
# format interest rates
df_within_percentiles_trans <- df_within_percentiles_trans %>%
  mutate_at(
    vars(contains("3interest_")),
    list(~ paste0(
      format(round(. *100, 2),
        nsmall = 2,
        big.mark = ","
      ), 
      "%"
    ))
  )

Table display (full table)

We present full information with lower and upper deciles, formal, informal, and quasiformal information jointly.

# First, we define column names, which correspond to previously defined variable selection list.
ar_st_col_names <- c(
  "Percentiles",
  "BAAC", "Commercial Bank",
  "BAAC", "Commercial Bank",
  "BAAC", "Commercial Bank"
)
# Define column groups, grouping the names above
ar_st_col_groups <- c(
  " " = 1,
  "Length (months)" = 2,
  "Amount (baht)" = 2,
  "Interest (monthly)" = 2
)
# Define column groups, grouping the names above
ar_st_col_groups_super <- c(
  " " = 1,
  "Loan terms" = 6
)
# Second, we construct main table, and add styling.
bk_loan_terms <- kbl(
  df_within_percentiles_trans,
  format = st_kableformat,
  # escape = F,
  linesep = "",
  booktabs = T,
  align = "c",
  caption = "Loan terms distributions.",
  col.names = ar_st_col_names
) %>%
  # see https://cran.r-project.org/web/packages/kableExtra/vignettes/awesome_table_in_html.html#Bootstrap_table_classes
  kable_styling(
    bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = F, position = "left"
  )

# Third, we add in row groups
bk_loan_terms <- bk_loan_terms %>%
  add_header_above(ar_st_col_groups) 
  # add_header_above(ar_st_col_groups_super)

# Fourth, we add in column groups. 
bk_loan_terms <- bk_loan_terms %>%
  pack_rows(
    "Below median deciles",
    1, 5,
    latex_gap_space = "0.5em"
  ) %>%
  pack_rows(
    "Quartiles",
    6, 8,
    latex_gap_space = "0.5em", hline_before = F
  ) %>%
  pack_rows(
    "Above median deciles",
    9, 13,
    latex_gap_space = "0.5em", hline_before = F
  ) %>%
  pack_rows(
    "Mean",
    14, 14,
    latex_gap_space = "0.5em", hline_before = F
  )

# Fifth, column formatting.
bk_loan_terms <- bk_loan_terms %>%
  column_spec(1, width = "2.0cm") %>%
  column_spec(2:7, width = "1.6cm")

# Sixth, display.
# pl_bk_asset_count <- bk_loan_terms %>% as_image()
bk_loan_terms
Loan terms distributions.
Length (months)
Amount (baht)
Interest (monthly)
Percentiles BAAC Commercial Bank BAAC Commercial Bank BAAC Commercial Bank
Below median deciles
5 4.0 1.0 10,000 19,480 0.19% 0.00%
10 6.0 4.0 10,000 20,000 0.32% 0.26%
20 10.0 10.0 20,000 25,000 0.46% 0.53%
30 12.0 12.0 21,000 40,000 0.54% 0.54%
40 12.0 13.0 30,000 55,000 0.58% 0.65%
Quartiles
25 12.0 10.0 20,000 30,000 0.50% 0.54%
50 12.0 13.0 35,000 100,000 0.62% 0.69%
75 13.0 47.0 54,000 200,000 0.77% 2.81%
Above median deciles
60 13.0 24.0 44,125 150,000 0.67% 1.08%
70 13.0 37.0 50,000 200,000 0.75% 1.92%
80 13.0 60.0 70,000 300,000 0.88% 3.00%
90 36.0 66.0 100,000 500,000 1.14% 9.00%
95 61.0 136.0 200,000 850,000 1.71% 60.59%
Mean
mean 17.2 34.5 60,993 344,350 0.81% 28.79%

Table display (partial table)

We now present a subset of information contained in the prior table, we focus on just the quartiles and exclude quasi-formal.

First, we select a subset of the table to consider only quartiles and exclude quasi-formal.

df_within_percentiles_trans_sel <- df_within_percentiles_trans %>% 
  filter(percentile %in% c("25", "50", "75","mean")) %>% 
  select(-contains("quasi"))

Second, we generate the plot.

# First, we define column names, which correspond to previously defined variable selection list.
ar_st_col_names <- c(
  "",
  "BAAC", "Commercial Bank",
  "BAAC", "Commercial Bank",
  "BAAC", "Commercial Bank"
)
# Define column groups, grouping the names above
ar_st_col_groups <- c(
  " " = 1,
  "Length (months)" = 2,
  "Amount (baht)" = 2,
  "Interest (monthly)" = 2
)
# Define column groups, grouping the names above
ar_st_col_groups_super <- c(
  " " = 1,
  "Loan terms" = 6
)
# Second, we construct main table, and add styling.
bk_loan_qrt_terms <- kbl(
  df_within_percentiles_trans_sel,
  format = st_kableformat,
  # escape = F,
  linesep = "",
  booktabs = T,
  align = "c",
  caption = "Loan terms distributions.",
  col.names = ar_st_col_names
) %>%
  # see https://cran.r-project.org/web/packages/kableExtra/vignettes/awesome_table_in_html.html#Bootstrap_table_classes
  kable_styling(
    bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = F, position = "left"
  )

# Third, we add in row groups
bk_loan_qrt_terms <- bk_loan_qrt_terms %>%
  add_header_above(ar_st_col_groups) 
  # add_header_above(ar_st_col_groups_super)

# Fourth, we add in column groups. 
bk_loan_qrt_terms <- bk_loan_qrt_terms %>%
  pack_rows(
    "Quartiles",
    1, 3,
    latex_gap_space = "0.5em", hline_before = F
  ) %>%
  pack_rows(
    "Mean",
    4, 4,
    latex_gap_space = "0.5em", hline_before = F
  )

# Fifth, column formatting.
bk_loan_qrt_terms <- bk_loan_qrt_terms %>%
  column_spec(1, width = "2.0cm") %>%
  column_spec(2:7, width = "1.6cm")

# Sixth, display.
# pl_bk_asset_count <- bk_loan_qrt_terms %>% as_image()
bk_loan_qrt_terms
Loan terms distributions.
Length (months)
Amount (baht)
Interest (monthly)
BAAC Commercial Bank BAAC Commercial Bank BAAC Commercial Bank
Quartiles
25 12.0 10.0 20,000 30,000 0.50% 0.54%
50 12.0 13.0 35,000 100,000 0.62% 0.69%
75 13.0 47.0 54,000 200,000 0.77% 2.81%
Mean
mean 17.2 34.5 60,993 344,350 0.81% 28.79%