Loan terms commerical banks and BAAC
Source:vignettes/ffv_loan_terms_dist_comm.Rmd
ffv_loan_terms_dist_comm.Rmd
library(PrjThaiHFID)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(tidyr)
library(readr)
library(forcats)
library(ggplot2)
library(kableExtra)
#>
#> Attaching package: 'kableExtra'
#> The following object is masked from 'package:dplyr':
#>
#> group_rows
Categories and tabulations
# code to prepare `tstm_loans` dataset goes here
# 1000. Generate Loan IDs and MISC ----
# tstm_loans <- PrjThaiHFID::tstm_loans
# Unique ID for each loan
tstm_loans <- tstm_loans %>%
filter(G_LenderType %in% c("Commercial Bank", "BAAC")) %>%
select(
S_region, provid_Num, vilid_Num, hhid_Num, surveymonth, G_LenderType,
everything()
) %>%
drop_na(G_Loan_Repaid_Length) %>%
arrange(hhid_Num, surveymonth, G_Loan_Repaid_Length) %>%
mutate(loan_id = row_number())
# Tally loan types and drop NA
tstm_loans %>%
group_by(G_LenderType) %>%
tally()
#> # A tibble: 2 × 2
#> G_LenderType n
#> <chr> <int>
#> 1 BAAC 3396
#> 2 Commercial Bank 64
Loan terms tables
Show in table across loan types loan terms percentiles. This implements PrjThaiHFID-#14.
# Choose latex for paper output
# st_kableformat <- "latex"
st_kableformat <- "html"
# Percentiles of interest
# ar_fl_percentiles <- seq(0.01, 0.99, length.out = 99)
# ar_fl_percentiles <- seq(0.01, 0.99, length.out = 50)
# ar_fl_percentiles <- seq(0.02, 0.98, length.out = 33)
# ar_fl_percentiles <- seq(0.05, 0.95, length.out = 19)
# ar_fl_percentiles <- c(0.1, 0.25, 0.50, 0.75, 0.9, 0.95, 0.99)
ar_fl_percentiles <- c(
0.05, 0.10, 0.20, 0.30, 0.40,
0.25, 0.50, 0.75,
0.60, 0.70, 0.80, 0.9, 0.95)
# ar_fl_percentiles <- seq(0.10, 0.90, length.out=9)
Data prep part 1: rename and reshape
We loaded in tstm_loans already.
First select loanID, G_LenderType, term, and do so separately for each term, separate files.
# Select files
tstm_loans_sel <- tstm_loans %>%
select(
loan_id, G_LenderType,
G_Loan_Init_Length, S_Init_Amount, G_Loan_Init_IntMthlyRat
) %>%
rename(
id = loan_id,
loan_1length = G_Loan_Init_Length,
loan_2amount = S_Init_Amount,
loan_3interest = G_Loan_Init_IntMthlyRat
)
Second, reshape wide to long.
# Select files
tstm_loans_sel_long <- tstm_loans_sel %>%
pivot_longer(
cols = starts_with("loan"),
names_to = c("terms"),
names_pattern = paste0("loan_(.*)"),
values_to = "value"
)
# Drop NA
tstm_loans_sel_long <- tstm_loans_sel_long %>%
drop_na(value)
Data prep part 2: percentiles
First, compute CDF.
# Generate within-group CDF
tstm_loans_sel_long_pct <- tstm_loans_sel_long %>%
arrange(terms, G_LenderType, value) %>%
group_by(terms, G_LenderType) %>%
mutate(cdf = row_number() / n())
Second, compute percentiles and moments.
# Define strings
st_var_prefix <- "val"
st_var_prefix_perc <- paste0(st_var_prefix, "_p")
svr_mean <- paste0(st_var_prefix, "_mean")
svr_std <- paste0(st_var_prefix, "_std")
# Generate within-group percentiles
for (it_percentile_ctr in seq(1, length(ar_fl_percentiles))) {
# Current within group percentile to compute
fl_percentile <- ar_fl_percentiles[it_percentile_ctr]
# Percentile and mean stats
svr_percentile <- paste0(st_var_prefix_perc, round(fl_percentile * 100))
# Frame with specific percentile
df_within_percentiles_cur <- tstm_loans_sel_long_pct %>%
group_by(terms, G_LenderType) %>%
filter(cdf >= fl_percentile) %>%
slice(1) %>%
mutate(!!sym(svr_percentile) := value) %>%
select(terms, G_LenderType, one_of(svr_percentile))
# Merge percentile frames together
if (it_percentile_ctr > 1) {
df_within_percentiles <- df_within_percentiles %>%
left_join(df_within_percentiles_cur,
by = c("G_LenderType" = "G_LenderType", "terms" = "terms")
)
} else {
df_within_percentiles <- df_within_percentiles_cur
}
}
# Add in within group mean
df_within_percentiles_mean <- tstm_loans_sel_long_pct %>%
group_by(terms, G_LenderType) %>%
mutate(!!sym(svr_mean) := mean(value, na.rm = TRUE)) %>%
mutate(!!sym(svr_std) := sqrt(mean((value - !!sym(svr_mean))^2))) %>%
slice(1)
# Join to file
df_within_percentiles <- df_within_percentiles %>%
left_join(df_within_percentiles_mean,
by = c("G_LenderType" = "G_LenderType", "terms" = "terms")
) %>%
select(
terms, G_LenderType,
contains(st_var_prefix_perc),
one_of(svr_mean)
# ,
# one_of(svr_std)
)
# display
print(df_within_percentiles)
#> # A tibble: 6 × 16
#> # Groups: terms, G_LenderType [6]
#> terms G_LenderType val_p5 val_p10 val_p20 val_p30 val_p40 val_p25 val_p50
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1length BAAC 4 e+0 6 e+0 1 e+1 1.2 e+1 1.2 e+1 1.2 e+1 1.2 e+1
#> 2 1length Commercial … 1 e+0 4 e+0 1 e+1 1.2 e+1 1.3 e+1 1 e+1 1.3 e+1
#> 3 2amount BAAC 1 e+4 1 e+4 2 e+4 2.10e+4 3 e+4 2 e+4 3.5 e+4
#> 4 2amount Commercial … 1.95e+4 2 e+4 2.5 e+4 4 e+4 5.50e+4 3 e+4 1 e+5
#> 5 3interest BAAC 1.88e-3 3.18e-3 4.62e-3 5.38e-3 5.83e-3 5.00e-3 6.15e-3
#> 6 3interest Commercial … 0 2.56e-3 5.26e-3 5.38e-3 6.55e-3 5.36e-3 6.92e-3
#> # ℹ 7 more variables: val_p75 <dbl>, val_p60 <dbl>, val_p70 <dbl>,
#> # val_p80 <dbl>, val_p90 <dbl>, val_p95 <dbl>, val_mean <dbl>
Data prep part 3: transpose percentiles
First, combine all rows together and combine blnc_vars
and ivars
. Do not resort, preserve existing orders. Drop
the mean, not informative.
# Bind rows for selected key results:
df_within_percentiles <- df_within_percentiles %>%
ungroup() %>%
mutate(
cate_jnt = paste0(terms, "_j_", G_LenderType)
) %>%
select(
cate_jnt, -terms, -G_LenderType,
contains(st_var_prefix)
)
Second, reshape wide to long then to wide again to transpose.
df_within_percentiles_trans <- df_within_percentiles %>%
pivot_longer(
cols = starts_with(st_var_prefix),
names_to = c("percentile"),
names_pattern = paste0(st_var_prefix, "_(.*)"),
values_to = "value"
) %>%
pivot_wider(
names_from = cate_jnt,
values_from = value
)
Third, formatting. Length, levels, percentages. percentage formatting.
# format percentiles
df_within_percentiles_trans <- df_within_percentiles_trans %>%
mutate(percentile = gsub(
percentile, pattern="p", replace=""))
# format length
df_within_percentiles_trans <- df_within_percentiles_trans %>%
mutate_at(
vars(contains("1length_")),
list(~ paste0(
format(round(., 1),
nsmall = 1,
big.mark = ","
)
))
)
# format length
df_within_percentiles_trans <- df_within_percentiles_trans %>%
mutate_at(
vars(contains("2amount_")),
list(~ paste0(
format(round(., 0),
nsmall = 0,
big.mark = ","
)
))
)
# format interest rates
df_within_percentiles_trans <- df_within_percentiles_trans %>%
mutate_at(
vars(contains("3interest_")),
list(~ paste0(
format(round(. *100, 2),
nsmall = 2,
big.mark = ","
),
"%"
))
)
Table display (full table)
We present full information with lower and upper deciles, formal, informal, and quasiformal information jointly.
# First, we define column names, which correspond to previously defined variable selection list.
ar_st_col_names <- c(
"Percentiles",
"BAAC", "Commercial Bank",
"BAAC", "Commercial Bank",
"BAAC", "Commercial Bank"
)
# Define column groups, grouping the names above
ar_st_col_groups <- c(
" " = 1,
"Length (months)" = 2,
"Amount (baht)" = 2,
"Interest (monthly)" = 2
)
# Define column groups, grouping the names above
ar_st_col_groups_super <- c(
" " = 1,
"Loan terms" = 6
)
# Second, we construct main table, and add styling.
bk_loan_terms <- kbl(
df_within_percentiles_trans,
format = st_kableformat,
# escape = F,
linesep = "",
booktabs = T,
align = "c",
caption = "Loan terms distributions.",
col.names = ar_st_col_names
) %>%
# see https://cran.r-project.org/web/packages/kableExtra/vignettes/awesome_table_in_html.html#Bootstrap_table_classes
kable_styling(
bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = F, position = "left"
)
# Third, we add in row groups
bk_loan_terms <- bk_loan_terms %>%
add_header_above(ar_st_col_groups)
# add_header_above(ar_st_col_groups_super)
# Fourth, we add in column groups.
bk_loan_terms <- bk_loan_terms %>%
pack_rows(
"Below median deciles",
1, 5,
latex_gap_space = "0.5em"
) %>%
pack_rows(
"Quartiles",
6, 8,
latex_gap_space = "0.5em", hline_before = F
) %>%
pack_rows(
"Above median deciles",
9, 13,
latex_gap_space = "0.5em", hline_before = F
) %>%
pack_rows(
"Mean",
14, 14,
latex_gap_space = "0.5em", hline_before = F
)
# Fifth, column formatting.
bk_loan_terms <- bk_loan_terms %>%
column_spec(1, width = "2.0cm") %>%
column_spec(2:7, width = "1.6cm")
# Sixth, display.
# pl_bk_asset_count <- bk_loan_terms %>% as_image()
bk_loan_terms
Percentiles | BAAC | Commercial Bank | BAAC | Commercial Bank | BAAC | Commercial Bank |
---|---|---|---|---|---|---|
Below median deciles | ||||||
5 | 4.0 | 1.0 | 10,000 | 19,480 | 0.19% | 0.00% |
10 | 6.0 | 4.0 | 10,000 | 20,000 | 0.32% | 0.26% |
20 | 10.0 | 10.0 | 20,000 | 25,000 | 0.46% | 0.53% |
30 | 12.0 | 12.0 | 21,000 | 40,000 | 0.54% | 0.54% |
40 | 12.0 | 13.0 | 30,000 | 55,000 | 0.58% | 0.65% |
Quartiles | ||||||
25 | 12.0 | 10.0 | 20,000 | 30,000 | 0.50% | 0.54% |
50 | 12.0 | 13.0 | 35,000 | 100,000 | 0.62% | 0.69% |
75 | 13.0 | 47.0 | 54,000 | 200,000 | 0.77% | 2.81% |
Above median deciles | ||||||
60 | 13.0 | 24.0 | 44,125 | 150,000 | 0.67% | 1.08% |
70 | 13.0 | 37.0 | 50,000 | 200,000 | 0.75% | 1.92% |
80 | 13.0 | 60.0 | 70,000 | 300,000 | 0.88% | 3.00% |
90 | 36.0 | 66.0 | 100,000 | 500,000 | 1.14% | 9.00% |
95 | 61.0 | 136.0 | 200,000 | 850,000 | 1.71% | 60.59% |
Mean | ||||||
mean | 17.2 | 34.5 | 60,993 | 344,350 | 0.81% | 28.79% |
Table display (partial table)
We now present a subset of information contained in the prior table, we focus on just the quartiles and exclude quasi-formal.
First, we select a subset of the table to consider only quartiles and exclude quasi-formal.
df_within_percentiles_trans_sel <- df_within_percentiles_trans %>%
filter(percentile %in% c("25", "50", "75","mean")) %>%
select(-contains("quasi"))
Second, we generate the plot.
# First, we define column names, which correspond to previously defined variable selection list.
ar_st_col_names <- c(
"",
"BAAC", "Commercial Bank",
"BAAC", "Commercial Bank",
"BAAC", "Commercial Bank"
)
# Define column groups, grouping the names above
ar_st_col_groups <- c(
" " = 1,
"Length (months)" = 2,
"Amount (baht)" = 2,
"Interest (monthly)" = 2
)
# Define column groups, grouping the names above
ar_st_col_groups_super <- c(
" " = 1,
"Loan terms" = 6
)
# Second, we construct main table, and add styling.
bk_loan_qrt_terms <- kbl(
df_within_percentiles_trans_sel,
format = st_kableformat,
# escape = F,
linesep = "",
booktabs = T,
align = "c",
caption = "Loan terms distributions.",
col.names = ar_st_col_names
) %>%
# see https://cran.r-project.org/web/packages/kableExtra/vignettes/awesome_table_in_html.html#Bootstrap_table_classes
kable_styling(
bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = F, position = "left"
)
# Third, we add in row groups
bk_loan_qrt_terms <- bk_loan_qrt_terms %>%
add_header_above(ar_st_col_groups)
# add_header_above(ar_st_col_groups_super)
# Fourth, we add in column groups.
bk_loan_qrt_terms <- bk_loan_qrt_terms %>%
pack_rows(
"Quartiles",
1, 3,
latex_gap_space = "0.5em", hline_before = F
) %>%
pack_rows(
"Mean",
4, 4,
latex_gap_space = "0.5em", hline_before = F
)
# Fifth, column formatting.
bk_loan_qrt_terms <- bk_loan_qrt_terms %>%
column_spec(1, width = "2.0cm") %>%
column_spec(2:7, width = "1.6cm")
# Sixth, display.
# pl_bk_asset_count <- bk_loan_qrt_terms %>% as_image()
bk_loan_qrt_terms
BAAC | Commercial Bank | BAAC | Commercial Bank | BAAC | Commercial Bank | |
---|---|---|---|---|---|---|
Quartiles | ||||||
25 | 12.0 | 10.0 | 20,000 | 30,000 | 0.50% | 0.54% |
50 | 12.0 | 13.0 | 35,000 | 100,000 | 0.62% | 0.69% |
75 | 13.0 | 47.0 | 54,000 | 200,000 | 0.77% | 2.81% |
Mean | ||||||
mean | 17.2 | 34.5 | 60,993 | 344,350 | 0.81% | 28.79% |