Skip to contents
library(tibble)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(tidyr)
library(stringr)
library(readr)
library(kableExtra)
#> 
#> Attaching package: 'kableExtra'
#> The following object is masked from 'package:dplyr':
#> 
#>     group_rows

library(PrjCompPPTS)
# If resave outputs to data, only do this during development
bl_tex_save <- FALSE
verbose <- TRUE

# Latex output file
spt_path_res <- file.path("..", "res-tab", fsep = .Platform$file.sep)
spn_tex_out <- file.path(spt_path_res, "tab_korea_lrce.tex", fsep = .Platform$file.sep)

Implement PrjCompPPTS-13, summaries for Korea.

Generate input file

Load prepped all location level files

Load file with interpolated data.

# Locaiton codes
ppts_code_wrk <- ppts_country_code
# We load in the global population data.
df_ppts <- ppts_easia_weuro_world_pchg
# location level types
df_ppts %>% distinct(location_level)
#> # A tibble: 4 × 1
#>   location_level
#>   <fct>         
#> 1 country       
#> 2 multicountry  
#> 3 multiprovince 
#> 4 province
df_ppts_sel <- df_ppts %>%
  filter(location_level %in% c("province", "multiprovince"))

# Show all Korean locations
df_ppts_sel %>% distinct(location_code)
#> # A tibble: 19 × 1
#>    location_code
#>    <fct>        
#>  1 KOR_Metro    
#>  2 KOR_NonMetro 
#>  3 KOR_Busan    
#>  4 KOR_Chungbuk 
#>  5 KOR_Chungnam 
#>  6 KOR_Daegu    
#>  7 KOR_Daejeon  
#>  8 KOR_Gangwon  
#>  9 KOR_Gwangju  
#> 10 KOR_Gyeong-gi
#> 11 KOR_Gyeongbuk
#> 12 KOR_Gyeongnam
#> 13 KOR_Incheon  
#> 14 KOR_Jeju     
#> 15 KOR_Jeonbuk  
#> 16 KOR_Jeonnam  
#> 17 KOR_Sejong   
#> 18 KOR_Seoul    
#> 19 KOR_Ulsan
df_ppts_sel %>% distinct(variable)
#> # A tibble: 3 × 1
#>   variable
#>   <fct>   
#> 1 school  
#> 2 student 
#> 3 teacher

# Korean locations into three groups
ls_korea <- list(
  "KOR_Metro" = c("A1-Metro", "Location groups"),
  "KOR_NonMetro" = c("C1-Non-metro", "Location groups"),
  "KOR_Gyeong-gi" = c("A4-Gyeong-gi", "Capital"),
  "KOR_Incheon" = c("A3-Incheon", "Capital"),
  "KOR_Seoul" = c("A2-Seoul", "Capital"),
  "KOR_Busan" = c("B1-Busan", "Metropolitan cities"),
  "KOR_Daegu" = c("B2-Daegu", "Metropolitan cities"),
  "KOR_Daejeon" = c("B3-Daejeon", "Metropolitan cities"),
  "KOR_Gwangju" = c("B4-Gwangju", "Metropolitan cities"),
  "KOR_Sejong" = c("B5-Sejong", "Metropolitan cities"),
  "KOR_Ulsan" = c("B6-Ulsan", "Metropolitan cities"),
  "KOR_Chungbuk" = c("D1-Chungbuk", "Non-metropolitan cities"),
  "KOR_Chungnam" = c("D2-Chungnam", "Non-metropolitan cities"),
  "KOR_Gyeongbuk" = c("D3-Gyeongbuk", "Non-metropolitan cities"),
  "KOR_Gyeongnam" = c("D4-Gyeongnam", "Non-metropolitan cities"),
  "KOR_Jeonnam" = c("D5-Jeonnam", "Non-metropolitan cities"),
  "KOR_Gangwon" = c("E1-Gangwon", "Non-metropolitan cities"),
  "KOR_Jeju" = c("E2-Jeju", "Non-metropolitan cities"),
  "KOR_Jeonbuk" = c("E3-Jeonbuk", "Non-metropolitan cities")
)

Run PrjCompPPTS functions

Run functions to generate FLR, FPC, and FEL files. Run FLR.

# Generate PLR
df_flr_korea <- PrjCompPPTS::ff_ppts_lrce_flr(
  df_ppts_sel,
  st_year_bins_type = "1940t2020i01",
  ar_it_years = seq(1970, 2020, by = 10),
  # c(1970, 1980, 1990, 2000, 2010, 2020),
  ar_st_vars = c("student", "teacher", "school"),
  ls_rat_vars = list(
    "s2t" = c("student", "teacher"),
    "s2s" = c("student", "school")
  ),
  verbose = TRUE
)
#> F-713479, S1
#> [1] 1970 1980 1990 2000 2010 2020
#> F-713479, S2
#> dim of youth wide file: 114
#> dim of youth wide file: 6
#> F-713479, S2
#> [1] "var_rat_s2t"
#> [1] "var_lvl_student"
#> [1] "var_lvl_teacher"
#> dim FLR: 114
#> dim FLR: 7
#> F-713479, S2
#> [1] "var_rat_s2s"
#> [1] "var_lvl_student"
#> [1] "var_lvl_school"
#> dim FLR: 114
#> dim FLR: 8
#> F-713479, S3
#> [1] "location_code"   "location_level"  "year_bins"       "var_lvl_school" 
#> [5] "var_lvl_student" "var_lvl_teacher" "var_rat_s2t"     "var_rat_s2s"    
#> dim of youth wide ratio file: 114
#> dim of youth wide ratio file: 8
df_flr_korea %>% distinct(year_bins)
#> # A tibble: 6 × 1
#>   year_bins
#>       <dbl>
#> 1      1970
#> 2      1980
#> 3      1990
#> 4      2000
#> 5      2010
#> 6      2020

Run FPC.

# Generate FPC
df_fpc_korea <- PrjCompPPTS::ff_ppts_lrce_fpc(
  df_flr_korea,
  ar_it_years_chg = seq(1970, 2020, by = 10),
  ls_chg_years = list(
    "chg_20v70" = c(1970, 2020),
    # "chg_20v75" = c(1975, 2020),
    "chg_20v80" = c(1980, 2020),
    # "chg_20v85" = c(1985, 2020),
    "chg_20v90" = c(1990, 2020),
    # "chg_20v95" = c(1995, 2020),
    "chg_20v00" = c(2000, 2020),
    # "chg_20v05" = c(2005, 2020),
    "chg_20v10" = c(2010, 2020)
    # "chg_20v15" = c(2015, 2020)
  ), verbose = TRUE
)
#> F-376864, S1
#> dim of youth wide ratio file: 570
#> dim of youth wide ratio file: 6
#> F-376864, S2
#> dim of youth wide ratio file: 570
#> dim of youth wide ratio file: 6
#> F-376864, S3
#> dim of youth wide ratio file: 190
#> dim of youth wide ratio file: 10
#> F-376864, SD3
#> [1] "chg_20v70"
#> [1] "year1970"
#> [1] "year2020"
#> dim FPC: 190
#> dim FPC: 11
#> F-376864, SD3
#> [1] "chg_20v80"
#> [1] "year1980"
#> [1] "year2020"
#> dim FPC: 190
#> dim FPC: 12
#> F-376864, SD3
#> [1] "chg_20v90"
#> [1] "year1990"
#> [1] "year2020"
#> dim FPC: 190
#> dim FPC: 13
#> F-376864, SD3
#> [1] "chg_20v00"
#> [1] "year2000"
#> [1] "year2020"
#> dim FPC: 190
#> dim FPC: 14
#> F-376864, SD3
#> [1] "chg_20v10"
#> [1] "year2010"
#> [1] "year2020"
#> dim FPC: 190
#> dim FPC: 15
#> F-376864, S4
#> dim FPC: 190
#> dim FPC: 15
print(df_fpc_korea, n = 40)
#> # A tibble: 190 × 15
#>    location_code location_level vartype variable  year1970  year1980  year1990
#>    <fct>         <fct>          <chr>   <chr>        <dbl>     <dbl>     <dbl>
#>  1 KOR_Metro     multiprovince  lvl     school      1309      1525      1881  
#>  2 KOR_Metro     multiprovince  lvl     student  2531380   3004561   3109104  
#>  3 KOR_Metro     multiprovince  lvl     teacher    37375     51517     72501  
#>  4 KOR_Metro     multiprovince  rat     s2t           67.7      58.3      42.9
#>  5 KOR_Metro     multiprovince  rat     s2s         1934.     1970.     1653. 
#>  6 KOR_NonMetro  multiprovince  lvl     school      4652      4962      4454  
#>  7 KOR_NonMetro  multiprovince  lvl     student  3217921   2653441   1759416  
#>  8 KOR_NonMetro  multiprovince  lvl     teacher    63720     67547     64299  
#>  9 KOR_NonMetro  multiprovince  rat     s2t           50.5      39.3      27.4
#> 10 KOR_NonMetro  multiprovince  rat     s2s          692.      535.      395. 
#> 11 KOR_Busan     province       lvl     school        99       137       221  
#> 12 KOR_Busan     province       lvl     student   287059    446162    457057  
#> 13 KOR_Busan     province       lvl     teacher     4120      7420     10426  
#> 14 KOR_Busan     province       rat     s2t           69.7      60.1      43.8
#> 15 KOR_Busan     province       rat     s2s         2900.     3257.     2068. 
#> 16 KOR_Chungbuk  province       lvl     school       372       397       337  
#> 17 KOR_Chungbuk  province       lvl     student   310348    222453    153273  
#> 18 KOR_Chungbuk  province       lvl     teacher     6043      6237      5353  
#> 19 KOR_Chungbuk  province       rat     s2t           51.4      35.7      28.6
#> 20 KOR_Chungbuk  province       rat     s2s          834.      560.      455. 
#> 21 KOR_Chungnam  province       lvl     school       510       546       536  
#> 22 KOR_Chungnam  province       lvl     student   349484    279723    204769  
#> 23 KOR_Chungnam  province       lvl     teacher     7088      7427      7470  
#> 24 KOR_Chungnam  province       rat     s2t           49.3      37.7      27.4
#> 25 KOR_Chungnam  province       rat     s2s          685.      512.      382. 
#> 26 KOR_Daegu     province       lvl     school        81        86       118  
#> 27 KOR_Daegu     province       lvl     student   282466    243379    255071  
#> 28 KOR_Daegu     province       lvl     teacher     3884      4196      5692  
#> 29 KOR_Daegu     province       rat     s2t           72.7      58.0      44.8
#> 30 KOR_Daegu     province       rat     s2s         3487.     2830.     2162. 
#> 31 KOR_Daejeon   province       lvl     school        74        79        79  
#> 32 KOR_Daejeon   province       lvl     student   191964    153645    120318  
#> 33 KOR_Daejeon   province       lvl     teacher     2630      2756      2937  
#> 34 KOR_Daejeon   province       rat     s2t           73.0      55.7      41.0
#> 35 KOR_Daejeon   province       rat     s2s         2594.     1945.     1523. 
#> 36 KOR_Gangwon   province       lvl     school       607       618       499  
#> 37 KOR_Gangwon   province       lvl     student   370844    292446    188114  
#> 38 KOR_Gangwon   province       lvl     teacher     7100      7514      7371  
#> 39 KOR_Gangwon   province       rat     s2t           52.2      38.9      25.5
#> 40 KOR_Gangwon   province       rat     s2s          611.      473.      377. 
#> # ℹ 150 more rows
#> # ℹ 8 more variables: year2000 <dbl>, year2010 <dbl>, year2020 <dbl>,
#> #   chg_20v70 <dbl>, chg_20v80 <dbl>, chg_20v90 <dbl>, chg_20v00 <dbl>,
#> #   chg_20v10 <dbl>

Run FEL.

# Generate FEL
df_fel_korea <- PrjCompPPTS::ff_ppts_lrce_fel(
  df_fpc_korea,
  ls_rat_vars = list(
    # "y2t" = c("youthpop", "teacher"),
    "s2t" = c("student", "teacher"),
    # "y2s" = c("youthpop", "school"),
    "s2s" = c("student", "school")
  ),
  verbose = TRUE
)
#> F-307302, S1
#> dim df_fpc_base: 57
#> dim df_fpc_base: 8
#> F-307302, S2
#> dim df_fel_long: 285
#> dim df_fel_long: 5
#> F-307302, S3
#> dim df_fel_wide: 95
#> dim df_fel_wide: 6
#> F-307302, S3
#> [1] "var_elas_s2t"
#> [1] "var_chg_student"
#> [1] "var_chg_teacher"
#> dim FEL: 95
#> dim FEL: 7
#> F-307302, S3
#> [1] "var_elas_s2s"
#> [1] "var_chg_student"
#> [1] "var_chg_school"
#> dim FEL: 95
#> dim FEL: 8
#> F-307302, S4
#> dim df_fel_elas: 95
#> dim df_fel_elas: 8
#> F-307302, S4
#> dim FEL: 38
#> dim FEL: 8
print(df_fel_korea, n = 40)
#> # A tibble: 38 × 8
#>    location_code location_level variable elas_20v70 elas_20v80 elas_20v90
#>    <fct>         <fct>          <chr>         <dbl>      <dbl>      <dbl>
#>  1 KOR_Metro     multiprovince  s2t         -0.114     -0.272     -0.573 
#>  2 KOR_Metro     multiprovince  s2s         -0.185     -0.352     -0.586 
#>  3 KOR_NonMetro  multiprovince  s2t        -19.0       34.8      -17.8   
#>  4 KOR_NonMetro  multiprovince  s2s          2.04       1.70       1.58  
#>  5 KOR_Busan     province       s2t         -0.318     -1.78      24.9   
#>  6 KOR_Busan     province       s2s         -0.225     -0.538     -1.77  
#>  7 KOR_Chungbuk  province       s2t         -6.41      -7.86      -1.73  
#>  8 KOR_Chungbuk  province       s2s          2.37       1.76       1.90  
#>  9 KOR_Chungnam  province       s2t         -2.14      -2.31      -1.72  
#> 10 KOR_Chungnam  province       s2s          3.35       2.29       1.76  
#> 11 KOR_Daegu     province       s2t         -0.438     -0.442     -0.920 
#> 12 KOR_Daegu     province       s2s         -0.308     -0.296     -0.547 
#> 13 KOR_Daejeon   province       s2t         -0.451     -0.404     -0.319 
#> 14 KOR_Daejeon   province       s2s         -0.584     -0.550     -0.385 
#> 15 KOR_Gangwon   province       s2t         12.3        6.42       6.13  
#> 16 KOR_Gangwon   province       s2s          1.87       1.71       2.00  
#> 17 KOR_Gwangju   province       s2t         -0.371     -0.367     -0.406 
#> 18 KOR_Gwangju   province       s2s         -0.260     -0.251     -0.412 
#> 19 KOR_Gyeong-gi province       s2t          0.137      0.145      0.0895
#> 20 KOR_Gyeong-gi province       s2s          0.557      0.536      0.180 
#> 21 KOR_Gyeongbuk province       s2t          7.31       4.28       4.19  
#> 22 KOR_Gyeongbuk province       s2s          1.65       1.47       1.27  
#> 23 KOR_Gyeongnam province       s2t         -0.977     -0.889     -1.09  
#> 24 KOR_Gyeongnam province       s2s          1.79       1.34       1.37  
#> 25 KOR_Incheon   province       s2t          0.0381     0.0203    -0.201 
#> 26 KOR_Incheon   province       s2s          0.0375     0.0167    -0.169 
#> 27 KOR_Jeju      province       s2t         -0.283     -0.563     -0.456 
#> 28 KOR_Jeju      province       s2s         -7.95      53.3       15.9   
#> 29 KOR_Jeonbuk   province       s2t          6.48       4.74      34.4   
#> 30 KOR_Jeonbuk   province       s2s          3.39       2.51       2.51  
#> 31 KOR_Jeonnam   province       s2t          2.56       2.15       2.36  
#> 32 KOR_Jeonnam   province       s2s          1.66       1.51       1.43  
#> 33 KOR_Sejong    province       s2t         -0.0306     0.0651     0.404 
#> 34 KOR_Sejong    province       s2s         -0.145      0.347      2.09  
#> 35 KOR_Seoul     province       s2t         -0.274     -1.09      -5.05  
#> 36 KOR_Seoul     province       s2s         -0.241     -0.598     -2.06  
#> 37 KOR_Ulsan     province       s2t         -1.03      -0.963     -1.22  
#> 38 KOR_Ulsan     province       s2s         -4.25      -8.69      -2.43  
#> # ℹ 2 more variables: elas_20v00 <dbl>, elas_20v10 <dbl>

Merge FPC and FEL, country and group

Now we combine the level and change information from FPC with the elasticity results from FEL.

We have the LRCE file, level, ratio, change, and elasticity.

df_lrce_korea <- df_fpc_korea %>% left_join(
  df_fel_korea %>% mutate(vartype = "rat"),
  by = (c(
    "location_code" = "location_code",
    "location_level" = "location_level",
    "variable" = "variable",
    "vartype" = "vartype"
  ))
)
print(glue::glue("dim df_lrce_korea: {dim(df_lrce_korea)}"))
#> dim df_lrce_korea: 190
#> dim df_lrce_korea: 20

Finally, merge in country key info to FPC, country-level information.

# Create empty columns
df_lrce_korea <- df_lrce_korea %>%
  mutate(location_name = "", location_region_group = "")
# Add content to dataframe
it_len_ls_korea <- length(ls_korea)
ar_st_locs <- names(ls_korea)
for (it_loc in seq(1, it_len_ls_korea)) {
  # Get strings
  st_location_code <- ar_st_locs[it_loc]
  ar_st_location_display <- ls_korea[[it_loc]]
  st_loc_name_disp <- ar_st_location_display[[1]]
  st_loc_group <- ar_st_location_display[[2]]
  # Add values
  df_lrce_korea <- df_lrce_korea %>%
    mutate(
      location_name = case_when(
        location_code == st_location_code ~ st_loc_name_disp,
        TRUE ~ location_name
      ),
      location_region_group = case_when(
        location_code == st_location_code ~ st_loc_group,
        TRUE ~ location_region_group
      )
    )
}
# reselect vars
df_lrce_korea <- df_lrce_korea %>%
  select(location_code, location_level, location_name, location_region_group, everything())

Global Population Table

Prep Table inputs

First, select level and percentage changes year by year for four variables. Also get ratio and elasticities.

# level
df_lvl_korea <- df_lrce_korea %>%
  filter(vartype == "lvl") %>%
  select(
    location_code, location_level, location_name, location_region_group,
    variable, contains("year")
  )
# change
df_chg_korea <- df_lrce_korea %>%
  filter(vartype == "lvl") %>%
  select(
    location_code, location_level, location_name, location_region_group,
    variable, contains("chg")
  )
# ratio
df_rat_korea <- df_lrce_korea %>%
  filter(vartype == "rat") %>%
  select(
    location_code, location_level, location_name, location_region_group,
    variable, contains("year")
  )
# elasticity
df_elas_korea <- df_lrce_korea %>%
  filter(vartype == "rat") %>%
  select(
    location_code, location_level, location_name, location_region_group,
    variable, contains("elas")
  )

Second, reshape time variables from each table above from wide to long.

# level file
df_lvl_korea_long <- df_lvl_korea %>%
  pivot_longer(
    cols = starts_with("year"),
    names_to = c("yearset"),
    names_pattern = paste0("year(.*)"),
    values_to = "stats"
  ) %>%
  mutate(type = "1lvl")
# change file
df_chg_korea_long <- df_chg_korea %>%
  pivot_longer(
    cols = starts_with("chg"),
    names_to = c("yearset"),
    names_pattern = paste0("chg_(.*)"),
    values_to = "stats"
  ) %>%
  mutate(type = "2chg")
# ratio file
df_rat_korea_long <- df_rat_korea %>%
  pivot_longer(
    cols = starts_with("year"),
    names_to = c("yearset"),
    names_pattern = paste0("year(.*)"),
    values_to = "stats"
  ) %>%
  mutate(type = "3rat")
# elasticity file
df_elas_korea_long <- df_elas_korea %>%
  pivot_longer(
    cols = starts_with("elas"),
    names_to = c("yearset"),
    names_pattern = paste0("elas_(.*)"),
    values_to = "stats"
  ) %>%
  mutate(type = "4elas")

Third, combine long files, and convert countries to columns, reshape to wide.

# combine all long files
df_korea_long_jnt <- bind_rows(
  df_lvl_korea_long,
  df_chg_korea_long,
  df_rat_korea_long,
  df_elas_korea_long
)
# reshape long to wide
df_korea_wide <- df_korea_long_jnt %>%
  arrange(variable, yearset, location_name) %>%
  pivot_wider(
    id_cols = c("type", "variable", "yearset"),
    names_from = location_name,
    names_prefix = "j_",
    values_from = stats
  )

Fourth, organize columns, and generate four subsets.

# Select variables
tab3_korea <- df_korea_wide %>%
  select(
    type, variable, yearset, everything()
  )
# Four variables
tab3_korea_lvl <- tab3_korea %>% filter(type == "1lvl")
tab3_korea_chg <- tab3_korea %>% filter(type == "2chg")
tab3_korea_rat <- tab3_korea %>% filter(type == "3rat")
tab3_korea_elas <- tab3_korea %>% filter(type == "4elas")

Fifth, format each type of variables separately.

# Format variables, level in millions
tab3_korea_lvl_school <- tab3_korea_lvl %>%
  filter(variable == "school") %>%
  mutate_at(
    vars(contains("j_")),
    list(~ paste0(
      format(round(., 0),
        nsmall = 0, big.mark = ","
      )
    ))
  )
tab3_korea_lvl_teacher <- tab3_korea_lvl %>%
  filter(variable == "teacher") %>%
  mutate_at(
    vars(contains("j_")),
    list(~ paste0(
      format(round(. / 1000, 1),
        nsmall = 0, big.mark = ","
      )
    ))
  )
tab3_korea_lvl_student <- tab3_korea_lvl %>%
  filter(variable == "student") %>%
  mutate_at(
    vars(contains("j_")),
    list(~ paste0(
      format(round(. / 1000, 0),
        nsmall = 0, big.mark = ","
      )
    ))
  )
# Change in percent
tab3_korea_chg <- tab3_korea_chg %>%
  mutate_at(
    vars(contains("j_")),
    list(~ paste0(
      format(round(., 2) * 100,
        nsmall = 0,
        big.mark = ","
      ),
      "%"
    ))
  )
# Change in ratio
tab3_korea_rat <- tab3_korea_rat %>%
  mutate_at(
    vars(contains("j_")),
    list(~ paste0(
      format(round(., 0), nsmall = 0, big.mark = ",")
    ))
  )
# Change in ratio
tab3_korea_elas <- tab3_korea_elas %>%
  mutate_at(
    vars(contains("j_")),
    list(~ paste0(
      format(round(., 2), nsmall = 0, big.mark = ",")
    ))
  )

Sixth, combine all four stats again, now that formatting is done.

# combine
tab3_korea_jnt <- bind_rows(
  tab3_korea_lvl_school,
  tab3_korea_lvl_teacher,
  tab3_korea_lvl_student,
  tab3_korea_chg,
  tab3_korea_rat,
  # tab3_korea_elas
)
# replace variable names for sorting
tab3_korea_jnt <- tab3_korea_jnt %>%
  mutate(variable_sort = case_when(
    variable == "school" ~ "1school",
    variable == "teacher" ~ "2teacher",
    variable == "student" ~ "3student",
    variable == "s2s" ~ "1s2s",
    variable == "s2t" ~ "3s2t",
  ))
# Year sorting
tab3_korea_jnt <- tab3_korea_jnt %>%
  mutate(yearset = case_when(
    yearset == "20v00" ~ "2020 vs 2000",
    yearset == "20v10" ~ "2020 vs 2010",
    yearset == "20v60" ~ "2020 vs 1960",
    yearset == "20v70" ~ "2020 vs 1970",
    yearset == "20v80" ~ "2020 vs 1980",
    yearset == "20v90" ~ "2020 vs 1990",
    TRUE ~ yearset
  ))
# replace NAs
tab3_korea_jnt <- tab3_korea_jnt %>%
  mutate_at(vars(starts_with("j_")), ~ str_replace(., "NA%", "")) %>%
  mutate_at(vars(starts_with("j_")), ~ str_replace(., "NA", ""))

print(tab3_korea_jnt, n = 200)
#> # A tibble: 45 × 23
#>    type  variable yearset      `j_A1-Metro` `j_A2-Seoul` `j_A3-Incheon`
#>    <chr> <chr>    <chr>        <chr>        <chr>        <chr>         
#>  1 1lvl  school   1970         "1,309"      "206"        " 50"         
#>  2 1lvl  school   1980         "1,525"      "291"        " 55"         
#>  3 1lvl  school   1990         "1,881"      "463"        "103"         
#>  4 1lvl  school   2000         "2,312"      "532"        "174"         
#>  5 1lvl  school   2010         "2,893"      "587"        "226"         
#>  6 1lvl  school   2020         "3,164"      "607"        "253"         
#>  7 1lvl  teacher  1970         " 37.4"      "10.6"       " 2.0"        
#>  8 1lvl  teacher  1980         " 51.5"      "17.9"       " 2.5"        
#>  9 1lvl  teacher  1990         " 72.5"      "25.4"       " 4.5"        
#> 10 1lvl  teacher  2000         " 86.7"      "25.3"       " 7.2"        
#> 11 1lvl  teacher  2010         "114.8"      "29.3"       " 9.5"        
#> 12 1lvl  teacher  2020         "123.1"      "28.6"       "10.1"        
#> 13 1lvl  student  1970         "2,531"      "  770"      "136"         
#> 14 1lvl  student  1980         "3,005"      "1,169"      "148"         
#> 15 1lvl  student  1990         "3,109"      "1,142"      "208"         
#> 16 1lvl  student  2000         "2,773"      "  759"      "246"         
#> 17 1lvl  student  2010         "2,277"      "  566"      "183"         
#> 18 1lvl  student  2020         "1,867"      "  410"      "157"         
#> 19 2chg  school   2020 vs 2000 " 37%"       " 14%"       " 45%"        
#> 20 2chg  school   2020 vs 2010 "  9%"       "  3%"       " 12%"        
#> 21 2chg  school   2020 vs 1970 "142%"       "195%"       "406%"        
#> 22 2chg  school   2020 vs 1980 "107%"       "109%"       "360%"        
#> 23 2chg  school   2020 vs 1990 " 68%"       " 31%"       "146%"        
#> 24 2chg  student  2020 vs 2000 "-33%"       "-46%"       "-36%"        
#> 25 2chg  student  2020 vs 2010 "-18%"       "-28%"       "-14%"        
#> 26 2chg  student  2020 vs 1970 "-26%"       "-47%"       " 15%"        
#> 27 2chg  student  2020 vs 1980 "-38%"       "-65%"       "  6%"        
#> 28 2chg  student  2020 vs 1990 "-40%"       "-64%"       "-25%"        
#> 29 2chg  teacher  2020 vs 2000 " 42%"       " 13%"       " 41%"        
#> 30 2chg  teacher  2020 vs 2010 "  7%"       " -2%"       "  6%"        
#> 31 2chg  teacher  2020 vs 1970 "229%"       "171%"       "400%"        
#> 32 2chg  teacher  2020 vs 1980 "139%"       " 60%"       "295%"        
#> 33 2chg  teacher  2020 vs 1990 " 70%"       " 13%"       "123%"        
#> 34 3rat  s2s      1970         "1,934"      "3,739"      "2,724"       
#> 35 3rat  s2s      1980         "1,970"      "4,016"      "2,692"       
#> 36 3rat  s2s      1990         "1,653"      "2,466"      "2,022"       
#> 37 3rat  s2s      2000         "1,199"      "1,428"      "1,412"       
#> 38 3rat  s2s      2010         "  787"      "  964"      "  811"       
#> 39 3rat  s2s      2020         "  590"      "  675"      "  620"       
#> 40 3rat  s2t      1970         "   68"      "   73"      "   68"       
#> 41 3rat  s2t      1980         "   58"      "   65"      "   58"       
#> 42 3rat  s2t      1990         "   43"      "   45"      "   46"       
#> 43 3rat  s2t      2000         "   32"      "   30"      "   34"       
#> 44 3rat  s2t      2010         "   20"      "   19"      "   19"       
#> 45 3rat  s2t      2020         "   15"      "   14"      "   16"       
#> # ℹ 17 more variables: `j_A4-Gyeong-gi` <chr>, `j_B1-Busan` <chr>,
#> #   `j_B2-Daegu` <chr>, `j_B3-Daejeon` <chr>, `j_B4-Gwangju` <chr>,
#> #   `j_B5-Sejong` <chr>, `j_B6-Ulsan` <chr>, `j_C1-Non-metro` <chr>,
#> #   `j_D1-Chungbuk` <chr>, `j_D2-Chungnam` <chr>, `j_D3-Gyeongbuk` <chr>,
#> #   `j_D4-Gyeongnam` <chr>, `j_D5-Jeonnam` <chr>, `j_E1-Gangwon` <chr>,
#> #   `j_E2-Jeju` <chr>, `j_E3-Jeonbuk` <chr>, variable_sort <chr>

Finally, count by group, generate labeling string

# Group and arrange
tab3_korea_jnt <- tab3_korea_jnt %>%
  arrange(
    type, variable_sort, yearset,
  )
# get counts
df_typevar_counts <- tab3_korea_jnt %>%
  group_by(
    type, variable_sort, variable,
  ) %>%
  summarize(group_count = n()) %>%
  ungroup() %>%
  mutate(group_count_start = cumsum(group_count) - group_count + 1) %>%
  mutate(group_count_end = cumsum(group_count)) %>%
  select(group_count_start, group_count_end, everything())
#> `summarise()` has grouped output by 'type', 'variable_sort'. You can override
#> using the `.groups` argument.
# Generate group names
df_typevar_counts <- df_typevar_counts %>%
  mutate(type_lab_supra = case_when(
    type == "1lvl" ~ "Levels",
    type == "2chg" ~ "Percentage changes over time",
    type == "3rat" ~ "Ratios",
    type == "4elas" ~ "Elasticities"
  )) %>%
  mutate(type_lab = case_when(
    type == "1lvl" ~ "Number of",
    type == "2chg" ~ "Percentage change in",
    type == "3rat" ~ "Ratio",
    type == "4elas" ~ "Elasticity"
  )) %>%
  mutate(variable_lab = case_when(
    (type == "1lvl" & variable == "school") ~ "schools",
    (type == "1lvl" & variable == "teacher") ~ "teachers (1000s)",
    (type == "1lvl" & variable == "student") ~ "students (1000s)",
    (type == "2chg" & variable == "school") ~ "schools",
    (type == "2chg" & variable == "teacher") ~ "teachers",
    (type == "2chg" & variable == "student") ~ "students",
    (type == "2chg" & variable == "youthpop") ~ "youth 0-14",
    (type == "3rat" & variable == "s2s") ~ "Student/School",
    (type == "3rat" & variable == "s2t") ~ "Student/Teacher",
    (type == "4elas" & variable == "s2s") ~ "$\\Delta$\\%Student/$\\Delta$\\%School",
    (type == "4elas" & variable == "s2t") ~ "$\\Delta$\\%Student/$\\Delta$\\%Teacher"
  )) %>%
  unite(row_name, type_lab:variable_lab, sep = " ")
# print
print(df_typevar_counts, n = 100)
#> # A tibble: 8 × 8
#>   group_count_start group_count_end type  variable_sort variable group_count
#>               <dbl>           <int> <chr> <chr>         <chr>          <int>
#> 1                 1               6 1lvl  1school       school             6
#> 2                 7              12 1lvl  2teacher      teacher            6
#> 3                13              18 1lvl  3student      student            6
#> 4                19              23 2chg  1school       school             5
#> 5                24              28 2chg  2teacher      teacher            5
#> 6                29              33 2chg  3student      student            5
#> 7                34              39 3rat  1s2s          s2s                6
#> 8                40              45 3rat  3s2t          s2t                6
#> # ℹ 2 more variables: type_lab_supra <chr>, row_name <chr>

Generate table

Now we generate the table.

ar_st_kableformat <- c("latex", "html")
for (st_kableformat in ar_st_kableformat) {
  # j_China, j_Japan, j_Korea, j_Taiwan,
  # j_Austria, j_Germany, j_France, j_Netherlands, j_Switzerland
  # Column names
  ar_st_colnames <- names(df_korea_wide)
  ar_st_colnames_js <- ar_st_colnames[grepl("j_", ar_st_colnames)]
  ar_st_clean <- stringr::str_replace(ar_st_colnames_js, "j_[[:alnum:]]+-", "")
  ar_st_clean <- stringr::str_replace(ar_st_clean, "nam", "-nam")
  ar_st_clean <- stringr::str_replace(ar_st_clean, "buk", "-buk")
  ar_st_clean <- stringr::str_replace(ar_st_clean, "won", "-won")
  ar_st_clean <- stringr::str_replace(ar_st_clean, "Non-metro", "All")
  ar_st_clean <- stringr::str_replace(ar_st_clean, "Metro", "All")
  ar_st_col_names <- c(
    "Years",
    ar_st_clean
  )
  # Define column groups, grouping the names above
  # =1/3/2 are number of columns group title covers
  ar_st_col_groups_l2 <- c(
    " " = 1,
    "All" = 1,
    "Capital cities" = 3,
    "Metropolitian cities" = 6,
    "All" = 1,
    "Provinces" = 5,
    "Special provinces" = 3
  )
  ar_st_col_groups_l1 <- c(
    " " = 1,
    "Capital and metropolitan areas" = 10,
    "Non-metropolitan areas" = 9
  )

  # select and sort
  tab3_korea_jnt_kbl <- tab3_korea_jnt %>%
    select(
      -type, -variable, -variable_sort,
      yearset, one_of(ar_st_colnames_js)
    )

  # Second, we construct main table, and add styling.
  bk_korea <- kbl(
    tab3_korea_jnt_kbl,
    format = st_kableformat,
    label = "main:tab:korea:pop:teachers:schools",
    # escape = F,
    linesep = "",
    booktabs = T,
    longtable = T,
    align = "c",
    caption = "Korea: Schools, Teachers, and Students",
    col.names = ar_st_col_names
  ) %>%
    # see https://cran.r-project.org/web/packages/kableExtra/vignettes/awesome_table_in_html.html#Bootstrap_table_classes
    kable_styling(
      bootstrap_options = c("striped", "hover", "condensed", "responsive"),
      full_width = F, position = "left"
    )

  # Third, we add in column groups.
  bk_korea <- bk_korea %>%
    add_header_above(ar_st_col_groups_l2) %>%
    add_header_above(ar_st_col_groups_l1)

  # Fourth, we add in row groups.
  st_supra_grp_cur <- ""
  it_supra_grp_ctr <- 0
  for (it_group in seq(1, dim(df_typevar_counts)[1])) {
    # supra group variable
    st_supra_grp <- as.character(df_typevar_counts[[it_group, "type_lab_supra"]])
    # Reion full name info
    st_loc <- as.character(df_typevar_counts[[it_group, "row_name"]])

    # groups start and end
    it_group_count_start <- df_typevar_counts[[it_group, "group_count_start"]]
    it_group_count_end <- df_typevar_counts[[it_group, "group_count_end"]]

    if (st_supra_grp != st_supra_grp_cur) {
      it_supra_grp_ctr <- it_supra_grp_ctr + 1
      st_supra_grp_cur <- st_supra_grp
      bl_hline_before <- FALSE
      if (it_supra_grp_ctr >= 2) {
        bl_hline_before <- TRUE
      }
      # Heading group stats type
      st_supra_text <- paste0(st_supra_grp_cur)
      bk_korea <- bk_korea %>%
        pack_rows(
          st_supra_text, it_group_count_start, it_group_count_start,
          latex_gap_space = "0.0em",
          indent = FALSE,
          latex_align = "c",
          hline_after = TRUE,
          hline_before = bl_hline_before
        )
    }
    # Add to table
    bk_korea <- bk_korea %>%
      pack_rows(
        st_loc, it_group_count_start, it_group_count_end,
        latex_gap_space = "0.75em",
        latex_align = "l",
        hline_after = FALSE
      )
  }

  # Fifth, column formatting.
  fl_width_country <- 1.9
  st_width_country <- paste0(fl_width_country, "cm")
  bk_korea <- bk_korea %>%
    column_spec(1, width = st_width_country) %>%
    column_spec(2:dim(tab3_korea_jnt_kbl)[2], width = "0.75cm")

  # Final adjustments
  # Headings on all pages, note use `sub` to replace first midrule
  st_headend <- paste0(
    "\\midrule\\endhead\n",
    "\\addlinespace[0.2em]\\hline\\addlinespace[0.2em]\n",
    "\\multicolumn{", dim(tab3_korea_jnt_kbl)[2], "}{r}{\\emph{Continued on next page}}\\\\\n",
    "\\endfoot\\endlastfoot"
  )
  bk_korea <- sub(bk_korea,
    pattern = "\\midrule", replacement = st_headend,
    fixed = TRUE
  )
  # country-names left-align
  bk_korea <- gsub(bk_korea,
    pattern = paste0("\\centering\\arraybackslash}p{", st_width_country, "}"),
    replacement = paste0("\\raggedright\\arraybackslash}p{", st_width_country, "}"),
    fixed = TRUE
  )
  # midrule replacing hline
  bk_korea <- gsub(bk_korea,
    pattern = "hline",
    replacement = "midrule", fixed = TRUE
  )

  # Sixth, display.
  # pl_bk_asset_count <- bk_asset_count %>% as_image()
  # bk_korea
  # Save tex to file.
  if (st_kableformat == "html") {
    print(dim(bk_korea))
  } else {
    if (bl_tex_save) {
      fileConn <- file(spn_tex_out)
      writeLines(bk_korea, fileConn)
      close(fileConn)
      if (verbose) {
        print(glue::glue("F-945386, S1"))
        print(glue::glue("Latex saved: {spn_tex_out}"))
      }
    }
  }
}
#> Warning in pack_rows(., st_supra_text, it_group_count_start, it_group_count_start, : latex_align parameter is not used in HTML Mode,
#>                                     use label_row_css instead.
#> Warning in pack_rows(., st_loc, it_group_count_start, it_group_count_end, : latex_align parameter is not used in HTML Mode,
#>                                     use label_row_css instead.
#> Warning in pack_rows(., st_loc, it_group_count_start, it_group_count_end, : latex_align parameter is not used in HTML Mode,
#>                                     use label_row_css instead.
#> Warning in pack_rows(., st_loc, it_group_count_start, it_group_count_end, : latex_align parameter is not used in HTML Mode,
#>                                     use label_row_css instead.
#> Warning in pack_rows(., st_supra_text, it_group_count_start, it_group_count_start, : latex_align parameter is not used in HTML Mode,
#>                                     use label_row_css instead.
#> Warning in pack_rows(., st_loc, it_group_count_start, it_group_count_end, : latex_align parameter is not used in HTML Mode,
#>                                     use label_row_css instead.
#> Warning in pack_rows(., st_loc, it_group_count_start, it_group_count_end, : latex_align parameter is not used in HTML Mode,
#>                                     use label_row_css instead.
#> Warning in pack_rows(., st_loc, it_group_count_start, it_group_count_end, : latex_align parameter is not used in HTML Mode,
#>                                     use label_row_css instead.
#> Warning in pack_rows(., st_supra_text, it_group_count_start, it_group_count_start, : latex_align parameter is not used in HTML Mode,
#>                                     use label_row_css instead.
#> Warning in pack_rows(., st_loc, it_group_count_start, it_group_count_end, : latex_align parameter is not used in HTML Mode,
#>                                     use label_row_css instead.
#> Warning in pack_rows(., st_loc, it_group_count_start, it_group_count_end, : latex_align parameter is not used in HTML Mode,
#>                                     use label_row_css instead.
#> NULL
bk_korea
Korea: Schools, Teachers, and Students
Capital and metropolitan areas
Non-metropolitan areas
All
Capital cities
Metropolitian cities
All
Provinces
Special provinces
Years All Seoul Incheon Gyeong-gi Busan Daegu Daejeon Gwangju Sejong Ulsan All Chung-buk Chung-nam Gyeong-buk Gyeong-nam Jeon-nam Gang-won Jeju Jeon-buk
Levels
Number of schools
1970 1,309 206 50 613 99 81 74 50 32 105 4,652 372 510 890 731 881 607 108 553
1980 1,525 291 55 676 137 86 79 54 34 113 4,962 397 546 948 782 954 618 114 603
1990 1,881 463 103 684 221 118 79 83 29 101 4,454 337 536 890 701 821 499 115 555
2000 2,312 532 174 835 267 178 110 109 22 85 2,955 247 410 492 449 462 367 106 422
2010 2,893 587 226 1,145 298 214 138 145 22 118 2,961 259 408 494 495 433 353 106 413
2020 3,164 607 253 1,298 304 230 148 155 49 120 2,956 258 410 473 505 429 347 113 421
Number of teachers (1000s)
1970 37.4 10.6 2.0 8.4 4.1 3.9 2.6 2.4 0.6 2.7 63.7 6.0 7.1 11.5 8.8 12.4 7.1 1.2 9.4
1980 51.5 17.9 2.5 10.6 7.4 4.2 2.8 2.7 0.6 2.8 67.5 6.2 7.4 12.5 9.0 13.5 7.5 1.6 9.8
1990 72.5 25.4 4.5 16.7 10.4 5.7 2.9 3.2 0.5 3.1 64.3 5.4 7.5 12.0 10.2 11.6 7.4 1.8 8.4
2000 86.7 25.3 7.2 26.5 9.8 6.5 3.9 3.8 0.4 3.3 53.3 4.9 6.5 9.5 9.9 8.1 5.7 1.9 6.9
2010 114.8 29.3 9.5 40.4 11.0 8.7 5.6 5.6 0.5 4.2 62.0 5.9 8.0 10.4 12.6 8.3 6.5 2.4 7.9
2020 123.1 28.6 10.1 46.8 10.1 8.9 6.0 6.0 2.1 4.3 66.2 6.7 9.3 10.3 13.8 8.3 6.6 2.9 8.3
Number of students (1000s)
1970 2,531 770 136 469 287 282 192 191 32 172 3,218 310 349 570 428 630 371 64 494
1980 3,005 1,169 148 510 446 243 154 163 26 146 2,653 222 280 491 364 538 292 76 389
1990 3,109 1,142 208 656 457 255 120 134 12 124 1,759 153 205 318 309 290 188 56 240
2000 2,773 759 246 889 295 216 126 127 8 107 1,247 124 143 218 269 164 123 47 160
2010 2,277 566 183 848 197 168 109 116 8 82 1,022 105 131 164 228 124 100 44 127
2020 1,867 410 157 762 154 123 80 86 29 67 826 85 120 129 191 92 73 41 95
Percentage changes over time
Percentage change in schools
2020 vs 1970 142% 195% 406% 112% 207% 184% 100% 210% 53% 14% -36% -31% -20% -47% -31% -51% -43% 5% -24%
2020 vs 1980 107% 109% 360% 92% 122% 167% 87% 187% 44% 6% -40% -35% -25% -50% -35% -55% -44% -1% -30%
2020 vs 1990 68% 31% 146% 90% 38% 95% 87% 87% 69% 19% -34% -23% -24% -47% -28% -48% -30% -2% -24%
2020 vs 2000 37% 14% 45% 55% 14% 29% 35% 42% 123% 41% 0% 4% 0% -4% 12% -7% -5% 7% 0%
2020 vs 2010 9% 3% 12% 13% 2% 7% 7% 7% 123% 2% 0% 0% 0% -4% 2% -1% -2% 7% 2%
Percentage change in teachers
2020 vs 1970 229% 171% 400% 456% 146% 129% 129% 147% 251% 59% 4% 11% 31% -11% 57% -33% -7% 130% -12%
2020 vs 1980 139% 60% 295% 340% 37% 112% 119% 128% 235% 56% -2% 8% 25% -17% 54% -39% -12% 83% -16%
2020 vs 1990 70% 13% 123% 180% -3% 56% 105% 88% 357% 37% 3% 26% 24% -14% 35% -29% -10% 61% -2%
2020 vs 2000 42% 13% 41% 77% 4% 36% 55% 58% 426% 29% 24% 37% 43% 9% 40% 2% 16% 55% 20%
2020 vs 2010 7% -2% 6% 16% -7% 2% 9% 8% 328% 3% 7% 14% 16% 0% 10% 0% 2% 21% 4%
Percentage change in students
2020 vs 1970 -26% -47% 15% 62% -47% -57% -58% -55% -8% -61% -74% -73% -66% -77% -55% -85% -80% -37% -81%
2020 vs 1980 -38% -65% 6% 49% -66% -50% -48% -47% 15% -54% -69% -62% -57% -74% -48% -83% -75% -47% -76%
2020 vs 1990 -40% -64% -25% 16% -66% -52% -34% -36% 144% -46% -53% -44% -41% -59% -38% -68% -61% -28% -61%
2020 vs 2000 -33% -46% -36% -14% -48% -43% -37% -32% 249% -37% -34% -31% -16% -41% -29% -44% -40% -13% -41%
2020 vs 2010 -18% -28% -14% -10% -22% -27% -27% -25% 282% -17% -19% -19% -8% -21% -16% -25% -26% -8% -25%
Ratios
Ratio Student/School
1970 1,934 3,739 2,724 766 2,900 3,487 2,594 3,811 998 1,635 692 834 685 641 586 715 611 594 894
1980 1,970 4,016 2,692 755 3,257 2,830 1,945 3,016 752 1,292 535 560 512 518 466 564 473 668 644
1990 1,653 2,466 2,022 959 2,068 2,162 1,523 1,620 417 1,228 395 455 382 357 441 353 377 487 432
2000 1,199 1,428 1,412 1,065 1,105 1,216 1,143 1,163 384 1,254 422 503 349 442 599 354 334 441 378
2010 787 964 811 741 662 783 790 799 350 692 345 406 320 332 460 286 283 415 307
2020 590 675 620 587 505 533 539 558 602 562 280 330 293 273 378 215 212 359 225
Ratio Student/Teacher
1970 68 73 68 56 70 73 73 78 52 64 51 51 49 49 48 51 52 52 52
1980 58 65 58 48 60 58 56 61 40 53 39 36 38 39 40 40 39 49 40
1990 43 45 46 39 44 45 41 42 26 40 27 29 27 26 30 25 26 31 29
2000 32 30 34 34 30 33 32 33 21 32 23 25 22 23 27 20 22 25 23
2010 20 19 19 21 18 19 20 21 15 19 17 18 16 16 18 15 15 19 16
2020 15 14 16 16 15 14 13 14 14 16 12 13 13 13 14 11 11 14 11