Go to the RMD, R, PDF, or HTML version of this file. Go back to fan’s REconTools research support package, R4Econ examples page, PkgTestR packaging guide, or Stat4Econ course page.
# 5 by 3 matrix
mt_rnorm_a <- matrix(rnorm(4,mean=0,sd=1), nrow=5, ncol=3)
# Column Names
ar_st_varnames <- c('id','var1','varb','vartheta')
# Combine to tibble, add name col1, col2, etc.
tb_combine <- as_tibble(mt_rnorm_a) %>%
rowid_to_column(var = "id") %>%
rename_all(~c(ar_st_varnames))
# Display
kable(tb_combine) %>% kable_styling_fc()
id | var1 | varb | vartheta |
---|---|---|---|
1 | 0.6059296 | 0.0132510 | -0.2787887 |
2 | 0.0132510 | -0.2787887 | -0.1356745 |
3 | -0.2787887 | -0.1356745 | 0.6059296 |
4 | -0.1356745 | 0.6059296 | 0.0132510 |
5 | 0.6059296 | 0.0132510 | -0.2787887 |
First, we generate an empty matrix. Second, we compute values to fill in matrix cells.
# an NA matrix
it_nrow <- 5
it_ncol <- 3
mt_na <- matrix(NA, nrow=it_nrow, ncol=it_ncol)
# array of nrow values
ar_it_nrow <- seq(1, it_nrow)
ar_it_ncol <- seq(1, it_ncol)
# Generate values in matrix
for (it_row in ar_it_nrow) {
for (it_col in ar_it_ncol) {
print(glue::glue("row={it_row} and col={it_col}"))
mt_na[it_row, it_col] = it_row*it_col + it_row + it_col
}
}
## row=1 and col=1
## row=1 and col=2
## row=1 and col=3
## row=2 and col=1
## row=2 and col=2
## row=2 and col=3
## row=3 and col=1
## row=3 and col=2
## row=3 and col=3
## row=4 and col=1
## row=4 and col=2
## row=4 and col=3
## row=5 and col=1
## row=5 and col=2
## row=5 and col=3
# Display
kable(mt_na) %>% kable_styling_fc()
3 | 5 | 7 |
5 | 8 | 11 |
7 | 11 | 15 |
9 | 14 | 19 |
11 | 17 | 23 |
Third, we label the rows and the columns. Note that we will include the column names as column names, but the row names will be included as a variable.
# Column Names
ar_st_col_names <- paste0('colval=', ar_it_ncol)
ar_st_row_names <- paste0('rowval=', ar_it_nrow)
# Create tibble, and add in column and row names
tb_row_col_named <- as_tibble(mt_na) %>%
rename_all(~c(ar_st_col_names)) %>%
mutate(row_name = ar_st_row_names) %>%
select(row_name, everything())
# Display
kable(tb_row_col_named) %>% kable_styling_fc()
row_name | colval=1 | colval=2 | colval=3 |
---|---|---|---|
rowval=1 | 3 | 5 | 7 |
rowval=2 | 5 | 8 | 11 |
rowval=3 | 7 | 11 | 15 |
rowval=4 | 9 | 14 | 19 |
rowval=5 | 11 | 17 | 23 |
Finally, we generate a file name for exporting this tibble to a CSV file. We create a file name with a time stamp.
# Create a file name with date stamp
st_datetime <- base::format(Sys.time(), "%Y%m%d-%H%M%S")
# Copying a fixed date to avoid generating multiple testing files
# The date string below is generated by Sys.time()
st_snm_filename <- paste0("tibble_out_test_", st_datetime, '.csv')
# Create a file name with the time stamp.
spn_file_path = file.path(
"C:", "Users", "fan",
"R4Econ", "amto", "tibble", "_file",
st_snm_filename,
fsep = .Platform$file.sep)
# Save to file
write_csv(tb_row_col_named, spn_file_path)
Given Arrays and Matrixes, Generate Tibble and Name Variables/Columns
# Base Inputs
ar_col <- c(-1,+1)
mt_rnorm_a <- matrix(rnorm(4,mean=0,sd=1), nrow=2, ncol=2)
mt_rnorm_b <- matrix(rnorm(4,mean=0,sd=1), nrow=2, ncol=4)
# Combine Matrix
mt_combine <- cbind(ar_col, mt_rnorm_a, mt_rnorm_b)
colnames(mt_combine) <- c('ar_col',
paste0('matcolvar_grpa_', seq(1,dim(mt_rnorm_a)[2])),
paste0('matcolvar_grpb_', seq(1,dim(mt_rnorm_b)[2])))
# Variable Names
ar_st_varnames <- c('var_one',
paste0('tibcolvar_ga_', c(1,2)),
paste0('tibcolvar_gb_', c(1,2,3,4)))
# Combine to tibble, add name col1, col2, etc.
tb_combine <- as_tibble(mt_combine) %>% rename_all(~c(ar_st_varnames))
# Add an index column to the dataframe, ID column
tb_combine <- tb_combine %>% rowid_to_column(var = "ID")
# Change all gb variable names
tb_combine <- tb_combine %>%
rename_at(vars(starts_with("tibcolvar_gb_")),
funs(str_replace(., "_gb_", "_gbrenamed_")))
# Tibble back to matrix
mt_tb_combine_back <- data.matrix(tb_combine)
# Display
kable(mt_combine) %>% kable_styling_fc_wide()
ar_col | matcolvar_grpa_1 | matcolvar_grpa_2 | matcolvar_grpb_1 | matcolvar_grpb_2 | matcolvar_grpb_3 | matcolvar_grpb_4 |
---|---|---|---|---|---|---|
-1 | 0.8631453 | -0.7424531 | -0.277306 | 1.331686 | -0.277306 | 1.331686 |
1 | -0.0298100 | -0.8735947 | -0.610322 | 1.169677 | -0.610322 | 1.169677 |
kable(tb_combine) %>% kable_styling_fc_wide()
ID | var_one | tibcolvar_ga_1 | tibcolvar_ga_2 | tibcolvar_gbrenamed_1 | tibcolvar_gbrenamed_2 | tibcolvar_gbrenamed_3 | tibcolvar_gbrenamed_4 |
---|---|---|---|---|---|---|---|
1 | -1 | 0.8631453 | -0.7424531 | -0.277306 | 1.331686 | -0.277306 | 1.331686 |
2 | 1 | -0.0298100 | -0.8735947 | -0.610322 | 1.169677 | -0.610322 | 1.169677 |
kable(mt_tb_combine_back) %>% kable_styling_fc_wide()
ID | var_one | tibcolvar_ga_1 | tibcolvar_ga_2 | tibcolvar_gbrenamed_1 | tibcolvar_gbrenamed_2 | tibcolvar_gbrenamed_3 | tibcolvar_gbrenamed_4 |
---|---|---|---|---|---|---|---|
1 | -1 | 0.8631453 | -0.7424531 | -0.277306 | 1.331686 | -0.277306 | 1.331686 |
2 | 1 | -0.0298100 | -0.8735947 | -0.610322 | 1.169677 | -0.610322 | 1.169677 |
We run some function, whose outputs are named list, we store the values of the named list as additional rows into a dataframe whose column names are the names from named list.
First, we generate the function that returns named lists.
# Define a function
ffi_list_generator <- function(it_seed=123) {
set.seed(it_seed)
fl_abc <- rnorm(1)
ar_efg <- rnorm(3)
st_word <- sample(LETTERS, 5, replace = TRUE)
ls_return <- list("abc" = fl_abc, "efg" = ar_efg, "opq" = st_word)
return(ls_return)
}
# Run the function
it_seed=123
ls_return <- ffi_list_generator(it_seed)
print(ls_return)
## $abc
## [1] -0.5604756
##
## $efg
## [1] -0.23017749 1.55870831 0.07050839
##
## $opq
## [1] "K" "E" "T" "N" "V"
Second, we list of lists by running the function above with different starting seeds. We store results in a two-dimensional list.
# Run function once to get length
ls_return_test <- ffi_list_generator(it_seed=123)
it_list_len <- length(ls_return_test)
# list of list frame
it_list_of_list_len <- 5
ls_ls_return <- vector(mode = "list", length = it_list_of_list_len*it_list_len)
dim(ls_ls_return) <- c(it_list_of_list_len, it_list_len)
# Fill list of list
ar_seeds <- seq(123, 123 + it_list_of_list_len - 1)
it_ctr <- 0
for (it_seed in ar_seeds) {
print(it_seed)
it_ctr <- it_ctr + 1
ls_return <- ffi_list_generator(it_seed)
ls_ls_return[it_ctr,] <- ls_return
}
## [1] 123
## [1] 124
## [1] 125
## [1] 126
## [1] 127
# print 2d list
print(ls_ls_return)
## [,1] [,2] [,3]
## [1,] -0.5604756 numeric,3 character,5
## [2,] -1.385071 numeric,3 character,5
## [3,] 0.933327 numeric,3 character,5
## [4,] 0.366734 numeric,3 character,5
## [5,] -0.5677337 numeric,3 character,5
Third, we convert the list to a tibble dataframe. Prior to conversion we add names to the 1st and 2nd dimensions of the list. Then we print the results.
# get names from named list
ar_st_names <- names(ls_return_test)
dimnames(ls_ls_return)[[2]] <- ar_st_names
dimnames(ls_ls_return)[[1]] <- paste0('seed_', ar_seeds)
# Convert to dataframe
tb_ls_ls_return <- as_tibble(ls_ls_return)
# print
kable(tb_ls_ls_return) %>% kable_styling_fc()
abc | efg | opq |
---|---|---|
-0.5604756 | -0.23017749, 1.55870831, 0.07050839 | K, E, T, N, V |
-1.385071 | 0.03832318, -0.76303016, 0.21230614 | J, A, O, T, N |
0.933327 | -0.52503178, 1.81443979, 0.08304562 | C, T, M, S, K |
0.366734 | 0.3964520, -0.7318437, 0.9462364 | Z, L, J, Y, P |
-0.5677337 | -0.814760579, -0.493939596, 0.001818846 | Y, C, O, F, U |
Fourth, to export list to csv file, we need to unlist list contents. See also Create a tibble containing list columns
# Unlist
tb_unlisted <- tb_ls_ls_return %>%
rowwise() %>%
mutate_if(is.list,
funs(paste(unlist(.), sep='', collapse=', ')))
# print on screen, can see values
print(tb_unlisted)
After reshaping, often could end up with variable names that are all numeric, intgers for example, how to rename these variables to add a common prefix for example.
# Base Inputs
ar_col <- c(-1,+1)
mt_rnorm_c <- matrix(rnorm(4,mean=0,sd=1), nrow=5, ncol=10)
mt_combine <- cbind(ar_col, mt_rnorm_c)
# Variable Names
ar_it_cols_ctr <- seq(1, dim(mt_rnorm_c)[2])
ar_st_varnames <- c('var_one', ar_it_cols_ctr)
# Combine to tibble, add name col1, col2, etc.
tb_combine <- as_tibble(mt_combine) %>% rename_all(~c(ar_st_varnames))
# Add an index column to the dataframe, ID column
tb_combine_ori <- tb_combine %>% rowid_to_column(var = "ID")
# Change all gb variable names
tb_combine <- tb_combine_ori %>%
rename_at(
vars(num_range('',ar_it_cols_ctr)),
funs(paste0("rho", . , 'var'))
)
# Display
kable(tb_combine_ori) %>% kable_styling_fc_wide()
ID | var_one | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
---|---|---|---|---|---|---|---|---|---|---|---|
1 | -1 | -0.1255472 | 0.5646199 | 0.1335086 | -0.1059632 | -0.1255472 | 0.5646199 | 0.1335086 | -0.1059632 | -0.1255472 | 0.5646199 |
2 | 1 | 0.5646199 | 0.1335086 | -0.1059632 | -0.1255472 | 0.5646199 | 0.1335086 | -0.1059632 | -0.1255472 | 0.5646199 | 0.1335086 |
3 | -1 | 0.1335086 | -0.1059632 | -0.1255472 | 0.5646199 | 0.1335086 | -0.1059632 | -0.1255472 | 0.5646199 | 0.1335086 | -0.1059632 |
4 | 1 | -0.1059632 | -0.1255472 | 0.5646199 | 0.1335086 | -0.1059632 | -0.1255472 | 0.5646199 | 0.1335086 | -0.1059632 | -0.1255472 |
5 | -1 | -0.1255472 | 0.5646199 | 0.1335086 | -0.1059632 | -0.1255472 | 0.5646199 | 0.1335086 | -0.1059632 | -0.1255472 | 0.5646199 |
kable(tb_combine) %>% kable_styling_fc_wide()
ID | var_one | rho1var | rho2var | rho3var | rho4var | rho5var | rho6var | rho7var | rho8var | rho9var | rho10var |
---|---|---|---|---|---|---|---|---|---|---|---|
1 | -1 | -0.1255472 | 0.5646199 | 0.1335086 | -0.1059632 | -0.1255472 | 0.5646199 | 0.1335086 | -0.1059632 | -0.1255472 | 0.5646199 |
2 | 1 | 0.5646199 | 0.1335086 | -0.1059632 | -0.1255472 | 0.5646199 | 0.1335086 | -0.1059632 | -0.1255472 | 0.5646199 | 0.1335086 |
3 | -1 | 0.1335086 | -0.1059632 | -0.1255472 | 0.5646199 | 0.1335086 | -0.1059632 | -0.1255472 | 0.5646199 | 0.1335086 | -0.1059632 |
4 | 1 | -0.1059632 | -0.1255472 | 0.5646199 | 0.1335086 | -0.1059632 | -0.1255472 | 0.5646199 | 0.1335086 | -0.1059632 | -0.1255472 |
5 | -1 | -0.1255472 | 0.5646199 | 0.1335086 | -0.1059632 | -0.1255472 | 0.5646199 | 0.1335086 | -0.1059632 | -0.1255472 | 0.5646199 |
Show what is in the table: 1, column and row names; 2, contents inside table.
tb_iris <- as_tibble(iris)
print(rownames(tb_iris))
## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13"
## [14] "14" "15" "16" "17" "18" "19" "20" "21" "22" "23" "24" "25" "26"
## [27] "27" "28" "29" "30" "31" "32" "33" "34" "35" "36" "37" "38" "39"
## [40] "40" "41" "42" "43" "44" "45" "46" "47" "48" "49" "50" "51" "52"
## [53] "53" "54" "55" "56" "57" "58" "59" "60" "61" "62" "63" "64" "65"
## [66] "66" "67" "68" "69" "70" "71" "72" "73" "74" "75" "76" "77" "78"
## [79] "79" "80" "81" "82" "83" "84" "85" "86" "87" "88" "89" "90" "91"
## [92] "92" "93" "94" "95" "96" "97" "98" "99" "100" "101" "102" "103" "104"
## [105] "105" "106" "107" "108" "109" "110" "111" "112" "113" "114" "115" "116" "117"
## [118] "118" "119" "120" "121" "122" "123" "124" "125" "126" "127" "128" "129" "130"
## [131] "131" "132" "133" "134" "135" "136" "137" "138" "139" "140" "141" "142" "143"
## [144] "144" "145" "146" "147" "148" "149" "150"
colnames(tb_iris)
## [1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width" "Species"
colnames(tb_iris)
## [1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width" "Species"
summary(tb_iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100 setosa :50
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300 versicolor:50
## Median :5.800 Median :3.000 Median :4.350 Median :1.300 virginica :50
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
# Sort in Ascending Order
tb_iris %>% select(Species, Sepal.Length, everything()) %>%
arrange(Species, Sepal.Length) %>% head(10) %>%
kable() %>% kable_styling_fc()
Species | Sepal.Length | Sepal.Width | Petal.Length | Petal.Width |
---|---|---|---|---|
setosa | 4.3 | 3.0 | 1.1 | 0.1 |
setosa | 4.4 | 2.9 | 1.4 | 0.2 |
setosa | 4.4 | 3.0 | 1.3 | 0.2 |
setosa | 4.4 | 3.2 | 1.3 | 0.2 |
setosa | 4.5 | 2.3 | 1.3 | 0.3 |
setosa | 4.6 | 3.1 | 1.5 | 0.2 |
setosa | 4.6 | 3.4 | 1.4 | 0.3 |
setosa | 4.6 | 3.6 | 1.0 | 0.2 |
setosa | 4.6 | 3.2 | 1.4 | 0.2 |
setosa | 4.7 | 3.2 | 1.3 | 0.2 |
# Sort in Descending Order
tb_iris %>% select(Species, Sepal.Length, everything()) %>%
arrange(desc(Species), desc(Sepal.Length)) %>% head(10) %>%
kable() %>% kable_styling_fc()
Species | Sepal.Length | Sepal.Width | Petal.Length | Petal.Width |
---|---|---|---|---|
virginica | 7.9 | 3.8 | 6.4 | 2.0 |
virginica | 7.7 | 3.8 | 6.7 | 2.2 |
virginica | 7.7 | 2.6 | 6.9 | 2.3 |
virginica | 7.7 | 2.8 | 6.7 | 2.0 |
virginica | 7.7 | 3.0 | 6.1 | 2.3 |
virginica | 7.6 | 3.0 | 6.6 | 2.1 |
virginica | 7.4 | 2.8 | 6.1 | 1.9 |
virginica | 7.3 | 2.9 | 6.3 | 1.8 |
virginica | 7.2 | 3.6 | 6.1 | 2.5 |
virginica | 7.2 | 3.2 | 6.0 | 1.8 |
We use dplyr’s ranking functions to generate different types of ranking variables.
The example below demonstrates the differences between the functions row_number(), min_rank(), and dense_rank().
Note that we have “desc(var_name)” in order to generate the variable based on descending sort of the the “var_name” variable.
tb_iris %>%
select(Species, Sepal.Length) %>% head(10) %>%
mutate(row_number = row_number(desc(Sepal.Length)),
min_rank = min_rank(desc(Sepal.Length)),
dense_rank = dense_rank(desc(Sepal.Length))) %>%
kable(caption = "Ranking variable") %>% kable_styling_fc()
Species | Sepal.Length | row_number | min_rank | dense_rank |
---|---|---|---|---|
setosa | 5.1 | 2 | 2 | 2 |
setosa | 4.9 | 5 | 5 | 4 |
setosa | 4.7 | 7 | 7 | 5 |
setosa | 4.6 | 8 | 8 | 6 |
setosa | 5.0 | 3 | 3 | 3 |
setosa | 5.4 | 1 | 1 | 1 |
setosa | 4.6 | 9 | 8 | 6 |
setosa | 5.0 | 4 | 3 | 3 |
setosa | 4.4 | 10 | 10 | 7 |
setosa | 4.9 | 6 | 5 | 4 |
Use R4Econ’s summary tool.
df_summ_stats <- REconTools::ff_summ_percentiles(tb_iris)
kable(t(df_summ_stats)) %>% kable_styling_fc_wide()
stats | n | unique | NAobs | ZEROobs | mean | sd | cv | min | p01 | p05 | p10 | p25 | p50 | p75 | p90 | p95 | p99 | max |
Petal.Length | 150 | 43 | 0 | 0 | 3.758000 | 1.7652982 | 0.4697441 | 1.0 | 1.149 | 1.300 | 1.4 | 1.6 | 4.35 | 5.1 | 5.80 | 6.100 | 6.700 | 6.9 |
Petal.Width | 150 | 22 | 0 | 0 | 1.199333 | 0.7622377 | 0.6355511 | 0.1 | 0.100 | 0.200 | 0.2 | 0.3 | 1.30 | 1.8 | 2.20 | 2.300 | 2.500 | 2.5 |
Sepal.Length | 150 | 35 | 0 | 0 | 5.843333 | 0.8280661 | 0.1417113 | 4.3 | 4.400 | 4.600 | 4.8 | 5.1 | 5.80 | 6.4 | 6.90 | 7.255 | 7.700 | 7.9 |
Sepal.Width | 150 | 23 | 0 | 0 | 3.057333 | 0.4358663 | 0.1425642 | 2.0 | 2.200 | 2.345 | 2.5 | 2.8 | 3.00 | 3.3 | 3.61 | 3.800 | 4.151 | 4.4 |