1 Histogram

1.1 Generate Test Score Dataset

Go to the RMD, R, PDF, or HTML version of this file. Go back to fan’s REconTools Package, R Code Examples Repository (bookdown site), or Intro Stats with R Repository (bookdown site).

  • r generate text string as csv
  • r tibble matrix hand input

First, we will generate a test score dataset, directly from string. Below we type line by line a dataset with four variables in comma separated (csv) format, where the first row includes the variables names. These texts could be stored in a separate file, or they could be directly included in code and read in as csv

1.1.1 A Dataset with only Two Continuous Variable

ar_test_scores_ec3 <- c(107.72,101.28,105.92,109.31,104.27,110.27,91.92846154,81.8,109.0071429,103.07,98.97923077,101.91,96.49,97.79923077,99.07846154,99.17,103.51,112.2225,101.2964286,94.5,98.92,97.09,93.83989011,97.36304945,80.34,65.74,85.275,82.19708791,86.53758242,86.2025,86.63,82.57392857,83.66,79.76,75.55642857,86.32571429,66.41,76.06,44.225,82.28,47.77392857,70.005,69.13769231,73.52571429,60.51,56.04)
ar_test_scores_ec1 <- c(101.72,101.28,99.92,103.31,100.27,104.27,90.23615385,77.8,103.4357143,97.07,93.13307692,95.91,92.49,93.95307692,95.38615385,97.17,99.51,100.3475,95.83214286,92.5,94.92,91.09,90.4332967,93.52101648,80.34,59.74,79.525,77.67236264,81.59252747,82.3275,80.63,76.98464286,81.66,79.76,70.59214286,82.46857143,66.41,74.06,40.475,76.28,44.18464286,66.255,65.59923077,69.66857143,60.51,56.04)
mt_test_scores <- cbind(ar_test_scores_ec1, ar_test_scores_ec3)
ar_st_varnames <- c('course_total_ec1p','course_total_ec3p')
tb_final_twovar <- as_tibble(mt_test_scores) %>% rename_all(~c(ar_st_varnames))
summary(tb_final_twovar)
##  course_total_ec1p course_total_ec3p
##  Min.   : 40.48    Min.   : 44.23   
##  1st Qu.: 76.46    1st Qu.: 79.91   
##  Median : 86.35    Median : 89.28   
##  Mean   : 83.88    Mean   : 87.90   
##  3rd Qu.: 95.89    3rd Qu.:100.75   
##  Max.   :104.27    Max.   :112.22
ff_summ_percentiles(df = tb_final_twovar, bl_statsasrows = TRUE, col2varname = FALSE)

1.1.2 A Dataset with one Continuous Variable and Histogram

ar_final_scores <- c(94.28442509,95.68817475,97.25219512,77.89268293,95.08795497,93.27380863,92.3,84.25317073,86.08642991,84.69219512,71.43634146,76.21365854,71.68878049,77.46142589,79.29579268,43.7285453,63.80634146,67.92994774,100.8980488,100.0857143,99.93073171,98.4102439,97.93,97.10359756,96.97121951,96.60292683,96.23317073,93.92243902,93.82243902,92.75390244,92.65775261,92.20444653,91.73463415,90.38321161,89.37414634,86.95932458,79.58686411,78.70878049,77.2497561,76.88195122,76.52987805,74.72114313,74.27488676,71.30268293,63.70256098,37.90426829,2.292682927)
mt_test_scores <- cbind(seq(1,length(ar_final_scores)), ar_final_scores)
ar_st_varnames <- c('index', 'course_final')
tb_onevar <- as_tibble(mt_test_scores) %>% rename_all(~c(ar_st_varnames))
summary(tb_onevar)
##      index       course_final    
##  Min.   : 1.0   Min.   :  2.293  
##  1st Qu.:12.5   1st Qu.: 76.372  
##  Median :24.0   Median : 86.959  
##  Mean   :24.0   Mean   : 82.415  
##  3rd Qu.:35.5   3rd Qu.: 94.686  
##  Max.   :47.0   Max.   :100.898
ff_summ_percentiles(df = tb_onevar, bl_statsasrows = TRUE, col2varname = FALSE)

1.1.3 A Dataset with Multiple Variables

#load in data empirically by hand
txt_test_data <- "init_prof, later_prof, class_id, exam_score
 'SW', 'SW', 1, 102
 'SW', 'SW', 1, 102
 'SW', 'SW', 1, 101
 'SW', 'SW', 1, 100
 'SW', 'SW', 1, 100
 'SW', 'SW', 1, 99
 'SW', 'SW', 1, 98.5
 'SW', 'SW', 1, 98.5
 'SW', 'SW', 1, 97
 'SW', 'SW', 1, 95
 'SW', 'SW', 1, 94
 'SW', 'SW', 1, 91
 'SW', 'SW', 1, 91
 'SW', 'SW', 1, 90
 'SW', 'SW', 1, 89
 'SW', 'SW', 1, 88.5
 'SW', 'SW', 1, 88
 'SW', 'SW', 1, 87
 'SW', 'SW', 1, 87
 'SW', 'SW', 1, 87
 'SW', 'SW', 1, 86
 'SW', 'SW', 1, 86
 'SW', 'SW', 1, 84
 'SW', 'SW', 1, 82
 'SW', 'SW', 1, 78.5
 'SW', 'SW', 1, 76
 'SW', 'SW', 1, 72
 'SW', 'SW', 1, 70.5
 'SW', 'SW', 1, 67.5
 'SW', 'SW', 1, 67.5
 'SW', 'SW', 1, 67
 'SW', 'SW', 1, 63.5
 'SW', 'SW', 1, 60
 'SW', 'SW', 1, 59
 'SW', 'SW', 1, 44.5
 'SW', 'SW', 1, 44
 'SW', 'SW', 1, 42.5
 'SW', 'SW', 1, 40.5
 'SW', 'SW', 1, 40.5
 'SW', 'SW', 1, 36.5
 'SW', 'SW', 1, 35.5
 'SW', 'SW', 1, 21.5
 'SW', 'SW', 1, 4
 'MP', 'MP', 2, 105
 'MP', 'MP', 2, 103
 'MP', 'MP', 2, 102
 'MP', 'MP', 2, 101
 'MP', 'MP', 2, 101
 'MP', 'MP', 2, 100.5
 'MP', 'MP', 2, 100
 'MP', 'MP', 2, 99
 'MP', 'MP', 2, 97
 'MP', 'MP', 2, 97
 'MP', 'MP', 2, 97
 'MP', 'MP', 2, 97
 'MP', 'MP', 2, 96
 'MP', 'MP', 2, 95
 'MP', 'MP', 2, 91
 'MP', 'MP', 2, 89
 'MP', 'MP', 2, 85
 'MP', 'MP', 2, 84
 'MP', 'MP', 2, 84
 'MP', 'MP', 2, 84
 'MP', 'MP', 2, 83.5
 'MP', 'MP', 2, 82.5
 'MP', 'MP', 2, 81.5
 'MP', 'MP', 2, 80.5
 'MP', 'MP', 2, 80
 'MP', 'MP', 2, 77
 'MP', 'MP', 2, 77
 'MP', 'MP', 2, 75
 'MP', 'MP', 2, 75
 'MP', 'MP', 2, 71
 'MP', 'MP', 2, 70
 'MP', 'MP', 2, 68
 'MP', 'MP', 2, 63
 'MP', 'MP', 2, 56
 'MP', 'MP', 2, 56
 'MP', 'MP', 2, 55.5
 'MP', 'MP', 2, 49.5
 'MP', 'MP', 2, 48.5
 'MP', 'MP', 2, 47.5
 'MP', 'MP', 2, 44.5
 'MP', 'MP', 2, 34.5
 'MP', 'MP', 2, 29.5
 'CA', 'MP', 3, 103
 'CA', 'MP', 3, 103
 'CA', 'MP', 3, 101
 'CA', 'MP', 3, 96.5
 'CA', 'MP', 3, 93.5
 'CA', 'MP', 3, 93
 'CA', 'MP', 3, 93
 'CA', 'MP', 3, 92
 'CA', 'MP', 3, 90
 'CA', 'MP', 3, 90
 'CA', 'MP', 3, 89
 'CA', 'MP', 3, 86.5
 'CA', 'MP', 3, 84.5
 'CA', 'MP', 3, 83
 'CA', 'MP', 3, 83
 'CA', 'MP', 3, 82
 'CA', 'MP', 3, 78
 'CA', 'MP', 3, 75
 'CA', 'MP', 3, 74.5
 'CA', 'MP', 3, 70
 'CA', 'MP', 3, 54.5
 'CA', 'MP', 3, 52
 'CA', 'MP', 3, 50
 'CA', 'MP', 3, 42
 'CA', 'MP', 3, 36.5
 'CA', 'MP', 3, 28
 'CA', 'MP', 3, 26
 'CA', 'MP', 3, 11
 'CA', 'SN', 4, 103
 'CA', 'SN', 4, 103
 'CA', 'SN', 4, 102
 'CA', 'SN', 4, 102
 'CA', 'SN', 4, 101
 'CA', 'SN', 4, 100
 'CA', 'SN', 4, 98
 'CA', 'SN', 4, 98
 'CA', 'SN', 4, 98
 'CA', 'SN', 4, 95
 'CA', 'SN', 4, 95
 'CA', 'SN', 4, 92.5
 'CA', 'SN', 4, 92
 'CA', 'SN', 4, 91
 'CA', 'SN', 4, 90
 'CA', 'SN', 4, 85.5
 'CA', 'SN', 4, 84
 'CA', 'SN', 4, 82.5
 'CA', 'SN', 4, 81
 'CA', 'SN', 4, 77.5
 'CA', 'SN', 4, 77
 'CA', 'SN', 4, 72
 'CA', 'SN', 4, 71.5
 'CA', 'SN', 4, 69
 'CA', 'SN', 4, 68.5
 'CA', 'SN', 4, 68
 'CA', 'SN', 4, 67
 'CA', 'SN', 4, 65.5
 'CA', 'SN', 4, 62.5
 'CA', 'SN', 4, 62
 'CA', 'SN', 4, 61.5
 'CA', 'SN', 4, 61
 'CA', 'SN', 4, 57.5
 'CA', 'SN', 4, 54
 'CA', 'SN', 4, 52.5
 'CA', 'SN', 4, 51
 'CA', 'SN', 4, 50.5
 'CA', 'SN', 4, 50
 'CA', 'SN', 4, 49
 'CA', 'SN', 4, 43
 'CA', 'SN', 4, 39.5
 'CA', 'SN', 4, 32.5
 'CA', 'SN', 4, 25.5
 'CA', 'SN', 4, 18"

csv_test_data = read.csv(text=txt_test_data, header=TRUE)
ar_st_varnames <- c('first_half_professor',
                    'second_half_professor',
                    'course_id', 'exam_score')
tb_test_data <- as_tibble(csv_test_data) %>% 
  rename_all(~c(ar_st_varnames))
summary(tb_test_data)
##  first_half_professor second_half_professor   course_id       exam_score    
##  Length:157           Length:157            Min.   :1.000   Min.   :  4.00  
##  Class :character     Class :character      1st Qu.:1.000   1st Qu.: 60.00  
##  Mode  :character     Mode  :character      Median :2.000   Median : 82.00  
##                                             Mean   :2.465   Mean   : 75.08  
##                                             3rd Qu.:4.000   3rd Qu.: 94.00  
##                                             Max.   :4.000   Max.   :105.00

1.2 Test Score Distributions

1.2.1 Histogram

ggplot(tb_final_twovar, aes(x=ar_test_scores_ec3)) +
  geom_histogram(bins=25) +
  labs(title = paste0('Sandbox: Final Distribution (Econ 2370, FW)'),
       caption = paste0('FW Section, formula:',
                        '0.3*exam1Perc + 0.3*exam2Perc + ',
                        '0.42*HWtotalPerc + 0.03*AttendancePerc \n',
                        '+ perfect attendance + 0.03 per Extra Credit')) +
  theme_bw()

ggplot(tb_test_data, aes(x=exam_score)) +
  geom_histogram(bins=16) +
  labs(title = paste0('Exam Distribution'),
       caption = 'All Sections') +
  theme_bw()