R/ff_panel_cumsum.R
ff_panel_cumsum_grouplast.Rd
svr_id are individuals, observed at multiple points; svr_x is date of observation; svr_y is some value observed at each time point. Imagine these are SAT scores for individuals. we want to calculate the sum/avg etc cumulatively at each date for all individuals, but only using their last score.
ff_panel_cumsum_grouplast(
df,
svr_id,
svr_x,
svr_y,
svr_cumsumtop = "y_movingavg_lastestscores",
stat = "mean",
quick = TRUE,
verbose = FALSE
)
dataframe
string name of individual id variable
string name of the ranking variable
string name of the value variable
string variable name that stores the cumulative summed variable
string type of statistics to compute, mean, sum, max, min, median
boolean faster algorithm without repeating calculation, quick should be used, slow was initial bad algorithm, results identical
boolean when quick is FALSE, could verbose which generates slower results
a dataframe with the cumulative summed/averaged etc column up to each row
https://fanwangecon.github.io/REconTools/reference/ff_panel_cumsum_grouplast.html https://fanwangecon.github.io/REconTools/articles/fv_panel_cumsum_grouplast.html https://github.com/FanWangEcon/REconTools/blob/master/R/ff_panel_cumsum.R
library(tidyr)
library(dplyr)
library(tibble)
set.seed(12345)
it_N <- 5
ar_it_M <- sample(1:10, it_N, replace = TRUE)
ar_it_M_ID <- sample(1:it_N, it_N, replace = FALSE)
tb_combine <- as_tibble(cbind(ar_it_M, ar_it_M_ID)) %>% rowid_to_column(var = "id")
tb_long <- tb_combine %>% uncount(ar_it_M)
tb_long <- tb_long %>% add_column(xrand = runif(dim(tb_long)[1])) %>% arrange(xrand) %>% mutate(x = row_number())
tb_long <- tb_long %>% arrange(id, x) %>% group_by(id) %>% mutate(rank_l = row_number())
df <- tb_long %>% select(id, x) %>% add_column(y = runif(dim(tb_long)[1])) %>% arrange(id,x) %>% group_by(id) %>% mutate(y = cumsum(y))
print(df)
#> # A tibble: 39 x 3
#> # Groups: id [5]
#> id x y
#> <int> <int> <dbl>
#> 1 1 6 0.499
#> 2 1 16 1.23
#> 3 1 18 1.31
#> 4 2 9 0.436
#> 5 2 13 0.672
#> 6 2 17 1.46
#> 7 2 20 1.72
#> 8 2 22 2.71
#> 9 2 25 3.47
#> 10 2 27 4.44
#> # ... with 29 more rows
quick <- TRUE
ff_panel_cumsum_grouplast(df, svr_id='id', svr_x='x', svr_y='y',
svr_cumsumtop = 'y_movingmean_lastestscore', stat='mean', quick=quick)
#> # A tibble: 39 x 4
#> # Groups: id [5]
#> id x y y_movingmean_lastestscore
#> <int> <int> <dbl> <dbl>
#> 1 3 1 0.600 0.600
#> 2 5 2 0.826 0.713
#> 3 5 3 1.33 0.964
#> 4 4 4 0.961 0.963
#> 5 4 5 1.62 1.18
#> 6 1 6 0.499 1.01
#> 7 3 7 1.55 1.25
#> 8 5 8 2.13 1.45
#> 9 2 9 0.436 1.25
#> 10 4 10 2.13 1.35
#> # ... with 29 more rows
quick <- FALSE
verbose <- TRUE
ff_panel_cumsum_grouplast(df, svr_id='id', svr_x='x', svr_y='y',
svr_cumsumtop = 'y_movingmean_lastestscore', stat='mean', quick=quick, verbose=verbose)
#> # A tibble: 39 x 4
#> # Groups: id [5]
#> id x y y_movingmean_lastestscore
#> <int> <int> <dbl> <dbl>
#> 1 3 1 0.600 0.600
#> 2 5 2 0.826 0.713
#> 3 5 3 1.33 0.964
#> 4 4 4 0.961 0.963
#> 5 4 5 1.62 1.18
#> 6 1 6 0.499 1.01
#> 7 3 7 1.55 1.25
#> 8 5 8 2.13 1.45
#> 9 2 9 0.436 1.25
#> 10 4 10 2.13 1.35
#> # ... with 29 more rows
verbose <- FALSE
ff_panel_cumsum_grouplast(df, svr_id='id', svr_x='x', svr_y='y',
svr_cumsumtop = 'y_movingmean_lastestscore', stat='mean', quick=quick, verbose=verbose)
#> # A tibble: 39 x 4
#> # Groups: id [5]
#> id x y y_movingmean_lastestscore
#> <int> <int> <dbl> <dbl>
#> 1 3 1 0.600 0.600
#> 2 5 2 0.826 0.713
#> 3 5 3 1.33 0.964
#> 4 4 4 0.961 0.963
#> 5 4 5 1.62 1.18
#> 6 1 6 0.499 1.01
#> 7 3 7 1.55 1.25
#> 8 5 8 2.13 1.45
#> 9 2 9 0.436 1.25
#> 10 4 10 2.13 1.35
#> # ... with 29 more rows
ff_panel_cumsum_grouplast(df, svr_id='id', svr_x='x', svr_y='y',
svr_cumsumtop = 'y_movingmedian_lastestscore', stat='median')
#> # A tibble: 39 x 4
#> # Groups: id [5]
#> id x y y_movingmedian_lastestscore
#> <int> <int> <dbl> <dbl>
#> 1 3 1 0.600 0.600
#> 2 5 2 0.826 0.713
#> 3 5 3 1.33 0.964
#> 4 4 4 0.961 0.961
#> 5 4 5 1.62 1.33
#> 6 1 6 0.499 0.964
#> 7 3 7 1.55 1.44
#> 8 5 8 2.13 1.58
#> 9 2 9 0.436 1.55
#> 10 4 10 2.13 1.55
#> # ... with 29 more rows
ff_panel_cumsum_grouplast(df, svr_id='id', svr_x='x', svr_y='y',
svr_cumsumtop = 'y_movingsum_lastestscore', stat='sum')
#> # A tibble: 39 x 4
#> # Groups: id [5]
#> id x y y_movingsum_lastestscore
#> <int> <int> <dbl> <dbl>
#> 1 3 1 0.600 0.600
#> 2 5 2 0.826 1.43
#> 3 5 3 1.33 1.93
#> 4 4 4 0.961 2.89
#> 5 4 5 1.62 3.54
#> 6 1 6 0.499 4.04
#> 7 3 7 1.55 4.99
#> 8 5 8 2.13 5.79
#> 9 2 9 0.436 6.23
#> 10 4 10 2.13 6.74
#> # ... with 29 more rows
ff_panel_cumsum_grouplast(df, svr_id='id', svr_x='x', svr_y='y',
svr_cumsumtop = 'y_movingmin_lastestscore', stat='min')
#> # A tibble: 39 x 4
#> # Groups: id [5]
#> id x y y_movingmin_lastestscore
#> <int> <int> <dbl> <dbl>
#> 1 3 1 0.600 0.600
#> 2 5 2 0.826 0.600
#> 3 5 3 1.33 0.600
#> 4 4 4 0.961 0.600
#> 5 4 5 1.62 0.600
#> 6 1 6 0.499 0.499
#> 7 3 7 1.55 0.499
#> 8 5 8 2.13 0.499
#> 9 2 9 0.436 0.436
#> 10 4 10 2.13 0.436
#> # ... with 29 more rows
ff_panel_cumsum_grouplast(df, svr_id='id', svr_x='x', svr_y='y',
svr_cumsumtop = 'y_movingmax_lastestscore', stat='max')
#> # A tibble: 39 x 4
#> # Groups: id [5]
#> id x y y_movingmax_lastestscore
#> <int> <int> <dbl> <dbl>
#> 1 3 1 0.600 0.600
#> 2 5 2 0.826 0.826
#> 3 5 3 1.33 1.33
#> 4 4 4 0.961 1.33
#> 5 4 5 1.62 1.62
#> 6 1 6 0.499 1.62
#> 7 3 7 1.55 1.62
#> 8 5 8 2.13 2.13
#> 9 2 9 0.436 2.13
#> 10 4 10 2.13 2.13
#> # ... with 29 more rows