svr_id are individuals, observed at multiple points; svr_x is date of observation; svr_y is some value observed at each time point. Imagine these are SAT scores for individuals. we want to calculate the sum/avg etc cumulatively at each date for all individuals, but only using their last score.

ff_panel_cumsum_grouplast(
  df,
  svr_id,
  svr_x,
  svr_y,
  svr_cumsumtop = "y_movingavg_lastestscores",
  stat = "mean",
  quick = TRUE,
  verbose = FALSE
)

Arguments

df

dataframe

svr_id

string name of individual id variable

svr_x

string name of the ranking variable

svr_y

string name of the value variable

svr_cumsumtop

string variable name that stores the cumulative summed variable

stat

string type of statistics to compute, mean, sum, max, min, median

quick

boolean faster algorithm without repeating calculation, quick should be used, slow was initial bad algorithm, results identical

verbose

boolean when quick is FALSE, could verbose which generates slower results

Value

a dataframe with the cumulative summed/averaged etc column up to each row

Author

Fan Wang, http://fanwangecon.github.io

Examples

library(tidyr)
library(dplyr)
library(tibble)
set.seed(12345)
it_N <- 5
ar_it_M <- sample(1:10, it_N, replace = TRUE)
ar_it_M_ID <- sample(1:it_N, it_N, replace = FALSE)
tb_combine <- as_tibble(cbind(ar_it_M, ar_it_M_ID)) %>% rowid_to_column(var = "id")
tb_long <- tb_combine %>% uncount(ar_it_M)
tb_long <- tb_long %>% add_column(xrand = runif(dim(tb_long)[1])) %>% arrange(xrand) %>% mutate(x = row_number())
tb_long <- tb_long %>% arrange(id, x) %>% group_by(id) %>% mutate(rank_l = row_number())
df <- tb_long %>% select(id, x) %>% add_column(y = runif(dim(tb_long)[1])) %>% arrange(id,x) %>% group_by(id) %>% mutate(y = cumsum(y))
print(df)
#> # A tibble: 39 x 3
#> # Groups:   id [5]
#>       id     x     y
#>    <int> <int> <dbl>
#>  1     1     6 0.499
#>  2     1    16 1.23 
#>  3     1    18 1.31 
#>  4     2     9 0.436
#>  5     2    13 0.672
#>  6     2    17 1.46 
#>  7     2    20 1.72 
#>  8     2    22 2.71 
#>  9     2    25 3.47 
#> 10     2    27 4.44 
#> # ... with 29 more rows
quick <- TRUE
ff_panel_cumsum_grouplast(df, svr_id='id', svr_x='x', svr_y='y',
                          svr_cumsumtop = 'y_movingmean_lastestscore', stat='mean', quick=quick)
#> # A tibble: 39 x 4
#> # Groups:   id [5]
#>       id     x     y y_movingmean_lastestscore
#>    <int> <int> <dbl>                     <dbl>
#>  1     3     1 0.600                     0.600
#>  2     5     2 0.826                     0.713
#>  3     5     3 1.33                      0.964
#>  4     4     4 0.961                     0.963
#>  5     4     5 1.62                      1.18 
#>  6     1     6 0.499                     1.01 
#>  7     3     7 1.55                      1.25 
#>  8     5     8 2.13                      1.45 
#>  9     2     9 0.436                     1.25 
#> 10     4    10 2.13                      1.35 
#> # ... with 29 more rows
quick <- FALSE
verbose <- TRUE
ff_panel_cumsum_grouplast(df, svr_id='id', svr_x='x', svr_y='y',
                          svr_cumsumtop = 'y_movingmean_lastestscore', stat='mean', quick=quick, verbose=verbose)
#> # A tibble: 39 x 4
#> # Groups:   id [5]
#>       id     x     y y_movingmean_lastestscore
#>    <int> <int> <dbl>                     <dbl>
#>  1     3     1 0.600                     0.600
#>  2     5     2 0.826                     0.713
#>  3     5     3 1.33                      0.964
#>  4     4     4 0.961                     0.963
#>  5     4     5 1.62                      1.18 
#>  6     1     6 0.499                     1.01 
#>  7     3     7 1.55                      1.25 
#>  8     5     8 2.13                      1.45 
#>  9     2     9 0.436                     1.25 
#> 10     4    10 2.13                      1.35 
#> # ... with 29 more rows
verbose <- FALSE
ff_panel_cumsum_grouplast(df, svr_id='id', svr_x='x', svr_y='y',
                          svr_cumsumtop = 'y_movingmean_lastestscore', stat='mean', quick=quick, verbose=verbose)
#> # A tibble: 39 x 4
#> # Groups:   id [5]
#>       id     x     y y_movingmean_lastestscore
#>    <int> <int> <dbl>                     <dbl>
#>  1     3     1 0.600                     0.600
#>  2     5     2 0.826                     0.713
#>  3     5     3 1.33                      0.964
#>  4     4     4 0.961                     0.963
#>  5     4     5 1.62                      1.18 
#>  6     1     6 0.499                     1.01 
#>  7     3     7 1.55                      1.25 
#>  8     5     8 2.13                      1.45 
#>  9     2     9 0.436                     1.25 
#> 10     4    10 2.13                      1.35 
#> # ... with 29 more rows
ff_panel_cumsum_grouplast(df, svr_id='id', svr_x='x', svr_y='y',
                          svr_cumsumtop = 'y_movingmedian_lastestscore', stat='median')
#> # A tibble: 39 x 4
#> # Groups:   id [5]
#>       id     x     y y_movingmedian_lastestscore
#>    <int> <int> <dbl>                       <dbl>
#>  1     3     1 0.600                       0.600
#>  2     5     2 0.826                       0.713
#>  3     5     3 1.33                        0.964
#>  4     4     4 0.961                       0.961
#>  5     4     5 1.62                        1.33 
#>  6     1     6 0.499                       0.964
#>  7     3     7 1.55                        1.44 
#>  8     5     8 2.13                        1.58 
#>  9     2     9 0.436                       1.55 
#> 10     4    10 2.13                        1.55 
#> # ... with 29 more rows
ff_panel_cumsum_grouplast(df, svr_id='id', svr_x='x', svr_y='y',
                          svr_cumsumtop = 'y_movingsum_lastestscore', stat='sum')
#> # A tibble: 39 x 4
#> # Groups:   id [5]
#>       id     x     y y_movingsum_lastestscore
#>    <int> <int> <dbl>                    <dbl>
#>  1     3     1 0.600                    0.600
#>  2     5     2 0.826                    1.43 
#>  3     5     3 1.33                     1.93 
#>  4     4     4 0.961                    2.89 
#>  5     4     5 1.62                     3.54 
#>  6     1     6 0.499                    4.04 
#>  7     3     7 1.55                     4.99 
#>  8     5     8 2.13                     5.79 
#>  9     2     9 0.436                    6.23 
#> 10     4    10 2.13                     6.74 
#> # ... with 29 more rows
ff_panel_cumsum_grouplast(df, svr_id='id', svr_x='x', svr_y='y',
                          svr_cumsumtop = 'y_movingmin_lastestscore', stat='min')
#> # A tibble: 39 x 4
#> # Groups:   id [5]
#>       id     x     y y_movingmin_lastestscore
#>    <int> <int> <dbl>                    <dbl>
#>  1     3     1 0.600                    0.600
#>  2     5     2 0.826                    0.600
#>  3     5     3 1.33                     0.600
#>  4     4     4 0.961                    0.600
#>  5     4     5 1.62                     0.600
#>  6     1     6 0.499                    0.499
#>  7     3     7 1.55                     0.499
#>  8     5     8 2.13                     0.499
#>  9     2     9 0.436                    0.436
#> 10     4    10 2.13                     0.436
#> # ... with 29 more rows
ff_panel_cumsum_grouplast(df, svr_id='id', svr_x='x', svr_y='y',
                          svr_cumsumtop = 'y_movingmax_lastestscore', stat='max')
#> # A tibble: 39 x 4
#> # Groups:   id [5]
#>       id     x     y y_movingmax_lastestscore
#>    <int> <int> <dbl>                    <dbl>
#>  1     3     1 0.600                    0.600
#>  2     5     2 0.826                    0.826
#>  3     5     3 1.33                     1.33 
#>  4     4     4 0.961                    1.33 
#>  5     4     5 1.62                     1.62 
#>  6     1     6 0.499                    1.62 
#>  7     3     7 1.55                     1.62 
#>  8     5     8 2.13                     2.13 
#>  9     2     9 0.436                    2.13 
#> 10     4    10 2.13                     2.13 
#> # ... with 29 more rows