Dataset tracks individuals over time. At each date, calculate the cumulative sum across all individuals based on each individual’s final date of observation. This file works out how the ff_panel_cumsum_grouplast function works.

There are \(N\) groups indexed by \(j\), each group has \(\left\{M_{i}\right\}_{i=1}^{N}\) individuals. Variable \(x_{ji}\) is some variable measured at the group/individual level. There is a scheme that selects only individuals whose \(x_{ji} < W\). But we will only take one individual from each group, could be the individual from the group with the highest \(x_{ji}\) value. One could construct a variable \(G\), where each row corresponds to a \(x_{ji}\) value. What is the sum of all \(y_{ji}\) conditional on \(x_{ji} \le W\), only counting the largest individual within group.

Suppose we have data from N years, but each year’s data is incomplete, so information from some months is unavailable. Generate a cumulative sum up to any calendar month, where we only sum up the value observed on the last available date of each year, and on the last available month of the current year up to the month of accounting.

Package and Data Loading and Parameter Setting

# Load Library
rm(list = ls(all.names = TRUE))
# library(tidyverse)
# library(tidymodels)
# library(rlang)

library(tibble)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(tidyr)

library(REconTools)

library(knitr)
library(kableExtra)
#> 
#> Attaching package: 'kableExtra'
#> The following object is masked from 'package:dplyr':
#> 
#>     group_rows

Generate Data Structure

# Generate X vector
set.seed(12345)

# Number of N
it_N <- 5
# M values for each i
ar_it_M <- sample(1:10, it_N, replace = TRUE)
ar_it_M_ID <- sample(1:it_N, it_N, replace = FALSE)
# Generate dataframe
tb_combine <- as_tibble(cbind(ar_it_M, ar_it_M_ID)) %>% rowid_to_column(var = "id")

# Generate X Vector
tb_long <- tb_combine %>% uncount(ar_it_M)
tb_long <- tb_long %>% add_column(xrand = runif(dim(tb_long)[1])) %>% arrange(xrand) %>% mutate(x = row_number())

# Generate within Group Rank
tb_long <- tb_long %>% arrange(id, x) %>% group_by(id) %>% mutate(rank_l = row_number())

# Select Core
tb_data <- tb_long %>% select(id, x) %>% add_column(y = runif(dim(tb_long)[1])) %>%
              arrange(id,x) %>% group_by(id) %>% mutate(y = cumsum(y))

# Display
kable(tb_long) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
id ar_it_M_ID xrand x rank_l
1 2 0.1789636 6 1
1 2 0.3881440 16 2
1 2 0.4024851 18 3
2 5 0.2264672 9 1
2 5 0.3267524 13 2
2 5 0.3898285 17 3
2 5 0.4537281 20 4
2 5 0.5440579 22 5
2 5 0.6445426 25 6
2 5 0.6985436 27 7
2 5 0.7074819 28 8
2 5 0.9516588 37 9
2 5 0.9654153 39 10
3 3 0.0059876 1 1
3 3 0.1877124 7 2
3 3 0.3616256 14 3
3 3 0.3701041 15 4
3 3 0.4845578 21 5
3 3 0.6818336 26 6
3 3 0.7930072 32 7
3 3 0.8687949 34 8
4 4 0.0601952 4 1
4 4 0.1340316 5 2
4 4 0.2596812 10 3
4 4 0.3212247 12 4
4 4 0.4291988 19 5
4 4 0.6174246 23 6
4 4 0.7732432 30 7
4 4 0.7821933 31 8
4 4 0.9041547 35 9
4 4 0.9272740 36 10
5 1 0.0434565 2 1
5 1 0.0550538 3 2
5 1 0.2130255 8 3
5 1 0.3150282 11 4
5 1 0.6255428 24 5
5 1 0.7324961 29 6
5 1 0.8273029 33 7
5 1 0.9644703 38 8
kable(tb_data ) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
id x y
1 6 0.4992410
1 16 1.2290130
1 18 1.3093490
2 9 0.4355305
2 13 0.6721109
2 17 1.4636787
2 20 1.7223631
2 22 2.7083469
2 25 3.4652206
2 27 4.4449989
2 28 4.6639467
2 37 5.6126539
2 39 5.7621118
3 1 0.6003570
3 7 1.5467877
3 14 2.2351411
3 15 2.7406748
3 21 3.1136110
3 26 3.4494161
3 32 3.4976674
3 34 4.1166150
4 4 0.9614473
4 5 1.6164078
4 10 2.1266998
4 12 2.2767980
4 19 3.1472459
4 23 3.6616876
4 30 3.6703355
4 31 3.6895302
4 35 3.8340421
4 36 4.1390739
5 2 0.8256569
5 3 1.3280015
5 8 2.1315741
5 11 2.1922141
5 24 3.1201692
5 29 3.9283478
5 33 4.0071611
5 38 4.6080894

Cumulative Sum Within Group Max

The dataframe we constructed has three columns, id, x and y.

Algorithm 1: Iterative Compute Row by Row redo All

This is a slow algorithm, redo all computations with the addition of each row, this works, but is very slow. Recompute row by row fully.

  1. sort by x
  2. select frame up to current row
  3. slice last of group
  4. sum
  5. fill

tb_data <- tb_data %>% arrange(x)
tb_data_cum_sum_top <- tb_data %>% mutate(cum_sum_top = 0,
                                          cum_mean_top = 0,
                                          cum_median_top = 0)

for (row_ctr in seq(1, dim(tb_data)[1])) {

    # select up to current row sort and group
    tb_data_up2row <- tb_data[1:row_ctr,] %>% arrange(id, x) %>% group_by(id)

    # Obtain last element sorted by x for each group, and resort by x
    tb_data_up2row <- tb_data_up2row %>% slice(n()) %>% arrange(x)

    # cumulative sum of the highest element of each group below row_ctr
    fl_cum_sum_top_cur <- tb_data_up2row %>% ungroup() %>%
        summarize(y_sum_top_up2row = sum(y)) %>% pull(y_sum_top_up2row)
    fl_cum_mean_top_cur <- tb_data_up2row %>% ungroup() %>%
        summarize(y_mean_top_up2row = mean(y)) %>% pull(y_mean_top_up2row)
    fl_cum_median_top_cur <- tb_data_up2row %>% ungroup() %>%
        summarize(y_median_top_up2row = median(y)) %>% pull(y_median_top_up2row)

    # Store results
    tb_data_cum_sum_top[row_ctr, 'cum_sum_top'] <- fl_cum_sum_top_cur
    tb_data_cum_sum_top[row_ctr, 'cum_mean_top'] <- fl_cum_mean_top_cur
    tb_data_cum_sum_top[row_ctr, 'cum_median_top'] <- fl_cum_median_top_cur

    # Display
    if (row_ctr %% 10 == 0) {
      cat('row_ctr:', row_ctr, '\n')
      print(tb_data_up2row)
    }
}
#> row_ctr: 10 
#> # A tibble: 5 x 3
#> # Groups:   id [5]
#>      id     x     y
#>   <int> <int> <dbl>
#> 1     1     6 0.499
#> 2     3     7 1.55 
#> 3     5     8 2.13 
#> 4     2     9 0.436
#> 5     4    10 2.13 
#> row_ctr: 20 
#> # A tibble: 5 x 3
#> # Groups:   id [5]
#>      id     x     y
#>   <int> <int> <dbl>
#> 1     5    11  2.19
#> 2     3    15  2.74
#> 3     1    18  1.31
#> 4     4    19  3.15
#> 5     2    20  1.72
#> row_ctr: 30 
#> # A tibble: 5 x 3
#> # Groups:   id [5]
#>      id     x     y
#>   <int> <int> <dbl>
#> 1     1    18  1.31
#> 2     3    26  3.45
#> 3     2    28  4.66
#> 4     5    29  3.93
#> 5     4    30  3.67

# Display Final
kable(tb_data_cum_sum_top) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
id x y cum_sum_top cum_mean_top cum_median_top
3 1 0.6003570 0.600357 0.6003570 0.6003570
5 2 0.8256569 1.426014 0.7130069 0.7130069
5 3 1.3280015 1.928359 0.9641792 0.9641792
4 4 0.9614473 2.889806 0.9632686 0.9614473
4 5 1.6164078 3.544766 1.1815888 1.3280015
1 6 0.4992410 4.044007 1.0110018 0.9641792
3 7 1.5467877 4.990438 1.2476095 1.4373946
5 8 2.1315741 5.794011 1.4485027 1.5815978
2 9 0.4355305 6.229541 1.2459082 1.5467877
4 10 2.1266998 6.739833 1.3479666 1.5467877
5 11 2.1922141 6.800473 1.3600946 1.5467877
4 12 2.2767980 6.950571 1.3901143 1.5467877
2 13 0.6721109 7.187152 1.4374304 1.5467877
3 14 2.2351411 7.875505 1.5751010 2.1922141
3 15 2.7406748 8.381039 1.6762078 2.1922141
1 16 1.2290130 9.110811 1.8221622 2.1922141
2 17 1.4636787 9.902379 1.9804757 2.1922141
1 18 1.3093490 9.982715 1.9965429 2.1922141
4 19 3.1472459 10.853163 2.1706325 2.1922141
2 20 1.7223631 11.111847 2.2223694 2.1922141
3 21 3.1136110 11.484783 2.2969566 2.1922141
2 22 2.7083469 12.470767 2.4941534 2.7083469
4 23 3.6616876 12.985209 2.5970417 2.7083469
5 24 3.1201692 13.913164 2.7826328 3.1136110
2 25 3.4652206 14.670037 2.9340075 3.1201692
3 26 3.4494161 15.005843 3.0011685 3.4494161
2 27 4.4449989 15.985621 3.1971242 3.4494161
2 28 4.6639467 16.204569 3.2409137 3.4494161
5 29 3.9283478 17.012747 3.4025494 3.6616876
4 30 3.6703355 17.021395 3.4042790 3.6703355
4 31 3.6895302 17.040590 3.4081180 3.6895302
3 32 3.4976674 17.088841 3.4177682 3.6895302
5 33 4.0071611 17.167655 3.4335309 3.6895302
3 34 4.1166150 17.786602 3.5573204 4.0071611
4 35 3.8340421 17.931114 3.5862228 4.0071611
4 36 4.1390739 18.236146 3.6472292 4.1166150
2 37 5.6126539 19.184853 3.8369706 4.1166150
5 38 4.6080894 19.785781 3.9571562 4.1390739
2 39 5.7621118 19.935239 3.9870478 4.1390739

Algorithm 2: Faster Algorithm, Less Repeatitive Calculations

  1. Initialize a df_indi, each row is a different individual, N by 1, benefit is that this is small
  2. Loop over queue file, row by row, rank by rank
  3. With the addition of each row, update df_indi’s row corresponding to the individual, always update to the latest piece of data.
  4. Compute statistics of interest with df_indi

tb_data <- tb_data %>% arrange(x)
tb_data_cum_sum_top <- tb_data %>% mutate(cum_sum_top = 0,
                                          cum_mean_top = 0,
                                          cum_median_top = 0)
ar_latest_indi <- rep(NA, 1, it_N)

for (row_ctr in seq(1, dim(tb_data)[1])) {

    # current rank, what is the ID of the person at this rank
    it_id_row <- tb_data[['id']][row_ctr]

    # update the overall individual array with highest, latest value
    ar_latest_indi[it_id_row] <- tb_data[['y']][row_ctr]

    # Compute sum.
    fl_cum_sum_top_cur <- sum(ar_latest_indi, na.rm = TRUE)
    fl_cum_mean_top_cur <- mean(ar_latest_indi, na.rm = TRUE)
    fl_cum_median_top_cur <- median(ar_latest_indi, na.rm = TRUE)

    # Store results
    tb_data_cum_sum_top[row_ctr, 'cum_sum_top'] <- fl_cum_sum_top_cur
    tb_data_cum_sum_top[row_ctr, 'cum_mean_top'] <- fl_cum_mean_top_cur
    tb_data_cum_sum_top[row_ctr, 'cum_median_top'] <- fl_cum_median_top_cur

    # Display
    if (row_ctr %% 10 == 0) {
      cat('row_ctr:', row_ctr, '\n')
      print(tb_data_up2row)
    }
}
#> row_ctr: 10 
#> # A tibble: 5 x 3
#> # Groups:   id [5]
#>      id     x     y
#>   <int> <int> <dbl>
#> 1     1    18  1.31
#> 2     3    34  4.12
#> 3     4    36  4.14
#> 4     5    38  4.61
#> 5     2    39  5.76
#> row_ctr: 20 
#> # A tibble: 5 x 3
#> # Groups:   id [5]
#>      id     x     y
#>   <int> <int> <dbl>
#> 1     1    18  1.31
#> 2     3    34  4.12
#> 3     4    36  4.14
#> 4     5    38  4.61
#> 5     2    39  5.76
#> row_ctr: 30 
#> # A tibble: 5 x 3
#> # Groups:   id [5]
#>      id     x     y
#>   <int> <int> <dbl>
#> 1     1    18  1.31
#> 2     3    34  4.12
#> 3     4    36  4.14
#> 4     5    38  4.61
#> 5     2    39  5.76

# Display Final
kable(tb_data_cum_sum_top) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
id x y cum_sum_top cum_mean_top cum_median_top
3 1 0.6003570 0.600357 0.6003570 0.6003570
5 2 0.8256569 1.426014 0.7130069 0.7130069
5 3 1.3280015 1.928359 0.9641792 0.9641792
4 4 0.9614473 2.889806 0.9632686 0.9614473
4 5 1.6164078 3.544766 1.1815888 1.3280015
1 6 0.4992410 4.044007 1.0110018 0.9641792
3 7 1.5467877 4.990438 1.2476095 1.4373946
5 8 2.1315741 5.794011 1.4485027 1.5815978
2 9 0.4355305 6.229541 1.2459082 1.5467877
4 10 2.1266998 6.739833 1.3479666 1.5467877
5 11 2.1922141 6.800473 1.3600946 1.5467877
4 12 2.2767980 6.950571 1.3901143 1.5467877
2 13 0.6721109 7.187152 1.4374304 1.5467877
3 14 2.2351411 7.875505 1.5751010 2.1922141
3 15 2.7406748 8.381039 1.6762078 2.1922141
1 16 1.2290130 9.110811 1.8221622 2.1922141
2 17 1.4636787 9.902379 1.9804757 2.1922141
1 18 1.3093490 9.982715 1.9965429 2.1922141
4 19 3.1472459 10.853163 2.1706325 2.1922141
2 20 1.7223631 11.111847 2.2223694 2.1922141
3 21 3.1136110 11.484783 2.2969566 2.1922141
2 22 2.7083469 12.470767 2.4941534 2.7083469
4 23 3.6616876 12.985209 2.5970417 2.7083469
5 24 3.1201692 13.913164 2.7826328 3.1136110
2 25 3.4652206 14.670037 2.9340075 3.1201692
3 26 3.4494161 15.005843 3.0011685 3.4494161
2 27 4.4449989 15.985621 3.1971242 3.4494161
2 28 4.6639467 16.204569 3.2409137 3.4494161
5 29 3.9283478 17.012747 3.4025494 3.6616876
4 30 3.6703355 17.021395 3.4042790 3.6703355
4 31 3.6895302 17.040590 3.4081180 3.6895302
3 32 3.4976674 17.088841 3.4177682 3.6895302
5 33 4.0071611 17.167655 3.4335309 3.6895302
3 34 4.1166150 17.786602 3.5573204 4.0071611
4 35 3.8340421 17.931114 3.5862228 4.0071611
4 36 4.1390739 18.236146 3.6472292 4.1166150
2 37 5.6126539 19.184853 3.8369706 4.1166150
5 38 4.6080894 19.785781 3.9571562 4.1390739
2 39 5.7621118 19.935239 3.9870478 4.1390739

Test Function

Testing ff_panel_cumsum_grouplast.

Cumulative Sum the Last

Within group last occurance, cumulative sum of last occurance of individual by date. Think of date as x, y as individual outcomes, and id as individual ID. Each individual is observed at multiple x points. In the example here, at each x, only one id observed.

ff_panel_cumsum_grouplast(tb_data, svr_id='id', svr_x='x', svr_y='y', svr_cumsumtop = 'y_cumsum', stat='sum')
#> # A tibble: 39 x 4
#> # Groups:   id [5]
#>       id     x     y y_cumsum
#>    <int> <int> <dbl>    <dbl>
#>  1     3     1 0.600    0.600
#>  2     5     2 0.826    1.43 
#>  3     5     3 1.33     1.93 
#>  4     4     4 0.961    2.89 
#>  5     4     5 1.62     3.54 
#>  6     1     6 0.499    4.04 
#>  7     3     7 1.55     4.99 
#>  8     5     8 2.13     5.79 
#>  9     2     9 0.436    6.23 
#> 10     4    10 2.13     6.74 
#> # ... with 29 more rows

Cumulative Average Last

Thinking of taking Average SAT scores but, only of the last score people have. Calculate a moving average, where the moving window is all past information, and the average is across individuals, and for each individual, we only take the most recent score.

ff_panel_cumsum_grouplast(tb_data, svr_id='id', svr_x='x', svr_y='y', svr_cumsumtop = 'y_movingaverage_mean', stat='mean')
#> # A tibble: 39 x 4
#> # Groups:   id [5]
#>       id     x     y y_movingaverage_mean
#>    <int> <int> <dbl>                <dbl>
#>  1     3     1 0.600                0.600
#>  2     5     2 0.826                0.713
#>  3     5     3 1.33                 0.964
#>  4     4     4 0.961                0.963
#>  5     4     5 1.62                 1.18 
#>  6     1     6 0.499                1.01 
#>  7     3     7 1.55                 1.25 
#>  8     5     8 2.13                 1.45 
#>  9     2     9 0.436                1.25 
#> 10     4    10 2.13                 1.35 
#> # ... with 29 more rows

Large Dataframe Cumulative Sum, Quick and Slow

Suppose I have 100,000 records each at a unique sequencial date, records for over 500 individuals, each individual multiple times. Cumulative sum at each date, only summing the latest value for each individual. What is the speed of the algorithm here.

Slow and Fast Algorithm Comparison

First Explore algorithms, initial algorithm very slow, improved algorithm dramtically faster. Using option quick to control

# Initialize
set.seed(67890)

it_loop <- 3
it_N_min <- 5
it_N_max <- 100
it_N_max <- 30
it_max_M_min <- 10
it_max_M_max <- 100
it_max_M_max <- 30

ar_it_N <- floor(seq(it_N_min, it_N_max, length.out=it_loop))
ar_it_max_M <- floor(seq(it_max_M_min, it_max_M_max, length.out=it_loop))

for (it_ctr in seq(1, it_loop)) {

  # Set df size
  it_N <- ar_it_N[it_ctr]
  it_max_M <- ar_it_max_M[it_ctr]

  # Generate Panel Frame
  df_start_time <- Sys.time()
  ar_it_M <- sample(1:it_max_M, it_N, replace = TRUE)
  ar_it_M_ID <- sample(1:it_N, it_N, replace = FALSE)
  tb_combine <- as_tibble(cbind(ar_it_M, ar_it_M_ID)) %>% rowid_to_column(var = "id")
  tb_long <- tb_combine %>% uncount(ar_it_M)
  tb_long <- tb_long %>% add_column(xrand = runif(dim(tb_long)[1])) %>% arrange(xrand) %>% mutate(x = row_number())
  tb_long <- tb_long %>% arrange(id, x) %>% group_by(id) %>% mutate(rank_l = row_number())
  df <- tb_long %>% select(id, x) %>% add_column(y = runif(dim(tb_long)[1])) %>% arrange(id,x) %>% group_by(id) %>% mutate(y = cumsum(y))
  df_end_time <- Sys.time()

  # Timing Test FAST
  start_time_fast <- Sys.time()
  tb_data_cum_sum_top <- ff_panel_cumsum_grouplast(df, svr_id='id', svr_x='x', svr_y='y', svr_cumsumtop = 'y_movingsum_lastestscore', stat='sum', quick=TRUE)
  end_time_fast <- Sys.time()

  # Timing Test SLOW VERBOSE TRUE
  start_time_slow <- Sys.time()
  tb_data_cum_sum_top <- ff_panel_cumsum_grouplast(df, svr_id='id', svr_x='x', svr_y='y', svr_cumsumtop = 'y_movingsum_lastestscore', stat='sum', quick=FALSE, verbose = TRUE)
  end_time_slow <- Sys.time()

  # Timing Test SLOW VERBOSE FALSE
  start_time_slow_verbose <- Sys.time()
  tb_data_cum_sum_top <- ff_panel_cumsum_grouplast(df, svr_id='id', svr_x='x', svr_y='y', svr_cumsumtop = 'y_movingsum_lastestscore', stat='sum', quick=FALSE, verbose = FALSE)
  end_time_slow_verbose <- Sys.time()

  cat('it_ctr:', it_ctr, ', it_N:', it_N, ', it_max_N:', it_max_M, '\n')
  print(paste0('Df Generation Took:', df_end_time - df_start_time))
  print(paste0('Moving Stat Took QUICK True:', end_time_fast - start_time_fast))
  print(paste0('Moving Stat Took SLOW VERBOSE True:', end_time_slow - start_time_slow))
  print(paste0('Moving Stat Took SLOW VERBOSE False:', end_time_slow_verbose - start_time_slow_verbose))
}
#> it_ctr: 1 , it_N: 5 , it_max_N: 10 
#> [1] "Df Generation Took:0.0268862247467041"
#> [1] "Moving Stat Took QUICK True:0.0159308910369873"
#> [1] "Moving Stat Took SLOW VERBOSE True:0.121488094329834"
#> [1] "Moving Stat Took SLOW VERBOSE False:0.101566791534424"
#> it_ctr: 2 , it_N: 17 , it_max_N: 20 
#> [1] "Df Generation Took:0.0209119319915771"
#> [1] "Moving Stat Took QUICK True:0.191187143325806"
#> [1] "Moving Stat Took SLOW VERBOSE True:1.85213279724121"
#> [1] "Moving Stat Took SLOW VERBOSE False:1.43789315223694"
#> it_ctr: 3 , it_N: 30 , it_max_N: 30 
#> [1] "Df Generation Took:0.0209121704101562"
#> [1] "Moving Stat Took QUICK True:0.658204793930054"
#> [1] "Moving Stat Took SLOW VERBOSE True:6.17477202415466"
#> [1] "Moving Stat Took SLOW VERBOSE False:5.06150007247925"

Fast Algorithm Large Matrix Testing

Panel structure gets somewhat large, 1000 individuals, each observed on average 100 times. This would be a very substantial panel. About 33 seconds, not very fast, could be improved later.

The structure is iterative by construction, difficult to fully vectorize. Perhaps achievable as well and faster with some moving average function, rolling window functions.

# Initialize
set.seed(67890)

it_loop <- 5
it_N_min <- 5
it_N_max <- 50
it_max_M_min <- 10
it_max_M_max <- 20

ar_it_N <- floor(seq(it_N_min, it_N_max, length.out=it_loop))
ar_it_max_M <- floor(seq(it_max_M_min, it_max_M_max, length.out=it_loop))

for (it_ctr in seq(1, it_loop)) {

  # Set df size
  it_N <- ar_it_N[it_ctr]
  it_max_M <- ar_it_max_M[it_ctr]

  # Generate Panel Frame
  df_start_time <- Sys.time()
  ar_it_M <- sample(1:it_max_M, it_N, replace = TRUE)
  ar_it_M_ID <- sample(1:it_N, it_N, replace = FALSE)
  tb_combine <- as_tibble(cbind(ar_it_M, ar_it_M_ID)) %>% rowid_to_column(var = "id")
  tb_long <- tb_combine %>% uncount(ar_it_M)
  tb_long <- tb_long %>% add_column(xrand = runif(dim(tb_long)[1])) %>% arrange(xrand) %>% mutate(x = row_number())
  tb_long <- tb_long %>% arrange(id, x) %>% group_by(id) %>% mutate(rank_l = row_number())
  df <- tb_long %>% select(id, x) %>% add_column(y = runif(dim(tb_long)[1])) %>% arrange(id,x) %>% group_by(id) %>% mutate(y = cumsum(y))
  df_end_time <- Sys.time()

  # Timing Test FAST
  start_time_fast <- Sys.time()
  tb_data_cum_sum_top <- ff_panel_cumsum_grouplast(df, svr_id='id', svr_x='x', svr_y='y', svr_cumsumtop = 'y_movingsum_lastestscore', stat='sum', quick=TRUE)
  end_time_fast <- Sys.time()

  cat('it_ctr:', it_ctr, ', it_N:', it_N, ', it_max_N:', it_max_M, '\n')
  print(paste0('Df Generation Took:', df_end_time - df_start_time))
  print(paste0('Moving Stat Took QUICK True:', end_time_fast - start_time_fast))
}
#> it_ctr: 1 , it_N: 5 , it_max_N: 10 
#> [1] "Df Generation Took:0.0229029655456543"
#> [1] "Moving Stat Took QUICK True:0.0159330368041992"
#> it_ctr: 2 , it_N: 16 , it_max_N: 12 
#> [1] "Df Generation Took:0.0238981246948242"
#> [1] "Moving Stat Took QUICK True:0.164302825927734"
#> it_ctr: 3 , it_N: 27 , it_max_N: 15 
#> [1] "Df Generation Took:0.0209109783172607"
#> [1] "Moving Stat Took QUICK True:0.317649841308594"
#> it_ctr: 4 , it_N: 38 , it_max_N: 17 
#> [1] "Df Generation Took:0.022907018661499"
#> [1] "Moving Stat Took QUICK True:0.479958057403564"
#> it_ctr: 5 , it_N: 50 , it_max_N: 20 
#> [1] "Df Generation Took:0.0219070911407471"
#> [1] "Moving Stat Took QUICK True:0.615386009216309"