Testing Estimating Nonlinear Least Square for on Child¶

Testing nonlinear least square with the bridge model. What results do we get in R vs results from Stata?

# Library
# conda install -c conda-forge r-minpack.lm
library(tidyverse)
library(haven)
library(stats)
library(minpack.lm)

# Load Data
setwd('C:/Users/fan/HeightProfile/_data/test/')
df <- read_stata('data_test_small.dta', encoding = NULL)
head(df, 3)

Running non-linear least square for One Individual¶

We will test with one individual, so we will grab out from full dataframe values for one individual for testing run.

# Filter data, just one child ID = 5, data between month 0 and 24
indi_id <- 5
df_one <- df %>% filter(S_seq_guat %in% c(indi_id)) %>% 
             filter(between(svymthRound, 0, 24)) %>% 
             select(hgt_dfrom0, ageInDays)
as_tibble(df_one)

Warning message:
"between() called on numeric vector with S3 class"

NLS run and also NLSLM run¶

nls from stats as nlsLM from minpack.lm. Run both, do results differ, do iterations differ?

We will use the nlslm package from minpack.lm, and also nls. nlslm is suppose to be a better package than nls from stats. In the example below, we get the same results, and also we got the same results from Stata for this child.

nlslm uses Levenberg–Marquardt algorithm, which is a dampened NLS method.

Running the stata command nl is dramatically slower than running this here in R.

# Parameter Initialization Values
ASYM_0 <- log(75)
A_0 <- -5.26
B_0 <- 0.63
bridge.start.points <- list(bridge.ASYM = ASYM_0, bridge.A = A_0, bridge.B = B_0)

# Construct Formula
bridge <- formula(hgt_dfrom0 ~ (exp(bridge.ASYM)*(1-exp(-exp(bridge.A)*ageInDays^(bridge.B)))))

# Running nlslm
results.nlslm <- nlsLM(bridge, data = df_one, start = bridge.start.points, trace = TRUE)
summary(results.nlslm)

# Running nls
results.nls <- nls(bridge, data = df_one, start = bridge.start.points, trace = TRUE)
summary(results.nls)

It.    0, RSS =    165.776, Par. =    4.31749      -5.26       0.63
It.    1, RSS =    15.4386, Par. =    4.37029     -5.388   0.692583
It.    2, RSS =     11.548, Par. =     4.0681   -5.58145   0.777016
It.    3, RSS =    11.1484, Par. =    3.98764   -5.63993   0.805193
It.    4, RSS =    11.1126, Par. =    3.94838   -5.65555   0.816505
It.    5, RSS =    11.0862, Par. =    3.86555   -5.68024   0.838763
It.    6, RSS =     11.074, Par. =    3.82855   -5.70779   0.852045
It.    7, RSS =    11.0733, Par. =    3.83486   -5.70428   0.850098
It.    8, RSS =    11.0733, Par. =    3.83439   -5.70501   0.850329
It.    9, RSS =    11.0733, Par. =    3.83451    -5.7049   0.850284

Formula: hgt_dfrom0 ~ (exp(bridge.ASYM) * (1 - exp(-exp(bridge.A) * ageInDays^(bridge.B))))

Parameters:
            Estimate Std. Error t value Pr(>|t|)    
bridge.ASYM   3.8345     0.7327   5.234  0.00195 ** 
bridge.A     -5.7049     0.6017  -9.481 7.84e-05 ***
bridge.B      0.8503     0.2449   3.472  0.01327 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.359 on 6 degrees of freedom

Number of iterations to convergence: 9 
Achieved convergence tolerance: 1.49e-08

165.7757 :   4.317488 -5.260000  0.630000
155.2626 :   3.634832 -4.951104  0.719458
44.06405 :   3.6616354 -5.4299226  0.8213946
11.20044 :   3.8050826 -5.7639188  0.8681323
11.07493 :   3.8407802 -5.6939408  0.8469292
11.07334 :   3.8325933 -5.7064573  0.8509764
11.07332 :   3.8347244 -5.7046385  0.8501906
11.07332 :   3.834447 -5.704958  0.850307
11.07332 :   3.8344983 -5.7049065  0.8502869

Formula: hgt_dfrom0 ~ (exp(bridge.ASYM) * (1 - exp(-exp(bridge.A) * ageInDays^(bridge.B))))

Parameters:
            Estimate Std. Error t value Pr(>|t|)    
bridge.ASYM   3.8345     0.7327   5.234  0.00195 ** 
bridge.A     -5.7049     0.6017  -9.481 7.84e-05 ***
bridge.B      0.8503     0.2449   3.472  0.01327 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.359 on 6 degrees of freedom

Number of iterations to convergence: 8 
Achieved convergence tolerance: 5.805e-06

# Results nlsm and nls together
param.esti <- tibble(params=names(bridge.start.points),
                     nlslm.estimates=coef(results.nlslm),
                     nls.estimates=coef(results.nls))
param.esti

# Structure as Dataframe
data.frame(t(as.matrix(coef(results.nlslm))))

# param.esti
# t(param.esti)
# t(param.esti)[,2:(length(start.points)-1)]

Graphical Results¶

What is the actual data pattern and the predicted data pattern?

# Results nlsm and nls together
data.predict <- bind_cols(df_one, hgt_dfrom0.nlslm.pred=fitted(results.nlslm))
data.predict

# Control Figure Size
options(repr.plot.width = 6, repr.plot.height = 4)

# Title
param.estimates.str <- paste(paste0(param.esti[[1]], ': ', round(param.esti[[2]], 3)), collapse='; ')
param.estimates.str
graph.title <- paste0('Actual and NLS Height Prediction\n', param.estimates.str)

# Graphing
predict.graph <- data.predict %>%
    gather(variable, value, -ageInDays) %>%
    ggplot(aes(x=ageInDays, y=value,
               colour=variable, linetype=variable)) +
    geom_line() +
    geom_point() +
    labs(title = graph.title,
         x = 'Days of Age',
         y = 'Height Change',
         caption = paste0('Individual ',indi_id,' Guatemala')) +
    scale_x_continuous(labels = as.character(data.predict$ageInDays),
                       breaks = data.predict$ageInDays)    
# Print Graph
print(predict.graph)

Warning message:
"attributes are not identical across measure variables;
they will be dropped"

Additional Outputs From Regression¶

generics could be applied for nlslm results

## all generics are applicable
coef(results.nlslm)
# confint(results.nlslm)
# deviance(results.nlslm)
df.residual(results.nlslm)
fitted(results.nlslm)
formula(results.nlslm)
# logLik(results.nlslm)
# predict(results.nlslm)
# print(results.nlslm)
# profile(results.nlslm)
# residuals(results.nlslm)
# summary(results.nlslm)
# update(results.nlslm)
vcov(results.nlslm)
# weights(results.nlslm)

hgt_dfrom0 ~ (exp(bridge.ASYM) * (1 - exp(-exp(bridge.A) * ageInDays^(bridge.B))))

Run and Catch Exception¶

For some individuals, initial values might not work, need to catch exception. When using the code, might not work for some individuals, need to be able to handle exceptions.

# Estimation Function to test starting points
f.test.startpoints <- function(start.points){
    results.nlslm <- nlsLM(bridge, data = df_one, start = start.points, trace = TRUE)
    param.esti.dataframe <- data.frame(t(as.matrix(coef(results.nlslm))))
    return(param.esti.dataframe)
}
# Error Filler Function
f.failed <- function(start.points){
    null.mat <- as.matrix(c(0,0,0))
    rownames(null.mat) <- names(start.points)
    param.esti.dataframe <- data.frame(t(null.mat))
    return(param.esti.dataframe)
}

# Starting Points
start.test.1 <- list(bridge.ASYM = log(75), bridge.A = -5.26, bridge.B = 0.63)
start.test.2 <- list(bridge.ASYM = NA, bridge.A = -100000, bridge.B = 1010010101)
# Start Points
tryCatch(f.test.startpoints(start.test.1), error = function(err){return(f.failed(start.test.1))})
tryCatch(f.test.startpoints(start.test.2), error = function(err){return(f.failed(start.test.2))})

It.    0, RSS =    165.776, Par. =    4.31749      -5.26       0.63
It.    1, RSS =    15.4386, Par. =    4.37029     -5.388   0.692583
It.    2, RSS =     11.548, Par. =     4.0681   -5.58145   0.777016
It.    3, RSS =    11.1484, Par. =    3.98764   -5.63993   0.805193
It.    4, RSS =    11.1126, Par. =    3.94838   -5.65555   0.816505
It.    5, RSS =    11.0862, Par. =    3.86555   -5.68024   0.838763
It.    6, RSS =     11.074, Par. =    3.82855   -5.70779   0.852045
It.    7, RSS =    11.0733, Par. =    3.83486   -5.70428   0.850098
It.    8, RSS =    11.0733, Par. =    3.83439   -5.70501   0.850329
It.    9, RSS =    11.0733, Par. =    3.83451    -5.7049   0.850284

hgt_dfrom0	ageInDays
17.2	365
28.4	731
5.0	91
0.0	0
12.5	183
23.5	548
17.0	274
20.0	457
25.2	639

hgt_dfrom0	ageInDays	hgt_dfrom0.nlslm.pred
17.2	365	18.273991
28.4	731	27.586828
5.0	91	6.612755
0.0	0	0.000000
12.5	183	11.276633
23.5	548	23.517319
17.0	274	15.058702
20.0	457	21.085226
25.2	639	25.662972

	bridge.ASYM	bridge.A	bridge.B
bridge.ASYM	0.5368158	0.2845770	-0.17086373
bridge.A	0.2845770	0.3620359	-0.12484134
bridge.B	-0.1708637	-0.1248413	0.05997523

ID_months	ID	S_country	IDnum	IDnumTsset	ID_ori	months	svymthRound	survey_year	survey_day	...	wgt_mono_lg	hgt_brdg	wgt_brdg	hgt_brdg_lg	wgt_brdg_lg	hgt_brdg_lgn	wgt_brdg_lgn	hgt_brdg_lgn_lg	wgt_brdg_lgn_lg	S_seq_guatcebu
10010014__0012	10010014	1	10010014	10010014	01__0014	12	12	NA	NA	...	NA	NA	NA	NA	NA	NA	NA	NA	NA	2
10010014__0102	10010014	1	10010014	NA	01__0014	102	102	NA	NA	...	NA	NA	NA	NA	NA	NA	NA	NA	NA	2
10010014__0004	10010014	1	10010014	10010014	01__0014	4	4	NA	NA	...	NA	NA	NA	NA	NA	NA	NA	NA	NA	2

params	nlslm.estimates	nls.estimates
bridge.ASYM	3.8345060	3.8344983
bridge.A	-5.7048979	-5.7049065
bridge.B	0.8502837	0.8502869