Testing nonlinear least square with the bridge model. What results do we get in R vs results from Stata?
# Library
# conda install -c conda-forge r-minpack.lm
library(tidyverse)
library(haven)
library(stats)
library(minpack.lm)
# Load Data
setwd('C:/Users/fan/HeightProfile/_data/test/')
df <- read_stata('data_test_small.dta', encoding = NULL)
head(df, 3)
We will test with one individual, so we will grab out from full dataframe values for one individual for testing run.
# Filter data, just one child ID = 5, data between month 0 and 24
indi_id <- 5
df_one <- df %>% filter(S_seq_guat %in% c(indi_id)) %>%
filter(between(svymthRound, 0, 24)) %>%
select(hgt_dfrom0, ageInDays)
as_tibble(df_one)
nls from stats as nlsLM from minpack.lm. Run both, do results differ, do iterations differ?
We will use the nlslm package from minpack.lm, and also nls. nlslm is suppose to be a better package than nls from stats. In the example below, we get the same results, and also we got the same results from Stata for this child.
nlslm uses Levenberg–Marquardt algorithm, which is a dampened NLS method.
Running the stata command nl is dramatically slower than running this here in R.
# Parameter Initialization Values
ASYM_0 <- log(75)
A_0 <- -5.26
B_0 <- 0.63
bridge.start.points <- list(bridge.ASYM = ASYM_0, bridge.A = A_0, bridge.B = B_0)
# Construct Formula
bridge <- formula(hgt_dfrom0 ~ (exp(bridge.ASYM)*(1-exp(-exp(bridge.A)*ageInDays^(bridge.B)))))
# Running nlslm
results.nlslm <- nlsLM(bridge, data = df_one, start = bridge.start.points, trace = TRUE)
summary(results.nlslm)
# Running nls
results.nls <- nls(bridge, data = df_one, start = bridge.start.points, trace = TRUE)
summary(results.nls)
# Results nlsm and nls together
param.esti <- tibble(params=names(bridge.start.points),
nlslm.estimates=coef(results.nlslm),
nls.estimates=coef(results.nls))
param.esti
# Structure as Dataframe
data.frame(t(as.matrix(coef(results.nlslm))))
# param.esti
# t(param.esti)
# t(param.esti)[,2:(length(start.points)-1)]
What is the actual data pattern and the predicted data pattern?
# Results nlsm and nls together
data.predict <- bind_cols(df_one, hgt_dfrom0.nlslm.pred=fitted(results.nlslm))
data.predict
# Control Figure Size
options(repr.plot.width = 6, repr.plot.height = 4)
# Title
param.estimates.str <- paste(paste0(param.esti[[1]], ': ', round(param.esti[[2]], 3)), collapse='; ')
param.estimates.str
graph.title <- paste0('Actual and NLS Height Prediction\n', param.estimates.str)
# Graphing
predict.graph <- data.predict %>%
gather(variable, value, -ageInDays) %>%
ggplot(aes(x=ageInDays, y=value,
colour=variable, linetype=variable)) +
geom_line() +
geom_point() +
labs(title = graph.title,
x = 'Days of Age',
y = 'Height Change',
caption = paste0('Individual ',indi_id,' Guatemala')) +
scale_x_continuous(labels = as.character(data.predict$ageInDays),
breaks = data.predict$ageInDays)
# Print Graph
print(predict.graph)
generics could be applied for nlslm results
## all generics are applicable
coef(results.nlslm)
# confint(results.nlslm)
# deviance(results.nlslm)
df.residual(results.nlslm)
fitted(results.nlslm)
formula(results.nlslm)
# logLik(results.nlslm)
# predict(results.nlslm)
# print(results.nlslm)
# profile(results.nlslm)
# residuals(results.nlslm)
# summary(results.nlslm)
# update(results.nlslm)
vcov(results.nlslm)
# weights(results.nlslm)
For some individuals, initial values might not work, need to catch exception. When using the code, might not work for some individuals, need to be able to handle exceptions.
# Estimation Function to test starting points
f.test.startpoints <- function(start.points){
results.nlslm <- nlsLM(bridge, data = df_one, start = start.points, trace = TRUE)
param.esti.dataframe <- data.frame(t(as.matrix(coef(results.nlslm))))
return(param.esti.dataframe)
}
# Error Filler Function
f.failed <- function(start.points){
null.mat <- as.matrix(c(0,0,0))
rownames(null.mat) <- names(start.points)
param.esti.dataframe <- data.frame(t(null.mat))
return(param.esti.dataframe)
}
# Starting Points
start.test.1 <- list(bridge.ASYM = log(75), bridge.A = -5.26, bridge.B = 0.63)
start.test.2 <- list(bridge.ASYM = NA, bridge.A = -100000, bridge.B = 1010010101)
# Start Points
tryCatch(f.test.startpoints(start.test.1), error = function(err){return(f.failed(start.test.1))})
tryCatch(f.test.startpoints(start.test.2), error = function(err){return(f.failed(start.test.2))})