1 Interactive GAMLSS model building

  1. Load data, create model formula and fitting Gaussian GAM
library(mgcViz)
library(qgam) 
library(gridExtra)
data("UKload")
form <- NetDemand~s(wM,k=20,bs='cr') + s(wM_s95,k=20,bs='cr') + 
        s(Posan, k=30, bs = "cc") + Dow + s(Trend,k=4,bs='cr') + 
        NetDemand.48 + Holy

fit <- gam(form, data = UKload) 
fit0 <- getViz(fit, nsim = 50)
  1. Look for patterns in the residuals conditional mean
pl <- list()
pl[[1]] <- ( check1D(fit0, "wM") + l_gridCheck1D(gridFun = mean, stand = "sc") )$ggObj      # OK
pl[[2]] <- ( check1D(fit0, "wM_s95") + l_gridCheck1D(gridFun = mean, stand = "sc") )$ggObj  # OK
pl[[3]] <- ( check1D(fit0, "Posan") + l_gridCheck1D(gridFun = mean, stand = "sc") )$ggObj   # Not OK

grid.arrange(grobs = pl, ncol = 2)

Plots for wM and wM_s95 are kind of fine. But in plot for Posan, we have positive mean residuals in January and negative in December. Maybe using a cyclic effect for Posan was a mistake: the effect at the beginning of January does not match that at the end of December! This might require more careful handling of winter holidays.

  1. Remove cyclic smooth and use check.gam
form <- NetDemand ~ s(wM,k=20,bs='cr') + 
                    s(wM_s95,k=20,bs='cr') + 
                    s(Posan,bs='cr',k=30) +  # <- Changed `cc` to `cr`
                    Dow + s(Trend,k=4) + NetDemand.48 + Holy

fit <- gam(form, data = UKload) 
fit1 <- getViz(fit, nsim = 50)

# Pattern in residuals mean is gone!
check1D(fit1, "Posan") + l_gridCheck1D(gridFun = mean)

AIC( fit0 ) - AIC( fit1 ) # Some improvement
## [1] 88.3984

The fit seems better without the cyclic smooth. However:

print(plot(fit1), pages = 1)

check(fit1)
## 
## Method: GCV   Optimizer: magic
## Smoothing parameter selection converged after 7 iterations.
## The RMS GCV score gradient at convergence was 1.726525 .
## The Hessian was positive definite.
## Model rank =  79 / 79 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##              k'   edf k-index p-value  
## s(wM)     19.00  5.32    0.99   0.250  
## s(wM_s95) 19.00  2.90    1.01   0.660  
## s(Posan)  29.00 23.61    0.95   0.015 *
## s(Trend)   3.00  2.96    1.10   1.000  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

the p-value of Posan is quite low in check.gam and this effect also drops sharply in December. Maybe an adaptive smooth would be better.

  1. Fit model and plot smooths
form <- NetDemand ~ s(wM,k=20,bs='cr') + 
                    s(wM_s95,k=20,bs='cr') + 
                    s(Posan,bs='ad',k=30) +  # <- Changed `cr` to `ad`
                    Dow + s(Trend,k=4) + NetDemand.48 + Holy
fit <- gam(form, data = UKload) 
fit2 <- getViz(fit, nsim = 50)

AIC( fit1 ) - AIC( fit2 ) # Lower AIC!
## [1] 33.47693
plot(sm(fit2, 3), n = 400) + l_points() + l_fitLine() + l_ciLine()

check(fit2) # Posan is not significant any more
## 
## Method: GCV   Optimizer: magic
## Smoothing parameter selection converged after 30 iterations.
## The RMS GCV score gradient at convergence was 0.05940024 .
## The Hessian was positive definite.
## Model rank =  79 / 79 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##              k'   edf k-index p-value
## s(wM)     19.00  7.43    1.00    0.42
## s(wM_s95) 19.00  3.30    1.00    0.47
## s(Posan)  29.00 28.75    0.97    0.14
## s(Trend)   3.00  2.95    1.08    1.00

We now look at the conditional variance of the residuals. First serie of checks:

pl <- list()
pl[[1]] <- ( check1D(fit2, "wM") + l_densCheck(n = c(100, 100), tol = -1) )$ggObj
pl[[2]] <- ( check1D(fit2, "wM_s95") + l_densCheck(n = c(100, 100), tol = -1) )$ggObj
pl[[3]] <- ( check1D(fit2, "Posan") + l_densCheck(n = c(100, 100), tol = -1) )$ggObj

grid.arrange(grobs = pl, ncol = 2) # Some evidence of heteroscedasticity

The residuals seem over-dispersed for \(wM < 20\) and for \(Posan < 0.5\) or \(Posan > 0.9\). Second serie of checks:

pl <- list()
pl[[1]] <- ( check1D(fit2, "wM") + l_gridCheck1D(gridFun = sd, stand = "sc") )$ggObj
pl[[2]] <- ( check1D(fit2, "wM_s95") + l_gridCheck1D(gridFun = sd, stand = "sc") )$ggObj
pl[[3]] <- ( check1D(fit2, "Posan") + l_gridCheck1D(gridFun = sd, stand = "sc") )$ggObj

grid.arrange(grobs = pl, ncol = 2) # More evidence of heteroscedasticity

The variance changes a lot along all the three variables. We could address this by fitting a GAMLSS model with variable scale.

  1. Fitting gaulss GAMLSS model
form <- list(NetDemand ~ s(wM,k=20,bs='cr') + 
             s(wM_s95,k=20,bs='cr') + 
             s(Posan,bs='ad',k=30) + 
             Dow + s(Trend,k=4) + NetDemand.48 + Holy, 
             ~ s(wM_s95,k=10,bs='cr') + 
               s(Posan,bs='cr',k=20) + 
               Dow)

fit <- gam(form, family = gaulss, optimizer = "efs", data = UKload)

# Add function for simulating residuals
fit$family$rd <- function(mu,wt,scale){ rnorm(nrow(mu), mu[ , 1], sqrt(scale/wt)/mu[ , 2]) }

fit3 <- getViz(fit, nsim = 50)

AIC(fit2) - AIC(fit3) # Massive decrease!
## [1] 976.0566

The AIC has improved quite a lot! Now we repeat the variance checks:

pl <- list()
pl[[1]] <- ( check1D(fit3, "wM") + l_gridCheck1D(gridFun = sd, stand = "sc") )$ggObj
pl[[2]] <- ( check1D(fit3, "wM_s95") + l_gridCheck1D(gridFun = sd, stand = "sc") )$ggObj
pl[[3]] <- ( check1D(fit3, "Posan") + l_gridCheck1D(gridFun = sd, stand = "sc") )$ggObj

grid.arrange(grobs = pl, ncol = 2) 

There variance is much less variable, relative to the location-only Gaussian model.

  1. Looking at skewness Now we do a global residual check using qq.gam:
qq.gam(fit3, method = "simul1", rep = 50) 

There is evidence of fat tails, and possibly skewness to the left. Let’s look at how the skewness changes with the covariates:

library(e1071)
pl <- list()
pl[[1]] <- ( check1D(fit3, "wM_s95") + l_gridCheck1D(gridFun = skewness, stand = "sc") )$ggObj
pl[[2]] <- ( check1D(fit3, "Posan") + l_gridCheck1D(gridFun = skewness, stand = "sc") )$ggObj

grid.arrange(grobs = pl, ncol = 2) # Some skewness

Both plots show several departures from the model based estimates (the model is Gaussian, so the distribution of the response is symmetric). Residuals seem very strongly skewed to the left for \(\text{Posan} \approx 0.25\), which corresponds roughly to March.

  1. Shash GAMLSS model
library(mgcFam)
form <- list(NetDemand ~ s(wM,k=20,bs='cr') + 
               s(wM_s95,k=20,bs='cr') + 
               s(Posan,bs='ad',k=30) + 
               Dow + s(Trend,k=4) + NetDemand.48 + Holy, 
             ~ s(wM_s95,k=10,bs='cr') + 
               s(Posan,bs='cr',k=20) + 
               Dow, 
             ~ s(Posan, k = 10, bs='cr') + Dow, 
             ~ 1) # If convergence problems arise use
                  # ~ -1 + s(Holy, bs = "re", sp = 1e6)

fit <- gam(form, family = shash, optimizer = "efs", data = UKload)
fit4 <- getViz(fit, nsim = 50)

AIC(fit3) - AIC(fit4) # Decreased again by a lot
## [1] 262.3851
qq.gam(fit4, method = "simul1", rep = 50) # Better on left tail

There is a clear improvement, especially in the left tail of the QQ-plot. Now we re-check how the skewness varies along the covariates:

pl <- list()
pl[[1]] <- ( check1D(fit4, "wM_s95") + l_gridCheck1D(gridFun = skewness, stand = "sc") )$ggObj
pl[[2]] <- ( check1D(fit4, "Posan") + l_gridCheck1D(gridFun = skewness, stand = "sc") )$ggObj

grid.arrange(grobs = pl, ncol = 2) 

Along Posan there is a quite a lot of improvement, the points with very low skewness in March have disappeared. More limited improvement along wM_s95.

2 BAM modelling and looking for interactions

  1. Load data, create model formula and fitting Gaussian GAM
library( mgcViz )
data(UKload48)

fit <- bam(NetDemand ~ Dow + Holy +
             NetDemand.48 +
             s(Tendance, k = 6, bs = "cr") + 
             s(wM, bs = "cr") + 
             s(Instant, bs = "cr") +
             s(wM_s95, bs = 'cr') + 
             s(Posan,bs='ad',k=30),
           data = UKload48, discrete = TRUE, nthreads = 4)
  1. Load data, create model formula and fitting Gaussian GAM
fit0 <- getViz(fit, nsim = 50)

tmp <- check(fit0) 
## 
## Method: fREML   Optimizer: perf chol
## $grad
##  [1] -3.879942e-07 -2.020181e-06 -1.053579e-06 -8.212841e-07  2.847952e-04
##  [6]  4.402442e-04  1.815212e-04  3.041543e-04  4.740143e-04 -1.477468e-03
## 
## $hess
##            [,1]          [,2]          [,3]          [,4]          [,5]
##    1.854278e+00  2.573978e-03 -1.460908e-03 -2.289393e-04  3.765879e-07
##    2.573978e-03  1.966208e+00 -8.334463e-04 -5.295460e-02 -9.372088e-07
##   -1.460908e-03 -8.334463e-04  3.992713e+00 -1.188417e-03 -4.526128e-07
##   -2.289393e-04 -5.295460e-02 -1.188417e-03  3.243458e+00 -1.534647e-06
##    3.765879e-07 -9.372088e-07 -4.526128e-07 -1.534647e-06  2.848092e-04
##    2.596757e-06 -6.465393e-06 -3.095284e-06 -1.065496e-05  1.352626e-07
##   -6.514561e-03 -1.811104e-02 -9.398786e-03 -3.720404e-03  1.666693e-04
##   -3.255931e-07 -4.627052e-07 -2.569934e-07  2.962951e-07 -1.131803e-10
##   -4.388206e-07 -6.206715e-07 -3.441708e-07  4.163842e-07 -1.693495e-10
## d -1.971004e+00 -3.151595e+00 -3.997834e+00 -3.323624e+00 -5.247632e-04
##            [,6]          [,7]          [,8]          [,9]         [,10]
##    2.596757e-06 -6.514561e-03 -3.255931e-07 -4.388206e-07 -1.971004e+00
##   -6.465393e-06 -1.811104e-02 -4.627052e-07 -6.206715e-07 -3.151595e+00
##   -3.095284e-06 -9.398786e-03 -2.569934e-07 -3.441708e-07 -3.997834e+00
##   -1.065496e-05 -3.720404e-03  2.962951e-07  4.163842e-07 -3.323624e+00
##    1.352626e-07  1.666693e-04 -1.131803e-10 -1.693495e-10 -5.247632e-04
##    4.417913e-04  2.689172e-03  6.966998e-10 -1.034654e-09 -3.626508e-03
##    2.689172e-03  1.174069e+01  3.484087e-05 -1.971777e-05 -1.386381e+01
##    6.966998e-10  3.484087e-05  3.041539e-04 -1.395792e-09 -4.066988e-04
##   -1.034654e-09 -1.971777e-05 -1.395792e-09  4.740119e-04 -5.462274e-04
## d -3.626508e-03 -1.386381e+01 -4.066988e-04 -5.462274e-04  4.818400e+04
## 
## Model rank =  70 / 70 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                k'   edf k-index p-value    
## s(Tendance)  5.00  4.94    0.81  <2e-16 ***
## s(wM)        9.00  7.30    1.01    0.82    
## s(Instant)   9.00  9.00    0.97    0.03 *  
## s(wM_s95)    9.00  7.65    0.99    0.35    
## s(Posan)    29.00 28.73    0.89  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The p-values for Tendance and Posan are very low, suggesting that we should increase the dimension of their spline bases (by increasing k). However, if we do this for Tendance (which is simply a time counter) we will ultimately end up interpolating the data! Increasing the basis dimension for Posan might lead to a similar problem.

The p-values of wM, wM_s95 and Instant are not low, but the Effective Degrees of Freedom (EDF) of the fitted smooths is very close to the bases dimension (k'). Hence, we might want to increase \(k\) for these effects. Before doing so, we look for residuals patterns along these covariates and NetDemand.48:

library(gridExtra)
pl <- lapply(c("NetDemand.48", "wM", "wM_s95", "Instant"), 
             function(.nam){
               ( check1D(fit0, x = .nam) + l_gridCheck1D(n = 40) )$ggObj
             })

grid.arrange(grobs = pl, ncol = 2)

There is clearly a systematic pattern along NetDemand.48, we should clearly use a smooth effect for this variable! Also, wM and wM_s95 show large but irregular deviations, while also Instant we see large deviations during peak times (roughly 8am and 6pm).

  1. Re-fit with larger bases, re-check and look for interactions
fit <- bam(NetDemand ~ Dow + Holy +
             s(NetDemand.48, bs = "cr", k = 20) +
             s(Tendance, k = 6, bs = "cr") + 
             s(wM, bs = "cr", k = 20) + 
             s(Instant, bs = "cr", k = 40) +
             s(wM_s95, bs = 'cr', k = 20) + 
             s(Posan,bs='ad',k=30),
           data = UKload48, discrete = TRUE, nthreads = 4)

fit1 <- getViz(fit, nsim = 50)

tmp <- check(fit1) # Looks better
## 
## Method: fREML   Optimizer: perf chol
## $grad
##  [1]  1.778520e-06 -2.151317e-06 -6.097171e-06 -6.424792e-06  3.046655e-06
##  [6]  4.463902e-04  2.877421e-04  8.817041e-05  1.122707e-03  8.967624e-04
## [11] -2.532862e-03
## 
## $hess
##            [,1]          [,2]          [,3]          [,4]          [,5]
##    6.688547e+00 -2.810530e-04 -6.533680e-03  2.659035e-02  6.299057e-03
##   -2.810530e-04  1.848478e+00  2.367964e-03 -6.447900e-03  8.917002e-04
##   -6.533680e-03  2.367964e-03  3.247601e+00  2.168832e-05 -3.401333e-01
##    2.659035e-02 -6.447900e-03  2.168832e-05  1.541956e+01 -1.937831e-03
##    6.299057e-03  8.917002e-04 -3.401333e-01 -1.937831e-03  4.372790e+00
##    4.629012e-07  6.037813e-07  2.108691e-06 -2.625497e-06 -1.758470e-06
##    1.502143e-06  1.891151e-06  6.628774e-06 -8.174515e-06 -5.445545e-06
##    9.757384e-03 -7.030660e-03 -1.812362e-02 -3.703178e-02  6.911958e-03
##    1.049361e-06 -1.271863e-06 -3.603214e-06 -3.809565e-06  1.798006e-06
##    7.291557e-07 -8.794792e-07 -2.494061e-06 -2.615293e-06  1.248686e-06
## d -7.538514e+00 -1.969779e+00 -6.333322e+00 -1.572614e+01 -7.706416e+00
##            [,6]          [,7]          [,8]          [,9]         [,10]
##    4.629012e-07  1.502143e-06  9.757384e-03  1.049361e-06  7.291557e-07
##    6.037813e-07  1.891151e-06 -7.030660e-03 -1.271863e-06 -8.794792e-07
##    2.108691e-06  6.628774e-06 -1.812362e-02 -3.603214e-06 -2.494061e-06
##   -2.625497e-06 -8.174515e-06 -3.703178e-02 -3.809565e-06 -2.615293e-06
##   -1.758470e-06 -5.445545e-06  6.911958e-03  1.798006e-06  1.248686e-06
##    4.464265e-04  1.591192e-07  2.682714e-04 -7.064602e-10 -5.389285e-10
##    1.591192e-07  2.885667e-04  1.962302e-03  1.747633e-09 -1.504871e-09
##    2.682714e-04  1.962302e-03  1.173283e+01  1.267158e-04 -3.818823e-05
##   -7.064602e-10  1.747633e-09  1.267158e-04  1.122701e-03 -9.812757e-09
##   -5.389285e-10 -1.504871e-09 -3.818823e-05 -9.812757e-09  8.967538e-04
## d -8.316128e-04 -2.609992e-03 -1.386359e+01 -1.499909e-03 -1.032984e-03
##           [,11]
##   -7.538514e+00
##   -1.969779e+00
##   -6.333322e+00
##   -1.572614e+01
##   -7.706416e+00
##   -8.316128e-04
##   -2.609992e-03
##   -1.386359e+01
##   -1.499909e-03
##   -1.032984e-03
## d  4.818400e+04
## 
## Model rank =  138 / 138 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                    k'   edf k-index p-value    
## s(NetDemand.48) 19.00 16.08    1.03    0.97    
## s(Tendance)      5.00  4.94    0.79  <2e-16 ***
## s(wM)           19.00 13.67    0.98    0.05 *  
## s(Instant)      39.00 32.45    0.99    0.17    
## s(wM_s95)       19.00 16.41    0.98    0.14    
## s(Posan)        29.00 28.73    0.91  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 1D residuals checks
pl <- lapply(c("NetDemand.48", "wM", "wM_s95", "Instant"), 
       function(.nam){
         ( check1D(fit1, x = .nam) + l_gridCheck1D(n = 40) )$ggObj
       })

grid.arrange(grobs = pl, ncol = 2)

EDF now lower then k' for all variables, apart from Tendance and Posan (but this is ok, see above). Output of looks better than before, especially along NetDemand and Instant. However, notice that around 8am there are still be some problems along Instant.

We start by looking for interactions with Instant:

pl <- lapply(c("NetDemand.48", "wM", "wM_s95", "Posan"), 
             function(.nam){
               ( check2D(fit1, x1 = .nam, x2 = "Instant") + l_gridCheck2D() )$ggObj
             })

grid.arrange(grobs = pl, ncol = 2)

All of plots show systematic patterns: all interactions with Instant need to be included. Now we look for interactions with Posan:

pl <- lapply(c("NetDemand.48", "wM", "wM_s95"), 
             function(.nam){
               ( check2D(fit1, x1 = .nam, x2 = "Posan") + l_gridCheck2D() )$ggObj
             })

grid.arrange(grobs = pl, ncol = 2)

The effect of NetDemand might be changing with Posan, but let’s not add it for the moment.

  1. Fit model with interactions and re-check
fit <- bam(NetDemand ~ Dow + Holy +
             s(NetDemand.48, bs = "cr", k = 20) +
             s(Tendance, k = 6, bs = "cr") + 
             s(wM, bs = "cr", k = 20) + 
             s(Instant, bs = "cr", k = 40) +
             s(wM_s95, bs = 'cr', k = 20) + 
             s(Posan,bs='ad',k=30) +
             ti(NetDemand.48, Instant, k = c(15, 15), bs = c("cr", "cr")) +
             ti(wM, Instant, k = c(10, 10), bs = c("cr", "cr")) +
             ti(wM_s95, Instant, k = c(15, 15), bs = c("cr", "cr")) +
             ti(Posan, Instant, k = c(10, 15), bs = c("cr", "cr")),
           data = UKload48, discrete = TRUE, nthreads = 4)

fit2 <- getViz(fit, nsim = 50)
tmp <- check(fit2) 
## 
## Method: fREML   Optimizer: perf chol
## $grad
##  [1]  5.994542e-07 -8.873520e-07 -2.190457e-06 -2.240703e-06  5.892996e-07
##  [6]  6.062764e-04  6.356470e-04  1.633617e-04  5.076111e-04  4.797934e-04
## [11]  2.856495e-07 -9.974044e-07 -5.088769e-07 -4.623437e-07 -3.493327e-06
## [16]  1.106815e-07 -4.747773e-06 -4.930084e-06 -1.242561e-03
## 
## $hess
##            [,1]          [,2]          [,3]          [,4]          [,5]
##    2.331697e+00  2.121566e-03 -5.783857e-03 -1.467073e-01 -3.736578e-04
##    2.121566e-03  1.892281e+00  2.137840e-03 -4.264195e-03  3.608335e-03
##   -5.783857e-03  2.137840e-03  3.331134e+00  1.310081e-02 -2.410495e-01
##   -1.467073e-01 -4.264195e-03  1.310081e-02  1.500503e+01  7.511579e-03
##   -3.736578e-04  3.608335e-03 -2.410495e-01  7.511579e-03  3.547025e+00
##   -1.046872e-06  9.509650e-08  1.153534e-06 -8.889754e-07 -7.077417e-07
##   -2.982943e-06  2.892996e-07  3.431462e-06 -2.627676e-06 -2.087016e-06
##    2.186241e-03 -7.146985e-03 -1.434389e-02 -2.216978e-02  2.558532e-03
##    3.274242e-07 -4.898563e-07 -1.206836e-06 -1.238648e-06  3.190908e-07
##    2.721246e-07 -3.975628e-07 -9.837450e-07 -1.002154e-06  2.702423e-07
##   -2.059360e-01  3.852283e-03 -5.386387e-03  2.587725e-01  8.843384e-03
##   -5.569181e-01  3.410220e-04 -1.286204e-02 -7.477729e-01  1.612806e-02
##   -1.669055e-02  5.602470e-03  1.423993e-01  2.018703e-01 -2.944718e-01
##   -2.696096e-03  4.730007e-03 -1.151975e-01 -2.422195e-02  1.238242e-02
##    1.295752e-02  1.431499e-02  7.242019e-02 -3.552551e-03  1.930669e-01
##    1.012657e-02  8.339674e-03 -2.227012e-03  3.184345e-02  6.416552e-02
##   -6.762814e-03 -5.090689e-03  3.237245e-03 -4.387092e-02 -8.957144e-04
##   -1.800992e-03 -7.460591e-03 -4.607124e-03  5.122034e-02  3.120589e-04
## d -3.566755e+00 -1.978663e+00 -6.379217e+00 -1.652508e+01 -7.109560e+00
##            [,6]          [,7]          [,8]          [,9]         [,10]
##   -1.046872e-06 -2.982943e-06  2.186241e-03  3.274242e-07  2.721246e-07
##    9.509650e-08  2.892996e-07 -7.146985e-03 -4.898563e-07 -3.975628e-07
##    1.153534e-06  3.431462e-06 -1.434389e-02 -1.206836e-06 -9.837450e-07
##   -8.889754e-07 -2.627676e-06 -2.216978e-02 -1.238648e-06 -1.002154e-06
##   -7.077417e-07 -2.087016e-06  2.558532e-03  3.190908e-07  2.702423e-07
##    6.063400e-04  2.375115e-07  3.605151e-04 -2.193015e-10 -2.087189e-10
##    2.375115e-07  6.366868e-04  2.208675e-03  1.245560e-09 -5.171679e-10
##    3.605151e-04  2.208675e-03  1.299123e+01  1.282617e-04  3.521717e-05
##   -2.193015e-10  1.245560e-09  1.282617e-04  5.076144e-04  1.212132e-09
##   -2.087189e-10 -5.171679e-10  3.521717e-05  1.212132e-09  4.797939e-04
##    2.650065e-06  7.954783e-06  1.333553e-02  1.608636e-07  1.249695e-07
##    1.544318e-06  4.586789e-06 -2.530751e-03 -5.476111e-07 -4.500903e-07
##    4.683293e-07  1.167701e-06 -5.370863e-03 -2.860650e-07 -2.228311e-07
##   -7.292931e-07 -2.132041e-06 -5.887478e-03 -2.520000e-07 -2.103566e-07
##   -9.875900e-07 -3.138565e-06 -3.432522e-02 -1.916730e-06 -1.576889e-06
##    1.307135e-06  3.820910e-06  5.765322e-03  6.648229e-08  4.422255e-08
##   -4.510261e-06 -1.318060e-05 -5.433215e-02 -2.618424e-06 -2.129712e-06
##   -4.960603e-06 -1.451150e-05 -5.792058e-02 -2.724341e-06 -2.206118e-06
## d -1.031057e-03 -3.030899e-03 -1.394013e+01 -6.863413e-04 -5.563238e-04
##           [,11]         [,12]         [,13]         [,14]         [,15]
##   -2.059360e-01 -5.569181e-01 -1.669055e-02 -2.696096e-03  1.295752e-02
##    3.852283e-03  3.410220e-04  5.602470e-03  4.730007e-03  1.431499e-02
##   -5.386387e-03 -1.286204e-02  1.423993e-01 -1.151975e-01  7.242019e-02
##    2.587725e-01 -7.477729e-01  2.018703e-01 -2.422195e-02 -3.552551e-03
##    8.843384e-03  1.612806e-02 -2.944718e-01  1.238242e-02  1.930669e-01
##    2.650065e-06  1.544318e-06  4.683293e-07 -7.292931e-07 -9.875900e-07
##    7.954783e-06  4.586789e-06  1.167701e-06 -2.132041e-06 -3.138565e-06
##    1.333553e-02 -2.530751e-03 -5.370863e-03 -5.887478e-03 -3.432522e-02
##    1.608636e-07 -5.476111e-07 -2.860650e-07 -2.520000e-07 -1.916730e-06
##    1.249695e-07 -4.500903e-07 -2.228311e-07 -2.103566e-07 -1.576889e-06
##    1.949472e+01  4.198084e+00 -1.476574e-01 -3.097307e-03  8.022347e-02
##    4.198084e+00  5.960086e+00 -4.664029e-02 -1.878768e-03 -2.387247e-02
##   -1.476574e-01 -4.664029e-02  9.917619e+00  1.768287e+00 -2.324975e+00
##   -3.097307e-03 -1.878768e-03  1.768287e+00  4.377987e+00 -6.347992e-02
##    8.022347e-02 -2.387247e-02 -2.324975e+00 -6.347992e-02  1.183734e+01
##   -3.109459e-02  1.065725e-01 -3.187187e-01 -1.272595e+00  2.933715e+00
##   -7.944680e-02  1.140839e-03  2.896550e-02 -4.839059e-03 -6.706519e-02
##    4.035506e-02  1.581601e-01 -1.952115e-03 -8.747982e-03  3.593748e-02
## d -3.740783e+01 -1.574015e+01 -1.926310e+01 -6.382640e+00 -2.836106e+01
##           [,16]         [,17]         [,18]         [,19]
##    1.012657e-02 -6.762814e-03 -1.800992e-03 -3.566755e+00
##    8.339674e-03 -5.090689e-03 -7.460591e-03 -1.978663e+00
##   -2.227012e-03  3.237245e-03 -4.607124e-03 -6.379217e+00
##    3.184345e-02 -4.387092e-02  5.122034e-02 -1.652508e+01
##    6.416552e-02 -8.957144e-04  3.120589e-04 -7.109560e+00
##    1.307135e-06 -4.510261e-06 -4.960603e-06 -1.031057e-03
##    3.820910e-06 -1.318060e-05 -1.451150e-05 -3.030899e-03
##    5.765322e-03 -5.433215e-02 -5.792058e-02 -1.394013e+01
##    6.648229e-08 -2.618424e-06 -2.724341e-06 -6.863413e-04
##    4.422255e-08 -2.129712e-06 -2.206118e-06 -5.563238e-04
##   -3.109459e-02 -7.944680e-02  4.035506e-02 -3.740783e+01
##    1.065725e-01  1.140839e-03  1.581601e-01 -1.574015e+01
##   -3.187187e-01  2.896550e-02 -1.952115e-03 -1.926310e+01
##   -1.272595e+00 -4.839059e-03 -8.747982e-03 -6.382640e+00
##    2.933715e+00 -6.706519e-02  3.593748e-02 -2.836106e+01
##    6.104837e+00  5.654238e-02  2.558288e-01 -1.122483e+01
##    5.654238e-02  2.029319e+01  4.787180e+00 -2.662145e+01
##    2.558288e-01  4.787180e+00  2.219303e+01 -3.188978e+01
## d -1.122483e+01 -2.662145e+01 -3.188978e+01  4.818200e+04
## 
## Model rank =  737 / 737 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                              k'    edf k-index p-value    
## s(NetDemand.48)           19.00   8.13    0.99   0.180    
## s(Tendance)                5.00   4.96    0.82  <2e-16 ***
## s(wM)                     19.00  13.76    0.99   0.275    
## s(Instant)                39.00  34.05    0.99   0.325    
## s(wM_s95)                 19.00  15.22    0.96   0.005 ** 
## s(Posan)                  29.00  28.89    0.92  <2e-16 ***
## ti(NetDemand.48,Instant) 196.00 107.30    0.96  <2e-16 ***
## ti(wM,Instant)            81.00  52.29    0.98   0.060 .  
## ti(wM_s95,Instant)       196.00  80.17    1.00   0.635    
## ti(Posan,Instant)        126.00 118.02    0.93  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Not bad, the gaps between EDF and k' seem sufficient. Now lets repeat the visual checks

pl <- lapply(c("NetDemand.48", "wM", "wM_s95", "Posan"), 
             function(.nam){
               ( check2D(fit2, x1 = .nam, x2 = "Instant") + l_gridCheck2D() )$ggObj
             })

grid.arrange(grobs = pl, ncol = 2)

Good, the residual patterns are much less visible. Let’s looks again at the interactions with Posan:

pl <- lapply(c("NetDemand.48", "wM", "wM_s95"), 
             function(.nam){
               ( check2D(fit2, x1 = .nam, x2 = "Posan") + l_gridCheck2D() )$ggObj
             })

grid.arrange(grobs = pl, ncol = 2)

The pattern along NetDemand.48-Posan seems gone now… maybe not adding it was a good idea?

  1. Visualizing the smooth effects

We start by plotting all the marginal effects on one page:

print(plot(fit2, n = 200, select = 1:6), pages = 1)

Looks good, but the effect of wM_s95 seems a little bit too wiggly. Also, one might want to have a closer look at what happens along Posan, maybe it is possible to do something better to taking into account the effect of the Christmas holidays. Now we do the same with the 2D interactions:

print(plot(fit2, n2 = 100, select = 7:10) + theme(legend.position="none"), 
      pages = 1)

Notice that we used theme to remove the legend. We now produce some customized plots:

pl <- lapply(7:10, 
             function(.ii){
               ( plot(sm(fit2, .ii), too.far = -1, maxpo = 500) + 
                 l_fitRaster(noiseup = T, pFun = zto1(0.05, 3, 0.1)) + 
                 l_fitContour() + l_points() )$ggObj
             })

grid.arrange(grobs = pl, ncol = 2)

Here we used noiseup = TRUE to add noise to the fitted surface, which gives an idea about the uncertainty in each effect. We also used pFun to make so that the opacity of the heatmap is proportional to the statistical significance of the effect at each locations. We also set too.far = -1 to avoid having grey areas far from design points, and we added a sub-sample of only 500 design points using maxpo = 500.

This code produces some RGL plots, which unfortunately will not appear in the html file:

plotRGL(sm(fit2, 7), residuals = T)
## Warning in par3d(userMatrix = structure(c(1, 0, 0, 0, 0,
## 0.342020143325668, : font family "sans" not found, using "bitmap"
clear3d()
plotRGL(sm(fit2, 8), residuals = T)
clear3d()
plotRGL(sm(fit2, 9), residuals = T)
clear3d()
plotRGL(sm(fit2, 10), residuals = T)
rgl.close()
  1. Further model development: looking at load variance and skewness

We can look at the variance as follows

pl <- lapply(c("NetDemand.48", "wM", "wM_s95", "Posan"), 
             function(.nam){
               ( check2D(fit2, x1 = .nam, x2 = "Instant") + 
                   l_gridCheck2D(gridFun = sd) )$ggObj
             })

grid.arrange(grobs = pl, ncol = 2)

The variance varies strongly along Instant, and it seems to peak around 8am (especially in the winter and at low temperatures). We do the same for skewness:

library(e1071)
pl <- lapply(c("NetDemand.48", "wM", "wM_s95", "Posan"), 
             function(.nam){
               ( check2D(fit2, x1 = .nam, x2 = "Instant") + 
                   l_gridCheck2D(gridFun = skewness) )$ggObj
             })

grid.arrange(grobs = pl, ncol = 2)

It seems that the load distribution is positively skewed at night. We might be able to take this (and the variance) pattern into account using a GAMLSS model (e.g. usign the gaulss, gevlss or shash families). But this would take longer to fit, because bam is unavailable for such models.