GWR and Mixed GWR with spatial autocorrelation

Estimation of GWR/MGWR with spatial dependance.

The MGWRSAR function requires a dataframe and a matrix of coordinates, and eventually a spatial weight matrix if model include spatiale dependence. The package includes a simulated data example which is used for this vignette.

library(mgwrsar)
#> Loading required package: Rcpp
#> Loading required package: sp
#> Loading required package: leaflet
#> Loading required package: Matrix
## loading data example
data(mydata)
coord=as.matrix(mydata[,c("x","y")])
head(coord)
#>             x       y
#> [1,] 872142.8 6234075
#> [2,] 872894.3 6233366
#> [3,] 878615.1 6229884
#> [4,] 869936.0 6228408
#> [5,] 866441.8 6231471
#> [6,] 866952.9 6222411

head(mydata)
#>   Y_mgwrsar_1_0_kv X0       X1        X2        X3       Beta0      Beta1
#> 1       0.44146014  1 2.396274 0.4763857 0.6854775  0.11599551 0.26922206
#> 2       0.02223952  1 1.684776 0.3303000 0.8994048  0.11533789 0.25784575
#> 3       3.41607993  1 2.369278 1.5117796 0.5808293  0.20973774 0.55723064
#> 4       0.28497090  1 4.208743 1.0916590 1.4058201 -0.08706293 0.06795084
#> 5      -0.79417336  1 3.109543 0.6487661 1.6977387 -0.08209832 0.21763086
#> 6       6.78538775  1 7.284831 0.8828887 1.5085908 -0.28330886 0.52468859
#>       Beta2      Beta3    lambda     Y_ols    Y_sar       Y_gwr      Y_mgwr
#> 1 0.6772792 -1.1106223 0.2408643 0.9890454 2.010387  0.32246490  0.39829402
#> 2 0.6751576 -1.0326101 0.2273072 0.2732833 1.539485 -0.15597959 -0.12664995
#> 3 0.5623073 -0.5412975 0.3384363 2.1155894 3.637831  2.06565538  1.79922754
#> 4 1.0796267 -0.9258913 0.3543732 1.7902102 3.201578  0.07587226 -0.02831127
#> 5 1.1019372 -1.2759568 0.3772657 0.5057989 1.460611 -0.85670748 -0.38820500
#> 6 1.5488474 -0.7649646 0.4933481 3.0167134 4.654380  3.75240014  3.39782791
#>   Y_mgwrsar_0_0_kv Y_mgwrsar_0_kc_kv Y_mgwrsar_1_kc_kv Y_mgwr_outlier X2dummy
#> 1        0.5673431         0.7137761        0.55150700      0.3982940   FALSE
#> 2        0.2031676         0.2611103        0.06388119     -0.1266500    TRUE
#> 3        3.8183989         3.1882294        2.86908705      1.7992275    TRUE
#> 4        0.3314998         0.1832260        0.14519215     -0.0188881    TRUE
#> 5       -0.7858941        -0.1114446       -0.13694311     -0.3882050   FALSE
#> 6        5.8227160         5.2859121        6.16333174      3.3978279    TRUE
#>   Y_mgwr_X2dummy        x       y
#> 1     0.07564794 872142.8 6234075
#> 2    -0.34965450 872894.3 6233366
#> 3     0.94914284 878615.1 6229884
#> 4    -1.20689547 869936.0 6228408
#> 5    -1.10310449 866441.8 6231471
#> 6     2.03036799 866952.9 6222411
## plot
ggplot(mydata,aes(x,y))+geom_point(aes(color=Beta0))+ ggtitle('Spatial Pattern of coefficient Beta0')+scale_colour_gradientn(colours = terrain.colors(10))

ggplot(mydata,aes(x,y))+geom_point(aes(color=Beta1))+ ggtitle('Spatial Pattern of coefficient Beta1')+scale_colour_gradientn(colours = terrain.colors(10))

ggplot(mydata,aes(x,y))+geom_point(aes(color=lambda))+ ggtitle('Spatial Pattern of spatial autocorrelation coefficient (lambda)')+scale_colour_gradientn(colours = terrain.colors(10))


W=kernel_matW(H=4,kernels='rectangle',coords=coord,NN=4,adaptive=TRUE,diagnull=TRUE)

Note that in this package there are two ways to express a bandwidth $H$ . It can be a distance (adaptive = F) or a number of neighbors (adaptive = T).

Estimation

Estimation of a mgwrsar(1,0,kv) model with all parameter spatially varying using an adapative gaussian kernel (based on 20 nearest neighbors):

mgwrsar_1_0_kv<-MGWRSAR(formula = 'Y_mgwrsar_1_0_kv~X1+X2+X3', data = mydata,coords=coord, fixed_vars=NULL,kernels=c('gauss'),H=20, Model = 'MGWRSAR_1_0_kv',control=list(SE=F,adaptive=T,W=W))
summary(mgwrsar_1_0_kv)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_1_0_kv~X1+X2+X3", data = mydata, 
#>     coords = coord, fixed_vars = NULL, kernels = c("gauss"), 
#>     H = 20, Model = "MGWRSAR_1_0_kv", control = list(SE = F, 
#>         adaptive = T, W = W))
#> Model: MGWRSAR_1_0_kv 
#> Method for spatial autocorrelation: 2SLS 
#> Kernels function: gauss 
#> Kernels adaptive: YES 
#> Kernels type: GD 
#> Bandwidth: 20 
#> Computation time: 2.137 
#> Use of parallel computing: FALSE  ncore = 1 
#> Use of rough kernel: NO 
#> Use of Target Points: NO 
#> Number of data points: 1000 
#> Varying parameters: Intercept X1 X2 X3  
#>         Intercept       X1       X2       X3  lambda
#> Min.     -5.55249  0.11207 -1.20584 -1.86027 -0.2375
#> 1st Qu.  -1.47283  0.36956  0.64156 -1.24644  0.1831
#> Median   -0.75406  0.54369  1.72877 -0.99875  0.3845
#> Mean     -0.79631  0.50721  1.75803 -1.00349  0.3568
#> 3rd Qu.  -0.15439  0.63505  2.86983 -0.71468  0.5382
#> Max.      2.06110  0.95748  5.14466 -0.27205  0.8808
#> Effective degrees of freedom: 885.0928 
#> AICc: 2957.617 
#> Residual sum of squares: 868.1985 
#> RMSE: 0.9317717

Estimation of a mgwrsar(0,0,kv) model with stationnary spatial multiplier using an adapative gaussian kernel (based on 20 nearest neighbors):

mgwrsar_0_0_kv<-MGWRSAR(formula = 'Y_mgwrsar_0_kc_kv~X1+X2+X3', data = mydata,coords=coord, fixed_vars=NULL,kernels=c('gauss'),H=20, Model = 'MGWRSAR_0_0_kv',control=list(SE=F,adaptive=T,W=W))
summary(mgwrsar_0_0_kv)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_0_kc_kv~X1+X2+X3", data = mydata, 
#>     coords = coord, fixed_vars = NULL, kernels = c("gauss"), 
#>     H = 20, Model = "MGWRSAR_0_0_kv", control = list(SE = F, 
#>         adaptive = T, W = W))
#> Model: MGWRSAR_0_0_kv 
#> Method for spatial autocorrelation: 2SLS 
#> Kernels function: gauss 
#> Kernels adaptive: YES 
#> Kernels type: GD 
#> Bandwidth: 20 
#> Computation time: 1.612 
#> Use of parallel computing: FALSE  ncore = 1 
#> Use of rough kernel: NO 
#> Use of Target Points: NO 
#> Number of data points: 1000 
#> Constant parameters: XX 
#> 0.3676871 
#> Varying parameters: Intercept X1 X2 X3 
#>         Intercept        X1        X2      X3
#> Min.    -3.763567  0.080428 -2.209824 -1.2308
#> 1st Qu. -1.219268  0.357217  0.706989 -1.0647
#> Median  -0.618269  0.536381  1.425772 -0.9997
#> Mean    -0.513883  0.499074  1.398492 -1.0014
#> 3rd Qu. -0.017472  0.627690  2.638912 -0.9437
#> Max.     2.108716  0.889871  4.369973 -0.6752
#> Effective degrees of freedom: 905.871 
#> AICc: 1161.224 
#> Residual sum of squares: 151.7064 
#> RMSE: 0.3894951

Estimation of a mgwrsar(0,kc,kv) model with stationary X2 and stationnary spatial multiplier using an adapative gaussian kernel (based on 20 nearest neighbors):

mgwrsar_0_kc_kv<-MGWRSAR(formula = 'Y_mgwrsar_0_kc_kv~X1+X2+X3', data = mydata,coords=coord, fixed_vars='X2',kernels=c('gauss'),H=20, Model = 'MGWRSAR_0_kc_kv',control=list(SE=F,adaptive=T,W=W))
summary(mgwrsar_0_kc_kv)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_0_kc_kv~X1+X2+X3", data = mydata, 
#>     coords = coord, fixed_vars = "X2", kernels = c("gauss"), 
#>     H = 20, Model = "MGWRSAR_0_kc_kv", control = list(SE = F, 
#>         adaptive = T, W = W))
#> Model: MGWRSAR_0_kc_kv 
#> Method for spatial autocorrelation: 2SLS 
#> Kernels function: gauss 
#> Kernels adaptive: YES 
#> Kernels type: GD 
#> Bandwidth: 20 
#> Computation time: 1.122 
#> Use of parallel computing: FALSE  ncore = 1 
#> Use of rough kernel: NO 
#> Use of Target Points: NO 
#> Number of data points: 1000 
#> Constant parameters: XXX2 XXPhWy 
#> 0.870593 0.4120387 
#> Varying parameters: Intercept X1 X3 
#>         Intercept        X1      X3
#> Min.    -1.254418  0.029588 -1.2702
#> 1st Qu. -0.444425  0.366633 -1.0810
#> Median  -0.033372  0.540500 -1.0026
#> Mean     0.065257  0.498703 -1.0016
#> 3rd Qu.  0.445573  0.627798 -0.9243
#> Max.     2.099437  0.873585 -0.6744
#> Effective degrees of freedom: 914.8315 
#> AICc: 1044.642 
#> Residual sum of squares: 137.9687 
#> RMSE: 0.3714414

Estimation of a mgwrsar(1,kc,kv) model with stationary X2 using an adapative gaussian kernel (based on 20 nearest neighbors):

mgwrsar_1_kc_kv<-MGWRSAR(formula = 'Y_mgwrsar_1_kc_kv~X1+X2+X3', data = mydata,coords=coord, fixed_vars='X2',kernels=c('gauss'),H=20, Model = 'MGWRSAR_1_kc_kv',control=list(SE=F,adaptive=T,W=W))
summary(mgwrsar_1_kc_kv)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_1_kc_kv~X1+X2+X3", data = mydata, 
#>     coords = coord, fixed_vars = "X2", kernels = c("gauss"), 
#>     H = 20, Model = "MGWRSAR_1_kc_kv", control = list(SE = F, 
#>         adaptive = T, W = W))
#> Model: MGWRSAR_1_kc_kv 
#> Method for spatial autocorrelation: 2SLS 
#> Kernels function: gauss 
#> Kernels adaptive: YES 
#> Kernels type: GD 
#> Bandwidth: 20 
#> Computation time: 2.006 
#> Use of parallel computing: FALSE  ncore = 1 
#> Use of rough kernel: NO 
#> Use of Target Points: NO 
#> Number of data points: 1000 
#> Constant parameters: XX 
#> 2.459628 
#> Varying parameters: Intercept X1 X3  
#>         Intercept        X1        X3  lambda
#> Min.    -2.785597  0.064134 -1.373242 -0.6412
#> 1st Qu. -0.783444  0.371202 -1.092831  0.0467
#> Median  -0.454995  0.534294 -1.006845  0.3538
#> Mean    -0.435127  0.501815 -1.009762  0.3553
#> 3rd Qu. -0.111683  0.633466 -0.933401  0.8027
#> Max.     2.392965  0.910200 -0.617579  0.9900
#> Effective degrees of freedom: 901.0112 
#> AICc: 4933.69 
#> Residual sum of squares: 6519.053 
#> RMSE: 2.553244

Estimation of a mgwrsar(1,kc,0) model with stationary X1 using an adapative gaussian kernel (based on 20 nearest neighbors):

mgwrsar_1_kc_0<-MGWRSAR(formula = 'Y_mgwrsar_1_kc_kv~X1+X2+X3', data = mydata,coords=coord, fixed_vars=c('Intercept','X1','X2','X3'),kernels=c('gauss'),H=20, Model = 'MGWRSAR_1_kc_0',control=list(SE=F,adaptive=T,W=W))
summary(mgwrsar_1_kc_0)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_1_kc_kv~X1+X2+X3", data = mydata, 
#>     coords = coord, fixed_vars = c("Intercept", "X1", "X2", "X3"), 
#>     kernels = c("gauss"), H = 20, Model = "MGWRSAR_1_kc_0", control = list(SE = F, 
#>         adaptive = T, W = W))
#> Model: MGWRSAR_1_kc_0 
#> Method for spatial autocorrelation: 2SLS 
#> Kernels function: gauss 
#> Kernels adaptive: YES 
#> Kernels type: GD 
#> Bandwidth: 20 
#> Computation time: 1.279 
#> Use of parallel computing: FALSE  ncore = 1 
#> Use of rough kernel: NO 
#> Use of Target Points: NO 
#> Number of data points: 1000 
#> Constant parameters: XXIntercept XXX1 XXX2 XXX3 
#> -0.8205521 0.509378 1.69077 -1.035463 
#> Varying parameters: 
#>          lambda
#> Min.    -0.9137
#> 1st Qu.  0.2291
#> Median   0.6009
#> Mean     0.4822
#> 3rd Qu.  0.9900
#> Max.     0.9995
#> Effective degrees of freedom: 971.1444 
#> AICc: 3905.183 
#> Residual sum of squares: 2736.795 
#> RMSE: 1.654326

Plot and summary

Summary and plot of mgwrsar object using leaflet:


mgwrsar_1_0_kv<-MGWRSAR(formula = 'Y_mgwrsar_1_0_kv~X1+X2+X3', data = mydata,coords=coord,fixed_vars=NULL,kernels=c('gauss'),H=20, Model = 'MGWRSAR_1_0_kv',control=list(SE=T,adaptive=T,W=W))

summary(mgwrsar_1_0_kv)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_1_0_kv~X1+X2+X3", data = mydata, 
#>     coords = coord, fixed_vars = NULL, kernels = c("gauss"), 
#>     H = 20, Model = "MGWRSAR_1_0_kv", control = list(SE = T, 
#>         adaptive = T, W = W))
#> Model: MGWRSAR_1_0_kv 
#> Method for spatial autocorrelation: 2SLS 
#> Kernels function: gauss 
#> Kernels adaptive: YES 
#> Kernels type: GD 
#> Bandwidth: 20 
#> Computation time: 1.362 
#> Use of parallel computing: FALSE  ncore = 1 
#> Use of rough kernel: NO 
#> Use of Target Points: NO 
#> Number of data points: 1000 
#> Varying parameters: Intercept X1 X2 X3  
#>         Intercept       X1       X2       X3  lambda
#> Min.     -5.55249  0.11207 -1.20584 -1.86027 -0.2375
#> 1st Qu.  -1.47283  0.36956  0.64156 -1.24644  0.1831
#> Median   -0.75406  0.54369  1.72877 -0.99875  0.3845
#> Mean     -0.79631  0.50721  1.75803 -1.00349  0.3568
#> 3rd Qu.  -0.15439  0.63505  2.86983 -0.71468  0.5382
#> Max.      2.06110  0.95748  5.14466 -0.27205  0.8808
#> Effective degrees of freedom: 885.0928 
#> AICc: 2957.617 
#> Residual sum of squares: 868.1985 
#> RMSE: 0.9317717
plot(mgwrsar_1_0_kv,type='B_coef',var='X2', crs=2154,radius=150,myzoom=10)

#plot(mgwrsar_1_0_kv,type='t_coef',var='X2', crs=2154,radius=150,myzoom=10)

Prediction

In this example, we utilize an in-sample of size 800 for model estimation and an out-sample of size 200 for prediction. It’s worth noting that for models with spatial autocorrelation, you must provide a global weight matrix of size 1000, ordered by in-sample and then out-sample locations.

For GWR and MGWR_1_0_kv models, where all coefficients vary in space, predictions are made using the corresponding model estimate with the out-sample locations as target points. Hence, the estimated coefficients are not utilized; only the call and the data of the estimated model are employed (method_pred = ‘TP’, default value). However, for mixed models that include some constant coefficients (such as MGWR, MGXWR_0_0_kv, MGXWR_1_kc_kv, MGXWR_1_kc_0), the estimated coefficients are extrapolated using a weighting matrix. By default, the weighting matrix for prediction is derived from the expected weights of out-sample data if they were added to in-sample data to estimate the corresponding MGWRSAR model: method_pred=‘tWtp_model’ (refer to Geniaux, 2024, for further details). Alternatively, the user can choose to extrapolate the model coefficients using a Shepard kernel with a custom number of neighbors (method_pred=‘shepard’,k_extra=12) or using the same kernel and bandwidth as the estimated model (method_pred=‘model’). In case of extrapolation, predition method without the best linear unbiased predictor (‘type = YTC’) and with the best linear unbiased predictor (‘type = BPN’) are available.

Prediction of MGWRSAR_1_0_kv model using shepard kernel with 4 neighbors and Best Linear Unbiased Predictor (BLUP) :


### Global Spatial Weight matrix W should be ordered by in_sample (S) then out_sample

# in_sample and out_sample folds
length_out=800
index_in=sample(1:1000,length_out)
index_out=(1:1000)[-index_in]

#W=kernel_matW(H=4,kernels='rectangle',coords=coord,NN=4,adaptive=TRUE,diagnull=TRUE)
W_in_out=kernel_matW(H=4,kernels='rectangle',coords=rbind(coord[index_in,],coord[index_out,]),NN=4,adaptive=TRUE,diagnull=TRUE)
W_in=W_in_out[1:length(index_in),1:length(index_in)]
W_in=mgwrsar::normW(W_in)
newdata=mydata[index_out,]
newdata_coords=coord[index_out,]


### SAR Model
model_SAR_insample<-MGWRSAR(formula = 'Y_sar~X1+X2+X3', data = mydata[index_in,],coords=coord[index_in,], fixed_vars=NULL,kernels=NULL,H=NULL, Model = 'SAR',control=list(W=W_in))
model_SAR_insample@RMSE
#> [1] 0.1303956
summary(model_SAR_insample)
#> Call:
#> MGWRSAR(formula = "Y_sar~X1+X2+X3", data = mydata[index_in, ], 
#>     coords = coord[index_in, ], fixed_vars = NULL, kernels = NULL, 
#>     H = NULL, Model = "SAR", control = list(W = W_in))
#> Model: SAR 
#> Method for spatial autocorrelation: 2SLS 
#> Kernels function:  
#> Kernels adaptive: NO 
#> Kernels type:  
#> Bandwidth: NA 
#> Computation time: 0.044 
#> Use of parallel computing: FALSE  ncore = 1 
#> Use of rough kernel:  
#> Use of Target Points: NO 
#> Number of data points: 800 
#> Constant parameters: Intercept X1 X2 X3 lambda 
#> 0.1515706 0.5040549 1.153973 -1.012715 0.3147484 
#> AICc:  
#> Residual sum of squares: 13.6024 
#> RMSE: 0.1303956

## SAR Model: predictions
## without Best Linear Unbiased Predictor
# prediction YTC
Y_pred10=predict(model_SAR_insample, newdata=newdata, newdata_coords=newdata_coords,W=W_in_out,type='YTC')
head(Y_pred10)
#> [1] 2.027912 3.731384 5.910859 2.949735 4.971490 5.277246
RMSE_YTC=sqrt(mean((mydata$Y_sar[index_out]-Y_pred10)^2))
RMSE_YTC #  0.08441952
#> [1] 0.08441952

## Using Best Linear Unbiased Predictor
Y_pred11=predict(model_SAR_insample, newdata=newdata, newdata_coords=newdata_coords,W=W_in_out,type='BPN')
head(Y_pred11)
#> [1] 2.015560 3.680499 5.923186 2.923057 4.940062 5.313581
RMSE_BPN=sqrt(mean((mydata$Y_sar[index_out]-Y_pred11)^2))
RMSE_BPN #   0.06039921
#> [1] 0.06039921


#### MGWRSAR_1_0_kv model
model_MGWRSAR_1_0_kv_insample<-MGWRSAR(formula = 'Y_mgwrsar_1_0_kv~X1+X2+X3', data = mydata[index_in,],coords=coord[index_in,], fixed_vars=NULL,kernels='gauss',H=20, Model = 'MGWRSAR_1_0_kv',control=list(W=W_in,adaptive=TRUE,isgcv=F))
model_MGWRSAR_1_0_kv_insample@RMSE
#> [1] 0.7005766
summary(model_MGWRSAR_1_0_kv_insample)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_1_0_kv~X1+X2+X3", data = mydata[index_in, 
#>     ], coords = coord[index_in, ], fixed_vars = NULL, kernels = "gauss", 
#>     H = 20, Model = "MGWRSAR_1_0_kv", control = list(W = W_in, 
#>         adaptive = TRUE, isgcv = F))
#> Model: MGWRSAR_1_0_kv 
#> Method for spatial autocorrelation: 2SLS 
#> Kernels function: gauss 
#> Kernels adaptive: YES 
#> Kernels type: GD 
#> Bandwidth: 20 
#> Computation time: 0.971 
#> Use of parallel computing: FALSE  ncore = 1 
#> Use of rough kernel: NO 
#> Use of Target Points: NO 
#> Number of data points: 800 
#> Varying parameters: Intercept X1 X2 X3  
#>         Intercept       X1       X2       X3  lambda
#> Min.     -7.35091  0.11250 -1.03271 -1.92040 -0.2411
#> 1st Qu.  -1.74521  0.37676  1.07924 -1.28170  0.1253
#> Median   -0.91625  0.53936  2.01644 -1.01160  0.2901
#> Mean     -0.97835  0.50956  2.21153 -1.02210  0.2983
#> 3rd Qu.  -0.26127  0.64346  3.42518 -0.71382  0.4631
#> Max.      2.09208  0.93698  8.41085 -0.26197  0.8171
#> Effective degrees of freedom: 705.8388 
#> AICc: 1915.822 
#> Residual sum of squares: 392.6461 
#> RMSE: 0.7005766

#### MGWRSAR_1_0_kv model: predictions

## Using Best Linear Unbiased Predictor with method_pred='model' 
Y_pred12=predict(model_MGWRSAR_1_0_kv_insample, newdata=newdata, newdata_coords=newdata_coords,W=W_in_out,type='BPN',method_pred='model' )
head(Y_pred12)
#> [1] 0.6988705 3.7068233 5.6428853 2.6763307 6.5104905 4.6867043
RMSE_model_BPN=sqrt(mean((mydata$Y_mgwrsar_1_0_kv[index_out]-Y_pred12)^2))
RMSE_model_BPN #  0.435830
#> [1] 0.4358303

# prediction with method_pred='tWtp'
Y_pred13=predict(model_MGWRSAR_1_0_kv_insample, newdata=newdata, newdata_coords=newdata_coords,W=W_in_out,type='BPN',method_pred='tWtp_model')
head(Y_pred13)
#> [1] 0.7317686 3.6460268 5.6538075 2.6628929 6.5911347 4.8190344
RMSE_tWtp_BPN=sqrt(mean((mydata$Y_mgwrsar_1_0_kv[index_out]-Y_pred13)^2))
RMSE_tWtp_BPN #   0.4544316
#> [1] 0.4544316

## Using Best Linear Unbiased Predictor with method_pred='shepard' 
W_out=kernel_matW(H=4,kernels='rectangle',coords=coord[index_out,],NN=4,adaptive=TRUE,diagnull=TRUE)
Y_pred14=predict(model_MGWRSAR_1_0_kv_insample, newdata=newdata, newdata_coords=newdata_coords,W=W_in_out,type='BPN',method_pred='shepard',k_extra=8)
head(Y_pred14)
#> [1] 0.7715255 3.4637052 5.6434874 2.7065372 6.5409656 4.7106552
RMSE_shepard_BPN=sqrt(mean((mydata$Y_mgwrsar_1_0_kv[index_out]-Y_pred14)^2))
RMSE_shepard_BPN # 0.512511
#> [1] 0.512511

Prediction of MGWRSAR_1_kc_kv model using shepard kernel with 4 neighbors and Best Linear Unbiased Predictor (BLUP) :


### Global Spatial Weight matrix W should be ordered by in_ sample (S) then out_sample

# in_sample and out_sample folds
length_out=800
index_in=sample(1:1000,length_out)
index_out=(1:1000)[-index_in]

W=kernel_matW(H=4,kernels='rectangle',coords=rbind(coord[index_in,],coord[index_out,]),NN=4,adaptive=TRUE,diagnull=TRUE)
W_in=W[index_in,index_in]
W_in=mgwrsar::normW(W_in)

### model_MGWRSAR_1_kc_kv
model_MGWRSAR_1_kc_kv_insample<-MGWRSAR(formula = 'Y_mgwrsar_1_kc_kv~X1+X2+X3', data = mydata[index_in,],coords=coord[index_in,], fixed_vars='X3',kernels=c('gauss'),H=11, Model = 'MGWRSAR_1_kc_kv',control=list(W=W_in,adaptive=TRUE,isgcv=F))
model_MGWRSAR_1_kc_kv_insample@RMSE
#> [1] 0.3502106
summary(model_MGWRSAR_1_kc_kv_insample)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_1_kc_kv~X1+X2+X3", data = mydata[index_in, 
#>     ], coords = coord[index_in, ], fixed_vars = "X3", kernels = c("gauss"), 
#>     H = 11, Model = "MGWRSAR_1_kc_kv", control = list(W = W_in, 
#>         adaptive = TRUE, isgcv = F))
#> Model: MGWRSAR_1_kc_kv 
#> Method for spatial autocorrelation: 2SLS 
#> Kernels function: gauss 
#> Kernels adaptive: YES 
#> Kernels type: GD 
#> Bandwidth: 11 
#> Computation time: 1.28 
#> Use of parallel computing: FALSE  ncore = 1 
#> Use of rough kernel: NO 
#> Use of Target Points: NO 
#> Number of data points: 800 
#> Constant parameters: XX 
#> -1.042863 
#> Varying parameters: Intercept X1 X2  
#>         Intercept        X1        X2  lambda
#> Min.    -10.72384   0.08488  -0.77331 -0.2800
#> 1st Qu.  -1.70402   0.35135   2.04683 -0.0410
#> Median   -0.78341   0.53193   3.01252 -0.0047
#> Mean     -0.99026   0.50042   3.42218 -0.0116
#> 3rd Qu.   0.06569   0.63682   5.18079  0.0268
#> Max.      1.62136   0.98688  12.43667  0.1318
#> Effective degrees of freedom: 666.3673 
#> AICc: 914.0958 
#> Residual sum of squares: 98.11797 
#> RMSE: 0.3502106

### model_MGWRSAR_1_kc_kv: predictions
## without Best Linear Unbiased Predictor
newdata=mydata[index_out,]
newdata_coords=coord[index_out,]
newdata$Y_mgwrsar_1_kc_kv=0

Y_pred15=predict(model_MGWRSAR_1_kc_kv_insample, newdata=newdata, newdata_coords=newdata_coords,W=W,type='YTC',method_pred='shepard')
head(Y_pred15)
#> [1] 0.5010164 3.6370096 5.8722300 2.5841532 7.9874040 4.0912315
RMSE_YTC=sqrt(mean((mydata$Y_mgwrsar_1_kc_kv[index_out]-Y_pred15)^2))
RMSE_YTC # 0.4274275
#> [1] 0.4274275

## Using Best Linear Unbiased Predictor
Y_pred16=predict(model_MGWRSAR_1_kc_kv_insample, newdata=newdata, newdata_coords=newdata_coords,W=W,type='BPN',method_pred='shepard')
head(Y_pred16)
#> [1] 0.5044271 3.8406314 5.8762162 2.5846512 7.9916212 4.0910314
RMSE_BPN=sqrt(mean((mydata$Y_mgwrsar_1_kc_kv[index_out]-Y_pred16)^2))
RMSE_BPN #  0.4416827
#> [1] 0.4416827

Optimal bandwidths search

The mgwrsar package offers a wrapper that facilitates finding the optimal bandwidth for a variety of model and kernel types, utilizing either AICc or CV criteria. It is specifically tailored for spatial kernels (Type=‘GD’) but can also be applied to space-time kernels (Type=‘GDT’) by specifying the bandwidth for the time kernel.

Examples for MGWRSAR_1_0_kv model using first an adaptive gaussian kernel and then a non-adaptive gaussian kernel.


### adaptive kernel : the bandwidth is expressed as a distance 
## AICc
res9=golden_search_bandwidth(formula='Y_mgwrsar_1_0_kv~X1+X2+X3',H2=NULL,data = mydata,coords=coord, fixed_vars=NULL,kernels=c('gauss'), Model = 'MGWRSAR_1_0_kv',control=list(verbose=F,NN=300,criterion='AICc',adaptive=F,W=W),lower.bound=0.001, upper.bound=1,tolerance=0.001)
res9$minimum # 0.05055378
summary(res9$model)

## CV
res11=golden_search_bandwidth(formula='Y_mgwrsar_1_0_kv~X1+X2+X3',H2=NULL,data = mydata,coords=coord, fixed_vars=NULL,kernels=c('gauss'), Model = 'MGWRSAR_1_0_kv',control=list(verbose=F,NN=300,criterion='CV',adaptive=F,W=W),lower.bound=0.001, upper.bound=1,tolerance=0.001)
res11$minimum # 0.03549386
summary(res11$model)

### non-adaptive kernel: the bandwidth is expressed as of number of neighbors
## AICc
res10=golden_search_bandwidth(formula='Y_mgwrsar_1_0_kv~X1+X2+X3',H2=NULL,data = mydata,coords=coord, fixed_vars=NULL,kernels=c('gauss'), Model = 'MGWRSAR_1_0_kv',control=list(verbose=F,NN=300,criterion='AICc',adaptive=T,W=W),lower.bound=2, upper.bound=300,tolerance=0.001)
res10$minimum # 10
summary(res10$model)

## CV
res12=golden_search_bandwidth(formula='Y_mgwrsar_1_0_kv~X1+X2+X3',H2=NULL,data = mydata,coords=coord, fixed_vars=NULL,kernels=c('gauss'), Model = 'MGWRSAR_1_0_kv',control=list(verbose=F,NN=300,criterion='CV',adaptive=T,W=W),lower.bound=2, upper.bound=300,tolerance=0.001)
res12$minimum #. 5
summary(res12$model)

GWR and Mixed GWR with spatial autocorrelation

Ghislain Geniaux

2025-02-18

Introduction

Estimation of GWR/MGWR with spatial dependance.

Estimation

Plot and summary

Prediction

Optimal bandwidths search

References