We present the performances of GREMLINS on a simulated multipartite
network. GREMLINS includes a function rMBM
to simulate
multipartite networks. Mathematical details can be found in Bar-Hen, Barbillon, and S. (2021).
We use the function rMBM
provided in the package to
simulate a multipartite network involving \(2\) functional groups (namely A and B) of
respective sizes \[n_A = 60, \quad, n_B =
50.\]
A and B are divided respectively into \(3\) and \(2\) blocks. The sizes of the blocks are generated randomly. For reproductibility, we fix the random seed to an arbitrarily chosen value.
<- c('A','B')
namesFG <- c(60,50) #size of each FG
v_NQ = list(c(0.16 ,0.40 ,0.44),c(0.3,0.7)) #proportion of each block in each FG
list_pi 1]]
list_pi[[#> [1] 0.16 0.40 0.44
We assume that we observe \(3\) interactions matrices
- A-B : continuous weighted interactions
- B-B : binary interactions
- A-A : counting directed interactions
<- rbind(c(1,2),c(2,2),c(1,1))
E <- c( "inc","diradj", "adj")
typeInter <- c('ZIgaussian','bernoulli','poisson') v_distrib
Note that the distributions may be Bernoulli
,
Poisson
, Gaussian
or Laplace
(with null mean). For the Gaussian distribution, a mean and a variance
must be given. We generate randomly the emission parameters \(\theta\).
<- list()
list_theta 1]] <- list()
list_theta[[1]]$mean <- matrix(c(6.1, 8.9, 6.6, 9.8, 2.6, 1.0), 3, 2)
list_theta[[1]]$var <- matrix(c(1.6, 1.6, 1.8, 1.7 ,2.3, 1.5),3, 2)
list_theta[[1]]$p0 <- matrix(c(0.4, 0.1, 0.6, 0.5 , 0.2, 0),3, 2)
list_theta[[2]] <- matrix(c(0.7,1.0, 0.4, 0.6),2, 2)
list_theta[[<- matrix(c(2.5, 2.6 ,2.2 ,2.2, 2.7 ,3.0 ,3.6, 3.5, 3.3),3,3 )
m3 3]] <- (m3 + t(m3))/2# for symetrisation list_theta[[
We are now ready to simulate the data
library(GREMLINS)
<- rMBM(v_NQ,E , typeInter, v_distrib, list_pi,
dataSim namesFG = namesFG, seed = 4,keepClassif = TRUE)
list_theta, <- dataSim$list_Net
list_Net length(list_Net)
#> [1] 3
names(list_Net[[1]])
#> [1] "mat" "typeInter" "rowFG" "colFG"
1]]$typeInter
list_Net[[#> [1] "inc"
1]]$rowFG
list_Net[[#> [1] "A"
1]]$colFG
list_Net[[#> [1] "B"
The model selection and the estimation are performed with the
function multipartiteBM
.
<- multipartiteBM(list_Net,
res_MBMsimu v_distrib = v_distrib,
namesFG = c('A','B'),
v_Kinit = c(2,2),
nbCores = 2,
initBM = FALSE,
keep = FALSE)
#> [1] "------------Nb of entities in each functional group--------------"
#> A B
#> 60 50
#> [1] "------------Probability distributions on each network--------------"
#> [1] "ZIgaussian" "bernoulli" "poisson"
#> [1] "-------------------------------------------------------------------"
#> [1] " ------ Searching the numbers of blocks starting from [ 2 2 ] blocks"
#> [1] "ICL : -7085.81 . Nb of blocks: [ 2 2 ]"
#> [1] "ICL : -5901.15 . Nb of blocks: [ 3 2 ]"
#> [1] "Best model------ ICL : -5901.15 . Nb of clusters: [ 3 2 ] for [ A , B ] respectively"
We can now get the estimated parameters.
$fittedModel[[1]]$paramEstim$list_theta$AB$mean
res_MBMsimu#> [,1] [,2]
#> [1,] 1.004152 6.572955
#> [2,] 2.582062 8.881842
#> [3,] 9.994673 6.139221
extractClustersMBM
produces the clusters in each
functional group.
<- extractClustersMBM(res_MBMsimu) Cl
One may also want to estimate the parameters for given numbers of
clusters. The function multipartiteBMFixedModel
is designed
for this task.
<- multipartiteBMFixedModel(list_Net, v_distrib = v_distrib, nbCores = 2,namesFG = namesFG, v_K = c(3,2))
res_MBMsimu_fixed #> [1] "====================== First Forward Step =================="
#> [1] "====================== First Backward Step =================="
#> [1] "====================== Last Forward Step =================="
#> [1] "====================== Last Backward Step =================="
$fittedModel[[1]]$paramEstim$v_K
res_MBMsimu_fixed#> [1] 3 2
extractClustersMBM(res_MBMsimu_fixed)$A
#> [[1]]
#> [1] 1 4 5 10 11 13 15 16 17 23 24 25 27 29 32 33 34 35 39 40 42 48 51 56 57
#> [26] 58 59 60
#>
#> [[2]]
#> [1] 2 6 7 8 12 14 19 22 26 31 36 37 38 41 43 44 46 47 49 50 52
#>
#> [[3]]
#> [1] 3 9 18 20 21 28 30 45 53 54 55
GREMLINS is also able to handle missing data. In the following experiment, we artificially set missing data in the previously simulated matrices.
############# NA data at random in any matrix
= 10/100
epsilon <- list_Net
list_Net_NA for (m in 1:nrow(E)){
<- sample(c(1,0),v_NQ[E[m,1]]*v_NQ[E[m,2]],replace=TRUE,prob = c(epsilon, 1-epsilon))
U <- matrix(U,v_NQ[E[m,1]],v_NQ[E[m,2]])
matNA $mat[matNA== 1] = NA
list_Net_NA[[m]]if (list_Net_NA[[m]]$typeInter == 'adj') {
<- list_Net_NA[[m]]$mat
M diag(M) <- NA
lower.tri(M)] = t(M)[lower.tri(M)]
M[$mat <- M
list_Net_NA[[m]]
} }
<- multipartiteBM(list_Net_NA,
res_MBMsimuNA v_distrib = v_distrib,
namesFG = c('A','B'),
v_Kinit = c(2,2),
nbCores = 2,
keep = FALSE)
#> [1] "------------Nb of entities in each functional group--------------"
#> A B
#> 60 50
#> [1] "------------Probability distributions on each network--------------"
#> [1] "ZIgaussian" "bernoulli" "poisson"
#> [1] "-------------------------------------------------------------------"
#> [1] " ------ Searching the numbers of blocks starting from [ 2 2 ] blocks"
#> [1] "ICL : -6521.28 . Nb of blocks: [ 2 2 ]"
#> [1] "ICL : -5446.97 . Nb of blocks: [ 3 2 ]"
#> [1] " ------ Searching the numbers of blocks starting from [ 3 2 ] blocks"
#> [1] "ICL : -5446.97 . Nb of blocks: [ 3 2 ]"
#> [1] " ------ Searching the numbers of blocks starting from [ 1 2 ] blocks"
#> [1] "ICL : -6977.56 . Nb of blocks: [ 1 2 ]"
#> [1] "ICL : -6521.28 . Nb of blocks: [ 2 2 ]"
#> [1] "Best model------ ICL : -5446.97 . Nb of clusters: [ 3 2 ] for [ A , B ] respectively"
We then have a function to predict the missing edges (probability if binary or intensity if weighted)
<- predictMBM(res_MBMsimuNA) pred