outlierensembles provides a collection of outlier/anomaly detection ensembles. Given the anomaly scores of different anomaly detection methods, the following ensemble techniques can be used to construct an ensemble score:
You can install the released version of outlierensembles from CRAN with:
install.packages("outlierensembles")
And the development version from GitHub with:
# install.packages("devtools")
::install_github("sevvandi/outlierensembles") devtools
We use methods from dbscan R package as to find anomalies. You can use any anomaly detection method you want to build the ensemble. First, we construct the IRT ensemble. The colors show the ensemble scores.
<- scale(faithful)
faithfulu # Using different parameters of lof for anomaly detection
<- dbscan::lof(faithfulu, minPts = 5)
y1 <- dbscan::lof(faithfulu, minPts = 10)
y2 <- dbscan::lof(faithfulu, minPts = 20)
y3 <- dbscan::kNN(faithfulu, k = 20)
knnobj # Using different KNN distances as anomaly scores
<- knnobj$dist[ ,5]
y4 <- knnobj$dist[ ,10]
y5 <- knnobj$dist[ ,20]
y6 # Dense points are less anomalous. Points in less dense areas are more anomalous. Hence 1 - pointdensity is used.
<- 1 - dbscan::pointdensity(faithfulu, eps = 1, type="gaussian")
y7 <- 1 - dbscan::pointdensity(faithfulu, eps = 2, type = "gaussian")
y8 <- 1 - dbscan::pointdensity(faithfulu, eps = 0.5, type = "gaussian")
y9
<- cbind.data.frame(y1, y2, y3, y4, y5, y6, y7, y8, y9)
Y <- irt_ensemble(Y)
ens1 #> Warning in sqrt(diag(solve(Hess))): NaNs produced
<- cbind.data.frame(faithful, ens1$scores)
df colnames(df)[3] <- "IRT"
ggplot(df, aes(eruptions, waiting)) + geom_point(aes(color=IRT)) + scale_color_gradient(low="yellow", high="red")
Then we do the greedy ensemble.
<- greedy_ensemble(Y)
ens2 <- cbind.data.frame(faithful, ens2$scores)
df colnames(df)[3] <- "Greedy"
ggplot(df, aes(eruptions, waiting)) + geom_point(aes(color=Greedy)) + scale_color_gradient(low="yellow", high="red")
We do the ICWA ensemble next.
<- icwa_ensemble(Y)
ens3 <- cbind.data.frame(faithful, ens3)
df colnames(df)[3] <- "ICWA"
ggplot(df, aes(eruptions, waiting)) + geom_point(aes(color=ICWA)) + scale_color_gradient(low="yellow", high="red")
Next, we use the maximum scores to build the ensemble.
<- max_ensemble(Y)
ens4 <- cbind.data.frame(faithful, ens4)
df colnames(df)[3] <- "Max"
ggplot(df, aes(eruptions, waiting)) + geom_point(aes(color=Max)) + scale_color_gradient(low="yellow", high="red")
Then, we use the a threshold sum to construct the ensemble.
<- threshold_ensemble(Y)
ens5 <- cbind.data.frame(faithful, ens5)
df colnames(df)[3] <- "Threshold"
ggplot(df, aes(eruptions, waiting)) + geom_point(aes(color=Threshold)) + scale_color_gradient(low="yellow", high="red")
Finally, we use the mean values as the ensemble score.
<- average_ensemble(Y)
ens6 <- cbind.data.frame(faithful, ens6)
df colnames(df)[3] <- "Average"
ggplot(df, aes(eruptions, waiting)) + geom_point(aes(color=Average)) + scale_color_gradient(low="yellow", high="red")