1 Exercice 1.
1.1 1.
<-1000
n<-c(1,2)/3
Proportions<-list()
parameters1]]<-list(mean=0,sd=1)
parameters[[2]]<-list(mean=4,sd=1/2) parameters[[
<-function(n=100,parameters,Proportions){
simu_mixture<-t(rmultinom(n,1,prob = Proportions))
z<-apply(z,1,which.max)
z<-matrix(0,n,1)
xfor (i in 1:length(z)){
1]<-rnorm(1,mean=parameters[[z[i]]]$mean,sd=parameters[[z[i]]]$sd)
x[i,
}return(list(x=x,z=z))
}
<-simu_mixture(n=1000, parameters=parameters,Proportions = Proportions) simulation
hist(simulation$x,breaks=50)
= 1000
N = c(1/3,2/3)
pi_k <- sample(1:2,size=N,replace=T,prob=pi_k)
z
<- c(0,4)
mu <- c(1,1/2)
sig <- rep(NA,N)
X
for(i in 1:N){
<- rnorm(1,mean = mu[z[i]], sd = sig[z[i]])
X[i] }
hist(X,breaks=50)
1.2 2.
<-kmeans(simulation$x,2,nstart = 30) res
table(res$cluster,simulation$z)
##
## 1 2
## 1 316 0
## 2 5 679
1.3 3.
= simulation$x[res$cluster==1,]
X_g1= simulation$x[res$cluster==2,] X_g2
Paramètre groupe 1 :
mean(X_g1);sd(X_g1)
## [1] -0.0318892
## [1] 0.9413205
Paramètre groupe 2 :
mean(X_g2);sd(X_g2)
## [1] 3.974728
## [1] 0.5206221
1.4 4.
#install.packages("mclust",dependencies = T)
library(mclust)
## Package 'mclust' version 5.4.10
## Type 'citation("mclust")' for citing this R package in publications.
?Mclust
1.4.1 Model E
<- Mclust(simulation$x,G=2, modelNames="E") #equal variance
model_Mclust_E model_Mclust_E
## 'Mclust' model object: (E,2)
##
## Available components:
## [1] "call" "data" "modelName" "n"
## [5] "d" "G" "BIC" "loglik"
## [9] "df" "bic" "icl" "hypvol"
## [13] "parameters" "z" "classification" "uncertainty"
table(model_Mclust_E$classification,simulation$z)
##
## 1 2
## 1 313 0
## 2 8 679
$parameters model_Mclust_E
## $pro
## [1] 0.3110587 0.6889413
##
## $mean
## 1 2
## -0.06026344 3.95880199
##
## $variance
## $variance$modelName
## [1] "E"
##
## $variance$d
## [1] 1
##
## $variance$G
## [1] 2
##
## $variance$sigmasq
## [1] 0.4724138
##
##
## $Vinv
## NULL
1.4.2 Model V
<- Mclust(simulation$x,G=2, modelNames="V") #unequal variance model_Mclust_V
table(model_Mclust_V$classification,simulation$z)
##
## 1 2
## 1 319 0
## 2 2 679
$parameters model_Mclust_V
## $pro
## [1] 0.3206809 0.6793191
##
## $mean
## 1 2
## 0.00356914 3.98559680
##
## $variance
## $variance$modelName
## [1] "V"
##
## $variance$d
## [1] 1
##
## $variance$G
## [1] 2
##
## $variance$sigmasq
## [1] 0.9573296 0.2542782
##
## $variance$scale
## [1] 0.9573296 0.2542782
##
##
## $Vinv
## NULL
<- Mclust(simulation$x, modelNames="V") #unequal variance model_Mclust
plot(model_Mclust$BIC)
1.5 5.
2 Exercice 2
2.1 1.
data(faithful)
2.2 2.
?faithful
summary(faithful)
## eruptions waiting
## Min. :1.600 Min. :43.0
## 1st Qu.:2.163 1st Qu.:58.0
## Median :4.000 Median :76.0
## Mean :3.488 Mean :70.9
## 3rd Qu.:4.454 3rd Qu.:82.0
## Max. :5.100 Max. :96.0
plot(faithful$waiting,faithful$eruptions)
2.3 3.
<- Mclust(faithful) model
$parameters model
## $pro
## [1] 0.1656784 0.3563696 0.4779520
##
## $mean
## [,1] [,2] [,3]
## eruptions 3.793066 2.037596 4.463245
## waiting 77.521051 54.491158 80.833439
##
## $variance
## $variance$modelName
## [1] "EEE"
##
## $variance$d
## [1] 2
##
## $variance$G
## [1] 3
##
## $variance$sigma
## , , 1
##
## eruptions waiting
## eruptions 0.07825448 0.4801979
## waiting 0.48019785 33.7671464
##
## , , 2
##
## eruptions waiting
## eruptions 0.07825448 0.4801979
## waiting 0.48019785 33.7671464
##
## , , 3
##
## eruptions waiting
## eruptions 0.07825448 0.4801979
## waiting 0.48019785 33.7671464
##
##
## $variance$Sigma
## eruptions waiting
## eruptions 0.07825448 0.4801979
## waiting 0.48019785 33.7671464
##
## $variance$cholSigma
## eruptions waiting
## eruptions -0.27974 -1.716586
## waiting 0.00000 5.551619
$parameters model
## $pro
## [1] 0.1656784 0.3563696 0.4779520
##
## $mean
## [,1] [,2] [,3]
## eruptions 3.793066 2.037596 4.463245
## waiting 77.521051 54.491158 80.833439
##
## $variance
## $variance$modelName
## [1] "EEE"
##
## $variance$d
## [1] 2
##
## $variance$G
## [1] 3
##
## $variance$sigma
## , , 1
##
## eruptions waiting
## eruptions 0.07825448 0.4801979
## waiting 0.48019785 33.7671464
##
## , , 2
##
## eruptions waiting
## eruptions 0.07825448 0.4801979
## waiting 0.48019785 33.7671464
##
## , , 3
##
## eruptions waiting
## eruptions 0.07825448 0.4801979
## waiting 0.48019785 33.7671464
##
##
## $variance$Sigma
## eruptions waiting
## eruptions 0.07825448 0.4801979
## waiting 0.48019785 33.7671464
##
## $variance$cholSigma
## eruptions waiting
## eruptions -0.27974 -1.716586
## waiting 0.00000 5.551619
2.4 4.
plot(model)
2.5 5.
= hclust(dist(faithful),method = "ward.D2")
hclust_ward.D2 plot(hclust_ward.D2)
<- cutree(hclust_ward.D2,k = 2)
cut_ward.D2_2 <- Mclust(faithful,G=2)
model table("Mixture model"= model$classification,"Hierarchical model" = cut_ward.D2_2)
## Hierarchical model
## Mixture model 1 2
## 1 171 4
## 2 1 96
2.6 6.
<- cutree(hclust_ward.D2,k = 3)
cut_ward.D2_3 <- Mclust(faithful,G=3)
model table("Mixture model"= model$classification,"Hierarchical model" = cut_ward.D2_3)
## Hierarchical model
## Mixture model 1 2 3
## 1 23 4 13
## 2 0 96 1
## 3 106 0 29
2.7 7.
plot(faithful$waiting,faithful$eruptions,col=c("Red","Blue","Orange")[model$classification])
plot(faithful$waiting,faithful$eruptions,col=c("Red","Blue")[cut_ward.D2_2])
plot(faithful$waiting,faithful$eruptions,col=c("Red","Blue","Orange")[cut_ward.D2_3])