1 Exercice 1.

1.1 1.

n<-1000
Proportions<-c(1,2)/3
parameters<-list()
parameters[[1]]<-list(mean=0,sd=1)
parameters[[2]]<-list(mean=4,sd=1/2)
simu_mixture<-function(n=100,parameters,Proportions){
  z<-t(rmultinom(n,1,prob = Proportions))
  z<-apply(z,1,which.max)
  x<-matrix(0,n,1)
  for (i in 1:length(z)){ 
    x[i,1]<-rnorm(1,mean=parameters[[z[i]]]$mean,sd=parameters[[z[i]]]$sd)
  }
  return(list(x=x,z=z))
}
simulation<-simu_mixture(n=1000, parameters=parameters,Proportions = Proportions)
hist(simulation$x,breaks=50)

N = 1000
pi_k = c(1/3,2/3)
z <- sample(1:2,size=N,replace=T,prob=pi_k)

mu <- c(0,4)
sig <- c(1,1/2)
X <- rep(NA,N)

for(i in 1:N){
  X[i] <- rnorm(1,mean = mu[z[i]], sd = sig[z[i]])
}
hist(X,breaks=50)

1.2 2.

res<-kmeans(simulation$x,2,nstart = 30)
table(res$cluster,simulation$z)
##    
##       1   2
##   1 316   0
##   2   5 679

1.3 3.

X_g1= simulation$x[res$cluster==1,]
X_g2= simulation$x[res$cluster==2,]

Paramètre groupe 1 :

mean(X_g1);sd(X_g1)
## [1] -0.0318892
## [1] 0.9413205

Paramètre groupe 2 :

mean(X_g2);sd(X_g2)
## [1] 3.974728
## [1] 0.5206221

1.4 4.

#install.packages("mclust",dependencies = T)
library(mclust)
## Package 'mclust' version 5.4.10
## Type 'citation("mclust")' for citing this R package in publications.
?Mclust 

1.4.1 Model E

model_Mclust_E <- Mclust(simulation$x,G=2, modelNames="E") #equal variance
model_Mclust_E
## 'Mclust' model object: (E,2) 
## 
## Available components: 
##  [1] "call"           "data"           "modelName"      "n"             
##  [5] "d"              "G"              "BIC"            "loglik"        
##  [9] "df"             "bic"            "icl"            "hypvol"        
## [13] "parameters"     "z"              "classification" "uncertainty"
table(model_Mclust_E$classification,simulation$z)
##    
##       1   2
##   1 313   0
##   2   8 679
model_Mclust_E$parameters
## $pro
## [1] 0.3110587 0.6889413
## 
## $mean
##           1           2 
## -0.06026344  3.95880199 
## 
## $variance
## $variance$modelName
## [1] "E"
## 
## $variance$d
## [1] 1
## 
## $variance$G
## [1] 2
## 
## $variance$sigmasq
## [1] 0.4724138
## 
## 
## $Vinv
## NULL

1.4.2 Model V

model_Mclust_V <- Mclust(simulation$x,G=2, modelNames="V") #unequal variance
table(model_Mclust_V$classification,simulation$z)
##    
##       1   2
##   1 319   0
##   2   2 679
model_Mclust_V$parameters
## $pro
## [1] 0.3206809 0.6793191
## 
## $mean
##          1          2 
## 0.00356914 3.98559680 
## 
## $variance
## $variance$modelName
## [1] "V"
## 
## $variance$d
## [1] 1
## 
## $variance$G
## [1] 2
## 
## $variance$sigmasq
## [1] 0.9573296 0.2542782
## 
## $variance$scale
## [1] 0.9573296 0.2542782
## 
## 
## $Vinv
## NULL
model_Mclust <- Mclust(simulation$x, modelNames="V") #unequal variance
plot(model_Mclust$BIC)

1.5 5.

2 Exercice 2

2.1 1.

data(faithful)

2.2 2.

?faithful
summary(faithful)
##    eruptions        waiting    
##  Min.   :1.600   Min.   :43.0  
##  1st Qu.:2.163   1st Qu.:58.0  
##  Median :4.000   Median :76.0  
##  Mean   :3.488   Mean   :70.9  
##  3rd Qu.:4.454   3rd Qu.:82.0  
##  Max.   :5.100   Max.   :96.0
plot(faithful$waiting,faithful$eruptions)

2.3 3.

model <- Mclust(faithful)
model$parameters
## $pro
## [1] 0.1656784 0.3563696 0.4779520
## 
## $mean
##                [,1]      [,2]      [,3]
## eruptions  3.793066  2.037596  4.463245
## waiting   77.521051 54.491158 80.833439
## 
## $variance
## $variance$modelName
## [1] "EEE"
## 
## $variance$d
## [1] 2
## 
## $variance$G
## [1] 3
## 
## $variance$sigma
## , , 1
## 
##            eruptions    waiting
## eruptions 0.07825448  0.4801979
## waiting   0.48019785 33.7671464
## 
## , , 2
## 
##            eruptions    waiting
## eruptions 0.07825448  0.4801979
## waiting   0.48019785 33.7671464
## 
## , , 3
## 
##            eruptions    waiting
## eruptions 0.07825448  0.4801979
## waiting   0.48019785 33.7671464
## 
## 
## $variance$Sigma
##            eruptions    waiting
## eruptions 0.07825448  0.4801979
## waiting   0.48019785 33.7671464
## 
## $variance$cholSigma
##           eruptions   waiting
## eruptions  -0.27974 -1.716586
## waiting     0.00000  5.551619
model$parameters
## $pro
## [1] 0.1656784 0.3563696 0.4779520
## 
## $mean
##                [,1]      [,2]      [,3]
## eruptions  3.793066  2.037596  4.463245
## waiting   77.521051 54.491158 80.833439
## 
## $variance
## $variance$modelName
## [1] "EEE"
## 
## $variance$d
## [1] 2
## 
## $variance$G
## [1] 3
## 
## $variance$sigma
## , , 1
## 
##            eruptions    waiting
## eruptions 0.07825448  0.4801979
## waiting   0.48019785 33.7671464
## 
## , , 2
## 
##            eruptions    waiting
## eruptions 0.07825448  0.4801979
## waiting   0.48019785 33.7671464
## 
## , , 3
## 
##            eruptions    waiting
## eruptions 0.07825448  0.4801979
## waiting   0.48019785 33.7671464
## 
## 
## $variance$Sigma
##            eruptions    waiting
## eruptions 0.07825448  0.4801979
## waiting   0.48019785 33.7671464
## 
## $variance$cholSigma
##           eruptions   waiting
## eruptions  -0.27974 -1.716586
## waiting     0.00000  5.551619

2.4 4.

plot(model)

2.5 5.

hclust_ward.D2 = hclust(dist(faithful),method = "ward.D2")
plot(hclust_ward.D2)

cut_ward.D2_2 <- cutree(hclust_ward.D2,k = 2) 
model <- Mclust(faithful,G=2)
table("Mixture model"= model$classification,"Hierarchical model" = cut_ward.D2_2)
##              Hierarchical model
## Mixture model   1   2
##             1 171   4
##             2   1  96

2.6 6.

cut_ward.D2_3 <- cutree(hclust_ward.D2,k = 3) 
model <- Mclust(faithful,G=3)
table("Mixture model"= model$classification,"Hierarchical model" = cut_ward.D2_3)
##              Hierarchical model
## Mixture model   1   2   3
##             1  23   4  13
##             2   0  96   1
##             3 106   0  29

2.7 7.

plot(faithful$waiting,faithful$eruptions,col=c("Red","Blue","Orange")[model$classification])

plot(faithful$waiting,faithful$eruptions,col=c("Red","Blue")[cut_ward.D2_2])

plot(faithful$waiting,faithful$eruptions,col=c("Red","Blue","Orange")[cut_ward.D2_3])