| Data analysis | TD 3 -- Mixture Models

# Exercice 1. ## 1. ```{r} n<-1000 Proportions<-c(1,2)/3 parameters<-list() parameters[[1]]<-list(mean=0,sd=1) parameters[[2]]<-list(mean=4,sd=1/2) ``` ```{r} simu_mixture<-function(n=100,parameters,Proportions){ z<-t(rmultinom(n,1,prob = Proportions)) z<-apply(z,1,which.max) x<-matrix(0,n,1) for (i in 1:length(z)){ x[i,1]<-rnorm(1,mean=parameters[[z[i]]]$mean,sd=parameters[[z[i]]]$sd) } return(list(x=x,z=z)) } ``` ```{r} simulation<-simu_mixture(n=1000, parameters=parameters,Proportions = Proportions) ``` ```{r} hist(simulation$x,breaks=50) ``` ```{r} N = 1000 pi_k = c(1/3,2/3) z <- sample(1:2,size=N,replace=T,prob=pi_k) mu <- c(0,4) sig <- c(1,1/2) X <- rep(NA,N) for(i in 1:N){ X[i] <- rnorm(1,mean = mu[z[i]], sd = sig[z[i]]) } ``` ```{r} hist(X,breaks=50) ``` ## 2. ```{r} res<-kmeans(simulation$x,2,nstart = 30) ``` ```{r} table(res$cluster,simulation$z) ``` ## 3. ```{r} X_g1= simulation$x[res$cluster==1,] X_g2= simulation$x[res$cluster==2,] ``` Paramètre groupe 1 : ```{r} mean(X_g1);sd(X_g1) ``` Paramètre groupe 2 : ```{r} mean(X_g2);sd(X_g2) ``` ## 4. ```{r} #install.packages("mclust",dependencies = T) library(mclust) ``` ```{r} ?Mclust ``` ### Model E ```{r} model_Mclust_E <- Mclust(simulation$x,G=2, modelNames="E") #equal variance model_Mclust_E ``` ```{r} table(model_Mclust_E$classification,simulation$z) ``` ```{r} model_Mclust_E$parameters ``` ### Model V ```{r} model_Mclust_V <- Mclust(simulation$x,G=2, modelNames="V") #unequal variance ``` ```{r} table(model_Mclust_V$classification,simulation$z) ``` ```{r} model_Mclust_V$parameters ``` ```{r} model_Mclust <- Mclust(simulation$x, modelNames="V") #unequal variance ``` ```{r} plot(model_Mclust$BIC) ``` ## 5. # Exercice 2 ## 1. ```{r} data(faithful) ``` ## 2. ```{r} ?faithful ``` ```{r} summary(faithful) ``` ```{r} plot(faithful$waiting,faithful$eruptions) ``` ## 3. ```{r} model <- Mclust(faithful) ``` ```{r} model$parameters ``` ```{r} model$parameters ``` ## 4. ```{r} plot(model) ``` ## 5. ```{r} hclust_ward.D2 = hclust(dist(faithful),method = "ward.D2") plot(hclust_ward.D2) ``` ```{r} cut_ward.D2_2 <- cutree(hclust_ward.D2,k = 2) model <- Mclust(faithful,G=2) table("Mixture model"= model$classification,"Hierarchical model" = cut_ward.D2_2) ``` ## 6. ```{r} cut_ward.D2_3 <- cutree(hclust_ward.D2,k = 3) model <- Mclust(faithful,G=3) table("Mixture model"= model$classification,"Hierarchical model" = cut_ward.D2_3) ``` ## 7. ```{r} plot(faithful$waiting,faithful$eruptions,col=c("Red","Blue","Orange")[model$classification]) plot(faithful$waiting,faithful$eruptions,col=c("Red","Blue")[cut_ward.D2_2]) plot(faithful$waiting,faithful$eruptions,col=c("Red","Blue","Orange")[cut_ward.D2_3]) ```