TD4 - ACP

```{r} library(FactoMineR) ``` # Exercice 1 ```{r} library(MASS) data(crabs) crabsquant<-crabs[,4:8] ``` ## 1. ```{r} res.pca <- PCA(crabsquant, scale.unit=FALSE, ncp=5) ``` ```{r} eigvalues<-data.frame(res.pca$eig) barplot(eigvalues$percentage.of.variance, names.arg=row.names(eigvalues)) ``` Représentation des individus ```{r} plot(res.pca,choix="ind") ``` ```{r} plot(res.pca,choix="varcor") ``` ```{r} #res.pca$ind$cos2 ``` ```{r} library(factoextra) fviz_pca_ind(res.pca, col.ind="cos2") + scale_color_gradient2(low="white",mid="blue", high="red", midpoint=0.50) + theme_minimal() ``` ```{r} res.pca$var$contrib ``` ```{r} fviz_contrib(res.pca, choice = "ind", axes = 1) ``` ## 2. ```{r} library(tidyverse) #crabsquant_normalize <- scale(crabsquant) crabsquant_normalize <- (crabsquant / crabsquant[,"CL"]) crabsquant_normalize[,"CL"] <- NULL colnames(crabsquant_normalize) <- c("FL / CL", "RW / CL", "CW / CL", "BD / CL") ``` ```{r} res.pca2 <- PCA(crabsquant_normalize, scale.unit=FALSE, ncp=5, graph=F) ``` ```{r} eigvalues<-data.frame(res.pca2$eig) barplot(eigvalues$percentage.of.variance, names.arg=row.names(eigvalues)) ``` Représentation des individus ```{r} plot(res.pca2,choix="ind") ``` ```{r} library(factoextra) fviz_pca_ind(res.pca2, col.ind="cos2") + scale_color_gradient2(low="white",mid="blue", high="red", midpoint=0.50) + theme_minimal() ``` ```{r} plot(res.pca2,choix="varcor") ``` ```{r} data_pca <- res.pca2$ind$coord plot(data_pca[,1],data_pca[,2],col=c("blue","red")[crabs$sex],pch=as.numeric(crabs$sp)+16 ) ``` # Exercice 2 : 1. ```{r} d <- read.table("neighbor_globin.dat") ``` 2. ```{r} d[d<0] # aucun élément < 0 sum(abs(as.matrix(d[,-1]) - t(as.matrix(d[,-1])))) # d = t(d) diag(as.matrix(d[,-1])) # Diag nulle ``` ```{r} colnames(d) <- c("globine",d$V1) ``` ```{r} image(as.matrix(d[,-1]),col=heat.colors(32)) ``` Nous pouvons distinguer 3 groupes de globines évident. 3. ```{r} Delta <- as.matrix(d[,-1])**2 ``` 4. ```{r} n <- nrow(Delta) J <- diag(n) - 1/n * matrix(1,n,n) ``` 5. ```{r} B <- -1/2 * J %*% Delta %*% J image(B) ``` 6. ```{r} tmp <- eigen(B) U <- tmp$vectors A <- diag(tmp$values) ``` 7. ```{r} barplot(diag(A)) ``` 8. ```{r} m=3 X <- U[,1:m] %*% (A[1:m,1:m]**1/2) rownames(X) <- d[,1] X ``` ```{r} (tmp <- read.csv2("Globines_liste.txt",header=F,sep = "")) X_globine <- data.frame(Globine = tmp$V1,type=tmp$V2,species = NA) ``` ```{r} X_globine$type[grep("alpha",tmp$V3)] <- "hemoglobin_alpha" X_globine$type[grep("beta",tmp$V3)] <- "hemoglobin_alpha" for(i in 1:nrow(X_globine)){ idx <- pmatch("(",unlist(tmp[i,])) if(is.na(idx)) { X_globine$species[i] <- "" } else { X_globine$species[i] <- tmp[i,idx] } } ``` ```{r} X <- X[order(rownames(X)),] X_globine <- X_globine[order(X_globine$Globine),] X_globine$Globine<- as.factor(X_globine$Globine) X_globine$type<- as.factor(X_globine$type) X_globine$species<- as.factor(X_globine$species) ``` ```{r} plot(X[,1],X[,2],pch=as.numeric(X_globine$type),col=as.numeric(X_globine$species)) #text(X[,1],X[,2],colnames(Delta),col=as.numeric(X_globine$species)) plot(X[,1],X[,3],pch=as.numeric(X_globine$type),col=as.numeric(X_globine$species)) #text(X[,1],X[,3],colnames(Delta),col=as.numeric(X_globine$species)) plot(X[,2],X[,3],pch=as.numeric(X_globine$type),col=as.numeric(X_globine$species)) #text(X[,2],X[,3],colnames(Delta),col=as.numeric(X_globine$species)) ``` ```{r} library(MASS) tmp <- cmdscale(Delta,3,eig=T) plot(tmp$points[,1],tmp$points[,2],type = "n") text(tmp$points[,1],tmp$points[,2],colnames(Delta)) ``` ```{r} tmp.sh<- Shepard(dist(as.matrix(d[,-1])), tmp$points) plot(tmp.sh, pch = ".",ylab="distances dans l'espace réduit") lines(tmp.sh$x, tmp.sh$yf, type = "S") ```