library(FactoMineR)

Exercice 1

 library(MASS)
 data(crabs)
 crabsquant<-crabs[,4:8]

1.

res.pca <- PCA(crabsquant, scale.unit=FALSE, ncp=5)

eigvalues<-data.frame(res.pca$eig)
barplot(eigvalues$percentage.of.variance, names.arg=row.names(eigvalues))

Représentation des individus

plot(res.pca,choix="ind")

plot(res.pca,choix="varcor")

#res.pca$ind$cos2
library(factoextra)
## Le chargement a nécessité le package : ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_pca_ind(res.pca, col.ind="cos2") + scale_color_gradient2(low="white",mid="blue", high="red", midpoint=0.50) + theme_minimal()

res.pca$var$contrib
##        Dim.1     Dim.2     Dim.3     Dim.4     Dim.5
## FL  8.350999 10.449058 25.722120 53.918281  1.559541
## RW  3.892033 74.773353 17.150833  2.199562  1.984219
## CL 35.927868  3.929368  3.074058  2.061926 55.006780
## CW 43.778731  8.293189 24.144989  1.578244 22.204847
## BD  8.050368  2.555033 29.908000 40.241986 19.244612
fviz_contrib(res.pca, choice = "ind", axes = 1)

2.

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ tibble  3.1.7      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.0      ✔ stringr 1.4.0 
## ✔ readr   2.1.2      ✔ forcats 0.5.1 
## ✔ purrr   0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ dplyr::select() masks MASS::select()
#crabsquant_normalize <- scale(crabsquant)
crabsquant_normalize <- (crabsquant / crabsquant[,"CL"])
crabsquant_normalize[,"CL"] <- NULL
colnames(crabsquant_normalize) <- c("FL / CL", "RW / CL", "CW / CL", "BD / CL")
res.pca2 <- PCA(crabsquant_normalize, scale.unit=FALSE, ncp=5, graph=F)
eigvalues<-data.frame(res.pca2$eig)
barplot(eigvalues$percentage.of.variance, names.arg=row.names(eigvalues))

Représentation des individus

plot(res.pca2,choix="ind")

library(factoextra)
fviz_pca_ind(res.pca2, col.ind="cos2") + scale_color_gradient2(low="white",mid="blue", high="red", midpoint=0.50) + theme_minimal()

plot(res.pca2,choix="varcor")

data_pca <- res.pca2$ind$coord
plot(data_pca[,1],data_pca[,2],col=c("blue","red")[crabs$sex],pch=as.numeric(crabs$sp)+16 )

Exercice 2 :

d <- read.table("neighbor_globin.dat")
d[d<0] # aucun élément < 0
## character(0)
sum(abs(as.matrix(d[,-1]) - t(as.matrix(d[,-1])))) # d = t(d)
## [1] 0
diag(as.matrix(d[,-1])) # Diag nulle
##  [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
colnames(d) <- c("globine",d$V1)
image(as.matrix(d[,-1]),col=heat.colors(32))

Nous pouvons distinguer 3 groupes de globines évident.

Delta <- as.matrix(d[,-1])**2
n <- nrow(Delta)
J <- diag(n) - 1/n * matrix(1,n,n)
B <- -1/2 * J %*% Delta %*% J
image(B)

tmp <- eigen(B)
U <- tmp$vectors
A <- diag(tmp$values)
barplot(diag(A))

m=3
X <- U[,1:m] %*% (A[1:m,1:m]**1/2)
rownames(X) <- d[,1]
X
##                  [,1]        [,2]        [,3]
## MYG_PHYCA  -2.6383105  0.14035672 -0.12911825
## MYG_HUMAN  -2.7439247  0.11504221 -0.03891154
## MYG_MOUSE  -2.7227285 -0.04794574 -0.08942775
## MYG_CHICK  -2.5278351 -0.12768784 -0.06753978
## MYG_ALLMI  -2.4169207 -0.13417781  0.20260044
## MYG_CHEMY  -2.2606231 -0.10139523 -0.07483969
## HBB_CHICK   0.8800819 -0.55477654 -0.01786071
## HBB_CHRPI   1.1862854 -0.60615637  0.07362542
## HBB1_IGUIG  1.3020736 -0.58343456 -0.12684656
## HBB_PHYCA   0.9844366 -0.64578349 -0.04018854
## HBB_HUMAN   1.3153661 -0.57756475 -0.01895086
## HBB1_MOUSE  1.0460926 -0.49590819  0.06049615
## HBB_ALLMI   0.9006238 -0.55007345  0.09048363
## HBA_CHICK   1.0822615  0.50671248 -0.32715233
## HBA_CHRPI   0.8781429  0.49109110 -0.27374855
## HBA_ALLMI   1.0320072  0.51302419 -0.44219449
## HBA_PHYCA   1.2249861  0.61452540 -0.03205960
## HBA_HUMAN   1.0205599  0.56862015 -0.07777836
## HBA_MOUSE   0.9164063  0.53400158 -0.15129493
## HBA1_IGUIG  0.9427166  0.46714129 -0.30692743
## GLB3_MYXGL  0.5983022  0.47438884  1.78763372
(tmp <- read.csv2("Globines_liste.txt",header=F,sep = ""))
##            V1         V2       V3        V4               V5            V6
## 1   HBA_CHICK Hemoglobin  alpha-A     chain                -        poulet
## 2   HBB_CHICK Hemoglobin     beta     chain                -        poulet
## 3   MYG_CHICK  Myoglobin        -    poulet         (Oiseau)              
## 4   HBA_PHYCA Hemoglobin    alpha     chain                -      Physeter
## 5   HBB_PHYCA Hemoglobin beta-1/2     chain                -      Physeter
## 6   MYG_PHYCA  Myoglobin        -  Physeter          catodon    (cachalot)
## 7   HBA_HUMAN Hemoglobin    alpha     chain          humaine     (Primate)
## 8   HBB_HUMAN Hemoglobin     beta     chain          humaine     (Primate)
## 9   MYG_HUMAN  Myoglobin  humaine (Primate)                               
## 10 GLB3_MYXGL   Globin-3        -    Myxine                               
## 11  HBA_MOUSE Hemoglobin    alpha     chain                -        Souris
## 12 HBB1_MOUSE Hemoglobin   beta-1     chain                -        Souris
## 13  MYG_MOUSE  Myoglobin        -       Mus          muculus             -
## 14  HBA_ALLMI Hemoglobin    alpha     chain                -     Alligator
## 15  HBB_ALLMI Hemoglobin     beta     chain                -     Alligator
## 16  MYG_ALLMI  Myoglobin        - Alligator mississippiensis (Crocodilien)
## 17 HBA1_IGUIG Hemoglobin  alpha-1     chain                -        Iguana
## 18 HBB1_IGUIG Hemoglobin   beta-1         -           Iguana        iguana
## 19  HBA_CHRPI Hemoglobin  alpha-A     chain                -     Chrysemys
## 20  HBB_CHRPI Hemoglobin     beta     chain                -     Chrysemys
## 21  MYG_CHEMY  Myoglobin        -  Chelonia            mydas      (Tortue)
##                  V7              V8
## 1          (Oiseau)                
## 2          (Oiseau)                
## 3                                  
## 4           catodon      (cachalot)
## 5           catodon      (cachalot)
## 6                 - Cetartiodactyle
## 7                                  
## 8                                  
## 9                                  
## 10                                 
## 11        (Rongeur)                
## 12        (Rongeur)                
## 13           Souris       (Rongeur)
## 14 mississippiensis   (Crocodilien)
## 15 mississippiensis   (Crocodilien)
## 16                                 
## 17           iguana      (Squamate)
## 18       (Squamate)                
## 19            Picta        (Tortue)
## 20            Picta        (Tortue)
## 21
X_globine <- data.frame(Globine = tmp$V1,type=tmp$V2,species = NA)
X_globine$type[grep("alpha",tmp$V3)] <- "hemoglobin_alpha"
X_globine$type[grep("beta",tmp$V3)] <- "hemoglobin_alpha"

for(i in 1:nrow(X_globine)){
  idx <- pmatch("(",unlist(tmp[i,]))
  if(is.na(idx)) {
    X_globine$species[i] <- ""
  } else {
  X_globine$species[i] <- tmp[i,idx]
  }
}
X <- X[order(rownames(X)),]
X_globine <- X_globine[order(X_globine$Globine),]

X_globine$Globine<- as.factor(X_globine$Globine)
X_globine$type<- as.factor(X_globine$type)
X_globine$species<- as.factor(X_globine$species)
plot(X[,1],X[,2],pch=as.numeric(X_globine$type),col=as.numeric(X_globine$species))

#text(X[,1],X[,2],colnames(Delta),col=as.numeric(X_globine$species))

plot(X[,1],X[,3],pch=as.numeric(X_globine$type),col=as.numeric(X_globine$species))

#text(X[,1],X[,3],colnames(Delta),col=as.numeric(X_globine$species))

plot(X[,2],X[,3],pch=as.numeric(X_globine$type),col=as.numeric(X_globine$species))

#text(X[,2],X[,3],colnames(Delta),col=as.numeric(X_globine$species))
library(MASS)
tmp <- cmdscale(Delta,3,eig=T)
plot(tmp$points[,1],tmp$points[,2],type = "n")
text(tmp$points[,1],tmp$points[,2],colnames(Delta))

tmp.sh<- Shepard(dist(as.matrix(d[,-1])), tmp$points)
plot(tmp.sh, pch = ".",ylab="distances dans l'espace réduit")
lines(tmp.sh$x, tmp.sh$yf, type = "S")