library(FactoMineR)
library(MASS)
data(crabs)
crabsquant<-crabs[,4:8]
res.pca <- PCA(crabsquant, scale.unit=FALSE, ncp=5)
eigvalues<-data.frame(res.pca$eig)
barplot(eigvalues$percentage.of.variance, names.arg=row.names(eigvalues))
Représentation des individus
plot(res.pca,choix="ind")
plot(res.pca,choix="varcor")
#res.pca$ind$cos2
library(factoextra)
## Le chargement a nécessité le package : ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_pca_ind(res.pca, col.ind="cos2") + scale_color_gradient2(low="white",mid="blue", high="red", midpoint=0.50) + theme_minimal()
res.pca$var$contrib
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## FL 8.350999 10.449058 25.722120 53.918281 1.559541
## RW 3.892033 74.773353 17.150833 2.199562 1.984219
## CL 35.927868 3.929368 3.074058 2.061926 55.006780
## CW 43.778731 8.293189 24.144989 1.578244 22.204847
## BD 8.050368 2.555033 29.908000 40.241986 19.244612
fviz_contrib(res.pca, choice = "ind", axes = 1)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ tibble 3.1.7 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ✔ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::select() masks MASS::select()
#crabsquant_normalize <- scale(crabsquant)
crabsquant_normalize <- (crabsquant / crabsquant[,"CL"])
crabsquant_normalize[,"CL"] <- NULL
colnames(crabsquant_normalize) <- c("FL / CL", "RW / CL", "CW / CL", "BD / CL")
res.pca2 <- PCA(crabsquant_normalize, scale.unit=FALSE, ncp=5, graph=F)
eigvalues<-data.frame(res.pca2$eig)
barplot(eigvalues$percentage.of.variance, names.arg=row.names(eigvalues))
Représentation des individus
plot(res.pca2,choix="ind")
library(factoextra)
fviz_pca_ind(res.pca2, col.ind="cos2") + scale_color_gradient2(low="white",mid="blue", high="red", midpoint=0.50) + theme_minimal()
plot(res.pca2,choix="varcor")
data_pca <- res.pca2$ind$coord
plot(data_pca[,1],data_pca[,2],col=c("blue","red")[crabs$sex],pch=as.numeric(crabs$sp)+16 )
d <- read.table("neighbor_globin.dat")
d[d<0] # aucun élément < 0
## character(0)
sum(abs(as.matrix(d[,-1]) - t(as.matrix(d[,-1])))) # d = t(d)
## [1] 0
diag(as.matrix(d[,-1])) # Diag nulle
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
colnames(d) <- c("globine",d$V1)
image(as.matrix(d[,-1]),col=heat.colors(32))
Nous pouvons distinguer 3 groupes de globines évident.
Delta <- as.matrix(d[,-1])**2
n <- nrow(Delta)
J <- diag(n) - 1/n * matrix(1,n,n)
B <- -1/2 * J %*% Delta %*% J
image(B)
tmp <- eigen(B)
U <- tmp$vectors
A <- diag(tmp$values)
barplot(diag(A))
m=3
X <- U[,1:m] %*% (A[1:m,1:m]**1/2)
rownames(X) <- d[,1]
X
## [,1] [,2] [,3]
## MYG_PHYCA -2.6383105 0.14035672 -0.12911825
## MYG_HUMAN -2.7439247 0.11504221 -0.03891154
## MYG_MOUSE -2.7227285 -0.04794574 -0.08942775
## MYG_CHICK -2.5278351 -0.12768784 -0.06753978
## MYG_ALLMI -2.4169207 -0.13417781 0.20260044
## MYG_CHEMY -2.2606231 -0.10139523 -0.07483969
## HBB_CHICK 0.8800819 -0.55477654 -0.01786071
## HBB_CHRPI 1.1862854 -0.60615637 0.07362542
## HBB1_IGUIG 1.3020736 -0.58343456 -0.12684656
## HBB_PHYCA 0.9844366 -0.64578349 -0.04018854
## HBB_HUMAN 1.3153661 -0.57756475 -0.01895086
## HBB1_MOUSE 1.0460926 -0.49590819 0.06049615
## HBB_ALLMI 0.9006238 -0.55007345 0.09048363
## HBA_CHICK 1.0822615 0.50671248 -0.32715233
## HBA_CHRPI 0.8781429 0.49109110 -0.27374855
## HBA_ALLMI 1.0320072 0.51302419 -0.44219449
## HBA_PHYCA 1.2249861 0.61452540 -0.03205960
## HBA_HUMAN 1.0205599 0.56862015 -0.07777836
## HBA_MOUSE 0.9164063 0.53400158 -0.15129493
## HBA1_IGUIG 0.9427166 0.46714129 -0.30692743
## GLB3_MYXGL 0.5983022 0.47438884 1.78763372
(tmp <- read.csv2("Globines_liste.txt",header=F,sep = ""))
## V1 V2 V3 V4 V5 V6
## 1 HBA_CHICK Hemoglobin alpha-A chain - poulet
## 2 HBB_CHICK Hemoglobin beta chain - poulet
## 3 MYG_CHICK Myoglobin - poulet (Oiseau)
## 4 HBA_PHYCA Hemoglobin alpha chain - Physeter
## 5 HBB_PHYCA Hemoglobin beta-1/2 chain - Physeter
## 6 MYG_PHYCA Myoglobin - Physeter catodon (cachalot)
## 7 HBA_HUMAN Hemoglobin alpha chain humaine (Primate)
## 8 HBB_HUMAN Hemoglobin beta chain humaine (Primate)
## 9 MYG_HUMAN Myoglobin humaine (Primate)
## 10 GLB3_MYXGL Globin-3 - Myxine
## 11 HBA_MOUSE Hemoglobin alpha chain - Souris
## 12 HBB1_MOUSE Hemoglobin beta-1 chain - Souris
## 13 MYG_MOUSE Myoglobin - Mus muculus -
## 14 HBA_ALLMI Hemoglobin alpha chain - Alligator
## 15 HBB_ALLMI Hemoglobin beta chain - Alligator
## 16 MYG_ALLMI Myoglobin - Alligator mississippiensis (Crocodilien)
## 17 HBA1_IGUIG Hemoglobin alpha-1 chain - Iguana
## 18 HBB1_IGUIG Hemoglobin beta-1 - Iguana iguana
## 19 HBA_CHRPI Hemoglobin alpha-A chain - Chrysemys
## 20 HBB_CHRPI Hemoglobin beta chain - Chrysemys
## 21 MYG_CHEMY Myoglobin - Chelonia mydas (Tortue)
## V7 V8
## 1 (Oiseau)
## 2 (Oiseau)
## 3
## 4 catodon (cachalot)
## 5 catodon (cachalot)
## 6 - Cetartiodactyle
## 7
## 8
## 9
## 10
## 11 (Rongeur)
## 12 (Rongeur)
## 13 Souris (Rongeur)
## 14 mississippiensis (Crocodilien)
## 15 mississippiensis (Crocodilien)
## 16
## 17 iguana (Squamate)
## 18 (Squamate)
## 19 Picta (Tortue)
## 20 Picta (Tortue)
## 21
X_globine <- data.frame(Globine = tmp$V1,type=tmp$V2,species = NA)
X_globine$type[grep("alpha",tmp$V3)] <- "hemoglobin_alpha"
X_globine$type[grep("beta",tmp$V3)] <- "hemoglobin_alpha"
for(i in 1:nrow(X_globine)){
idx <- pmatch("(",unlist(tmp[i,]))
if(is.na(idx)) {
X_globine$species[i] <- ""
} else {
X_globine$species[i] <- tmp[i,idx]
}
}
X <- X[order(rownames(X)),]
X_globine <- X_globine[order(X_globine$Globine),]
X_globine$Globine<- as.factor(X_globine$Globine)
X_globine$type<- as.factor(X_globine$type)
X_globine$species<- as.factor(X_globine$species)
plot(X[,1],X[,2],pch=as.numeric(X_globine$type),col=as.numeric(X_globine$species))
#text(X[,1],X[,2],colnames(Delta),col=as.numeric(X_globine$species))
plot(X[,1],X[,3],pch=as.numeric(X_globine$type),col=as.numeric(X_globine$species))
#text(X[,1],X[,3],colnames(Delta),col=as.numeric(X_globine$species))
plot(X[,2],X[,3],pch=as.numeric(X_globine$type),col=as.numeric(X_globine$species))
#text(X[,2],X[,3],colnames(Delta),col=as.numeric(X_globine$species))
library(MASS)
tmp <- cmdscale(Delta,3,eig=T)
plot(tmp$points[,1],tmp$points[,2],type = "n")
text(tmp$points[,1],tmp$points[,2],colnames(Delta))
tmp.sh<- Shepard(dist(as.matrix(d[,-1])), tmp$points)
plot(tmp.sh, pch = ".",ylab="distances dans l'espace réduit")
lines(tmp.sh$x, tmp.sh$yf, type = "S")