library(kernlab)
library(FactoMineR)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::alpha() masks kernlab::alpha()
## ✖ purrr::cross() masks kernlab::cross()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
data(spam)
spam_quant <- spam %>% select(-"type")
spam_quant_norm <- scale(spam_quant)
res.pca <- PCA(spam_quant_norm,graph = F)
eigvalues<-data.frame(res.pca$eig)
barplot(eigvalues$percentage.of.variance, names.arg=row.names(eigvalues))
X <- res.pca$ind$coord
plot(X[,1],X[,2],col = c("Blue","Red")[spam$type])
N = 100
spam_subset_idx <- sample(1:nrow(spam),N)
table(spam[spam_subset_idx,58])
##
## nonspam spam
## 58 42
kpc <- kpca(~.,data=as.data.frame(spam_quant_norm[spam_subset_idx,]),kernel="rbfdot",kpar=list(sigma=0.01))
#kpcv <- pcv(kpc)
plot(rotated(kpc)[,1:2],col=c("Blue","Red")[spam[spam_subset_idx,58]],xlab="1st Principal Comp.",ylab="2nd Principal Comp.")
barplot(eig(kpc)/sum(eig(kpc)))
kpc <- kpca(~.,data=as.data.frame(spam_quant_norm[spam_subset_idx,]),kernel="polydot",kpar=list(degree=2),features=2)
kpcv <- pcv(kpc)
plot(rotated(kpc),col=c("Blue","Red")[spam[spam_subset_idx,58]],xlab="1st Principal Comp.",ylab="2nd Principal Comp.")
barplot(eig(kpc)/sum(eig(kpc)))
kpc <- kpca(~.,data=as.data.frame(spam_quant_norm[spam_subset_idx,]),kernel="polydot",kpar=list(degree=5))
kpcv <- pcv(kpc)
plot(rotated(kpc)[,1:2],col=c("Blue","Red")[spam[spam_subset_idx,58]],xlab="1st Principal Comp.",ylab="2nd Principal Comp.")
barplot(eig(kpc)/sum(eig(kpc)))
myKPCA<-function(X,k=2,kernel="Gaussian",sigma=1){
X<-as.matrix(X)
if (kernel == "Gaussian")
{K<- exp(-1/sigma*(as.matrix(dist(X))^2))
} else {
K<-X%*%t(X)
}
# Centering
n<-nrow(X)
II<-matrix(1/n,n,n)
Ktilde<-K -2*II %*% K+ II %*%K%*%II
# Eigenvalue decomposition
res<-svd(Ktilde)
alpha<-res$u
lambda<-res$d^2
# Projection
Y<-K%*%alpha[,1:k]
return(list(Y=Y,lambda=lambda[1:k]))
}
k=2
my_kpca <- myKPCA(spam_quant_norm[spam_subset_idx,],k=k,sigma=1)
plot(my_kpca$Y,col=c("Blue","Red")[spam[spam_subset_idx,58]],xlab="1st Principal Comp.",ylab="2nd Principal Comp.")
library(mlbench)
set.seed(111)
obj <- mlbench.spirals(100,1,0.025)
my.data <- data.frame(4 * obj$x)
names(my.data)<-c("X1","X2")
plot(my.data)
my.data<-as.matrix(my.data)
Compute \(K\) the matrix of similarities for this dataset using the gaussian kernel
rbfkernel <- rbfdot(sigma = 1)
K <- as.matrix(kernelMatrix(rbfkernel,my.data))
image(K)
The next step consists in computing an affinity matrix \(A\) based on \(K\). \(A\) must be made of positive values and be symmetric.
diag(K) = 0
A <- matrix(0,N,N)
neighboor = 3
for(i in 1:N){
idx = order(K[i,],decreasing=T)[1:neighboor]
A[i,idx] = 1
}
image(A)
ATTENTIOn A n’est pas symétrique.
A <- K>0.5
diag(A) = 0
image(A)
library(igraph)
##
## Attachement du package : 'igraph'
## Les objets suivants sont masqués depuis 'package:dplyr':
##
## as_data_frame, groups, union
## Les objets suivants sont masqués depuis 'package:purrr':
##
## compose, simplify
## L'objet suivant est masqué depuis 'package:tidyr':
##
## crossing
## L'objet suivant est masqué depuis 'package:tibble':
##
## as_data_frame
## Les objets suivants sont masqués depuis 'package:stats':
##
## decompose, spectrum
## L'objet suivant est masqué depuis 'package:base':
##
## union
plot(graph_from_adjacency_matrix(A,mode="undirected"))
D<-diag(colSums(A))
L<-D-A
unnormalized graph Laplacian \(U=D-A\)
U <- D - A
k <- 2
evL <- eigen(U, symmetric=TRUE)
Z <- evL$vectors[,(ncol(evL$vectors)-k+1):ncol(evL$vectors)]
Z : matrice des k vecteurs propres associés aux plus petites valeurs propres.
plot(Z)
library(stats)
km <- kmeans(Z, centers=k, nstart=5)
plot(my.data, col=km$cluster)
spec_clust = specc(my.data,centers=k)
plot(my.data, col=spec_clust@.Data, pch=4) # estimated classes (x)
points(my.data, col=obj$classes, pch=5) # true classes (<>)