#############################################
#Khattree, R. and Naik, D. N.(2000)
#Correspondence analysis. 
#In Multivariate Data Reduction and Discrimination with SAS software (pp. 443--509)
#SAS institute and John Wiley
#############################################

#------------------------------------------#
#Step1: 7.2 Correspondence Analysis (pp.444--449)
#------------------------------------------#

##(a) 7.1式
a <- 4
b <- 3
P <- matrix(c(5, 10, 12, 10,
              5, 30,  0,  1,
              5, 10, 12,  0), nrow=a, ncol=b) * .01
P     #7.1式 


##(b) row and col sums of P
r <- matrix(rowSums(P))
c <- matrix(colSums(P))

Dr <- matrix(0, ncol=a, nrow=a)
Dc <- matrix(0, ncol=b, nrow=b)
diag(Dr) <- r
diag(Dc) <- c


#(c) rows profiles (7.2式)
R  <- solve(Dr) %*% P
R                       #7.3式 
rowSums(R)  #sum of the elements of any row profile is one.


##(d) column profiles (7.2式)
C   <- solve(Dc) %*% t(P)
C
rowSums(C)


##(e) average row and colum profiles
c
t(r) %*% R  #7.4式 

r
t(c) %*% C


##(f) quantity d^2
t(R[2,] - c) %*% solve(Dc) %*% (R[2,] - c)
t(R[2,] - c) %*% (R[2,] - c)


##(g) singular value decomposition
m <- min(c(a, b) - 1)       #the rank of P
k <- 2
T <- sqrt(solve(Dr)) %*% (P - r %*% t(c)) %*% sqrt(solve(Dc))

A <- sqrt(Dr) %*% svd(T)$u[, 1:k]
B <- sqrt(Dc) %*% svd(T)$v[, 1:k]
Lamda <- matrix(0, nrow=k, ncol=k)
diag(Lamda) <- svd(T)$d[1:k]


##(h) principal coordinates
F <- solve(Dr) %*% A %*% Lamda
G <- solve(Dc) %*% B %*% Lamda


##(i) standard cordinates
solve(Dc) %*% B


#------------------------------------------#
#Step2: 関数定義
#------------------------------------------#
my.corresp <- function(tab, k=2){
        a <- nrow(tab)
        b <- ncol(tab)
        P <- prop.table(tab)
        r <- matrix(rowSums(P))
        c <- matrix(colSums(P))
        
        Dr <- matrix(0, ncol=a, nrow=a)
        Dc <- matrix(0, ncol=b, nrow=b)
        diag(Dr) <- r
        diag(Dc) <- c
                
        #rows and colum profiles (7.2式)
        R  <- solve(Dr) %*% P
        C   <- solve(Dc) %*% t(P)     
                              
        #singular value decomposition
        m <- min(c(a, b) - 1)       #the rank of P
        k <- k
        T <- sqrt(solve(Dr)) %*% (P - r %*% t(c)) %*% sqrt(solve(Dc))
        
        A <- sqrt(Dr) %*% svd(T)$u[, 1:k]
        B <- sqrt(Dc) %*% svd(T)$v[, 1:k]
        Lamda <- matrix(0, nrow=k, ncol=k)
        lamda <- svd(T)$d[1:k]
        diag(Lamda) <- lamda
                        
        ##principal coordinates
        F <- solve(Dr) %*% A %*% Lamda
        G <- solve(Dc) %*% B %*% Lamda
        
        ## standard cordinates
        Ga <- solve(Dc) %*% B
        
        ##chisq        
        chisq <- sum(tab) * sum(lamda^2)
        df    <- (a - 1) * (b - 1)
        p.value <- pchisq(chisq, df, lower.tail=F)
        
        
        ##命名
        rowlab <- paste(names(attr(tab, "dimnames"))[1], rownames(tab), sep=":")
        collab <- paste(names(attr(tab, "dimnames"))[2], colnames(tab), sep=":")
        dimlab <- paste("Dim", 1:k, sep="") 
        colnames(F) <- colnames(G) <- colnames(Ga) <- dimlab
        rownames(F) <- rowlab
        rownames(G) <- rownames(Ga) <- collab
        
        
        ##asymmetric plot
        plot(rbind(F, Ga), type="n", main="Asymmetric plot")
        lines(F, type="p", pch=1)
        text(F, rowlab, pos=3)
        lines(Ga, type="p", pch=2)
        text(Ga, collab, pos=2)
        
        ##symmetric plot
        X11()
        plot(rbind(F, G), type="n", main="Symmetric plot")
        lines(F, type="p", pch=1)
        text(F, rowlab, pos=3)
        lines(G, type="p", pch=2)
        text(G, collab, pos=2)        
        
        output <- list(table=tab, row.profile=R, col.profile=C, row=F, col=G, cola = Ga, lamda=lamda, chisq=chisq, df=df, p.value=p.value)
        return(output)
}


#------------------------------------------#
#Step3: Example 1 Analysis of Headache Types by Age Data (pp.449--456)
#------------------------------------------#

tenkai <- function(f)
{
    nc <- ncol(f)
    nr <- nrow(f)
    list(x=rep(rep(1:nr, nc), f), y=rep(rep(1:nc, each=nr), f))
}

##(a) データセットの作成
ex1 <- as.table(matrix(c(77, 52, 20,
                         66, 82, 68,
                         31, 49, 37,
                         20, 44, 31,
                         27, 52, 25), ncol=3, byrow=T))
ex1 <- data.frame(tenkai(ex1))
ex1[, 1] <- factor(ex1[, 1], labels=c("0-19", "20-29", "30-39", "40-49", "50-59"))
ex1[, 2] <- factor(ex1[, 2], labels=c("Ndiagno", "Tension", "Migraine"))
names(ex1) <- c("age", "categories")


##(b) 分析 
my.corresp(table(ex1))
graphics.off()


##(c) ade4
library(ade4)
ex1 <- as.data.frame(table(ex1)[1:5, 1:3])
x <- dudi.coa(ex1, nf=2)
2
scatter.dudi(x)
x$co
x$li
graphics.off()


##(d) FactoMineR
library(FactoMineR)
x <- CA(ex1)
x$row
x$col
graphics.off()


#------------------------------------------#
#Step4: Example 2 Correspondence analysis of socioeconomic status by mental health of children data (pp.456--463)
#------------------------------------------#

##(a) データセットの作成
ex2 <- as.table(matrix(c(64,94,58,46,
                         57,94,54,40,
                         57,105,65,60,
                         72,141,77,94,
                         36,97,54,78,
                         21,71,54,71), ncol=4, byrow=T))
ex2 <- data.frame(tenkai(ex2))
ex2[, 1] <- factor(ex2[, 1], labels=c("1","2","3","4","5","6"))
ex2[, 2] <- factor(ex2[, 2], labels=c("Well", "Mild", "Moderate", "Impaired"))
names(ex2) <- c("status", "mental")


##(b) 分析 
my.corresp(table(ex2))


##(c) ade4
ex2 <- as.data.frame(table(ex2)[1:6, 1:4])
x <- dudi.coa(ex2, nf=2)
2
scatter.dudi(x)
x$co
x$li
graphics.off()


##(d) FactoMineR
x <- CA(ex2, ncp=2)
x$row
x$col
graphics.off()


#------------------------------------------#
#Step5: Example 3 Multiple correspondence analysis of rating of drug's efficacy: analegesic drugs data (pp.463--471)
#------------------------------------------#
##(a) データセットの作成
ex3 <- as.table(matrix(c(5,1,10,8,6,
                         5,3,3,8,12,
                         10,6,12,3,0,
                         7,12,8,1,1), ncol=5, byrow=T))
ex3 <- data.frame(tenkai(ex3))
ex3[, 1] <- factor(ex3[, 1], labels=c("z100","ec4","c60","c15"))
ex3[, 2] <- factor(ex3[, 2], labels=c("poor","fair","good","verygood","excellent"))
names(ex3) <- c("drug", "efficacy")
Z1 <- model.matrix(~drug - 1, data=ex3)
Z2 <- model.matrix(~efficacy - 1, data=ex3)
colnames(Z1) <- gsub("drug","", colnames(Z1))
colnames(Z2) <- gsub("efficacy","", colnames(Z2))
Z <- cbind(Z1, Z2)
burt <- t(Z) %*% Z


##(b) ade4
x <- dudi.acm(ex3, nf=2, scann = FALSE)
x$co
boxplot(x)
scatter(x)
graphics.off()


##(c) FactoMineR
x <- MCA(ex3, ncp=2)
x$var$coord
graphics.off()