############################################################
#Peng, C.-Y. J., & So, T.-S. H. (2002)
#Logistic regression analysis and reporting: A primer.
#Understanding Statistics, 1, 31-70.
############################################################
source("http://blue.zero.jp/yokumura/R/testtheory/ctt.txt")
#----------------------------------------------------------#
#Figure 1 (p.34)
#----------------------------------------------------------#

##(a) データを作成
alpha <- c(0,0)
beta  <- c(.2,.4)
x     <- seq(-30,30,by=.01)


##(b) (2)式を定義
form1 <- function(a,b,x){
	exp(a + b * x)/(1 + exp(a + b * x))
}


##(c) 描画

#(1) alpha = 0, beta=.2
plot(form1(a=alpha[1], b = beta[1], x= x)~x, type="l", ylab="P(Y=1)",  xlab="X")

#(2) alpha = 0, beta=.4
lines(form1(a=alpha[2], b = beta[2], x= x)~x, lty=2)


#(3) x = alpha/betaのときのyは0.5になる
form1(a=alpha[1], b = beta[1], x= alpha[1]/beta[1])
form1(a=alpha[2], b = beta[2], x= alpha[2]/beta[2])

graphics.off()


#----------------------------------------------------------#
#Table 1 (p.34)
#----------------------------------------------------------#

##(a) データを作成
tab1 <- matrix(c(73,15,23,11),ncol=2,byrow=T)
tab1 <- as.table(tab1)
rownames(tab1) <- c("Low","Normal")
colnames(tab1) <- c("Low","Normal")


##(b) test of independence
chisq.test(tab1,correct=F)


##(c) odds ratio
p1 <- prop.table(tab1, margin=1)[1,1] #低体重の母が低体重の子を産む比率
p0 <- prop.table(tab1, margin=1)[2,1] #通常体重の母が低体重の子を産む比率
(p1/(1-p1)) / (p0/(1-p0))   #odds ratio


#----------------------------------------------------------#
#Table 2 (p.37)
#----------------------------------------------------------#

##(a) データを呼び出し
library(car)		#carパッケージに，Mroz (1987) のデータが格納されている
data(Mroz)
Mroz			#論文中では，N=752と書いてるが，オリジナルはN=753
?Mroz			#変数名を確認
dat1 <- subset(Mroz, inc > 0, select=c(age, hc, inc, k5, k618, lfp, wc, lwg))	#変数名を並べ替え、論文データに標本を合わせる
names(dat1)[8] <- "wg"	          #lwgをwgに変更
dat1$wg <- exp(dat1$wg)						#対数変換を元に戻す


##(b)　Table 2 を算出
library(rpsychi)
summary(dat1)
groupSummary(dat1, group="lfp")   #層別の基礎統計量
                                  #標準偏差は、標本標準偏差なのでTable 2と若干異なる


#----------------------------------------------------------#
#Table 3 (p.39-46)
#----------------------------------------------------------#

##(a) 年齢を5段階に区切る
dat1$newage <-  cut(dat1$age, breaks=seq(30, 60, by =5), include.lowest=T, right=F)
dat1$newage <- relevel(dat1$newage, ref="[55,60]")


##(b) glmを使用
glm1 <- glm(lfp~ k5 + k618 + hc + newage*wc, family=binomial, data=dat1)
glm0 <- glm(lfp~1, data=dat1, family=binomial)


##(c) overall model evaluations
anova(glm0, glm1, test="Chisq") #Likelihood ratio test


##(d) statistical tests of individual predictors
summary(glm1)			            #Parameter Estimate, SE, AIC
logistic.tab(glm1, data=dat1) #オリジナル関数 (odds比と信頼区間)


##(e) goodness-of-fit statistics
hosmer.lemeshow.test(glm1)  #オリジナル関数 (Hosmer-Lemeshow goodness-of-fit)
library(pscl)
pR2(glm1)         #McFadden's Rho2,


#(f) lrmを使用
library(Design)
lrm1 <- lrm(lfp~ k5 + k618 + hc + newage*wc, data=dat1)
lrm1
round(lrm1$stats, 3)
        #Model L.R. 	= Likelihood ratio test
				#Dxy 		= Somer's Dxy
				#C  		= c statistic
				#Gamma		= Goodman-Kruskal Gamma
				#Tau-a		= Kendall's Tau-a
				#R2		= Nagelkerke R^2


#----------------------------------------------------------#
#Figure 2-3 (p.47-49)
#----------------------------------------------------------#

##関数使用
library(epicalc)
roc.glm <- lroc(glm1)
x.prob <- roc.glm$predicted.table[,1]
y.sen <- 1 - roc.glm$diagnostic.table[,1]
y.spe <- roc.glm$diagnostic.table[,2]
graphics.off()

##自作
cutoff <- seq(0, 1, by=.001)
output <- data.frame(matrix(nrow=length(cutoff), ncol=2))
names(output) <- c("sensitivity", "specificity")
for(i in 1:length(cutoff)){
  my.y <- factor(predict(glm1, type="response") >= cutoff[i], levels=c(FALSE, TRUE), labels=c(FALSE, TRUE))
  my.tab <- table(my.y, glm1$y)
  output[i, ] <- c(my.tab[2, 2]/sum(my.tab[,2]), my.tab[1, 1]/sum(my.tab[,1]))
}
output$one.spe <- 1-output$specificity
plot(sensitivity~one.spe, data=output)
graphics.off()

plot(sensitivity~cutoff, data=output, type="l")
lines(specificity~cutoff, data=output, lty=2)
cutoff[which.min(abs(output$sensitivity - output$specificity))]
graphics.off()


##AUCの計算
count <- 0
n <- 50000
for(i in 1:n){
  xy <- runif(2)
  cond1 <- output$sensitivity[which.min(abs(output$one.spe - xy[1]))]
  if(xy[2] <= cond1){
    count <- count + 1
  }
}
count/n
roc.glm$auc


#----------------------------------------------------------#
#Figure 4以降未完
#----------------------------------------------------------#