############################################################ #Peng, C.-Y. J., & So, T.-S. H. (2002) #Logistic regression analysis and reporting: A primer. #Understanding Statistics, 1, 31-70. ############################################################ source("http://blue.zero.jp/yokumura/R/testtheory/ctt.txt") #----------------------------------------------------------# #Figure 1 (p.34) #----------------------------------------------------------# ##(a) データを作成 alpha <- c(0,0) beta <- c(.2,.4) x <- seq(-30,30,by=.01) ##(b) (2)式を定義 form1 <- function(a,b,x){ exp(a + b * x)/(1 + exp(a + b * x)) } ##(c) 描画 #(1) alpha = 0, beta=.2 plot(form1(a=alpha[1], b = beta[1], x= x)~x, type="l", ylab="P(Y=1)", xlab="X") #(2) alpha = 0, beta=.4 lines(form1(a=alpha[2], b = beta[2], x= x)~x, lty=2) #(3) x = alpha/betaのときのyは0.5になる form1(a=alpha[1], b = beta[1], x= alpha[1]/beta[1]) form1(a=alpha[2], b = beta[2], x= alpha[2]/beta[2]) graphics.off() #----------------------------------------------------------# #Table 1 (p.34) #----------------------------------------------------------# ##(a) データを作成 tab1 <- matrix(c(73,15,23,11),ncol=2,byrow=T) tab1 <- as.table(tab1) rownames(tab1) <- c("Low","Normal") colnames(tab1) <- c("Low","Normal") ##(b) test of independence chisq.test(tab1,correct=F) ##(c) odds ratio p1 <- prop.table(tab1, margin=1)[1,1] #低体重の母が低体重の子を産む比率 p0 <- prop.table(tab1, margin=1)[2,1] #通常体重の母が低体重の子を産む比率 (p1/(1-p1)) / (p0/(1-p0)) #odds ratio #----------------------------------------------------------# #Table 2 (p.37) #----------------------------------------------------------# ##(a) データを呼び出し library(car) #carパッケージに,Mroz (1987) のデータが格納されている data(Mroz) Mroz #論文中では,N=752と書いてるが,オリジナルはN=753 ?Mroz #変数名を確認 dat1 <- subset(Mroz, inc > 0, select=c(age, hc, inc, k5, k618, lfp, wc, lwg)) #変数名を並べ替え、論文データに標本を合わせる names(dat1)[8] <- "wg" #lwgをwgに変更 dat1$wg <- exp(dat1$wg) #対数変換を元に戻す ##(b) Table 2 を算出 library(rpsychi) summary(dat1) groupSummary(dat1, group="lfp") #層別の基礎統計量 #標準偏差は、標本標準偏差なのでTable 2と若干異なる #----------------------------------------------------------# #Table 3 (p.39-46) #----------------------------------------------------------# ##(a) 年齢を5段階に区切る dat1$newage <- cut(dat1$age, breaks=seq(30, 60, by =5), include.lowest=T, right=F) dat1$newage <- relevel(dat1$newage, ref="[55,60]") ##(b) glmを使用 glm1 <- glm(lfp~ k5 + k618 + hc + newage*wc, family=binomial, data=dat1) glm0 <- glm(lfp~1, data=dat1, family=binomial) ##(c) overall model evaluations anova(glm0, glm1, test="Chisq") #Likelihood ratio test ##(d) statistical tests of individual predictors summary(glm1) #Parameter Estimate, SE, AIC logistic.tab(glm1, data=dat1) #オリジナル関数 (odds比と信頼区間) ##(e) goodness-of-fit statistics hosmer.lemeshow.test(glm1) #オリジナル関数 (Hosmer-Lemeshow goodness-of-fit) library(pscl) pR2(glm1) #McFadden's Rho2, #(f) lrmを使用 library(Design) lrm1 <- lrm(lfp~ k5 + k618 + hc + newage*wc, data=dat1) lrm1 round(lrm1$stats, 3) #Model L.R. = Likelihood ratio test #Dxy = Somer's Dxy #C = c statistic #Gamma = Goodman-Kruskal Gamma #Tau-a = Kendall's Tau-a #R2 = Nagelkerke R^2 #----------------------------------------------------------# #Figure 2-3 (p.47-49) #----------------------------------------------------------# ##関数使用 library(epicalc) roc.glm <- lroc(glm1) x.prob <- roc.glm$predicted.table[,1] y.sen <- 1 - roc.glm$diagnostic.table[,1] y.spe <- roc.glm$diagnostic.table[,2] graphics.off() ##自作 cutoff <- seq(0, 1, by=.001) output <- data.frame(matrix(nrow=length(cutoff), ncol=2)) names(output) <- c("sensitivity", "specificity") for(i in 1:length(cutoff)){ my.y <- factor(predict(glm1, type="response") >= cutoff[i], levels=c(FALSE, TRUE), labels=c(FALSE, TRUE)) my.tab <- table(my.y, glm1$y) output[i, ] <- c(my.tab[2, 2]/sum(my.tab[,2]), my.tab[1, 1]/sum(my.tab[,1])) } output$one.spe <- 1-output$specificity plot(sensitivity~one.spe, data=output) graphics.off() plot(sensitivity~cutoff, data=output, type="l") lines(specificity~cutoff, data=output, lty=2) cutoff[which.min(abs(output$sensitivity - output$specificity))] graphics.off() ##AUCの計算 count <- 0 n <- 50000 for(i in 1:n){ xy <- runif(2) cond1 <- output$sensitivity[which.min(abs(output$one.spe - xy[1]))] if(xy[2] <= cond1){ count <- count + 1 } } count/n roc.glm$auc #----------------------------------------------------------# #Figure 4以降未完 #----------------------------------------------------------#