########################################## #Long, J. S. (1997). #Binary outcomes: The linear probability, probit, and logit models. #In Regression models for categorical and limited dependent variables (pp.34--84). #Thousand Oaks, CA: Sage. ########################################### source("http://blue.zero.jp/yokumura/R/testtheory/ctt.txt") #library(VGAM) #vglm() #library(nnet) #multinom() library(car) #Mroz library(sampleSelection) #probit #library(MASS) #polr #-------------------------------------# #Step1: Example of the LPM (pp.36-38) #-------------------------------------# data(Mroz) Mroz2 <- Mroz Mroz <- data.frame(sapply(Mroz, as.numeric)) Mroz$lfp <- Mroz$lfp - 1 Mroz$wc <- Mroz$wc -1 Mroz$hc <- Mroz$hc -1 ##(a) Table 3.1 #-------------------------------# #summary2(data, digits=3, sig.level=.05, graph=T) #-------------------------------# #data = data.frame #digits = 表示桁数 #sig.level = 有意水準 #graph = T 平均値の95%信頼区間 summary2(Mroz,digits=2, graph=F) ##(b) Table 3.2 lm() reg1 <- lm(lfp~., data=Mroz) summary(reg1) #unstandardized coefficient; t.value Mroz.std <- Mroz #標準偏回帰係数は出ないので,標準化データセットを作る continuous <- c("k5","k618","age","lwg","inc") Mroz.std[,continuous] <- scale(Mroz.std[,continuous]) #連続変数を標準化 reg2 <- lm(lfp~., data=Mroz.std) #standardized coefficient tab3.2 <- data.frame(b=coef(reg1), bs=coef(reg2), t=summary(reg1)$coef[,"t value"]) round(tab3.2, 3) #-------------------------------------# #Step2: Example of Logit and Probit (pp.48-49) #-------------------------------------# ##(a) Table 3.3 glm() logit1 <- glm(lfp~.,data=Mroz, family=binomial(link=logit)) summary(logit1) #Logit probit1 <- glm(lfp~.,data=Mroz, family=binomial(link=probit)) summary(probit1) #Probit tab3.3 <- data.frame(b1=coef(logit1), z1=summary(logit1)$coef[,"z value"], b2=coef(probit1),z2=summary(probit1)$coef[,"z value"]) attach(tab3.3) tab3.3$b3 <- b1/b2 tab3.3$z3 <- z1/z2 detach(tab3.3) round(tab3.3, 3) logit1$deviance probit1$deviance ##(b) probit() probit2 <- probit(lfp~., data=Mroz) coef(probit2) coef(probit1) -2 * probit2$maximum probit1$deviance #-------------------------------------# #Step3: Interpretation (pp.61-79) #-------------------------------------# ##(a) Table3.4 attach(Mroz) x <- Mroz[,-1] #従属変数を除いたデータセット X1 <- X2 <- matrix(mean(x), ncol(x), ncol(x), byrow=T) diag(X1) <- sapply(x, min) diag(X2) <- sapply(x, max) X1 <- data.frame(X1); X2 <- data.frame(X2) names(X1) <- names(X2) <- names(x) rownames(X1) <- rownames(X2) <- names(x) tab3.4 <- cbind(predict(probit1, type="response",newdata=X2), predict(probit1, type="response",newdata=X1)) tab3.4 <- data.frame(tab3.4) colnames(tab3.4) <- c("max","min") tab3.4$range <- abs(apply(tab3.4, 1, diff)) round(tab3.4, 2) ##(b) Figure 3.10 xaxis <- seq(min(age), max(age)) yaxis <- seq(0,1, length=length(xaxis)) wc0 <- predict(probit1, type="response", newdata=data.frame(k5=mean(k5),k618=mean(k618),age=xaxis,wc=0,hc=mean(hc),lwg=mean(lwg),inc=mean(inc))) wc1 <- predict(probit1, type="response", newdata=data.frame(k5=mean(k5),k618=mean(k618),age=xaxis,wc=1,hc=mean(hc),lwg=mean(lwg),inc=mean(inc))) plot(yaxis~xaxis, type="n", yaxt="n") lines(wc0~xaxis, type="b",pch=1) lines(wc1~xaxis, type="b",pch=2) abline(h=c(.25,.5,.75), lty=2) axis(side=2,at=seq(0,1,by=.25)) graphics.off() ##(c) Figure 3.11 xaxis <- seq(min(inc), max(inc)) yaxis <- seq(0,1, length=length(xaxis)) age30 <- predict(probit1, type="response", newdata=data.frame(k5=mean(k5),k618=mean(k618),age=30,wc=mean(wc),hc=mean(hc),lwg=mean(lwg),inc=xaxis)) age40 <- predict(probit1, type="response", newdata=data.frame(k5=mean(k5),k618=mean(k618),age=40,wc=mean(wc),hc=mean(hc),lwg=mean(lwg),inc=xaxis)) age50 <- predict(probit1, type="response", newdata=data.frame(k5=mean(k5),k618=mean(k618),age=50,wc=mean(wc),hc=mean(hc),lwg=mean(lwg),inc=xaxis)) age60 <- predict(probit1, type="response", newdata=data.frame(k5=mean(k5),k618=mean(k618),age=60,wc=mean(wc),hc=mean(hc),lwg=mean(lwg),inc=xaxis)) plot(yaxis~xaxis, type="n", yaxt="n") lines(age30~xaxis, type="b",pch=1) lines(age40~xaxis, type="b",pch=2) lines(age50~xaxis, type="b",pch=3) lines(age60~xaxis, type="b",pch=4) abline(h=c(.25,.5,.75), lty=2) axis(side=2,at=seq(0,1,by=.25)) graphics.off() ##(d) Table 3.5 wc0 <- predict(probit1, type="response", newdata=data.frame(k5=c(0,1,2,3),k618=mean(k618),age=mean(age), wc=0,hc=mean(hc),lwg=mean(lwg),inc=mean(inc))) wc1 <- predict(probit1, type="response", newdata=data.frame(k5=c(0,1,2,3),k618=mean(k618),age=mean(age), wc=1,hc=mean(hc),lwg=mean(lwg),inc=mean(inc))) table3.5 <- cbind(wc0, wc1, diff=wc1-wc0) rownames(table3.5) <- c(0,1,2,3) round(table3.5,2) ##(e) Table 3.6 x <- Mroz[,-1] #従属変数を除いたデータセット b <- as.matrix(coef(probit1)[-1]) #切片を除いた偏回帰係数 vary <- as.numeric(t(b) %*% cov(x) %*% as.matrix(b) + 1) bsy <- b/sqrt(vary) sigmak <- sd(x) bs <- sigmak * bsy z.value <- summary(probit1)$coef[-1,"z value"] tab3.6 <- data.frame(b=b, bsy=bsy, bs=bs, z.value) round(tab3.6, 3) ##(f) Table 3.7 (未完) ##(g) Table 3.8 #centered unit change X1 <- X2 <- matrix(mean(x), ncol(x), ncol(x), byrow=T) diag(X1) <- diag(X1) - 1/2 diag(X2) <- diag(X2) + 1/2 X1 <- data.frame(X1); X2 <- data.frame(X2) names(X1) <- names(X2) <- names(x) rownames(X1) <- rownames(X2) <- names(x) Pr1 <- predict(probit1, type="response",newdata=X1) Pr2 <- predict(probit1, type="response",newdata=X2) cuc <- Pr2 - Pr1 cuc[c(4,5)] <- NA #centered standard deviation change X1 <- X2 <- matrix(mean(x), ncol(x), ncol(x), byrow=T) diag(X1) <- diag(X1) - sd(x)/2 diag(X2) <- diag(X2) + sd(x)/2 X1 <- data.frame(X1); X2 <- data.frame(X2) names(X1) <- names(X2) <- names(x) rownames(X1) <- rownames(X2) <- names(x) Pr1 <- predict(probit1, type="response",newdata=X1) Pr2 <- predict(probit1, type="response",newdata=X2) csdc <- Pr2 - Pr1 csdc[c(4,5)] <- NA #change from 0 to 1 X1 <- X2 <- matrix(mean(x), ncol(x), ncol(x), byrow=T) diag(X1) <- 0 diag(X2) <- 1 X1 <- data.frame(X1); X2 <- data.frame(X2) names(X1) <- names(X2) <- names(x) rownames(X1) <- rownames(X2) <- names(x) Pr1 <- predict(probit1, type="response",newdata=X1) Pr2 <- predict(probit1, type="response",newdata=X2) cf0to1 <- Pr2 - Pr1 cf0to1[-c(4,5)] <- NA tab3.8 <- data.frame(cuc, csdc, cf0to1) round(tab3.8, 2) ##(h) Table 3.9 lc <- coef(logit1) fc <- exp(lc * 1) sfc <- exp(lc * c(NA,sigmak)) z.value <- summary(logit1)$coef[,"z value"] tab3.9 <- data.frame(lc, fc, sfc, z.value) tab3.9$fc[1] <- tab3.9$sfc[5:6] <- NA round(tab3.9, 3) detach(Mroz)