############################################################ #Sofroniou, N. & Hutcheson, G. D. (2002) #Confidence intervals for the predictions of logistic regression #in the presence and absence of a variance-covariance matrix #Understanding Statistics, 1, 3-18. # #Dataset の所在 #StatLib---Datasets Archive #http://lib.stat.cmu.edu/datasets/hutsof99.ZIP ############################################################ ############################################################ #uncompleted job #(a) variance-covariance matrix の求め方 ############################################################ #----------------------------------------------------------# #Step 1: Table 1 p.8 を再現 #----------------------------------------------------------# ##(a) データの読み込み dat <- read.table("http://blue.zero.jp/yokumura/R/logistic/dat1.dat",header=T) ##(b) (7) 式を推定 log1 <- glm(TreatmentOutcome~InfectionSeverity, binomial, data=dat) coef(log1) ##(c) odds exp(coef(log1)[[2]]) ##(d) ASE ASE <- coef(summary(log1))[2,2] #ASE for Severity coef(summary(log1))[1,2] #ASE for Constant ##(e) 95% CI for Beta (upper.b <- coef(log1)[[2]] + 1.96 * ASE) (lower.b <- coef(log1)[[2]] - 1.96 * ASE) ##(f) 95% CI for odds exp(upper.b) exp(lower.b) ##(g) 関数作成 logistic <- function(model,data){ log1 <- glm(model, binomial, data=data) Coefficient <- coef(log1) #係数 ##(c) odds odds <- exp(Coefficient) ##(d) ASE ASE <- coef(summary(log1))[,2] #ASE for Severity ##(e) 95% CI for Beta (upper.b <- Coefficient + 1.96 * ASE) (lower.b <- Coefficient - 1.96 * ASE) ##(f) 95% CI for odds (upper.odds <- exp(upper.b)) (lower.odds <- exp(lower.b)) ##(g) 出力 output <- data.frame(Coefficient,ASE, lower.b, upper.b, odds, lower.odds, upper.odds) output[1,3:7] <- NA output <- rbind(output[-1,],output[1,]) return(output) } ##テスト model <- formula(TreatmentOutcome~InfectionSeverity) data <- dat logistic(model,data) #----------------------------------------------------------# #Step2: Table 2 (p.9) からASEを求める #----------------------------------------------------------# ##(a) Table2 を作成 table2 <- matrix(c(1.89769,-0.02326,-0.02326,0.00031),ncol=2,byrow=T) sqrt(table2[1,1] + 50 ^ 2 * table2[2,2] + 2 * 50 * table2[1,2]) # (6) 式 #----------------------------------------------------------# #Step3: Table 4 (p.9), Figure 1 (p.6), Figure 2 (p.7) を作成 #----------------------------------------------------------# ##(a) x=50の場合の信頼区間 x <- 50 rescaled <- dat$InfectionSeverity-50 log1 <- glm(TreatmentOutcome~InfectionSeverity, binomial, data=dat) logit.p <- coef(log1)[[1]] + coef(log1)[[2]] * x #xをモデル式に代入 (log1.re <- glm(TreatmentOutcome~rescaled, binomial, data=dat)) (ASE <- coef(summary(log1.re))[1,2]) (lower.logit.p <- logit.p + 1.96 * ASE) (upper.logit.p <- logit.p - 1.96 * ASE) #x=50 のときに死ぬ確率 prob <- exp(logit.p)/(1 + exp(logit.p)) #下側上側 (lower.prob <- exp(upper.logit.p)/(1 + exp(upper.logit.p))) (upper.prob <- exp(lower.logit.p)/(1 + exp(lower.logit.p))) ##(b) 関数作成 inf.log <- function(x){ ##出力データフレームの作成 output <- data.frame(matrix(NA,ncol=7,nrow=length(x)),row.names=x) names(output) <- c("ASE","logit.p", "lower.logit.p", "upper.logit.p", "prob","lower.prob","upper.prob") ##信頼区間の計算 for(i in 1:length(x)){ rescaled <- dat$InfectionSeverity-x[i] log1 <- glm(TreatmentOutcome~InfectionSeverity, binomial, data=dat) logit.p <- coef(log1)[[1]] + coef(log1)[[2]] * x[i] #x[i]をモデル式に代入 #ASE (log1.re <- glm(TreatmentOutcome~rescaled, binomial, data=dat)) (ASE <- coef(summary(log1.re))[1,2]) #下側上側 (upper.logit.p <- logit.p + 1.96 * ASE) (lower.logit.p <- logit.p - 1.96 * ASE) #x[i]=50 のときに死ぬ確率 prob <- exp(logit.p)/(1 + exp(logit.p)) #下側上側 (lower.prob <- exp(upper.logit.p)/(1 + exp(upper.logit.p))) (upper.prob <- exp(lower.logit.p)/(1 + exp(lower.logit.p))) #出力 output[i,] <- data.frame(ASE, logit.p, lower.logit.p, upper.logit.p, prob,lower.prob,upper.prob) }#end for return(output) }#end fnc ##(c) Table4 を作成 x <- seq(0,200, by=50) inf.log(x) ##(d) Figure 1 を作成 x <- seq(0,170,by=.5) fig1 <- inf.log(x) plot(fig1$logit.p~x,type="l",xlim=c(0,170),ylim=c(-8,8), xlab="Severity of infection", ylab="Logit(p)") lines(fig1$lower.logit.p~x, lty=2) lines(fig1$upper.logit.p~x, lty=2) graphics.off() ##(e) Figure 2 を作成 plot(fig1$prob~x,type="l",xlim=c(0,170),ylim=c(0,1), xlab="Severity of infection", ylab="p") lines(fig1$lower.prob~x, lty=2) lines(fig1$upper.prob~x, lty=2) graphics.off() #----------------------------------------------------------# #Step4: Table5 (p.12) を再現 #----------------------------------------------------------# ##(a) データの読み込み dat2 <- read.table("http://blue.zero.jp/yokumura/R/logistic/dat2.dat",header=T) dat2$Age <- factor(dat2$Age,levels=c(1,0)) ##(b) Table 5 を計算 model <- formula(Prosecute~ Quality + Age + Loc_home + Loc_scho + loc_spec) log4 <- glm(model ,data=dat2, binomial) summary(log4) logistic(model,dat2) #----------------------------------------------------------# #Step5: Table7 (p.14) を再現 #----------------------------------------------------------# ##(a) 5-6歳,自宅,質=50の場合の確率 logit.p <- coef(log4)[[1]] + coef(log4)[[2]] * 50 + coef(log4)[[3]] + coef(log4)[[4]] exp(logit.p)/(1+exp(logit.p)) ##(b) 変数を変換 attach(dat2) rQuality <- Quality - 50 rAge <- factor(Age,levels = c(0,1)) rLoc_home <- Loc_home - 1 ##(c)切片の標準誤差を算出 model2 <- formula(Prosecute~ rQuality + rAge + rLoc_home + Loc_scho + loc_spec) glm(model2,data=dat2,binomial) logistic(model2,dat2) ASE <- logistic(model2,dat2)[6,2] #標準誤差 ##(d) 信頼区間を算出 (lower.logit.p <- logit.p - 1.96 * ASE) (upper.logit.p <- logit.p + 1.96 * ASE) exp(lower.logit.p)/(1+exp(lower.logit.p)) exp(upper.logit.p)/(1+exp(upper.logit.p))