sink("c:/beta1-tau10.txt",append=TRUE) print(date()) library(Rlab) library(compositions) # rnorm library(survival) library(catspec) library(gtools) # combinations library(stats) # Beta distribution options("warn"=2) # warning message replace error message #---------------------------------------------------------------------------------- # Simulate data # beta: regression coefficient # N: number of subjects # tau: range of time period # isx2: whether x2 is in the true model, 1=yes, 0=no # 1. id, 2. date, 3. fallid, 4. primaryfallid, 5. event, 6. exposure. #---------------------------------------------------------------------------------- simu<-function(beta,N,tau,isx2,beta2) { data=NULL time=c(1:tau) n=0 while(n 0) { # n: sample size, number of subjects who had at least one event during the study period n=n+1 # location of the falls location=time[yi==1] # Mark the first fall (and its consecutive falls) if it/they occur on the first day flag=0 # indicates whether the first one or the first several falls need to be marked exclude=1 # mark the location, before which all the falls should be excluded from some analysis, i.e. mark 1:(exclude-1) falls while(location[exclude]==exclude) { flag=1 exclude=exclude+1 if(exclude>length(location)) break } if(flag==0|(flag==1&exclude-11) { datai=cbind(id=rep(n,exclude-1), date=location[1:(exclude-1)], fallid=1:(exclude-1), primaryfallid=rep(0,exclude-1), # they are not considered as primary falls, so this id is set to be 0 event=rep(1,exclude-1), # they are still considered as events exposure=xi[location[1:(exclude-1)]]) } if(exclude-1==1) { datai=c(n,location[1],1,0,1,xi[location[1]]) } } if (flag==0) { datai=NULL } # For the first primary fall index=1 # index the primary falls (and their matched controls) tempdata=cbind(id=rep(n,2), date=c(location[exclude]-1,location[exclude]), fallid=rep(exclude,2), primaryfallid=rep(index,2), event=c(0,1), exposure=c(xi[location[exclude]-1],xi[location[exclude]])) datai=rbind(datai,tempdata) if((ki-exclude+1)>=2) { for (k in (exclude+1):ki) # from the one following the 1st primary fall { if(location[k]-location[k-1]==1) # seconary/consecutive fall { datai=rbind(datai,c(n,location[k],k,0,1,xi[location[k]])) }else{ index=index+1 tempdata=cbind(id=rep(n,2), date=c(location[k]-1,location[k]), fallid=rep(k,2), primaryfallid=rep(index,2), event=c(0,1), exposure=c(xi[location[k]-1],xi[location[k]])) datai=rbind(datai,tempdata) } } } } if(flag ==1 & exclude-1==length(location)) # none of the falls can be considered as primary fall { if(exclude-1>1) { datai=cbind(id=rep(n,exclude-1), date=location[1:(exclude-1)], fallid=1:(exclude-1), primaryfallid=rep(0,exclude-1), # they are not considered as primary falls event=rep(1,exclude-1), # they are still considered as events exposure=xi[location[1:(exclude-1)]]) } if(exclude-1==1) { datai=c(n,1,1,0,1,xi[location[1]]) } } # Merge subject i's data to the main data data=rbind(data,datai) } } return(data) } #----------------------------------------------------------------------- # Within-subject pairwise resampling (WSPR) method # Q: number of resampling # data.primaryfall: data set with only primary falls # y: outcome variable # x: exposure variable # id: subject id # primaryfallid: none-zero id to index different falls within subject #----------------------------------------------------------------------- WSPR<-function(Q,data.primaryfall,y,x,id,primaryfallid) { n.x=length(x) # number of covariates beta=matrix(rep(NA,Q*n.x),ncol=n.x) sigma=array(NA,c(n.x,n.x,Q)) ids=unique(data.primaryfall[,id]) #unique subject id n=length(ids) # number of subjects q=0 while (q < Q) { data.q=NULL for(i in 1:n) { idi=ids[i] datai=data.primaryfall[data.primaryfall[,id]==idi,] primaryfallids=unique(datai[,primaryfallid]) mi=length(primaryfallids) temp=sample(1:mi, size=1, replace = FALSE) data.qi=datai[(2*temp-1):(2*temp),] data.q=rbind(data.q,data.qi) } strata=data.q[,id] temp1=try(clogit(data.q[,y]~data.q[,x]+strata(strata)),TRUE) if(!inherits(temp1, "try-error")) { q=q+1 cat(q) beta[q,]=temp1$coef sigma[,,q]=temp1$var } } beta.estimate=apply(beta,2,mean) beta.var=0 for(q in 1:Q) { beta.var=beta.var+sigma[,,q]/Q-(beta[q,]-beta.estimate)%*%(beta[q,]-beta.estimate)/Q } return(list(beta.estimate=beta.estimate,beta.variance=beta.var)) } #----------------------------------------------------------------------- # Weighted Estimating Equation (WEE) method: score function # data.primaryfall: data with only primary falls # y: location of outcome variable # x: location of exposure variable # id: subject id # primaryfallid: matched set id # beta: #----------------------------------------------------------------------- Uij<-function(dataij,y,x,beta) # Calculate Uij and first derivative of Uij # The procedure is valid for mathed sets having >=2 events { # sum of covariates on all event times y1=sum(dataij[dataij[,y]==1,x]) # size of the ijth matched set, =2 if 1:1 match size.ij=length(dataij[,1]) # number of observed events in the ijth matched set, =1 if 1:1 match size.event=length(dataij[dataij[,y]==1,1]) # all combinations of size=number of observed events set.comb=combinations(size.ij, size.event, v=1:size.ij, set=TRUE, repeats.allowed=FALSE) s0=0 s1=0 s2=0 if(size.event==1) { size.set.comb=length(set.comb) }else{ size.set.comb=length(set.comb[,1]) } for(k in 1:size.set.comb) { if(size.event==1) { y.temp=dataij[set.comb[k],x] }else{ y.temp=sum(dataij[set.comb[k,],x]) } s0=s0+exp(beta*y.temp) s1=s1+y.temp*exp(beta*y.temp) s2=s2+y.temp^2*exp(beta*y.temp) } uij=y1-s1/s0 partial.uij=(s2*s0-s1^2)/s0^2 return(list(Uij=uij,PartialUij=partial.uij)) } score<-function(data,y,x,id,primaryfallid,beta) { ids=unique(data[,id]) n=length(ids) Swee=0 for(i in 1:n) { datai=data[data[,id]==ids[i],] primaryfallids=unique(datai[,primaryfallid]) mi=length(primaryfallids) for(j in 1:mi) { dataij=datai[datai[,primaryfallid]==primaryfallids[j],] temp=Uij(dataij,y,x,beta) Swee=Swee+temp$Uij/mi # The next line confirms that my program give exactly the same estimate as clogit function # Swee=Swee+temp$Uij } } return(Swee) } WEE<-function(data,y,x,id,primaryfallid,init) { low=init-1 up=init+1 round=1 while(sign(score(data,y,x,id,primaryfallid,low)*score(data,y,x,id,primaryfallid,up))!= -1 && round<10) { up<-up+1 low<-low-1 round<-round+1 cat("\n round=",round,"low=",low,"up=",up) } if(sign(score(data,y,x,id,primaryfallid,low)*score(data,y,x,id,primaryfallid,up))==-1) { # Estimate beta temp<-uniroot(score,lower=low,upper=up,tol=1e-10,data=data,y=y,x=x,id=id,primaryfallid=primaryfallid) beta.estimate<-as.numeric(temp$root) # Estimate variance ids=unique(data.primaryfall[,id]) n=length(ids) Hmatrix=0 Vmatrix=0 for(i in 1:n) { datai=data[data[,id]==ids[i],] primaryfallids=unique(datai[,primaryfallid]) mi=length(primaryfallids) Vi=0 for(j in 1:mi) { dataij=datai[datai[,primaryfallid]==primaryfallids[j],] temp1=Uij(dataij,y,x,beta.estimate) # plug in the estimated beta # H matrix in the sandwich form Hmatrix=Hmatrix+temp1$PartialUij/mi/n Vi=Vi+temp1$Uij/mi } Vmatrix=Vmatrix+(Vi^2)/n } beta.var=Vmatrix/Hmatrix/Hmatrix/n } if(round==10) { beta.estimate=NA beta.var=NA } return(list(beta.estimate=beta.estimate,beta.variance=beta.var)) } #--------------------------------------------------------- # Mantel-Haenszel estimator # Unweighted # Weighted # Note: Unweighted MH method is equivalent to the iid method # The weighted MH method is euivalent to WEE method # when only one binary covariate is present in the model. #--------------------------------------------------------- WMH<-function(data,y,x,id,primaryfallid) { ids=unique(data[,id]) n=length(ids) n10=0 # number of falls with the case exposed and the control not exposed n01=0 # number of falls with the case not exposed and the contrl exposed wn10=0 # weighted number of falls with the case exposed and the control not exposed wn01=0 # weighted number of falls with the case not exposed and the contrl exposed for(i in 1:n) { datai=data[data[,id]==ids[i],] primaryfallids=unique(datai[,primaryfallid]) mi=length(primaryfallids) for(j in 1:mi) { dataij=datai[datai[,primaryfallid]==primaryfallids[j],] if(dataij[1,y]==1) { case=1 #the row for the case control=2 # the row for the control }else{ case=2 control=1 } if(dataij[case,x]==1&dataij[control,x]==0) { n10=n10+1 wn10=wn10+1/mi } if(dataij[case,x]==0&dataij[control,x]==1) { n01=n01+1 wn01=wn01+1/mi } } } MH.estimate=n10/n01 WMH.estimate=wn10/wn01 return(list(MH.estimate=MH.estimate, WMH.estimate=WMH.estimate)) } #-------------------------------------------------- # Approximation methods in clustered data analysis # Efron's method, Breslow's method #-------------------------------------------------- myclogit=function (formula, data, method = c("exact", "breslow","efron"), na.action = getOption("na.action"), subset = NULL, control = coxph.control()) { mf <- match.call() mf[[1]] <- as.name("model.frame") mf$method <- mf$control <- NULL mfn <- mf mfn$na.action <- "I" mfn$subset <- NULL nrows <- NROW(eval(mfn, parent.frame())) mf <- eval(mf, parent.frame()) coxcall <- match.call() coxcall[[1]] <- as.name("coxph") newformula <- formula newformula[[2]] <- substitute(Surv(rep(1, nn), case), list(case = formula[[2]], nn = nrows)) environment(newformula) <- environment(formula) coxcall$formula <- newformula coxcall$method <- switch(match.arg(method), exact = "exact", breslow="breslow", efron="efron") coxcall <- eval(coxcall, sys.frame(sys.parent())) coxcall$userCall <- sys.call() class(coxcall) <- c("clogit", "coxph") coxcall } #--------------------------------------------------------- # Main program #--------------------------------------------------------- tau=10 # Time interval N=200 # Sample size MC.size=1000 # Monte-Carlo sample size Q=1000 # resampling times beta=1 isx2=0 beta2=1 # Average number of events avg.num.event=rep(NA,MC.size) # First event method estimate1=rep(NA,MC.size) se1=rep(NA,MC.size) # pooled data analysis method: treat data as iid estimate2=rep(NA,MC.size) se2=rep(NA,MC.size) # clustered data method (Navidi's method) estimate3=rep(NA,MC.size) se3=rep(NA,MC.size) # Efron's approximation estimate3E=rep(NA,MC.size) se3E=rep(NA,MC.size) # Breslow's approximation estimate3B=rep(NA,MC.size) se3B=rep(NA,MC.size) # WSPR method estimate4=rep(NA,MC.size) se4=rep(NA,MC.size) # WEE method estimate5=rep(NA,MC.size) se5=rep(NA,MC.size) # M-H method for single binary covariate #estimate6=rep(NA,MC.size) # Weighted M-H estimate #estimate7=rep(NA,MC.size) seed=0 MC.index=1 while(MC.index <= MC.size) { seed=seed+1 set.seed(seed, kind = NULL) data=simu(beta,N,tau,isx2,beta2) # Average number of events per subject avg.num.event[MC.index]=sum(data[,5])/N # First event method data.firstfall=data[data[,4]==1,] strata1=data.firstfall[,1] temp1=try(clogit(data.firstfall[,5]~data.firstfall[,6]+strata(strata1)),TRUE) if(!inherits(temp1, "try-error")) { estimate1[MC.index]=temp1$coef se1[MC.index]=sqrt(temp1$var) } # pooled data analysis method: treat data as iid data.primaryfall=data[data[,4]!=0,] strata2=data.primaryfall[,1]*100+data.primaryfall[,4] temp2=try(clogit(data.primaryfall[,5]~data.primaryfall[,6]+strata(strata2)),TRUE) if(!inherits(temp2, "try-error")) { estimate2[MC.index]=temp2$coef se2[MC.index]=sqrt(temp2$var) } # clustered data method (Navidi's method) strata3=data[,1] temp3=try(clogit(data[,5]~data[,6]+strata(strata3)),TRUE) if(!inherits(temp3, "try-error")) { estimate3[MC.index]=temp3$coef se3[MC.index]=sqrt(temp3$var) } #myclogit(data[,5]~data[,6]+strata(strata3),method="exact") # Efron's approximation temp3E=try(myclogit(data[,5]~data[,6]+strata(strata3),method="efron"),TRUE) if(!inherits(temp3E, "try-error")) { estimate3E[MC.index]=temp3E$coefficients se3E[MC.index]=sqrt(temp3E$var) } # Breslow's approximation temp3B=try(myclogit(data[,5]~data[,6]+strata(strata3),method="breslow"),TRUE) if(!inherits(temp3B, "try-error")) { estimate3B[MC.index]=temp3B$coefficients se3B[MC.index]=sqrt(temp3B$var) } # Within-subject pairwise resampling method # use data.primaryfall temp4=WSPR(Q,data.primaryfall,5,6,1,4) estimate4[MC.index]=temp4$beta.estimate se4[MC.index]=sqrt(temp4$beta.variance) # Weighted estimating equation method temp5=WEE(data.primaryfall,5,6,1,4,beta) estimate5[MC.index]=temp5$beta.estimate se5[MC.index]=sqrt(temp5$beta.variance) # M-H method for single binary covariate # temp=WMH(data.primaryfall,5,6,1,4) # estimate6[MC.index]=log(temp$MH.estimate) # Weighted M-H estimate # estimate7[MC.index]=log(temp$WMH.estimate) # cat("\n",MC.index,"seed=",seed,"beta=",estimate1[MC.index],estimate2[MC.index],estimate3[MC.index],estimate4[MC.index],estimate5[MC.index],estimate6[MC.index],estimate7[MC.index],avg.num.event[MC.index]) cat("\n",MC.index,"seed=",seed,"beta=",estimate1[MC.index],estimate2[MC.index],estimate3[MC.index],estimate3E[MC.index],estimate3B[MC.index],estimate4[MC.index],estimate5[MC.index],avg.num.event[MC.index]) cat("\n",MC.index,"seed=",seed,"se=",se1[MC.index],se2[MC.index],se3[MC.index],se3E[MC.index],se3B[MC.index],se4[MC.index],se5[MC.index],"\n") MC.index=MC.index+1 save.image("C:/beta1-tau10.RData") } mean(avg.num.event) mean(estimate1[!is.na(estimate1)]) sd(estimate1[!is.na(estimate1)]) mean(se1[!is.na(estimate1)]) mean(estimate2) sd(estimate2) mean(se2) mean(estimate3) sd(estimate3) mean(se3) mean(estimate3E) sd(estimate3E) mean(se3E) mean(estimate3B) sd(estimate3B) mean(se3B) mean(estimate4) sd(estimate4) mean(se4) mean(estimate5) sd(estimate5) mean(se5) print(date())