my.boot.crq= function (cluster=NULL, x, y, cen, taus, method, R = 100, mboot, bmethod, ...) #------------------------------------------------------------------------------------ # Modify the bootstrap function so that it can handle clustered data in resampling. # Note: We use clusters, rather than individual observations, as the resampling unit. # # cluster: the variable for identifying clusters, NULL if iid data. # # bmethod: # For iid data: # "xy-pair": Bootstrap method (Efron & Tibshirani, 1993) for iid data # "Bose": Bose and Chatterjee's (2003) weighted # resampling method with exponential weights # For clustered data: # "perturb1": one-stage purturbation method for clustered data # "boot1": one-stage bootstrap method for clustered data #------------------------------------------------------------------------------------- { if (is.null(cluster)) { n <- length(y) p <- ncol(x) if (missing(mboot)) mboot <- n A <- array(0, dim = c(p, length(taus), R)) for (i in 1:R) { if (bmethod == "xy-pair") { w <- table(sample(1:n, mboot, replace = TRUE)) s <- as.numeric(names(w)) w <- as.numeric(w) #yb <- y[s] #xb <- x[s, ] #cenb <- cen[s] location=rep(s,times=w) yb <- y[location] xb <- x[location, ] cenb <- cen[location] } else if (bmethod == "Bose") { w <- rexp(n) yb <- y xb <- x cenb <- cen } else stop("invalid bmethod for boot.crq") if (method == "PengHuang") { # a <- crq.fit.pen(xb, yb, cenb, weights = w, ...) if(is.null(bmethod)||bmethod=="xy-pair") a <- crq.fit.pen(xb, yb, cenb, weights = NULL) if(bmethod=="Bose") a <- crq.fit.pen(xb, yb, cenb, weights = w) } else stop("Invalid method for boot.crq") A[, , i] <- coef(a, taus) } return(list(A = A, n = length(y), mboot = mboot)) }else{ clusters=unique(cluster) n.cluster=length(unique(cluster)) if (missing(mboot)) mboot <- n.cluster freq=table(cluster) index<- cumsum( c(0, freq[-n.cluster]) ) p <- ncol(x) if (missing(mboot)) mboot <- n.cluster A <- array(0, dim = c(p, length(taus), R)) for (i in 1:R) { if (bmethod == "boot1") { w=table(sample(clusters,mboot,replace=TRUE)) s <- as.numeric(names(w)) # because of this line, cluster id must be 1:n.cluster w <- as.numeric(w) location=NULL for(ss in 1:length(s)) { location=c(location, rep(index[s[ss]]+(1:freq[s[ss]]),times=w[ss])) } yb <- y[location] xb <- x[location, ] cenb <- cen[location] }else if (bmethod == "perturb1") { w <- rexp(n.cluster) w=rep(w, times=freq) yb <- y xb <- x cenb <- cen }else stop("invalid bmethod for boot.crq for clustered data") if (method == "PengHuang") { if(bmethod=="boot1") a <- crq.fit.pen(xb, yb, cenb, weights = NULL) if(bmethod=="perturb1") a <- crq.fit.pen(xb, yb, cenb, weights = w) }else stop("Invalid method for boot.crq") #if ((i%%floor(R/10)) == 0) #cat(paste("bootstrap/perturbation roughly ", 100 * (i/R), " percent complete\n")) A[, , i] <- coef(a, taus) } return(list(A = A, n = n.cluster, mboot = mboot)) } } clean.quantile=function(vector,probs) #------------------------------------------------------------------------------------------------- # Empirical quantile for only non-NA and non-Inf values (i.e. cleaned data) in a vector #------------------------------------------------------------------------------------------------- { clean.vector=vector[!is.na(vector)] clean.vector=clean.vector[is.finite(clean.vector)] quantile(clean.vector,probs=probs) } my.summary.crq=function (object, cluster, taus = 1:4/5, bmethod, alpha = 0.05, se = "boot", covariance = TRUE, ...) #-------------------------------------------------------------------------------------------------------- # Summary of the crq() fitting results, which may be from the analysis of clustered data or iid data. # cluster: the variable for identifying clusters, NULL if iid data. # The definitions of the other input variables are the same as in summary.crq(). # Note: There could be Inf values in the bootstrap estimates, these Inf values are excluded when empirial # 95% CI and SE are calculated. #-------------------------------------------------------------------------------------------------------- { mt <- terms(object) m <- model.frame(object) x <- model.matrix(mt, m, contrasts = object$contrasts) Y <- model.response(m) method <- object$method y <- Y[, 1] cen <- Y[, 2] # the following line is not working, i replace it with my own line #wt <- model.weights(object$model) wt=m$"(weights)" if (missing(alpha)) alpha <- 0.05 if (!is.null(wt)) { # the following line is useless for Portnoy or Peng-Huang method #resid <- resid * wt x <- x * wt y <- y * wt # the following line should not be used #cen <- cen * wt } if (method == "Portnoy" || method == "PengHuang") { coef <- coef(object, taus) coef <- coef[, apply(coef, 2, function(x) any(!is.na(x)))] taus <- taus[1:ncol(coef)] B <- my.boot.crq(cluster, x, y, cen, taus, method = method, bmethod=bmethod,...) sqmn <- sqrt(B$mboot/B$n) # sqmn is used to adjust for un-equal sized bootstrap # (i.e., resamples have a smaller sample size than the original sample), # which usually works well when sample size is large ### Portnoy's hybrid method is used for getting resampling-based SD and 95% CI. fact <- qnorm(1 - alpha/2)/qnorm(0.75) #B <- apply(B$A, 1:2, quantile, probs = 1:3/4, na.rm = TRUE) B <- apply(B$A, 1:2, clean.quantile, probs = 1:3/4) D <- 0.5 * fact * (B[3, , ] - B[1, , ]) * sqmn L <- coef - D U <- coef + D S <- (U - L)/(2 * qnorm(1 - alpha/2)) T <- coef/S P <- 2 * (1 - pnorm(abs(T))) G <- list() cnames <- c("Value", "Lower Bd", "Upper Bd", "Std Error", "T Value", "Pr(>|t|)") for (i in 1:length(taus)) { tab <- cbind(coef[, i], L[, i], U[, i], S[, i], T[, i], P[, i]) dimnames(tab)[[2]] <- cnames G[[i]] <- list(tau = taus[i], coefficients = tab) } class(G) <- "summary.crqs" return(G) } else stop("Invalid method for my.summary.crq") } # Example (id: subject id; t2event: time to event; time: gap time; cen: censoring time; # onsetage: the onset age of schizophrenia; gender: 1=female; # delta, m, and mstar are as defined in the paper.) library(quantreg) data=read.csv("C:/example.csv",header=T) taus=10:60/100 # Construct working data set based on the raw data data.clean=data[data$exclude == 0, ] fit.WRS=crq(Surv(log(time),delta)~onsetage+gender, method="PengHuang", weights=1/mstar, data=data.clean) # One can use the following line to set the same seed each time so that the results are repeatable. set.seed(3) Sfit.summary=my.summary.crq(fit.WRS,cluster=data.clean$id,taus=taus,bmethod="perturb1") Sfit.summary