## data yn = c(125,18,20,34) ## score equation f1 = function(tha, yn){ tha*(tha*yn[1]+(yn[2]+yn[3]+yn[4])*(2+tha)) - (tha*yn[1]+yn[4]*(2+tha)) } uniroot(f1, 0:1, yn=yn) f2 = function(tha, yn){ -sum(yn)*tha^2 + (yn[1]-2*yn[2]-2*yn[3]-yn[4])*tha + 2*yn[4] } uniroot(f2, 0:1, yn=yn, tol=1e-9) th0 = 0.6268215 ## direct likelihood lik = function(tha, yn){ yn[1]*log(2+tha) + (yn[2]+yn[3])*log(1-tha) + yn[4]*log(tha) } optimize(lik, 0:1, maximum=TRUE, yn=yn, tol=1e-8) ## EM f = function(tha, itMax=1e2,eps=1e-8, yn){ it = 0 dif = 1 while( (it<itMax)&(dif>eps) ){ th1 = (tha*yn[1]+yn[4]*(2+tha))/(tha*yn[1]+(yn[2]+yn[3]+yn[4])*(2+tha)) it = it+1 dif = abs(tha-th1) cat(it, ": diff=", dif, ", th1=", th1, ", th1-th0=", th1-th0, ", cvr=", (th1-th0)/(tha-th0), "\n") tha = th1 } return(tha) } f(0.1, yn=yn) ## EM yn[1]/th0/(2+th0) + yn[4]/th0^2 + (yn[2]+yn[3])/(1-th0)^2 - yn[1]*2/th0/(2+th0)^2 ## 377.5169 ## MLE yn[1]/(2+th0)^2 + (yn[2]+yn[3])/(1-th0)^2 + yn[4]/th0^2 ## 377.5169
Read textbook Chapter 14 for techinical details of LMM EM estimation, Chapter 6/14 for techinical details of ML/REML estimation.
Newton-Raphson/Fisher-scoring for LMM
## y: n,m ## x: n,m,p ## z: n,m,k lmm.reml.ecm = function(y, x, z, itMax=1e3,eps=1e-5){ n = dim(y)[1] m = dim(y)[2] p = dim(x)[3] k = dim(z)[3] y1 = as.vector(y) xm = matrix(x, m*n,p) obj = lm(y1 ~ xm-1) rba = matrix(obj$coef, ncol=1)*runif(1) s2 = var(y1) res = matrix(obj$res, n,m) u = matrix(0, n,k) for(i in 1:n) u[i,] = lm(res[i,] ~ z[i,,]-1)$coef V = cov(u)*runif(1)+diag(k)*0.5 it = 0; dst = 1 while( (it<itMax)&(dst>eps) ){ res = y-matrix(xm%*%rba, ncol=m) ## rba W1 = matrix(0, p,p) W0 = rep(0,p) for(i in 1:n){ Vit = z[i,,]%*%V%*%t(z[i,,]) + s2*diag(m) Vii = solve(Vit) W1 = W1 + t(x[i,,])%*%Vii%*%x[i,,] W0 = W0 + t(x[i,,])%*%Vii%*%t(y[i,,drop=FALSE]) } W1i = solve(W1) rbau = W1i%*%W0 ## V, s2 Vu = matrix(0,k,k) s2u = 0 for(i in 1:n){ Vit = z[i,,]%*%V%*%t(z[i,,]) + s2*diag(m) Vii = solve(Vit) ei = Vii%*%t(res[i,,drop=FALSE]) uih = V%*%t(z[i,,])%*%ei a0 = V%*%t(z[i,,])%*%Vii%*%x[i,,] Vu = Vu + uih%*%t(uih)/n + (V-V%*%t(z[i,,])%*%Vii%*%z[i,,]%*%V)/n + a0%*%W1i%*%t(a0)/n a1 = Vii%*%x[i,,]%*%W1i%*%t(x[i,,])%*%Vii s2u = s2u + s2^2*sum(ei^2)/n/m + s2*sum(diag(Vii%*%z[i,,]%*%V%*%t(z[i,,])))/n/m + s2^2*sum(diag(a1))/n/m } ## diff dst = mean(abs(Vu-V))/3 + mean(abs(rbau-rba))/3 + mean(abs(s2u-s2))/3 it = it+1 ## update V = Vu; rba = rbau; s2 = s2u[1] cat(it, ":", dst, "\n") } return( list(V=V,rba=rba[,1],s2=s2, covb=W1i) ) } lmm.mle.ecm = function(y, x, z, itMax=1e3,eps=1e-5){ n = dim(y)[1] m = dim(y)[2] p = dim(x)[3] k = dim(z)[3] yv = as.vector(y) xm = matrix(x, m*n,p) obj = lm(yv ~ xm-1) rba = matrix(obj$coef, ncol=1)*runif(1) s2 = var(yv) res = matrix(obj$res, n,m) u = matrix(0, n,k) for(i in 1:n) u[i,] = lm(res[i,] ~ z[i,,]-1)$coef V = cov(u)*runif(1)+diag(k)*0.5 it = 0; dst = 1 while( (it<itMax)&(dst>eps) ){ res = y-matrix(xm%*%rba, ncol=m) ## rba W1 = matrix(0, p,p) W0 = rep(0,p) for(i in 1:n){ Vit = z[i,,]%*%V%*%t(z[i,,]) + s2*diag(m) Vii = solve(Vit) W1 = W1 + t(x[i,,])%*%Vii%*%x[i,,] W0 = W0 + t(x[i,,])%*%Vii%*%t(y[i,,drop=FALSE]) } W1i = solve(W1) rbau = W1i%*%W0 ## V, s2 Vu = matrix(0,k,k) s2u = 0 for(i in 1:n){ Vit = z[i,,]%*%V%*%t(z[i,,]) + s2*diag(m) Vii = solve(Vit) ei = Vii%*%t(res[i,,drop=FALSE]) uih = V%*%t(z[i,,])%*%ei a0 = V%*%t(z[i,,])%*%Vii%*%x[i,,] Vu = Vu + uih%*%t(uih)/n + (V-V%*%t(z[i,,])%*%Vii%*%z[i,,]%*%V)/n s2u = s2u + s2^2*sum(ei^2)/n/m + s2*sum(diag(Vii%*%z[i,,]%*%V%*%t(z[i,,])))/n/m } ## diff dst = mean(abs(Vu-V))/3 + mean(abs(rbau-rba))/3 + mean(abs(s2u-s2))/3 it = it+1 ## update V = Vu; rba = rbau; s2 = s2u[1] cat(it, ":", dst, "\n") } return( list(V=V,rba=rba[,1],s2=s2, covb=W1i) ) }
Read textbook Chapter 8. Fisher scoring/NR for MLE of LMM.
## likelihood calculation flik = function(y,x,z, rba,Dz,s2){ m = dim(x)[1] n = dim(x)[2] lik = 0 for(i in 1:m){ ## rba V = z[i,,]%*%Dz%*%t(z[i,,]) + s2*diag(n) Vinv = solve(V) res = t(y)[,i]-x[i,,]%*%rba lik = lik -n/2*log(2*pi)-log(det(V))/2-t(res)%*%Vinv%*%res/2 } return(lik) } gv.fs = function(y,x,z, rba,Dz,s2){ m = dim(x)[1] n = dim(x)[2] p = dim(x)[3] k = dim(z)[3] a0 = diag(k) id = row(a0)<=col(a0) g1 = matrix(0, p,1) g2 = 0 g3 = rep(0, k*(k+1)/2) for(i in 1:m){ ## rba V = z[i,,]%*%Dz%*%t(z[i,,]) + s2*diag(n) Vinv = solve(V) res = t(y)[,i]-x[i,,]%*%rba ## rba g1 = g1 + t(x[i,,])%*%Vinv%*%res ## s2 a1 = Vinv - Vinv%*%res%*%t(res)%*%Vinv g2 = g2 - sum(diag(a1))/2 ## D a2 = t(z[i,,])%*%a1%*%z[i,,] diag(a2) = diag(a2)/2 g3 = g3 - a2[id] } return( list(rba=g1,V=c(g2,g3)) ) } hm.fs = function(y,x,z, rba,Dz,s2){ m = dim(x)[1] n = dim(x)[2] p = dim(x)[3] k = dim(z)[3] kv = k*(k+1)/2 a0 = diag(k) id = row(a0)<=col(a0) ri = row(a0)[id] ci = col(a0)[id] rc = cbind(ri,ci) i1 = rep(1:kv, kv) i2 = rep(1:kv, each=kv) j13 = cbind(rc[i1,1], rc[i2,1]) j24 = cbind(rc[i1,2], rc[i2,2]) j14 = cbind(rc[i1,1], rc[i2,2]) j23 = cbind(rc[i1,2], rc[i2,1]) r0 = 1*(rc[,1] == rc[,2])+1 r1 = r0[i1] r2 = r0[i2] h1 = matrix(0, p,p) h2 = matrix(0, 1+kv, 1+kv) for(i in 1:m){ ## rba V = z[i,,]%*%Dz%*%t(z[i,,]) + s2*diag(n) Vinv = solve(V) res = t(y)[,i]-x[i,,]%*%rba ## n,1 ## rba h1 = h1 - t(x[i,,])%*%Vinv%*%x[i,,] ## s2 h2[1,1] = h2[1,1] - sum(Vinv^2)/2 ## s2,D a1 = t(z[i,,])%*%Vinv%*%Vinv%*%z[i,,] h2[1,-1] = h2[1,-1] - 2*a1[id]/r0/2 ## D,D Vz = t(z[i,,])%*%Vinv%*%z[i,,] a2 = 2*(Vz[j13]*Vz[j24] + Vz[j14]*Vz[j23])/r1/r2 h2[-1,-1] = h2[-1,-1] - a2/2 } h2[-1,1] = h2[1,-1] return( list(rba=h1,V=h2) ) } lmm.mle.fs = function(y, x, z, itMax=1e3,eps=1e-5, verbose=FALSE){ m = dim(y)[1] n = dim(y)[2] p = dim(x)[3] k = dim(z)[3] xm = matrix(x, m*n,p) obj = lm(as.vector(y) ~ xm-1) rba1 = matrix(obj$coef, ncol=1) s2 = 0 res = matrix(obj$res, m,n) u = matrix(0, m,k) for(i in 1:m) u[i,] = lm(res[i,] ~ z[i,,]-1)$coef D1 = cov(u)*0.9+diag(k)*0.1 id = row(D1)<=col(D1) it = 0; dst = 1 while( (it<itMax)&(dst>eps) ){ ## update Dz = D1; rba = rba1 ## gv, hm dr1 = gv.fs(y,x,z, rba,Dz,s2) dr2 = hm.fs(y,x,z, rba,Dz,s2) ## update rba1 = rba - solve(dr2$rba, dr1$rba) a1 = Dz[id] - solve(dr2$V[-1,-1], dr1$V[-1]) D1[id] = a1 D1[!id] = t(D1)[!id] ## diff dst = mean(abs(D1-Dz))/2 + mean(abs(rba1-rba))/2 it = it+1 if(verbose) cat(it, ":", dst, det(dr2$V), det(dr2$rba), "\n") } return( list(D=Dz,rba=rba,s2=s2) ) } ## generate data and validate the procedures library(mvtnorm) library(lme4) ## conditional model generation n = 10; m = 9; p = 2; k = 2 x = array(rnorm(n*m*p), c(n,m,p)) z = array(rnorm(n*m*k), c(n,m,k)) u = matrix(rnorm(n*k), n,k) zu = matrix(0, n,m) for(i in 1:n) zu[i,] = z[i,,]%*%u[i,] y = apply(x, 1:2, sum) + zu + rnorm(n*m,0,1.5) ## REML/EM a0 = lmm.reml.ecm(y,x,z, eps=1e-8) lr = rep(0, dim(y)[1]) for(i in 1:dim(y)[1]){ lr[i] = dmvnorm(y[i,], x[i,,]%*%a0$rba, a0$s2*diag(dim(y)[2])+z[i,,]%*%a0$V%*%t(z[i,,]), log=TRUE) } sum(lr) y2 = as.vector(y) x2 = matrix(x, ncol=dim(x)[3]) z2 = matrix(z, ncol=dim(z)[3]) id = rep(1:dim(y)[1], dim(y)[2]) b0 = lmer(y2 ~ 0 + x2 + (0+z2|id), REML=TRUE) b0v = VarCorr(b0) sig = attributes(b0v, "sc")$sc V = b0v$id lr1 = rep(0, dim(y)[1]) for(i in 1:dim(y)[1]){ lr1[i] = dmvnorm(y[i,], x[i,,]%*%fixef(b0), sig^2*diag(dim(y)[2])+z[i,,]%*%V%*%t(z[i,,]), log=TRUE) } sum(lr1) ## MLE/EM a1 = lmm.mle.ecm(y,x,z, eps=1e-8) lr = rep(0, dim(y)[1]) for(i in 1:dim(y)[1]){ lr[i] = dmvnorm(y[i,], x[i,,]%*%a1$rba, a1$s2*diag(dim(y)[2])+z[i,,]%*%a1$V%*%t(z[i,,]), log=TRUE) } sum(lr) b1 = lmer(y2 ~ 0 + x2 + (0+z2|id), REML=FALSE) b1v = VarCorr(b1) sig = attributes(b1v, "sc")$sc V = b1v$id lr1 = rep(0, dim(y)[1]) for(i in 1:dim(y)[1]){ lr1[i] = dmvnorm(y[i,], x[i,,]%*%fixef(b1), sig^2*diag(dim(y)[2])+z[i,,]%*%V%*%t(z[i,,]), log=TRUE) } sum(lr1) write.table(data.frame(y2,x2,z2,id), file="a.txt", sep="\t", row.names=FALSE) ## SAS proc import datafile="a.txt" out=aaa dbms=tab replace; getnames=yes; datarows=2; run; proc mixed data=aaa method=REML; class id; model y = x1 x2 / noint S chisq; random z1 z2 / type=un subject=id; run; proc mixed data=aaa method=ML; class id; model y = x1 x2 / noint S chisq; random z1 z2 / type=un subject=id; run;
data tlc; infile "tlc1.dat"; input id trt$ lead0 lead1 lead4 lead6; y=lead0; time=0; output; y=lead1; time=1; output; y=lead4; time=4; output; y=lead6; time=6; output; drop lead0 lead1 lead4 lead6; run; title Treatment of Lead Exposed Children (TLC) Trial; /* sorting data */ proc sort data = tlc; by trt descending time; run; /* factor model */ proc mixed order=data data=tlc; class id trt time; model y = trt time trt*time / s chisq; repeated time / type=un subject=id r; run; /* create linear splines */ data tlc2; set tlc; if time<=1 then time0 = time-1; else time0 = 0; if time>1 then time1 = time-1; else time1 = 0; run; /* splines model */ proc mixed data=tlc2; class id time trt; model y = time0 time1 trt trt*time0 trt*time1 / s chisq; repeated time / type=un subject=id r; run;
Read textbook chapter 7 and 14 for approximate/conditional/MLE inference of GLMM.
Read textbook chapter 14 for MCMC inference of GLMM.
Read textbook Chapter 14 (Page 320-343) for a review of computation for mixed effects model.
### GLM: MLE estimation (simulation studies) ## logisic regression model ## Logit\Pr(Y=1|X) = 1 + 2*X, X ~ N(0,1) ## numerical optimization set.seed(2008) n = 1e2 x = rnorm(n) pr = 1/(1+exp(-1-2*x)) y = 1*(runif(n)<pr) ( yfit = glm(y ~ x, family=binomial) ) ylik = function(rpar, x,y){ pr = 1/(1+exp(-rpar[1]-rpar[2]*x)) tmp = log(1-pr) + y*log(pr/(1-pr)) return( -sum(tmp) ) } ( a0 = optim(0:1, ylik, x=x,y=y) ) library(numDeriv) ymle = function(rpar){ ylik(rpar, x=x,y=y) } ( a1 = hessian(ymle, x=a0$par) ) solve(a1) summary(yfit)$cov.un ## NR/Fisher scoring: Textbook page 104-106 gv = function(rpar, x,y){ pr = 1/(1+exp(-rpar[1]-rpar[2]*x)) g1 = sum(y-pr) g2 = sum( x*(y-pr) ) return( c(g1,g2) ) } hm = function(rpar, x,y){ pr = 1/(1+exp(-rpar[1]-rpar[2]*x)) h11 = -sum(pr*(1-pr)) h12 = -sum(x*pr*(1-pr)) h22 = -sum(x^2*pr*(1-pr)) return( matrix(c(h11,h12,h12,h22),2,2) ) } rpar0 = 0:1 it = 0; dif = 1 while( (it<1e2)&(dif>1e-6) ){ rpar1 = rpar0 - solve(hm(rpar0,x=x,y=y), gv(rpar0,x=x,y=y)) it = it+1 dif = mean(abs(rpar1-rpar0)) cat(it, ": diff=", dif, ", par=", rpar1, "\n") rpar0 = rpar1 } (a2 = -hm(rpar0, x=x,y=y)) solve(a2) summary(yfit)$cov.un ## GLMM: ## logisic regression model ## Logit\Pr(y_{ij}=1|u_i,x_{ij}) = 1 + u_i + 2*x_{ij}, ## u_i ~ N(0,1) ## simulate x_{ij} ~ N(0,1) set.seed(2008) n = 1e2; m = 5 x = matrix(rnorm(n*m), n,m) u = rnorm(n) pr = 1/(1+exp(-1-u-2*x)) y = matrix(runif(n*m), n,m) y = 1*(y<pr) xi = as.vector(x) yi = as.vector(y) id = rep(1:n,m) write.table(data.frame(x=xi,y=yi,id=id), file="xy08.txt", quote=FALSE, row.names=FALSE, col.names=FALSE) ## approximate inference mlik = function(rpar, x=x, y=y){ n = dim(x)[1] ai = rep(0,n) for(i in 1:n){ fi = function(mu){ pr = 1/(1+exp(-rpar[1]-mu-rpar[2]*x[i,])) prod(y[i,]*pr + (1-y[i,])*(1-pr))*dnorm(mu, 0, rpar[3]) } ## ai[i] = integrate(fi, -Inf,Inf,stop.on.error=FALSE)$value ## ai[i] = integrate(fi, -Inf,Inf)$value ## a0 = mean(rpar[1]+rpar[2]*x[i,]) ## ai[i] = integrate(fi, a0-25,a0+25)$value a1 = range(rpar[1]+rpar[2]*x[i,]) ai[i] = integrate(fi, a1[1]-25,a1[2]+25)$value ## x0 = seq(a0-15, a0+15, length=1e3) ## ai[i] = sum(fi(x0)*30/1e3) } return(-sum(log(ai))) } mlik(c(1,2,1), x=x, y=y) ( a0 = optim(c(0.8,1.8,0.9), mlik, x=x,y=y) ) mlik(c(0.8785, 1.9203, 0.73573), x=x,y=y) ## set.seed(2008) mu0 = rnorm(1e3) amlik = function(rpar, x=x, y=y){ n = dim(x)[1] ai = rep(0,n) for(i in 1:n){ lik = sapply(mu0, function(mu){ pr = 1/(1+exp(-rpar[1]-rpar[2]*xi-mu*rpar[3])) prod(y[i,]*pr + (1-y[i,])*(1-pr)) }) ai[i] = mean(lik) } return(-sum(log(ai))) } amlik(c(1,2,1), x=x, y=y) ( a0 = optim(c(0.8,1.8,0.9), amlik, x=x,y=y) ) amlik(c(0.8785, 1.9203, 0.73573), x=x,y=y) ## linearization: method similar to proc glimmix with syntax similar to proc nlmixed ## approximate inference of Binomial likelihood with LS library(nlme) nlme(yi ~ 1/(1+exp(-beta.0-xi*beta.xi)), fixed = beta.0 + beta.xi ~ 1, random = beta.0 ~ 1, groups = ~ id, start=c(beta.0=0,beta.xi=1)) ## PQL/Laplace library(MASS) glmmPQL(yi ~ xi, random = ~ 1|id, family = binomial) ## glmmPQL might be sensitive to the random effects structure. ## For large number of random effects, or crossed random effects, ## the program may have numerical difficulties of convergence. ## PQL, AGQ library(lme4) glmer(yi ~ xi + (1|id), family=binomial, nAGQ = 1) glmer(yi ~ xi + (1|id), family=binomial, nAGQ = 20) ## alternative nlme modeling approach: Binomial likelihood approximation with LS f = function(x, beta.0, beta.x){ a0 = exp(-beta.0-x*beta.x) res = 1/(1+a0) gr1 = a0/(1+a0)^2 gr2 = x*a0/(1+a0)^2 grv <- array(0, c(length(res), 2), list(NULL, c("beta.0", "beta.x"))) grv[, "beta.0"] <- gr1 grv[, "beta.x"] <- gr2 attr(res, "gradient") <- grv res } xy = data.frame(x=xi,y=yi, id=id) nlmer(y ~ f(x, beta.0, beta.x) ~ beta.0|id, data=xy, start=c(beta.0=1, beta.x=2), nAQG=10) ## can only handle random intercept. library(glmmML) xy.agq = glmmML(yi ~ xi, cluster = id, family = binomial, method="ghq", n.points=20) summary(xy.agq) ## GEE1: assuming constant correlation matrix library(geepack) library(gee) xy.gee1 = gee(yi ~ xi, id=id, family="binomial", scale.fix=TRUE, corstr="unstructured") summary(xy.gee1) ## GEE1 augmented: ALR, assuming constant log OR xy.alr = ordgee(ordered(yi) ~ xi, id=id, waves=rep(1:5,n), int.const=TRUE, corstr="unstructured") summary(xy.alr)
data xy; infile "xy08.txt"; input x y id; run; Title Marginal inference with GEE assuming constant correlation matrix; /* 1. Based on the idea of GEE1 (Zeger and Liang 1986). 2. It is relatively easy to estimate the correlation matrix based on the residuals, with sample moments/average. 3. There are some implicit constraints on the correlation between discrete variables. */ proc genmod descending; class id; model y = x / dist=bin link=logit type3 wald; repeated subject=id / type=un CORRW; run; Title Marginal inference with GEE assuming constant OR to model the correlation matrix; /* 1. Based on alternating logistic regression model (ALR, Carey et.al 1993, often referred to as GEE1 augmented.) 2. The intuition is that it is more natural to use OR to model the dependence between discrete variables. */ proc genmod descending; class id; model y = x / dist=bin link=logit type3 wald; repeated subject=id / logor=fullclust; run; Title MLE by approximating nonlinear GLM with linear model; /* 1. Approximate the often nonlinear GLM with linear model, hence reduce to the LMM, which can be solved very efficiently (with iterations). 2. The approximation depends on the current paramter/random effects estimation. So for each iteration, the approximated LMM will be different. So it is a double iteration procedure. The whole procedure is often referred to as pseudo-likelihood (PL) or restricted PL (RPL) depending on whether we adjust for the degree of freedom in LMM. 3. It may not converge and may converge to the wrong model. 4. Can in principle handle any number of random effects (like proc mixed). But large number of random effects will make the convergence harder/slower and more likely converge to the wrong model. 5. It is important to set "maxopt" option, which controls the outside number of LMM approximations. */ proc glimmix maxopt=200; class id; model y = x / dist=bin S; random int / type=un subject=id; run; Title Conditional inference with GLMM: MLE by approximating integration with finite sums; /* 1. Adaptive Gaussian quadrature: similar to Laplace approximation (only using one point), but averaged over multiple points to improve the overall approximation accuracy. 2. Can only handle very small number of random effects 3. It is important to set "qpoints" option, which controls the number of adaptive Gaussian quadrature points. */ proc nlmixed qpoints=20; parms B_int = 1 B_x = 2 sig = 1.0; eta = B_int + B_x*x; pr = 1/(1 + exp(- b0 - eta)); model y ~ binomial(1, pr); random b0 ~ normal( 0, sig*sig ) subject=id; run;
GEE1 improved