#Convert standard format to the table format logLinearEM <- function(marker,n,pedInfo){ # find child and his/her parents child <- pedInfo$patid>0 patid <- pedInfo$patid[child] matid <- pedInfo$matid[child] chtid <- pedInfo$id[child] subPedid <- pedInfo$pedid[child] # sort the data, and write the sorted data into a new dataset called newdata data <- data.frame(cbind(pedInfo,marker)) newdata <- data - 100 for (i in 1:n) { newdata[(3*(i-1)+1),] <- data[(data$id==matid[i] & data$pedid==subPedid[i]),] newdata[(3*(i-1)+2),] <- data[(data$id==patid[i] & data$pedid==subPedid[i]),] newdata[(i*3),] <- data[(data$id==chtid[i] & data$pedid==subPedid[i]),] } pedInfo <- newdata nCopiesmarker <- (pedInfo[,7]+pedInfo[,8])-2 # Use pedInfo to generate matrix of MFC assuming that data may be out of order MFC <- matrix(nCopiesmarker,n,3,byrow=T) #######define N(i,j,k) N=array(0,c(3,3,3)) # where subscript 3 means 0 in genotype for(i in 1:3){ for (j in 1:3){ for (k in 1:3){ N[i,j,k]=sum((MFC[,1]==(i %% 3))&(MFC[,2]==(j %% 3))&(MFC[, 3]==(k %% 3)),na.rm=T) }}} MFCdata <- c(N[3,3,3],N[3,1,3],N[3,1,1],N[1,3,3],N[1,3,1], N[1,1,3],N[1,1,1],N[1,1,2],N[3,2,1],N[2,3,1],N[1,2,1], N[1,2,2],N[2,1,1],N[2,1,2],N[2,2,2]) # Dummy variables for Poisson model nCopy1 <- c(0,0,1,0,1,0,1,0,1,1,1,0,1,0,0) nCopy2 <- c(0,0,0,0,0,0,0,1,0,0,0,1,0,1,1) hetOff <- c(0,0,0,0,0,0,1,0,0,0,0,0,0,0,0) intStrat <- c(6,5,5,5,5,4,4,4,3,3,2,2,2,2,1) if(sum(is.na(marker))>0){ ####### define observed complete families N(i,j,k) and observed incompelete families M(i,?,k) ######## ########define M(i,?,k) Mdad=array(0,c(3,3)) for(i in 1:3){ for (k in 1:3){ Mdad[i,k]=sum((MFC[,1]==(i %% 3))&is.na(MFC[,2])&(MFC[, 3]==(k %% 3)),na.rm=T) }} ########define M(?,j,k) Mmom=array(0,c(3,3)) for(j in 1:3){ for (k in 1:3){ Mmom[j,k]=sum(is.na(MFC[,1])&(MFC[,2]==(j %% 3))&(MFC[, 3]==(k %% 3)),na.rm=T) }} M=Mmom+Mdad missDat <- c(M[2,2],M[2,1],M[1,2],M[1,1],M[1,3],M[3,1],M[3,3]) ###(sum(M)+sum(N))==n ############################## New EM ########################## ####### EM algoriithm p = c(rep(1/15,15)) p0 = rep(0,15) newDat <- MFCdata while(max(abs(p-p0))>10^(-6)){ p0=p ### M-step out <- glm(newDat~C(factor(intStrat),treatment,base=6)+ nCopy1+nCopy2,family=poisson,na.action=na.omit, offset=log(2)*hetOff) p <- as.vector(out$fitted.values/sum(out$fitted.values)) ### E-step EN000 <- ifelse(p[1]+p[2]==0,N[3,3,3], N[3,3,3]+M[3,3]*(p[1]/(p[1]+p[2]))) EN010 <- ifelse(p[1]+p[2]==0,N[3,1,3], N[3,1,3]+M[3,3]*(p[2]/(p[1]+p[2]))) EN011 <- ifelse(p[3]+p[9]==0,N[3,1,1], N[3,1,1]+M[3,1]*(p[3]/(p[3]+p[9]))) EN021 <- ifelse(p[9]+p[3]==0,N[3,2,1], N[3,2,1]+M[3,1]*(p[9]/(p[9]+p[3]))) EN100 <- ifelse(p[4]+p[6]==0,N[1,3,3], N[1,3,3]+M[1,3]*(p[4]/(p[4]+p[6]))) EN110 <- ifelse(p[6]+p[4]==0,N[1,1,3], N[1,1,3]+M[1,3]*(p[6]/(p[6]+p[4]))) EN101 <- ifelse(p[5]+p[7]+p[11]==0,N[1,3,1], N[1,3,1]+M[1,1]*(p[5]/(p[5]+p[7]+p[11]))) EN111 <- ifelse(p[7]+p[5]+p[11]==0,N[1,1,1], N[1,1,1]+M[1,1]*(p[7]/(p[5]+p[7]+p[11]))) EN121 <- ifelse(p[5]+p[7]+p[11]==0,N[1,2,1], N[1,2,1]+M[1,1]*(p[11]/(p[5]+p[7]+p[11]))) EN112 <- ifelse(p[8]+p[12]==0,N[1,1,2], N[1,1,2]+M[1,2]*(p[8]/(p[8]+p[12]))) EN122 <- ifelse(p[8]+p[12]==0,N[1,2,2], N[1,2,2]+M[1,2]*(p[12]/(p[8]+p[12]))) EN201 <- ifelse(p[10]+p[13]==0,N[2,3,1], N[2,3,1]+M[2,1]*(p[10]/(p[10]+p[13]))) EN211 <- ifelse(p[10]+p[13]==0,N[2,1,1], N[2,1,1]+M[2,1]*(p[13]/(p[10]+p[13]))) EN212 <- ifelse(p[14]+p[15]==0,N[2,1,2], N[2,1,2]+M[2,2]*(p[14]/(p[14]+p[15]))) EN222 <- ifelse(p[14]+p[15]==0,N[2,2,2], N[2,2,2]+M[2,2]*(p[15]/(p[14]+p[15]))) ### expected counts layout Ecount <- c(EN000,EN010,EN011,EN100,EN101,EN110,EN111,EN112, EN021,EN201,EN121,EN122,EN211,EN212,EN222) newDat <- round(Ecount) } mp <- c(p[14]+p[15],p[10]+p[13],p[8]+p[12],p[5]+p[7]+p[11], p[9]+p[3],p[4]+p[6],p[1]+p[2]) logLikFull <- sum(MFCdata*log(p))+sum(missDat*log(mp)) #Reduced p0 = c(rep(1/15,15)) newDat <- MFCdata while(max(abs(p-p0))>10^(-6)){ p0=p ### M-step outRed <- glm(newDat~C(factor(intStrat),treatment,base=6), family=poisson,na.action=na.omit,offset=log(2)*hetOff) p <- as.vector(outRed$fitted.values/sum(outRed$fitted.values)) ### E-step EN000 <- ifelse(p[1]+p[2]==0,N[3,3,3], N[3,3,3]+M[3,3]*(p[1]/(p[1]+p[2]))) EN010 <- ifelse(p[1]+p[2]==0,N[3,1,3], N[3,1,3]+M[3,3]*(p[2]/(p[1]+p[2]))) EN011 <- ifelse(p[3]+p[9]==0,N[3,1,1], N[3,1,1]+M[3,1]*(p[3]/(p[3]+p[9]))) EN021 <- ifelse(p[9]+p[3]==0,N[3,2,1], N[3,2,1]+M[3,1]*(p[9]/(p[9]+p[3]))) EN100 <- ifelse(p[4]+p[6]==0,N[1,3,3], N[1,3,3]+M[1,3]*(p[4]/(p[4]+p[6]))) EN110 <- ifelse(p[6]+p[4]==0,N[1,1,3], N[1,1,3]+M[1,3]*(p[6]/(p[6]+p[4]))) EN101 <- ifelse(p[5]+p[7]+p[11]==0,N[1,3,1], N[1,3,1]+M[1,1]*(p[5]/(p[5]+p[7]+p[11]))) EN111 <- ifelse(p[7]+p[5]+p[11]==0,N[1,1,1], N[1,1,1]+M[1,1]*(p[7]/(p[5]+p[7]+p[11]))) EN121 <- ifelse(p[5]+p[7]+p[11]==0,N[1,2,1], N[1,2,1]+M[1,1]*(p[11]/(p[5]+p[7]+p[11]))) EN112 <- ifelse(p[8]+p[12]==0,N[1,1,2], N[1,1,2]+M[1,2]*(p[8]/(p[8]+p[12]))) EN122 <- ifelse(p[8]+p[12]==0,N[1,2,2], N[1,2,2]+M[1,2]*(p[12]/(p[8]+p[12]))) EN201 <- ifelse(p[10]+p[13]==0,N[2,3,1], N[2,3,1]+M[2,1]*(p[10]/(p[10]+p[13]))) EN211 <- ifelse(p[10]+p[13]==0,N[2,1,1], N[2,1,1]+M[2,1]*(p[13]/(p[10]+p[13]))) EN212 <- ifelse(p[14]+p[15]==0,N[2,1,2], N[2,1,2]+M[2,2]*(p[14]/(p[14]+p[15]))) EN222 <- ifelse(p[14]+p[15]==0,N[2,2,2], N[2,2,2]+M[2,2]*(p[15]/(p[14]+p[15]))) ### expected counts layout Ecount <- c(EN000,EN010,EN011,EN100,EN101,EN110,EN111,EN112, EN021,EN201,EN121,EN122,EN211,EN212,EN222) newDat <- round(Ecount) } mp <- c(p[14]+p[15],p[10]+p[13],p[8]+p[12],p[5]+p[7]+p[11], p[9]+p[3],p[4]+p[6],p[1]+p[2]) logLikRed <- sum(MFCdata*log(p))+sum(missDat*log(mp)) } list(coef=out$coef,lrt=-2*(logLikRed-logLikFull)) }