### This file contains 3 functions ## (1) clogist -perform conditional logistic regression ## by calling the 2 functions below, provides both ## normal and sandwich estimators of variance ## see Fay, Graubard, Freedman, and Midthune (1998) ## Biometrics, 54: 195-208 ## (2) loglike - finds the loglikelihood using recursive ## methods ## (3) find.info - calculates score vector and information ## matrix using numerical methods ## Both methods are described in ## Gail, Lubin and Rubinstein (1981) ## Biometrika, 703-707 ## see comments in clogist for instructions. clogist<-function(n,m,x,set,initb=NA, h=0.0001,maxitr=15,epsilon=1e-8,alpha=0.05){ ## ## Perform conditional logistic regression ## for clustered data ## ## INPUT: ## n = a vector of number at risk for each observation ## m = a vector of number of events for each obs ## x = a matrix of covariates, same number of rows as ## length of n and m ## set = a vector defining the clusters ## initb = an initial guess for beta, if initb=NA ## then the program tries its own initial guess ## if possible using unconditional logistic ## regression ## h = same as h in Gail, Lubin and Rubinstein, ## Biometrika, 1981, 703-707 ## maxitr = maximum number of iterations ## epsilon = convergence criteria ## converge if the relative change in both beta ## and the loglikelihood is small, see definition ## of error below ## alpha = for defining 100(1-alpha) percent ## confidence intervals ## ## OUTPUT: ## outputs a list with the following elements: ## beta = vector of parameter estimates ## loglik = loglikelihood of final model ## modelvar = matrix of model-based variance estimate ## sandvar0 = naive sandwich variance estimate ## sandvar1 = adjusted sandwich var estimate ## see Fay, Graubard, Freedman, and Midthune (1998) ## Biometrics, 54: 195-208 ## ci.class = c.i. based on normal approx using modelvar ## ci.sand = c.i. based on normal approx using ## naive sandwich ## ci.t = c.i. based on adjusted sandwich var using t ## distribution (see Fay, et al, 1998) ## out = matrix with 7 columns: ## (1) beta ## (2) s.e. beta, using model-based var ## (3) s.e. beta, using naive sandwich ## (4) s.e. beta, using adjusted sandwich ## (5) 2-sided p-values, using model-based var, normal ## (6) 2-sided p-values, using naive sandwich, normal ## (7) 2-sided p-values, using adjusted sandwich, F ## all p-values represent test Ho: beta_j=0 ## vs. H1: beta_j neq 0, using Wald-type tests ## (with no correction for multiple tests) ## ## DETAILS: ## The program calls 2 functions defined externally: ## loglike - finds the loglikelihood using recursive ## methods ## find.info - calculates score vector and information ## matrix using numerical methods ## Both methods are described in ## Gail, Lubin and Rubinstein (1981) ## ## This S program is by Michael Fay, modeled after a ## Fortran program written by Doug Midthune ## please let me know if you have any problems: ## faym@mail.nih.gov ### first find an initial estimate for beta using unconditional ### logistic regression (if you can) x<-as.matrix(x) nobs<-length(n) uset<-unique(set) nsets<- length(uset) nbeta<-dim(x)[2] if ( !any(is.na(initb)) ) beta<-initb else{ if (nsets>1 && (nsets+nbeta)N/2){ ## for efficiency, keep loop part of calculation to minimum ## by switching m and n-m, beta and -beta m<-n-m M<-N-M U<-1/U eta<- -eta } if (M==1) return(sum(eta*m) - log(sum(U*n)) ) B<-rep(1,N-M+1) u<-rep(NA,N) count<-1 for (a in 1:length(n)){ u[count:(count+n[a]-1)]<-U[a] count<-count+n[a] } ## The last 2 lines of this function, may be written more ## clearly (i.e., more like in Gail, et al) BUT LESS EFFICIENTLY as: # B<-matrix(0,M+1,N+1) # B[1,]<-1 # for (i in 1:M){ # for (j in i:(N-M+i)){ # B[i+1,j+1]<- B[i+1,j]+u[j]*B[i,j] # } # } # sum(eta*m) - log(B[M+1,N+1]) for (i in 1:(M-1)) B<- cumsum(B*u[i:(N-M+i)]) sum(eta*m) - log(sum(B*u[M:N])) } ## end of loglike find.info<-function(n,m,x,beta,h){ ## estimate score vector and information ## matrix for a single set nbeta<-length(beta) ## LOGLIKP[i] gives loglik ## with beta[i]<-beta[i]+h ## LOGLIKM[i] gives loglik ## with beta[i]<-beta[i]-h x<-as.matrix(x) LOGLIKP<-rep(0,nbeta) LOGLIKM<-rep(0,nbeta) LOGLIK<-loglike(n,m,x,beta) all.not.equal<-function(x) var(x)>0 ## if all the jth column of covariates within a set ## are equal (i.e. are fixed) then score and info ## equal zero for the ## corresponding row and/or column, so no need to ## calculate them notfixed<-(1:nbeta)[apply(x,2,all.not.equal)] rinfo<-matrix(0,nbeta,nbeta) for (i in notfixed){ betatemp<-beta betatemp[i]<-beta[i]+h LOGLIKP[i]<-loglike(n,m,x,betatemp) betatemp[i]<-beta[i]-h LOGLIKM[i]<-loglike(n,m,x,betatemp) rinfo[i,i]<- LOGLIKP[i] + LOGLIKM[i]- 2*LOGLIK } if (length(n[notfixed])==1) return(list(score=(LOGLIKP-LOGLIKM)/(2*h), info=rinfo/(h**2), loglik=LOGLIK ) ) nfc<- 0 for (i in notfixed[-1]){ nfc<-nfc+1 for (j in notfixed[1:nfc]){ betatemp<-beta betatemp[i]<-beta[i]+h betatemp[j]<-beta[j]-h rinfo[i,j]<-(LOGLIKP[i] + LOGLIKM[j]- loglike(n,m,x,betatemp) - LOGLIK) rinfo[j,i]<-rinfo[i,j] } } list(score=(LOGLIKP-LOGLIKM)/(2*h),info=rinfo/(h**2),loglik=LOGLIK) } ## end of find.info