void coxfit2(Sint *maxiter, Sint *nusedx, Sint *nvarx, double *time, Sint *status, double *covar2, double *offset, double *weights, Sint *strata, double *means, double *beta, double *u, double *imat2, double loglik[2], Sint *flag, double *work, double *eps, double *tol_chol, double *sctest) { int i,j,k, person; int iter; int nused, nvar; double **covar, **cmat, **imat; /*ragged array versions*/ double *mark, *wtave; double *a, *newbeta; double *a2, **cmat2; double denom=0, zbeta, risk; double temp, temp2; double ndead; double newlk=0; double d2, efron_wt; int halving; /*are we doing step halving at the moment? */ double method; nused = *nusedx; nvar = *nvarx; method= *sctest; /* ** Set up the ragged arrays */ covar= dmatrix(covar2, nused, nvar); imat = dmatrix(imat2, nvar, nvar); cmat = dmatrix(work, nvar, nvar); cmat2= dmatrix(work+nvar*nvar, nvar, nvar); a = work + 2*nvar*nvar; newbeta = a + nvar; a2 = newbeta + nvar; mark = a2 + nvar; wtave= mark + nused; /* ** Mark(i) contains the number of tied deaths at this point, ** for the first person of several tied times. It is zero for ** the second and etc of a group of tied times. ** Wtave contains the average weight for the deaths */ temp=0; j=0; for (i=nused-1; i>0; i--) { if ((time[i]==time[i-1]) & (strata[i-1] != 1)) { j += status[i]; temp += status[i]* weights[i]; mark[i]=0; } else { mark[i] = j + status[i]; if (mark[i] >0) wtave[i]= (temp+ status[i]*weights[i])/ mark[i]; temp=0; j=0; } } mark[0] = j + status[0]; if (mark[0]>0) wtave[0] = (temp +status[0]*weights[0])/ mark[0]; /* ** Subtract the mean from each covar, as this makes the regression ** much more stable */ for (i=0; i<nvar; i++) { temp=0; for (person=0; person<nused; person++) temp += covar[i][person]; temp /= nused; means[i] = temp; for (person=0; person<nused; person++) covar[i][person] -=temp; } /* ** do the initial iteration step */ strata[nused-1] =1; loglik[1] =0; for (i=0; i<nvar; i++) { u[i] =0; for (j=0; j<nvar; j++) imat[i][j] =0 ; } efron_wt =0; for (person=nused-1; person>=0; person--) { if (strata[person] == 1) { denom = 0; for (i=0; i<nvar; i++) { a[i] = 0; a2[i]=0 ; for (j=0; j<nvar; j++) { cmat[i][j] = 0; cmat2[i][j]= 0; } } } zbeta = offset[person]; /* form the term beta*z (vector mult) */ for (i=0; i<nvar; i++) zbeta += beta[i]*covar[i][person]; zbeta = coxsafe(zbeta); risk = exp(zbeta) * weights[person]; denom += risk; efron_wt += status[person] * risk; /*sum(denom) for tied deaths*/ for (i=0; i<nvar; i++) { a[i] += risk*covar[i][person]; for (j=0; j<=i; j++) cmat[i][j] += risk*covar[i][person]*covar[j][person]; } if (status[person]==1) { loglik[1] += weights[person]*zbeta; for (i=0; i<nvar; i++) { u[i] += weights[person]*covar[i][person]; a2[i] += risk*covar[i][person]; for (j=0; j<=i; j++) cmat2[i][j] += risk*covar[i][person]*covar[j][person]; } } if (mark[person] >0) { /* once per unique death time */ /* ** Trick: when 'method==0' then temp=0, giving Breslow's method */ ndead = mark[person]; for (k=0; k<ndead; k++) { temp = (double)k * method / ndead; d2= denom - temp*efron_wt; loglik[1] -= wtave[person] * log(d2); for (i=0; i<nvar; i++) { temp2 = (a[i] - temp*a2[i])/ d2; u[i] -= wtave[person] *temp2; for (j=0; j<=i; j++) imat[j][i] += wtave[person]*( (cmat[i][j] - temp*cmat2[i][j]) /d2 - temp2*(a[j]-temp*a2[j])/d2); } } efron_wt =0; for (i=0; i<nvar; i++) { a2[i]=0; for (j=0; j<nvar; j++) cmat2[i][j]=0; } } } /* end of accumulation loop */ loglik[0] = loglik[1]; /* save the loglik for iteration zero */ /* am I done? ** update the betas and test for convergence */ for (i=0; i<nvar; i++) /*use 'a' as a temp to save u0, for the score test*/ a[i] = u[i]; *flag= cholesky2(imat, nvar, *tol_chol); chsolve2(imat,nvar,a); /* a replaced by a *inverse(i) */ *sctest=0; for (i=0; i<nvar; i++) *sctest += u[i]*a[i]; /* ** Never, never complain about convergence on the first step. That way, ** if someone HAS to they can force one iter at a time. */ for (i=0; i<nvar; i++) { newbeta[i] = beta[i] + a[i]; } if (*maxiter==0) { chinv2(imat,nvar); for (i=1; i<nvar; i++) for (j=0; j<i; j++) imat[i][j] = imat[j][i]; return; /* and we leave the old beta in peace */ } /* ** here is the main loop */ halving =0 ; /* =1 when in the midst of "step halving" */ for (iter=1; iter<=*maxiter; iter++) { newlk =0; for (i=0; i<nvar; i++) { u[i] =0; for (j=0; j<nvar; j++) imat[i][j] =0; } /* ** The data is sorted from smallest time to largest ** Start at the largest time, accumulating the risk set 1 by 1 */ for (person=nused-1; person>=0; person--) { if (strata[person] == 1) { /* rezero temps for each strata */ efron_wt =0; denom = 0; for (i=0; i<nvar; i++) { a[i] = 0; a2[i]=0 ; for (j=0; j<nvar; j++) { cmat[i][j] = 0; cmat2[i][j]= 0; } } } zbeta = offset[person]; for (i=0; i<nvar; i++) zbeta += newbeta[i]*covar[i][person]; zbeta = coxsafe(zbeta); risk = exp(zbeta ) * weights[person]; denom += risk; efron_wt += status[person] * risk; /* sum(denom) for tied deaths*/ for (i=0; i<nvar; i++) { a[i] += risk*covar[i][person]; for (j=0; j<=i; j++) cmat[i][j] += risk*covar[i][person]*covar[j][person]; } if (status[person]==1) { newlk += weights[person] *zbeta; for (i=0; i<nvar; i++) { u[i] += weights[person] *covar[i][person]; a2[i] += risk*covar[i][person]; for (j=0; j<=i; j++) cmat2[i][j] += risk*covar[i][person]*covar[j][person]; } } if (mark[person] >0) { /* once per unique death time */ for (k=0; k<mark[person]; k++) { temp = (double)k* method /mark[person]; d2= denom - temp*efron_wt; newlk -= wtave[person] *log(d2); for (i=0; i<nvar; i++) { temp2 = (a[i] - temp*a2[i])/ d2; u[i] -= wtave[person] *temp2; for (j=0; j<=i; j++) imat[j][i] += wtave[person] *( (cmat[i][j] - temp*cmat2[i][j]) /d2 - temp2*(a[j]-temp*a2[j])/d2); } } efron_wt =0; for (i=0; i<nvar; i++) { a2[i]=0; for (j=0; j<nvar; j++) cmat2[i][j]=0; } } } /* end of accumulation loop */ /* am I done? ** update the betas and test for convergence */ *flag = cholesky2(imat, nvar, *tol_chol); if (fabs(1-(loglik[1]/newlk))<=*eps && halving==0) { /* all done */ loglik[1] = newlk; chinv2(imat, nvar); /* invert the information matrix */ for (i=1; i<nvar; i++) for (j=0; j<i; j++) imat[i][j] = imat[j][i]; for (i=0; i<nvar; i++) beta[i] = newbeta[i]; *maxiter = iter; return; } if (iter==*maxiter) break; /*skip the step halving calc*/ if (newlk < loglik[1]) { /*it is not converging ! */ halving =1; for (i=0; i<nvar; i++) newbeta[i] = (newbeta[i] + beta[i]) /2; /*half of old increment */ } else { halving=0; loglik[1] = newlk; chsolve2(imat,nvar,u); j=0; for (i=0; i<nvar; i++) { beta[i] = newbeta[i]; newbeta[i] = newbeta[i] + u[i]; } } } /* return for another iteration */ loglik[1] = newlk; chinv2(imat, nvar); for (i=1; i<nvar; i++) for (j=0; j<i; j++) imat[i][j] = imat[j][i]; for (i=0; i<nvar; i++) beta[i] = newbeta[i]; *flag= 1000; return; }
SEXP coxfit6(SEXP maxiter2, SEXP time2, SEXP status2, SEXP covar2, SEXP offset2, SEXP weights2, SEXP strata2, SEXP method2, SEXP eps2, SEXP toler2, SEXP ibeta, SEXP doscale2) { int i,j,k, person; double **covar, **cmat, **imat; /*ragged arrays */ double wtave; double *a, *newbeta; double *a2, **cmat2; double *scale; double denom=0, zbeta, risk; double temp, temp2; int ndead; /* actually, the sum of their weights */ double newlk=0; double dtime, d2; double deadwt; /*sum of case weights for the deaths*/ double efronwt; /* sum of weighted risk scores for the deaths*/ int halving; /*are we doing step halving at the moment? */ int nrisk; /* number of subjects in the current risk set */ /* copies of scalar input arguments */ int nused, nvar, maxiter; int method; double eps, toler; int doscale; /* vector inputs */ double *time, *weights, *offset; int *status, *strata; /* returned objects */ SEXP imat2, means2, beta2, u2, loglik2; double *beta, *u, *loglik, *means; SEXP sctest2, flag2, iter2; double *sctest; int *flag, *iter; SEXP rlist, rlistnames; int nprotect; /* number of protect calls I have issued */ /* get local copies of some input args */ nused = LENGTH(offset2); nvar = ncols(covar2); method = asInteger(method2); maxiter = asInteger(maxiter2); eps = asReal(eps2); /* convergence criteria */ toler = asReal(toler2); /* tolerance for cholesky */ doscale = asInteger(doscale2); time = REAL(time2); weights = REAL(weights2); offset= REAL(offset2); status = INTEGER(status2); strata = INTEGER(strata2); /* ** Set up the ragged arrays and scratch space ** Normally covar2 does not need to be duplicated, even though ** we are going to modify it, due to the way this routine was ** was called. In this case NAMED(covar2) will =0 */ nprotect =0; if (NAMED(covar2)>0) { PROTECT(covar2 = duplicate(covar2)); nprotect++; } covar= dmatrix(REAL(covar2), nused, nvar); PROTECT(imat2 = allocVector(REALSXP, nvar*nvar)); nprotect++; imat = dmatrix(REAL(imat2), nvar, nvar); a = (double *) R_alloc(2*nvar*nvar + 4*nvar, sizeof(double)); newbeta = a + nvar; a2 = newbeta + nvar; scale = a2 + nvar; cmat = dmatrix(scale + nvar, nvar, nvar); cmat2= dmatrix(scale + nvar +nvar*nvar, nvar, nvar); /* ** create output variables */ PROTECT(beta2 = duplicate(ibeta)); beta = REAL(beta2); PROTECT(means2 = allocVector(REALSXP, nvar)); means = REAL(means2); PROTECT(u2 = allocVector(REALSXP, nvar)); u = REAL(u2); PROTECT(loglik2 = allocVector(REALSXP, 2)); loglik = REAL(loglik2); PROTECT(sctest2 = allocVector(REALSXP, 1)); sctest = REAL(sctest2); PROTECT(flag2 = allocVector(INTSXP, 1)); flag = INTEGER(flag2); PROTECT(iter2 = allocVector(INTSXP, 1)); iter = INTEGER(iter2); nprotect += 7; /* ** Subtract the mean from each covar, as this makes the regression ** much more stable. */ for (i=0; i<nvar; i++) { temp=0; for (person=0; person<nused; person++) temp += covar[i][person]; temp /= nused; means[i] = temp; for (person=0; person<nused; person++) covar[i][person] -=temp; if (doscale==1) { /* and also scale it */ temp =0; for (person=0; person<nused; person++) { temp += fabs(covar[i][person]); } if (temp > 0) temp = nused/temp; /* scaling */ else temp=1.0; /* rare case of a constant covariate */ scale[i] = temp; for (person=0; person<nused; person++) covar[i][person] *= temp; } } if (doscale==1) { for (i=0; i<nvar; i++) beta[i] /= scale[i]; /*rescale initial betas */ } else { for (i=0; i<nvar; i++) scale[i] = 1.0; } /* ** do the initial iteration step */ strata[nused-1] =1; loglik[1] =0; for (i=0; i<nvar; i++) { u[i] =0; a2[i] =0; for (j=0; j<nvar; j++) { imat[i][j] =0 ; cmat2[i][j] =0; } } for (person=nused-1; person>=0; ) { if (strata[person] == 1) { nrisk =0 ; denom = 0; for (i=0; i<nvar; i++) { a[i] = 0; for (j=0; j<nvar; j++) cmat[i][j] = 0; } } dtime = time[person]; ndead =0; /*number of deaths at this time point */ deadwt =0; /* sum of weights for the deaths */ efronwt=0; /* sum of weighted risks for the deaths */ while(person >=0 &&time[person]==dtime) { /* walk through the this set of tied times */ nrisk++; zbeta = offset[person]; /* form the term beta*z (vector mult) */ for (i=0; i<nvar; i++) zbeta += beta[i]*covar[i][person]; zbeta = coxsafe(zbeta); risk = exp(zbeta) * weights[person]; denom += risk; /* a is the vector of weighted sums of x, cmat sums of squares */ for (i=0; i<nvar; i++) { a[i] += risk*covar[i][person]; for (j=0; j<=i; j++) cmat[i][j] += risk*covar[i][person]*covar[j][person]; } if (status[person]==1) { ndead++; deadwt += weights[person]; efronwt += risk; loglik[1] += weights[person]*zbeta; for (i=0; i<nvar; i++) u[i] += weights[person]*covar[i][person]; if (method==1) { /* Efron */ for (i=0; i<nvar; i++) { a2[i] += risk*covar[i][person]; for (j=0; j<=i; j++) cmat2[i][j] += risk*covar[i][person]*covar[j][person]; } } } person--; if (strata[person]==1) break; /*ties don't cross strata */ } if (ndead >0) { /* we need to add to the main terms */ if (method==0) { /* Breslow */ loglik[1] -= deadwt* log(denom); for (i=0; i<nvar; i++) { temp2= a[i]/ denom; /* mean */ u[i] -= deadwt* temp2; for (j=0; j<=i; j++) imat[j][i] += deadwt*(cmat[i][j] - temp2*a[j])/denom; } } else { /* Efron */ /* ** If there are 3 deaths we have 3 terms: in the first the ** three deaths are all in, in the second they are 2/3 ** in the sums, and in the last 1/3 in the sum. Let k go ** from 0 to (ndead -1), then we will sequentially use ** denom - (k/ndead)*efronwt as the denominator ** a - (k/ndead)*a2 as the "a" term ** cmat - (k/ndead)*cmat2 as the "cmat" term ** and reprise the equations just above. */ for (k=0; k<ndead; k++) { temp = (double)k/ ndead; wtave = deadwt/ndead; d2 = denom - temp*efronwt; loglik[1] -= wtave* log(d2); for (i=0; i<nvar; i++) { temp2 = (a[i] - temp*a2[i])/ d2; u[i] -= wtave *temp2; for (j=0; j<=i; j++) imat[j][i] += (wtave/d2) * ((cmat[i][j] - temp*cmat2[i][j]) - temp2*(a[j]-temp*a2[j])); } } for (i=0; i<nvar; i++) { a2[i]=0; for (j=0; j<nvar; j++) cmat2[i][j]=0; } } } } /* end of accumulation loop */ loglik[0] = loglik[1]; /* save the loglik for iter 0 */ /* am I done? ** update the betas and test for convergence */ for (i=0; i<nvar; i++) /*use 'a' as a temp to save u0, for the score test*/ a[i] = u[i]; *flag= cholesky2(imat, nvar, toler); chsolve2(imat,nvar,a); /* a replaced by a *inverse(i) */ temp=0; for (i=0; i<nvar; i++) temp += u[i]*a[i]; *sctest = temp; /* score test */ /* ** Never, never complain about convergence on the first step. That way, ** if someone HAS to they can force one iter at a time. */ for (i=0; i<nvar; i++) { newbeta[i] = beta[i] + a[i]; } if (maxiter==0) { chinv2(imat,nvar); for (i=0; i<nvar; i++) { beta[i] *= scale[i]; /*return to original scale */ u[i] /= scale[i]; imat[i][i] *= scale[i]*scale[i]; for (j=0; j<i; j++) { imat[j][i] *= scale[i]*scale[j]; imat[i][j] = imat[j][i]; } } goto finish; } /* ** here is the main loop */ halving =0 ; /* =1 when in the midst of "step halving" */ for (*iter=1; *iter<= maxiter; (*iter)++) { newlk =0; for (i=0; i<nvar; i++) { u[i] =0; for (j=0; j<nvar; j++) imat[i][j] =0; } /* ** The data is sorted from smallest time to largest ** Start at the largest time, accumulating the risk set 1 by 1 */ for (person=nused-1; person>=0; ) { if (strata[person] == 1) { /* rezero temps for each strata */ denom = 0; nrisk =0; for (i=0; i<nvar; i++) { a[i] = 0; for (j=0; j<nvar; j++) cmat[i][j] = 0; } } dtime = time[person]; deadwt =0; ndead =0; efronwt =0; while(person>=0 && time[person]==dtime) { nrisk++; zbeta = offset[person]; for (i=0; i<nvar; i++) zbeta += newbeta[i]*covar[i][person]; zbeta = coxsafe(zbeta); risk = exp(zbeta) * weights[person]; denom += risk; for (i=0; i<nvar; i++) { a[i] += risk*covar[i][person]; for (j=0; j<=i; j++) cmat[i][j] += risk*covar[i][person]*covar[j][person]; } if (status[person]==1) { ndead++; deadwt += weights[person]; newlk += weights[person] *zbeta; for (i=0; i<nvar; i++) u[i] += weights[person] *covar[i][person]; if (method==1) { /* Efron */ efronwt += risk; for (i=0; i<nvar; i++) { a2[i] += risk*covar[i][person]; for (j=0; j<=i; j++) cmat2[i][j] += risk*covar[i][person]*covar[j][person]; } } } person--; if (strata[person]==1) break; /*tied times don't cross strata*/ } if (ndead >0) { /* add up terms*/ if (method==0) { /* Breslow */ newlk -= deadwt* log(denom); for (i=0; i<nvar; i++) { temp2= a[i]/ denom; /* mean */ u[i] -= deadwt* temp2; for (j=0; j<=i; j++) imat[j][i] += (deadwt/denom)* (cmat[i][j] - temp2*a[j]); } } else { /* Efron */ for (k=0; k<ndead; k++) { temp = (double)k / ndead; wtave= deadwt/ ndead; d2= denom - temp* efronwt; newlk -= wtave* log(d2); for (i=0; i<nvar; i++) { temp2 = (a[i] - temp*a2[i])/ d2; u[i] -= wtave*temp2; for (j=0; j<=i; j++) imat[j][i] += (wtave/d2)* ((cmat[i][j] - temp*cmat2[i][j]) - temp2*(a[j]-temp*a2[j])); } } for (i=0; i<nvar; i++) { /*in anticipation */ a2[i] =0; for (j=0; j<nvar; j++) cmat2[i][j] =0; } } } } /* end of accumulation loop */ /* am I done? ** update the betas and test for convergence */ *flag = cholesky2(imat, nvar, toler); if (fabs(1-(loglik[1]/newlk))<= eps && halving==0) { /* all done */ loglik[1] = newlk; chinv2(imat, nvar); /* invert the information matrix */ for (i=0; i<nvar; i++) { beta[i] = newbeta[i]*scale[i]; u[i] /= scale[i]; imat[i][i] *= scale[i]*scale[i]; for (j=0; j<i; j++) { imat[j][i] *= scale[i]*scale[j]; imat[i][j] = imat[j][i]; } } goto finish; } if (*iter== maxiter) break; /*skip the step halving calc*/ if (newlk < loglik[1]) { /*it is not converging ! */ halving =1; for (i=0; i<nvar; i++) newbeta[i] = (newbeta[i] + beta[i]) /2; /*half of old increment */ } else { halving=0; loglik[1] = newlk; chsolve2(imat,nvar,u); j=0; for (i=0; i<nvar; i++) { beta[i] = newbeta[i]; newbeta[i] = newbeta[i] + u[i]; } } } /* return for another iteration */ /* ** We end up here only if we ran out of iterations */ loglik[1] = newlk; chinv2(imat, nvar); for (i=0; i<nvar; i++) { beta[i] = newbeta[i]*scale[i]; u[i] /= scale[i]; imat[i][i] *= scale[i]*scale[i]; for (j=0; j<i; j++) { imat[j][i] *= scale[i]*scale[j]; imat[i][j] = imat[j][i]; } } *flag = 1000; finish: /* ** create the output list */ PROTECT(rlist= allocVector(VECSXP, 8)); SET_VECTOR_ELT(rlist, 0, beta2); SET_VECTOR_ELT(rlist, 1, means2); SET_VECTOR_ELT(rlist, 2, u2); SET_VECTOR_ELT(rlist, 3, imat2); SET_VECTOR_ELT(rlist, 4, loglik2); SET_VECTOR_ELT(rlist, 5, sctest2); SET_VECTOR_ELT(rlist, 6, iter2); SET_VECTOR_ELT(rlist, 7, flag2); /* add names to the objects */ PROTECT(rlistnames = allocVector(STRSXP, 8)); SET_STRING_ELT(rlistnames, 0, mkChar("coef")); SET_STRING_ELT(rlistnames, 1, mkChar("means")); SET_STRING_ELT(rlistnames, 2, mkChar("u")); SET_STRING_ELT(rlistnames, 3, mkChar("imat")); SET_STRING_ELT(rlistnames, 4, mkChar("loglik")); SET_STRING_ELT(rlistnames, 5, mkChar("sctest")); SET_STRING_ELT(rlistnames, 6, mkChar("iter")); SET_STRING_ELT(rlistnames, 7, mkChar("flag")); setAttrib(rlist, R_NamesSymbol, rlistnames); unprotect(nprotect+2); return(rlist); }
void survreg3(Sint *maxiter, Sint *nx, Sint *nvarx, double *y, Sint *ny, double *covar2, double *wtx, double *offset2, double *beta, Sint *nstratx, Sint *stratax, double *ux, double *imatx, double *loglik, Sint *flag, double *eps, double *tol_chol, Sint *dist, Sint *ddebug) { int i,j; int n; double *newbeta, *savediag; double temp; int halving, iter; double newlk; n = *nx; nvar = *nvarx; debug = *ddebug; offset = offset2; nstrat = *nstratx; strat = stratax; wt = wtx; covar = dmatrix(covar2, n, nvar); /* ** nvar = # of "real" x variables, for iteration ** nvar2= # of parameters ** nstrat= # of strata, where 0== fixed sigma */ nstrat = *nstratx; nvar2 = nvar + nstrat; /* number of coefficients */ if (nstrat==0) scale = exp(beta[nvar]); imat = dmatrix(imatx, nvar2, nvar2); u = ux; newbeta = u+nvar2; savediag= newbeta + nvar2; JJ = dmatrix(savediag+nvar2, nvar2, nvar2); if (*ny==2) { time1=y; status = y+n; } else { time1=y; time2 = time1 + n; status = time2 +n; } /* count up the number of interval censored obs ** and allocate memory for the callback arrarys */ j =0; for (i=0; i<n; i++) if (status[i]==3) j++; j = j+n; funs = dmatrix((double *)ALLOC(j*5, sizeof(double)), j, 5); z = (double *)ALLOC(j, sizeof(double)); /* ** do the initial iteration step */ *loglik = dolik(n, beta, 0); if (debug >0) { fprintf(stderr, "nvar=%d, nvar2=%d, nstrat=%d\n", nvar, nvar2, nstrat); fprintf(stderr, "iter=0, loglik=%f\n", loglik[0]); } *flag= cholesky2(imat, nvar2, *tol_chol); if (*flag < 0) { i = cholesky2(JJ, nvar2, *tol_chol); chsolve2(JJ, nvar2, u); if (debug>0) fprintf(stderr, " Alternate step, flag=%d\n", i); } else chsolve2(imat,nvar2,u); /* a replaced by a *inverse(i) */ if (debug>0) { fprintf(stderr, " flag=%d, Increment:", *flag); for (i=0; i<nvar2; i++) fprintf(stderr, " %f", u[i]); fprintf(stderr, "\n"); } if (debug >2) { fprintf(stderr, "Imat after inverse\n"); for (i=0; i<nvar2; i++) { for (j=0; j<nvar2; j++) fprintf(stderr," %f", imat[i][j]); fprintf(stderr, "\n"); } } /* ** Never, never complain about convergence on the first step. That way, ** if someone HAS to they can force one iter at a time. */ for (i=0; i<nvar2; i++) { newbeta[i] = beta[i] + u[i]; } if (*maxiter==0) { chinv2(imat,nvar2); for (i=1; i<nvar2; i++) for (j=0; j<i; j++) imat[i][j] = imat[j][i]; return; /* and we leave the old beta in peace */ } /* ** here is the main loop */ halving =0 ; /* >0 when in the midst of "step halving" */ newlk = dolik(n, newbeta, 0); /* put in a call to simplex if in trouble */ for (iter=1; iter<=*maxiter; iter++) { if (debug>0) fprintf(stderr, "---\niter=%d, loglik=%f\n\n", iter, newlk); /* ** Am I done? Check for convergence, then update betas */ if (fabs(1-(*loglik/newlk))<=*eps ) { /* all done */ *loglik = newlk; *flag = cholesky2(imat, nvar2, *tol_chol); if (debug==0) { chinv2(imat, nvar2); /* invert the information matrix */ for (i=1; i<nvar2; i++) for (j=0; j<i; j++) imat[i][j] = imat[j][i]; } for (i=0; i<nvar2; i++) beta[i] = newbeta[i]; if (halving==1) *flag= 1000; /*didn't converge after all */ *maxiter = iter; return; } if (newlk < *loglik) { /*it is not converging ! */ for (j=0; j<5 && newlk < *loglik; j++) { halving++; for (i=0; i<nvar2; i++) newbeta[i] = (newbeta[i] + beta[i]) /2; /* ** Special code for sigmas. Often, they are the part ** that gets this routine in trouble. The prior NR step ** may have decreased one of them by a factor of >10, in which ** case step halving isn't quite enough. Make sure the new ** try differs from the last good one by no more than 1/3 ** approx log(3) = 1.1 ** Step halving isn't enough of a "back away" when a ** log(sigma) goes from 0.5 to -3, or has become singular. */ if (halving==1) { /* only the first time */ for (i=0; i<nstrat; i++) { if ((beta[nvar+i]-newbeta[nvar+i])> 1.1) newbeta[nvar+i] = beta[nvar+i] - 1.1; } } newlk = dolik(n, newbeta, 1); } if (debug>0) { fprintf(stderr," Step half -- %d steps, newlik=%f\n", halving, newlk); fflush(stderr); } } else { /* take a standard NR step */ halving=0; *loglik = newlk; *flag = cholesky2(imat, nvar2, *tol_chol); if (debug >2) { fprintf(stderr, "Imat after inverse\n"); for (i=0; i<nvar2; i++) { for (j=0; j<nvar2; j++) fprintf(stderr," %f", imat[i][j]); fprintf(stderr, "\n"); } } if (*flag < 0) { i = cholesky2(JJ, nvar2, *tol_chol); chsolve2(JJ, nvar2, u); if (debug>0) fprintf(stderr, " Alternate step, flag=%d\n", i); } else chsolve2(imat,nvar2,u); if (debug>1) { fprintf(stderr, " flag=%d, Increment:", *flag); for (i=0; i<nvar2; i++) fprintf(stderr, " %f", u[i]); fprintf(stderr, "\n"); } for (i=0; i<nvar2; i++) { beta[i] = newbeta[i]; newbeta[i] = newbeta[i] + u[i]; } } newlk = dolik(n, newbeta, 0); } /* return for another iteration */ *loglik = newlk; if (debug==0) { cholesky2(imat, nvar2, *tol_chol); chinv2(imat, nvar2); for (i=1; i<nvar2; i++) { for (j=0; j<i; j++) imat[i][j] = imat[j][i]; } } for (i=0; i<nvar2; i++) beta[i] = newbeta[i]; *flag= 1000; return; }
static void clogit_fit(SEXP X, SEXP y, SEXP offset, int m, double *beta, double *loglik, double *u, double *info, int *flag, int *maxiter, double const *eps, double const * tol_chol) { int i, iter = 0; Rboolean halving = FALSE; double *oldbeta = Calloc(m, double); double **imat = Calloc(m, double*); /* Set up ragged array representation of information matrix for use by cholesky2, chsolve2, and invert_info functions */ for (i = 0; i < m; ++i) { imat[i] = info + m*i; } /* Initial iteration */ cloglik(X, y, offset, m, beta, loglik, u, info); if (*maxiter > 0) { *flag = cholesky2(imat, m, *tol_chol); if (*flag > 0) { chsolve2(imat, m, u); for (i = 0; i < m; i++) { oldbeta[i] = beta[i]; beta[i] += u[i]; } } else { /* Bad information matrix. Don't go into the main loop */ *maxiter = 0; } } /* Main loop */ for (iter = 1; iter <= *maxiter; iter++) { double oldlik = *loglik; cloglik(X, y, offset, m, beta, loglik, u, info); if (fabs(1 - (oldlik / *loglik)) <= *eps && !halving) { /* Done */ break; } else if (iter == *maxiter) { /* Out of time */ *flag = 1000; break; } else if (*loglik < oldlik) { /* Not converging: halve step size */ halving = TRUE; for (i = 0; i < m; i++) { beta[i] = (beta[i] + oldbeta[i]) /2; } } else { /* Normal update */ halving = FALSE; oldlik = *loglik; *flag = cholesky2(imat, m, *tol_chol); if (*flag > 0) { chsolve2(imat, m, u); for (i = 0; i < m; i++) { oldbeta[i] = beta[i]; beta[i] += u[i]; } } else { break; /* Bad information matrix */ } } } *maxiter = iter; if (*flag > 0) { cholesky2(imat, m, *tol_chol); invert_info(imat, m); } Free(oldbeta); Free(imat); }
SEXP coxexact(SEXP maxiter2, SEXP y2, SEXP covar2, SEXP offset2, SEXP strata2, SEXP ibeta, SEXP eps2, SEXP toler2) { int i,j,k; int iter; double **covar, **imat; /*ragged arrays */ double *time, *status; /* input data */ double *offset; int *strata; int sstart; /* starting obs of current strata */ double *score; double *oldbeta; double zbeta; double newlk=0; double temp; int halving; /*are we doing step halving at the moment? */ int nrisk; /* number of subjects in the current risk set */ int dsize, /* memory needed for one coxc0, coxc1, or coxd2 array */ dmemtot, /* amount needed for all arrays */ maxdeath, /* max tied deaths within a strata */ ndeath; /* number of deaths at the current time point */ double dtime; /* time value under current examiniation */ double *dmem0, **dmem1, *dmem2; /* pointers to memory */ double *dtemp; /* used for zeroing the memory */ double *d1; /* current first derivatives from coxd1 */ double d0; /* global sum from coxc0 */ /* copies of scalar input arguments */ int nused, nvar, maxiter; double eps, toler; /* returned objects */ SEXP imat2, beta2, u2, loglik2; double *beta, *u, *loglik; SEXP rlist, rlistnames; int nprotect; /* number of protect calls I have issued */ nused = LENGTH(offset2); nvar = ncols(covar2); maxiter = asInteger(maxiter2); eps = asReal(eps2); /* convergence criteria */ toler = asReal(toler2); /* tolerance for cholesky */ /* ** Set up the ragged array pointer to the X matrix, ** and pointers to time and status */ covar= dmatrix(REAL(covar2), nused, nvar); time = REAL(y2); status = time +nused; strata = INTEGER(PROTECT(duplicate(strata2))); offset = REAL(offset2); /* temporary vectors */ score = (double *) R_alloc(nused+nvar, sizeof(double)); oldbeta = score + nused; /* ** create output variables */ PROTECT(beta2 = duplicate(ibeta)); beta = REAL(beta2); PROTECT(u2 = allocVector(REALSXP, nvar)); u = REAL(u2); PROTECT(imat2 = allocVector(REALSXP, nvar*nvar)); imat = dmatrix(REAL(imat2), nvar, nvar); PROTECT(loglik2 = allocVector(REALSXP, 5)); /* loglik, sctest, flag,maxiter*/ loglik = REAL(loglik2); nprotect = 5; strata[0] =1; /* in case the parent forgot */ dsize = 0; maxdeath =0; j=0; /* start of the strata */ for (i=0; i<nused;) { if (strata[i]==1) { /* first obs of a new strata */ if (i>0) { /* If maxdeath <2 leave the strata alone at it's current value of 1 */ if (maxdeath >1) strata[j] = maxdeath; j = i; if (maxdeath*nrisk >dsize) dsize = maxdeath*nrisk; } maxdeath =0; /* max tied deaths at any time in this strata */ nrisk=0; ndeath =0; } dtime = time[i]; ndeath =0; /*number tied here */ while (time[i] ==dtime) { nrisk++; ndeath += status[i]; i++; if (i>=nused || strata[i] >0) break; /*tied deaths don't cross strata */ } if (ndeath > maxdeath) maxdeath=ndeath; } if (maxdeath*nrisk >dsize) dsize = maxdeath*nrisk; if (maxdeath >1) strata[j] = maxdeath; /* Now allocate memory for the scratch arrays Each per-variable slice is of size dsize */ dmemtot = dsize * ((nvar*(nvar+1))/2 + nvar + 1); dmem0 = (double *) R_alloc(dmemtot, sizeof(double)); /*pointer to memory */ dmem1 = (double **) R_alloc(nvar, sizeof(double*)); dmem1[0] = dmem0 + dsize; /*points to the first derivative memory */ for (i=1; i<nvar; i++) dmem1[i] = dmem1[i-1] + dsize; d1 = (double *) R_alloc(nvar, sizeof(double)); /*first deriv results */ /* ** do the initial iteration step */ newlk =0; for (i=0; i<nvar; i++) { u[i] =0; for (j=0; j<nvar; j++) imat[i][j] =0 ; } for (i=0; i<nused; ) { if (strata[i] >0) { /* first obs of a new strata */ maxdeath= strata[i]; dtemp = dmem0; for (j=0; j<dmemtot; j++) *dtemp++ =0.0; sstart =i; nrisk =0; } dtime = time[i]; /*current unique time */ ndeath =0; while (time[i] == dtime) { zbeta= offset[i]; for (j=0; j<nvar; j++) zbeta += covar[j][i] * beta[j]; score[i] = exp(zbeta); if (status[i]==1) { newlk += zbeta; for (j=0; j<nvar; j++) u[j] += covar[j][i]; ndeath++; } nrisk++; i++; if (i>=nused || strata[i] >0) break; } /* We have added up over the death time, now process it */ if (ndeath >0) { /* Add to the loglik */ d0 = coxd0(ndeath, nrisk, score+sstart, dmem0, maxdeath); R_CheckUserInterrupt(); newlk -= log(d0); dmem2 = dmem0 + (nvar+1)*dsize; /*start for the second deriv memory */ for (j=0; j<nvar; j++) { /* for each covariate */ d1[j] = coxd1(ndeath, nrisk, score+sstart, dmem0, dmem1[j], covar[j]+sstart, maxdeath) / d0; if (ndeath > 3) R_CheckUserInterrupt(); u[j] -= d1[j]; for (k=0; k<= j; k++) { /* second derivative*/ temp = coxd2(ndeath, nrisk, score+sstart, dmem0, dmem1[j], dmem1[k], dmem2, covar[j] + sstart, covar[k] + sstart, maxdeath); if (ndeath > 5) R_CheckUserInterrupt(); imat[k][j] += temp/d0 - d1[j]*d1[k]; dmem2 += dsize; } } } } loglik[0] = newlk; /* save the loglik for iteration zero */ loglik[1] = newlk; /* and it is our current best guess */ /* ** update the betas and compute the score test */ for (i=0; i<nvar; i++) /*use 'd1' as a temp to save u0, for the score test*/ d1[i] = u[i]; loglik[3] = cholesky2(imat, nvar, toler); chsolve2(imat,nvar, u); /* u replaced by u *inverse(imat) */ loglik[2] =0; /* score test stored here */ for (i=0; i<nvar; i++) loglik[2] += u[i]*d1[i]; if (maxiter==0) { iter =0; /*number of iterations */ loglik[4] = iter; chinv2(imat, nvar); for (i=1; i<nvar; i++) for (j=0; j<i; j++) imat[i][j] = imat[j][i]; /* assemble the return objects as a list */ PROTECT(rlist= allocVector(VECSXP, 4)); SET_VECTOR_ELT(rlist, 0, beta2); SET_VECTOR_ELT(rlist, 1, u2); SET_VECTOR_ELT(rlist, 2, imat2); SET_VECTOR_ELT(rlist, 3, loglik2); /* add names to the list elements */ PROTECT(rlistnames = allocVector(STRSXP, 4)); SET_STRING_ELT(rlistnames, 0, mkChar("coef")); SET_STRING_ELT(rlistnames, 1, mkChar("u")); SET_STRING_ELT(rlistnames, 2, mkChar("imat")); SET_STRING_ELT(rlistnames, 3, mkChar("loglik")); setAttrib(rlist, R_NamesSymbol, rlistnames); unprotect(nprotect+2); return(rlist); } /* ** Never, never complain about convergence on the first step. That way, ** if someone has to they can force one iter at a time. */ for (i=0; i<nvar; i++) { oldbeta[i] = beta[i]; beta[i] = beta[i] + u[i]; } halving =0 ; /* =1 when in the midst of "step halving" */ for (iter=1; iter<=maxiter; iter++) { newlk =0; for (i=0; i<nvar; i++) { u[i] =0; for (j=0; j<nvar; j++) imat[i][j] =0; } for (i=0; i<nused; ) { if (strata[i] >0) { /* first obs of a new strata */ maxdeath= strata[i]; dtemp = dmem0; for (j=0; j<dmemtot; j++) *dtemp++ =0.0; sstart =i; nrisk =0; } dtime = time[i]; /*current unique time */ ndeath =0; while (time[i] == dtime) { zbeta= offset[i]; for (j=0; j<nvar; j++) zbeta += covar[j][i] * beta[j]; score[i] = exp(zbeta); if (status[i]==1) { newlk += zbeta; for (j=0; j<nvar; j++) u[j] += covar[j][i]; ndeath++; } nrisk++; i++; if (i>=nused || strata[i] >0) break; } /* We have added up over the death time, now process it */ if (ndeath >0) { /* Add to the loglik */ d0 = coxd0(ndeath, nrisk, score+sstart, dmem0, maxdeath); R_CheckUserInterrupt(); newlk -= log(d0); dmem2 = dmem0 + (nvar+1)*dsize; /*start for the second deriv memory */ for (j=0; j<nvar; j++) { /* for each covariate */ d1[j] = coxd1(ndeath, nrisk, score+sstart, dmem0, dmem1[j], covar[j]+sstart, maxdeath) / d0; if (ndeath > 3) R_CheckUserInterrupt(); u[j] -= d1[j]; for (k=0; k<= j; k++) { /* second derivative*/ temp = coxd2(ndeath, nrisk, score+sstart, dmem0, dmem1[j], dmem1[k], dmem2, covar[j] + sstart, covar[k] + sstart, maxdeath); if (ndeath > 5) R_CheckUserInterrupt(); imat[k][j] += temp/d0 - d1[j]*d1[k]; dmem2 += dsize; } } } } /* am I done? ** update the betas and test for convergence */ loglik[3] = cholesky2(imat, nvar, toler); if (fabs(1-(loglik[1]/newlk))<= eps && halving==0) { /* all done */ loglik[1] = newlk; loglik[4] = iter; chinv2(imat, nvar); for (i=1; i<nvar; i++) for (j=0; j<i; j++) imat[i][j] = imat[j][i]; /* assemble the return objects as a list */ PROTECT(rlist= allocVector(VECSXP, 4)); SET_VECTOR_ELT(rlist, 0, beta2); SET_VECTOR_ELT(rlist, 1, u2); SET_VECTOR_ELT(rlist, 2, imat2); SET_VECTOR_ELT(rlist, 3, loglik2); /* add names to the list elements */ PROTECT(rlistnames = allocVector(STRSXP, 4)); SET_STRING_ELT(rlistnames, 0, mkChar("coef")); SET_STRING_ELT(rlistnames, 1, mkChar("u")); SET_STRING_ELT(rlistnames, 2, mkChar("imat")); SET_STRING_ELT(rlistnames, 3, mkChar("loglik")); setAttrib(rlist, R_NamesSymbol, rlistnames); unprotect(nprotect+2); return(rlist); } if (iter==maxiter) break; /*skip the step halving and etc */ if (newlk < loglik[1]) { /*it is not converging ! */ halving =1; for (i=0; i<nvar; i++) beta[i] = (oldbeta[i] + beta[i]) /2; /*half of old increment */ } else { halving=0; loglik[1] = newlk; chsolve2(imat,nvar,u); for (i=0; i<nvar; i++) { oldbeta[i] = beta[i]; beta[i] = beta[i] + u[i]; } } } /* return for another iteration */ /* ** Ran out of iterations */ loglik[1] = newlk; loglik[3] = 1000; /* signal no convergence */ loglik[4] = iter; chinv2(imat, nvar); for (i=1; i<nvar; i++) for (j=0; j<i; j++) imat[i][j] = imat[j][i]; /* assemble the return objects as a list */ PROTECT(rlist= allocVector(VECSXP, 4)); SET_VECTOR_ELT(rlist, 0, beta2); SET_VECTOR_ELT(rlist, 1, u2); SET_VECTOR_ELT(rlist, 2, imat2); SET_VECTOR_ELT(rlist, 3, loglik2); /* add names to the list elements */ PROTECT(rlistnames = allocVector(STRSXP, 4)); SET_STRING_ELT(rlistnames, 0, mkChar("coef")); SET_STRING_ELT(rlistnames, 1, mkChar("u")); SET_STRING_ELT(rlistnames, 2, mkChar("imat")); SET_STRING_ELT(rlistnames, 3, mkChar("loglik")); setAttrib(rlist, R_NamesSymbol, rlistnames); unprotect(nprotect+2); return(rlist); }