void test_ll_zinb (void) { tab_t *tab; // 0:14, 1:5, 2:4, 3:1, 5:1 int x1[25] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1,1,1,1,1,2,2,2,2,3,5 }; tab = tabulate(x1, 25); test_assert_critical(tab != NULL); test_assert(fabs(ll_zinb(1, .5, 1, tab)+22.5329303) < 1e-6); test_assert(fabs(ll_zinb(1, .5, .7, tab)+22.7832550) < 1e-6); test_assert(fabs(ll_zinb(2, .5, .7, tab)+23.7608409) < 1e-6); test_assert(fabs(ll_zinb(2, .3, .7, tab)+31.6978553) < 1e-6); free(tab); return; }
zinb_par_t * mle_zinb ( int *x, unsigned int nobs ) { tab_t *tab = tabulate(x, nobs); double sum = 0.0; unsigned int nona = 0; for (size_t i = 0 ; i < tab->size ; i++) { sum += tab->val[i]*tab->num[i]; nona += tab->num[i]; } // Extract the number of all-zero observaions. const unsigned int z0 = tab->val[0] == 0 ? tab->num[0] : 0; double deficit[11] = {0,.1,.2,.3,.4,.5,.6,.7,.8,.9,1}; double init_a[12] = {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,1}; double init_p[12] = {0,0,0,0,0,0,0,0,0,0,0,.5}; // Deplete some 0s from the observations, compute alpha // and p0 with standard negative binomial estimation // and keep the values to be used as initial conditions. for (size_t i = 0 ; i < 11 ; i++) { if (tab->val[0] == 0) tab->num[0] = z0 * (1-deficit[i]); double newmean = sum / nona / (1.0 - z0*deficit[i]/nobs); double alpha = nb_est_alpha(tab); init_a[i] = alpha; init_p[i] = alpha / (alpha + newmean); } // Reset 'tab'. if (tab->val[0] == 0) tab->num[0] = z0; zinb_par_t *par = calloc(1, sizeof(zinb_par_t)); if (par == NULL) { fprintf(stderr, "memory error: %s:%d\n", __FILE__, __LINE__); return NULL; } // Try initial conditions. Number 12 is a safety in case // all the rest failed during the first phase. double max_loglik = -1.0/0.0; for (size_t i = 0 ; i < 12 ; i++) { if (init_a[i] < 0) continue; // Skip failures. double a = init_a[i]; double p = init_p[i]; double grad; unsigned int iter = 0; double f = eval_zinb_f(a, p, nobs-z0, sum); double g = eval_zinb_g(a, p, tab); // Newton-Raphson iterations. while ((grad = f*f+g*g) > sq(ZINM_TOL) && iter++ < ZINM_MAXITER) { double dfda, dfdp, dgda, dgdp; dfda = dgdp = eval_zinb_dfda(a, p, nobs-z0); dfdp = eval_zinb_dfdp(a, p, nobs-z0, sum); dgda = eval_zinb_dgda(a, p, tab); double denom = dfdp*dgda - dfda*dgdp; double da = (f*dgdp - g*dfdp) / denom; double dp = (g*dfda - f*dgda) / denom; // Maintain 'a' and 'p' in their domain of definition. while (a+da < 0 || p+dp < 0 || p+dp > 1) { da /= 2; dp /= 2; } f = eval_zinb_f(a+da, p+dp, nobs-z0, sum); g = eval_zinb_g(a+da, p+dp, tab); // Backtrack if necessary. for (int j = 0 ; j < ZINM_MAXITER && f*f+g*g > grad ; j++) { da /= 2; dp /= 2; f = eval_zinb_f(a+da, p+dp, nobs-z0, sum); g = eval_zinb_g(a+da, p+dp, tab); } a = a+da; p = p+dp; } double pi = (nobs-z0) / (1-pow(p,a)) / nobs; if (pi > 1) pi = 1.0; if (pi < 0) pi = 0.0; double loglik = ll_zinb(a, p, pi, tab); if (loglik > max_loglik) { max_loglik = loglik; par->a = a; par->pi = pi; par->p = p; } } free(tab); return par; }