double RPedigree::get_rij(int i, int j){ // Authors: Rohan L. Fernando // (2005) // Contributors: if (i==0||j==0){ return 0.0; } double x = SpCij.retrieve_cij(i,j); if(x != -1.0) { return x; } int old, young; if(i < j){ old = i; young = j; } else if(j < i){ old = j; young = i; } else{ double f = pedVector[i-1]->f; x = 0.5*(1 + f); SpCij.put_cij(i,j,x); return x; } int y_sire = pedVector[young-1]->sire; int y_dam = pedVector[young-1]->dam; x = (get_rij(old,y_sire)+get_rij(old,y_dam))/2.0; SpCij.put_cij(i,j,x); return x; }
void RPedigree::calc_inbreeding(void){ // Authors: Rohan L. Fernando // (2005) // Contributors: SafeSTLVector <PNode*>::iterator it; unsigned rec = 0, rec1 = 0, non_rec = 0; cout << "\n calculating inbreeding \n"; for (it=pedVector.begin();it!=pedVector.end();it++){ rec++; if(rec==1000){ cout<<rec+rec1<<"\r"; cout.flush(); rec1 += rec; rec = 0; }; (*it)->f = get_rij((*it)->sire,(*it)->dam); } }
void EStep (Dataset *data) { int i, j, k, lij, rij, idx; double p, denom; int *count = (int *) malloc(sizeof(int) * (data->num_steps + 1)); int *beta_idx = (int *) malloc(sizeof(int) * data->num_steps); beta_idx[0] = 0; if (data->debug) { std::cerr << "EStep" << std::endl; } // prior Z for (j = 0; j < data->num_tasks; j++) { for (k = 0; k < data->num_leaves; k++) { idx = get_z_index(j, k, data); data->probZ[idx] = log(data->priorZ[idx]); } } for (k = 0; k < data->num_leaves; k++) { // true class // class-dependent beta if (data->mode == 2) { for (int h = 0; h < data->last_step[k] - 1; h++) { beta_idx[h+1] = get_beta_index(h, get_step(k, h, data), data); } } for (idx = 0; idx < data->num_labels; idx++) { i = data->labels[idx].labelerId; j = data->labels[idx].imageIdx; lij = data->labels[idx].label; // task-and-class dependent beta if (data->mode == 3) { for (int h = 0; h < data->last_step[k] - 1; h++) { beta_idx[h+1] = get_beta_index(j, h, get_step(k, h, data), data); } } // Find rij get_num_diff_steps(k, count, j, data); rij = get_rij(lij, k, data); // Compute logP(l); the probability of generating the label switch (data->mode) { case 1: // Steps GLAD (task-dependent) p = calc_log_ProbL_GLAD_t(lij, rij, data->last_step[k], data->alpha[i], data->beta[j], count[rij]); break; case 2: // Steps GLAD (class dependent) case 3: // Steps GLAD (task-and-class dependent) p = calc_log_ProbL_GLAD_ctc(rij, data->last_step[k], data->alpha[i], data->beta, beta_idx, count[rij]); break; case 4: // Steps Rasch model (task dependent) // This is presented as another example of extenstion p = calc_log_ProbL_rasch_t(lij, rij, data->last_step[k], data->alpha[i], data->beta[j], count[rij]); break; default: std::cerr << "Invalid mode " << data->mode << std::endl; abort(); } data->probZ[get_z_index(j, k, data)] += p; } } // Exponentiate and Normalize for (j = 0; j < data->num_tasks; j++) { denom = 0.0; for (k = 0; k < data->num_leaves; k++) { idx = get_z_index(j, k, data); if (data->probZ[idx] == -INFINITY) { data->probZ[idx] = 0; continue; } data->probZ[idx] = exp(data->probZ[idx]); denom += data->probZ[idx]; } for (k = 0; k < data->num_leaves; k++) { idx = get_z_index(j, k, data); data->probZ[idx] /= denom; if (isnan(data->probZ[idx])) { std::cerr << "ERROR: isnan(data->probZ[idx]) [EStep]" << std::endl; std::cerr << denom << std::endl; abort(); } } } free(count); free(beta_idx); }
//---------------------------------------------------------------------- // Gradients //---------------------------------------------------------------------- void compute_gradients(Dataset *data, double *dQdAlpha, double *dQdBeta) { int i, j, h, k, lij, rij = 0; int idx, upper; double sigma; int *beta_idx = (int *) malloc(sizeof(int) * data->num_steps); beta_idx[0] = 0; // This comes from the priors for (i = 0; i < data->num_labelers; i++) { dQdAlpha[i] = (data->ignore_priorAlpha) ? 0 : - (data->alpha[i] - data->priorAlpha[i]); // Regularization Penalty (default: lambda = 0) dQdAlpha[i] = - 2.0 * (data->alpha[i] - data->priorAlpha[i]); } for (idx = 0; idx < data->num_beta; idx++) { dQdBeta[idx] = (data->ignore_priorBeta) ? 0 : - (data->beta[idx] - data->priorBeta[idx]); // Regularization Penalty (default: lambda = 0) dQdBeta[idx] = - 2 * data->lambdaBeta * (data->beta[idx] - data->priorBeta[idx]); } i = j = 0; for (k = 0; k < data->num_leaves; k++) { // True class // class-dependent beta if (data->mode == 2) { for (h = 0; h < data->last_step[k] - 1; h++) { beta_idx[h+1] = get_beta_index(h, get_step(k, h, data), data); } } for (idx = 0; idx < data->num_labels; idx++) { i = data->labels[idx].labelerId; j = data->labels[idx].imageIdx; lij = data->labels[idx].label; // task-and-class dependent beta if (data->mode == 3) { for (h = 0; h < data->last_step[k] - 1; h++) { beta_idx[h+1] = get_beta_index(j, h, get_step(k, h, data), data); } } // Find rij rij = get_rij(lij, k, data); upper = (data->last_step[k] < (rij + 1)) ? data->last_step[k] : (rij + 1); // Calculate dQdAlpha and dQdBeta switch (data->mode) { case 1: sigma = sigmoid(exp(data->beta[j]) * data->alpha[i]); roundAlpha_GLAD_t(&dQdAlpha[i], rij, upper, data->probZ[j * data->num_leaves + k], sigma, data->beta[j]); roundBeta_GLAD_t(&dQdBeta[j], rij, upper, data->probZ[j * data->num_leaves + k], sigma, data->alpha[i], data->beta[j]); break; case 2: case 3: roundAlpha_GLAD_ctc(&dQdAlpha[i], rij, upper, data->probZ[get_z_index(j, k, data)], data->alpha[i], data->beta, beta_idx); roundBeta_GLAD_ctc(dQdBeta, rij, upper, data->probZ[get_z_index(j, k, data)], data->alpha[i], data->beta, beta_idx); break; case 4: sigma = sigmoid(data->alpha[i] - data->beta[j]); roundAlpha_rasch_t(&dQdAlpha[i], rij, upper, data->probZ[j * data->num_leaves + k], sigma); roundBeta_rasch_t(&dQdBeta[j], rij, upper, data->probZ[j * data->num_leaves + k], sigma, data->beta[j]); break; default: std::cerr << "Invalid mode flag " << data->mode << std::endl; abort(); } } } if (data->debug) { std::cerr << "da[7] = " << dQdAlpha[7] << " "; std::cerr << "da[52] = " << dQdAlpha[52] << " "; std::cerr << "da[74] = " << dQdAlpha[74] << std::endl; std::cerr << "db[0] = " << dQdBeta[0] << " "; std::cerr << "db[1] = " << dQdBeta[1] << std::endl; } free(beta_idx); }
double compute_objective_function(Dataset *data) { int i, j, k, idx, lij, rij; int *count = (int *) malloc(sizeof(int) * (data->num_steps + 1)); double p; double Q = 0; double *alpha = data->alpha, *beta = data->beta; int *beta_idx = (int *) malloc(sizeof(int) * data->num_steps); beta_idx[0] = 0; // Start with the expectation of the sum of priors over all images for (j = 0; j < data->num_tasks; j++) { for (k = 0; k < data->num_leaves; k++) { idx = get_z_index(j, k, data); if (data->priorZ[idx] == 0) continue; // Skip ignored Z Q += data->probZ[idx] * log(data->priorZ[idx]); } } for (k = 0; k < data->num_leaves; k++) { // True class // class-dependent beta if (data->mode == 2) { for (int h = 0; h < data->last_step[k] - 1; h++) beta_idx[h+1] = get_beta_index(h, get_step(k, h, data), data); } for (idx = 0; idx < data->num_labels; idx++) { i = data->labels[idx].labelerId; j = data->labels[idx].imageIdx; lij = data->labels[idx].label; // task-and-class dependent beta if (data->mode == 3) { for (int h = 0; h < data->last_step[k] - 1; h++) beta_idx[h+1] = get_beta_index(j, h, get_step(k, h, data), data); } // Find rij get_num_diff_steps(k, count, j, data); rij = get_rij(lij, k, data); // Compute logP(l) switch (data->mode) { case 1: // Steps GLAD (task-dependent) p = calc_log_ProbL_GLAD_t(lij, rij, data->last_step[k], data->alpha[i], data->beta[j], count[rij]); break; case 2: // Steps GLAD (class dependent) case 3: // Steps GLAD (task-and-class dependent) p = calc_log_ProbL_GLAD_ctc(rij, data->last_step[k], data->alpha[i], data->beta, beta_idx, count[rij]); break; case 4: // Steps Rasch model (task dependent) // This is presented as another example of extenstion p = calc_log_ProbL_rasch_t(lij, rij, data->last_step[k], data->alpha[i], data->beta[j], count[rij]); break; default: std::cerr << "Invalid mode " << data->mode << std::endl; abort(); } Q += p * data->probZ[get_z_index(j, k, data)]; } if (isnan(Q)) { std::cerr << "isnan(Q) is True after computing Q from labels: Q = " << Q << std::endl; abort(); } } // Reguralization penalty (default: lambda = 0) for (i = 0; i < data->num_labelers; i++) { Q -= data->lambdaAlpha * (data->alpha[i] - data->priorAlpha[i]) * (data->alpha[i] - data->priorAlpha[i]); } for (idx = 0; idx < data->num_beta; idx++) { Q -= data->lambdaBeta * (data->beta[idx] - data->priorBeta[idx]) * (data->beta[idx] - data->priorBeta[idx]); } // Add Gaussian (standard normal) prior for alpha if (!data->ignore_priorAlpha) { for (i = 0; i < data->num_labelers; i++) { Q += log(gsl_sf_erf_Z(alpha[i] - data->priorAlpha[i])); if (isnan(Q)) { std::cerr << "isnan(Q) is True after adding Gaussian prior for alpha" << std::endl; abort(); } } } // Add Gaussian (standard normal) prior for beta if (!data->ignore_priorBeta) { for (idx = 0; idx < data->num_beta; idx++) { Q += log(gsl_sf_erf_Z(beta[idx] - data->priorBeta[idx])); if (isnan(Q)) { std::cerr << "isnan(Q) is after True adding Gaussian prior for beta" << std::endl; abort(); } } } if (data->debug) { std::cerr << "Q = " << Q << std::endl; } free(count); free(beta_idx); return Q; }