コード例 #1
0
ファイル: data.c プロジェクト: KerstenDoering/CPI-Pipeline
void ap_sentence(sentence_type *s, Float score[], Float w[], Float dw, 
		 Float sum_w[], size_t it, size_t changed[])
{
  Float max_winner_score, max_score;
  size_t best_i = sentence_scores(s, w, score, &max_winner_score, &max_score);

  if (score[0] <= max_score) { /* update between parse[0] and parse[best_i] */
    size_t j;
    parse_type *correct = &s->parse[0], *winner = &s->parse[best_i];

    if (winner->Pyx >= correct->Pyx)
      return;

    /* multiply update weight by importance of this pair */
    assert(correct->Pyx > 0);
    dw *= s->Px * fabs(correct->Pyx - winner->Pyx)/correct->Pyx;

    /* subtract winner's feature counts */
    for (j = 0; j < winner->nf; ++j) 
      ap_update1(winner->f[j], w, -dw, sum_w, it, changed);
    for (j = 0; j < winner->nfc; ++j)
      ap_update1(winner->fc[j].f, w, -dw*winner->fc[j].c, sum_w, it, changed);

    /* add correct's feature counts */
    for (j = 0; j < correct->nf; ++j)
      ap_update1(correct->f[j], w, dw, sum_w, it, changed);
    for (j = 0; j < correct->nfc; ++j)
      ap_update1(correct->fc[j].f, w, dw*correct->fc[j].c, sum_w, it, changed);
  }
}  /* ap_sentence() */
コード例 #2
0
Float sentence_stats(sentence_type *s, const Float w[], Float score[], Float E_Ew[],
		     Float *sum_g, Float *sum_p, Float *sum_w) {
  Float best_correct_score, best_score;
  int i, j, best_i, best_correct_i;
  Float Z = 0, logZ, Ecorrect_score = 0;

  *sum_g += s->g;

  if (s->nparses <= 0) 
    return 0;

  sentence_scores(s, w, score, &best_correct_score, &best_correct_i, &best_score, &best_i);
  *sum_p += s->parse[best_i].p;
  *sum_w += s->parse[best_i].w;
  
  if (s->Px == 0)  /* skip statistics calculation if Px == 0 */
    return 0;

  assert(best_correct_score <= best_score);

  for (i = 0; i < s->nparses; ++i) {   /* compute Z and Zw */
    assert(score[i] <= best_score);
    Z += exp(score[i] - best_score);
    assert(finite(Z));
    if (s->parse[i].Pyx > 0) 
      Ecorrect_score += s->parse[i].Pyx * score[i];
  }

  logZ = log(Z) + best_score;

  /* calculate expectations */

  for (i = 0; i < s->nparses; ++i) {
    Float cp = exp(score[i] - logZ);  /* P_w(y|x) */
    assert(finite(cp));

    if (s->parse[i].Pyx > 0)  /* P_e(y|x)  */
      cp -= s->parse[i].Pyx;

    assert(cp >= -1.0);
    assert(cp <= 1.0);
    
    cp *= s->Px;

    /* calculate expectations */

    for (j = 0; j < s->parse[i].nf; ++j)  /* features with 1 count */
      E_Ew[s->parse[i].f[j]] += cp;

    for (j = 0; j < s->parse[i].nfc; ++j) /* features with arbitrary counts */
      E_Ew[s->parse[i].fc[j].f] += cp * s->parse[i].fc[j].c;
  }
  return - s->Px * (Ecorrect_score - logZ);
}  /* sentence_stats() */
コード例 #3
0
ファイル: data.c プロジェクト: KerstenDoering/CPI-Pipeline
void wap_sentence(sentence_type *s, Float score[], Float w[], 
		  Float dw, const size_t feat_class[], const Float class_dw[],
		  Float sum_w[], size_t it, size_t changed[])
{
  Float max_winner_score, max_score;
  size_t best_i = sentence_scores(s, w, score, &max_winner_score, &max_score);

  if (score[0] <= max_score) { /* update between parse[0] and parse[best_i] */
    size_t j;
    parse_type *correct = &s->parse[0], *winner = &s->parse[best_i];

    /* multiply update weight by importance of this pair */
    dw *= s->Px * fabs(correct->Pyx - winner->Pyx);

    /* subtract winner's feature counts */
    for (j = 0; j < winner->nf; ++j) {
      size_t f = winner->f[j];
      ap_update1(f, w, -dw*class_dw[feat_class[f]], sum_w, it, changed);
    }
    for (j = 0; j < winner->nfc; ++j) {
      size_t f = winner->fc[j].f;
      ap_update1(f, w, -dw*winner->fc[j].c*class_dw[feat_class[f]], 
		 sum_w, it, changed);
    }

    /* add correct's feature counts */
    for (j = 0; j < correct->nf; ++j) {
      size_t f = correct->f[j];
      ap_update1(f, w, dw*class_dw[feat_class[f]], sum_w, it, changed);
    }
    for (j = 0; j < correct->nfc; ++j) {
      size_t f = correct->fc[j].f;
      ap_update1(f, w, dw*correct->fc[j].c*class_dw[feat_class[f]], sum_w, it, changed);
    }
  }
}  /* wap_sentence() */
コード例 #4
0
Float pwlog_sentence_stats(sentence_type *s, const Float w[], 
			   Float score[], Float dL_dw[],
			   Float *sum_g, Float *sum_p, Float *sum_w) 
{
  Float L = 0, best_correct_score, best_score, sum_Pyc = 0;
  int i, j, best_correct_i, best_i;

  *sum_g += s->g;

  if (s->nparses <= 0) 
    return 0;

  sentence_scores(s, w, score, &best_correct_score, &best_correct_i, 
		  &best_score, &best_i);
  *sum_p += s->parse[best_i].p;
  *sum_w += s->parse[best_i].w;
  
  if (s->Px <= 0)  /* skip statistics calculation if Px == 0 */
    return 0;

  assert(best_correct_score <= best_score);
  assert(best_correct_i >= 0);
  assert(best_correct_i < s->nparses);

  for (i = 0; i < s->nparses; ++i) 
    if (i != best_correct_i) {
      Float max_score = 
	(score[i] > best_correct_score) ? score[i] : best_correct_score;
      Float logZ = 
	log(exp(best_correct_score-max_score) + exp(score[i]-max_score))
	+ max_score;
      assert(finite(logZ));
      L -= s->Px * (best_correct_score - logZ);
      /* Pyc is conditional probability of correct parse */
      Float Pyc = exp(best_correct_score - logZ); 
      assert(Pyc >= 0);
      assert(Pyc <= 1);
      sum_Pyc += Pyc;
      /* Pyi is conditional probability of incorrect parse */
      Float Pyi = exp(score[i] - logZ);
      assert(Pyi >= 0);
      assert(Pyi <= 1);
      /* Ei is expect number of times incorrect parse occurs */
      Float Ei = s->Px * Pyi;  
      if (Ei == 0) 
	continue;
      /* calculate contribution of incorrect parse to feature expectations */
      for (j = 0; j < s->parse[i].nf; ++j)  /* features with 1 count */
	dL_dw[s->parse[i].f[j]] += Ei;
      for (j = 0; j < s->parse[i].nfc; ++j) /* features with arbitrary count */
	dL_dw[s->parse[i].fc[j].f] += Ei * s->parse[i].fc[j].c;
    }

  /* calculate contribution of correct parse to feature expectations */

  assert(sum_Pyc >= 0);
  assert(sum_Pyc <= s->nparses-1);

  /* Ec_C is difference between expected and actual number of times
     the correct parse occurs.  */
  Float Ec_C = s->Px * (sum_Pyc - (s->nparses-1));
  /* features with 1 count */
  for (j = 0; j < s->parse[best_correct_i].nf; ++j) 
    dL_dw[s->parse[best_correct_i].f[j]] += Ec_C;
  /* features with arbitrary counts */
  for (j = 0; j < s->parse[best_correct_i].nfc; ++j)
    dL_dw[s->parse[best_correct_i].fc[j].f] 
      += s->parse[best_correct_i].fc[j].c * Ec_C;
  return L;
}  /* pwlog_sentence_stats() */
コード例 #5
0
Float emll_sentence_stats(sentence_type *s, const Float w[], 
			  Float score[], Float dL_dw[], 
			  Float *sum_g, Float *sum_p, Float *sum_w) {
  Float best_correct_score, best_score;
  int i, j, best_i, best_correct_i;
  Float Z = 0, logZ;    /*!< Z is the partition fn calculated over all parses */
  Float Zc = 0, logZc;  /*!< Zc is the partition fn calculated over correct parses */

  *sum_g += s->g;

  if (s->nparses <= 0) 
    return 0;

  sentence_scores(s, w, score, &best_correct_score, &best_correct_i, &best_score, &best_i);
  *sum_p += s->parse[best_i].p;
  *sum_w += s->parse[best_i].w;
  
  if (s->Px == 0)  /* skip statistics calculation if Px == 0 */
    return 0;

  assert(best_correct_score <= best_score);

  for (i = 0; i < s->nparses; ++i) {   /* compute Z and Zc */
    assert(score[i] <= best_score);
    Z += exp(score[i] - best_score);
    assert(finite(Z));
    if (s->parse[i].Pyx > 0) {
      assert(score[i] <= best_correct_score);
      Zc += s->parse[i].Pyx * exp(score[i] - best_correct_score);
      assert(finite(Zc));
    }
  }

  logZ = log(Z) + best_score;
  logZc = log(Zc) + best_correct_score;

  /* calculate expectations */

  for (i = 0; i < s->nparses; ++i) {
    Float cp = exp(score[i] - logZ);  /* P_w(y|x) */
    assert(finite(cp));
    assert(cp <= 1.0+FLT_EPSILON);

    if (s->parse[i].Pyx > 0) {
      cp -= s->parse[i].Pyx * exp(score[i] - logZc);
      assert(finite(cp));
      if (cp < -(1.0+FLT_EPSILON)) 
	fprintf(stderr, "\n\n## cp = %f, score[%d] = %g, logZc = %g, s->parse[%d].Pyx = %g, s->parse[i].Pyx * exp(score[i] - logZc) = %g\n\n",
		cp, i, score[i], logZc, i, s->parse[i].Pyx, s->parse[i].Pyx * exp(score[i] - logZc));
      assert(cp >= -(1.0+FLT_EPSILON));
    }
    
    cp *= s->Px;

    /* calculate expectations */

    for (j = 0; j < s->parse[i].nf; ++j)  /* features with 1 count */
      dL_dw[s->parse[i].f[j]] += cp;

    for (j = 0; j < s->parse[i].nfc; ++j) /* features with arbitrary counts */
      dL_dw[s->parse[i].fc[j].f] += cp * s->parse[i].fc[j].c;
  }
  return - s->Px * (logZc - logZ);
}  /* emll_sentence_stats() */