示例#1
0
static cellpoint read_sexp(void)
{
    return read_parse(ibuffer, &scan);
}
示例#2
0
int read_sentence(corpusflags_type *flags, FILE *in, sentence_type *s, 
		  feature_type *fmax, int *maxnparses) {
  int i, nread, best_fscore_index = -1, nwinners = 0;
  DataFloat fscore, best_logprob = -DATAFLOAT_MAX, best_fscore = -1;

  nread = fscanf(in, " G = " DATAFLOAT_FORMAT " ", &s->g);
  if (nread == EOF)
    return EOF;
  assert(nread == 0 || nread == 1);
  if (nread == 0)
    s->g = 1;

  nread = fscanf(in, " N = %d", &s->nparses);
  if (nread == EOF)
    return EOF;
  assert(nread == 1);
  assert(s->nparses >= 0);
  if (s->nparses > *maxnparses)
    *maxnparses = s->nparses;

  if (s->nparses > 0) {
    s->parse = SMALLOC(s->nparses*sizeof(parse_type));
    assert(s->parse != NULL);
  }
  else
    s->parse = NULL;

  for (i = 0; i < s->nparses; ++i) {
    read_parse(in, &s->parse[i], fmax);
    // handle the admittedly strange case where a parse has no brackets
    if (s->parse[i].w == 0.0 || s->parse[i].p == 0.0) {
      fscore = 0.0;
    } else {
      fscore = 2 * s->parse[i].w/(s->parse[i].p+s->g);
    }
    if (fscore+DATAFLOAT_EPS >= best_fscore) {
      Float logprob = feature_value(&s->parse[i], 0); // logprob is feature 0
      if (fabs(fscore-best_fscore) < 2*DATAFLOAT_EPS) {
	++nwinners;                   // tied best f-scores    
	if (logprob > best_logprob) { // pick candidate with best logprob
	  best_fscore = fscore;
	  best_logprob = logprob;
	  best_fscore_index = i;
	}
      }
      else {
	best_fscore = fscore;
	best_logprob = logprob;
	best_fscore_index = i;
	nwinners = 1;
      }
    }
  }

  // skip sentences where the best f-score was 0 since there's nothing to learn from them
  if (best_fscore == 0.0) {
    best_fscore = -1;
    best_logprob = -DATAFLOAT_MAX;
    best_fscore_index = -1;
    nwinners = 0;
  }
  if (best_fscore_index >= 0) {  /* is there a winner? */
    Float sum_Pyx = 0;
    assert(nwinners > 0);
    assert(best_fscore_index < s->nparses);
    s->Px = 1.0;		 /* indicate that there is a winner */
    s->correct_index = best_fscore_index;
    if (flags && flags->Pyx_factor > 1) {
      Float Z = 0;
      for (i = 0; i < s->nparses; ++i) {
	fscore = 2 * s->parse[i].w/(s->parse[i].p+s->g);
	Z += pow(flags->Pyx_factor, fscore - best_fscore);
      }
      for (i = 0; i < s->nparses; ++i) {
	fscore = 2 * s->parse[i].w/(s->parse[i].p+s->g);
	sum_Pyx += s->parse[i].Pyx 
	  = pow(flags->Pyx_factor, fscore - best_fscore) / Z;
      }
    }
    else if (flags && flags->Pyx_factor > 0) { 
      /* Pyx_factor == 1; all winners get equal Pyx */
      for (i = 0; i < s->nparses; ++i) {
	Float fscore = 2 * s->parse[i].w/(s->parse[i].p+s->g);
	if (fabs(best_fscore-fscore) < 2*DATAFLOAT_EPS)
	  sum_Pyx += s->parse[i].Pyx = 1.0/nwinners;
	else
	  sum_Pyx += s->parse[i].Pyx = 0;
      }
    }	  
    else {  /* Pyx_factor == 0; f-score winner gets Pyx = 1, all others 0 */
      for (i = 0; i < s->nparses; ++i)  
	sum_Pyx += s->parse[i].Pyx = (i == best_fscore_index);
    }
    assert(fabs(sum_Pyx - 1) <= DATAFLOAT_EPS);
  }
  else {    /* no winner, set Px to 0.0 to indicate this */
    s->Px = 0.0;
    s->correct_index = s->nparses;
    for (i = 0; i < s->nparses; ++i)
      s->parse[i].Pyx = 0.0;
    /* We used to ensure that s->nparses == 0 here, but 
       instead do nothing since read_corpus now knows to
       skip these sentences.  These sentences show up
       when we have parse failures and the parser returns
       a flat bracketing. */
  }

  return s->nparses;
}  /* read_sentence() */
示例#3
0
int read_sentence(corpusflags_type *flags, FILE *in, sentence_type *s, 
		  feature_type *fmax, int *maxnparses) {
  int i, nread, best_fscore_index = -1, nwinners = 0;
  DataFloat fscore, best_logprob = -DATAFLOAT_MAX, best_fscore = -1;

  nread = fscanf(in, " G = " DATAFLOAT_FORMAT " ", &s->g);
  if (nread == EOF)
    return EOF;
  assert(nread == 0 || nread == 1);
  if (nread == 0)
    s->g = 1;

  nread = fscanf(in, " N = %u", &s->nparses);
  if (nread == EOF)
    return EOF;
  assert(nread == 1);
  assert(s->nparses >= 0);
  if (s->nparses > *maxnparses)
    *maxnparses = s->nparses;

  if (s->nparses > 0) {
    s->parse = SMALLOC(s->nparses*sizeof(parse_type));
    assert(s->parse != NULL);
  }
  else
    s->parse = NULL;

  for (i = 0; i < s->nparses; ++i) {
    read_parse(in, &s->parse[i], fmax);
    fscore = 2 * s->parse[i].w/(s->parse[i].p+s->g);
    if (fscore+DATAFLOAT_EPS >= best_fscore) {
      Float logprob = feature_value(&s->parse[i], 0); // logprob is feature 0
      if (fabs(fscore-best_fscore) < 2*DATAFLOAT_EPS) {
	++nwinners;                   // tied best f-scores    
	if (logprob > best_logprob) { // pick candidate with best logprob
	  best_fscore = fscore;
	  best_logprob = logprob;
	  best_fscore_index = i;
	}
      }
      else {
	best_fscore = fscore;
	best_logprob = logprob;
	best_fscore_index = i;
	nwinners = 1;
      }
    }
  }

  if (best_fscore_index >= 0) {  /* is there a winner? */
    Float sum_Pyx = 0;
    assert(nwinners > 0);
    s->Px = 1.0;		 /* indicate that there is a winner */
    if (flags->Pyx_factor > 1) {
      Float Z = 0;
      for (i = 0; i < s->nparses; ++i) {
	fscore = 2 * s->parse[i].w/(s->parse[i].p+s->g);
	Z += pow(flags->Pyx_factor, fscore - best_fscore);
      }
      for (i = 0; i < s->nparses; ++i) {
	fscore = 2 * s->parse[i].w/(s->parse[i].p+s->g);
	sum_Pyx += s->parse[i].Pyx = pow(flags->Pyx_factor, fscore - best_fscore) / Z;
      }
    }
    else if (flags->Pyx_factor > 0) { 
      /* Pyx_factor == 1; all winners get equal Pyx */
      for (i = 0; i < s->nparses; ++i) {
	Float fscore = 2 * s->parse[i].w/(s->parse[i].p+s->g);
	if (fabs(best_fscore-fscore) < 2*DATAFLOAT_EPS)
	  sum_Pyx += s->parse[i].Pyx = 1.0/nwinners;
	else
	  sum_Pyx += s->parse[i].Pyx = 0;
      }
    }	  
    else {  /* Pyx_factor == 0; f-score winner gets Pyx = 1, all others 0 */
      for (i = 0; i < s->nparses; ++i)  
	sum_Pyx += s->parse[i].Pyx = (i == best_fscore_index);
    }
    assert(fabs(sum_Pyx - 1) <= DATAFLOAT_EPS);
  }
  else {    /* no winner, set Px to 0.0 to indicate this */
    s->Px = 0.0;
    for (i = 0; i < s->nparses; ++i)
      s->parse[i].Pyx = 0.0;
  }

  return s->nparses;
}  /* read_sentence() */