bool LMInteriorLevelWordEntry::getProbWithBackoff( int order , int *prev_words , real *prob )
{
    // There should be 'order' entries in 'prev_words' and the ordering should
    //   be eg.  W3,W2,W1,W4 if order == 4.
    real temp ;
    
#ifdef DEBUG
    if ( order < 1 )
        error("LMInteriorLevelWordEntry::getProbWithBackoff - order out of range\n") ;
#endif

    if ( order == 1 )
    {
        if ( (*prob = getWordProb( *prev_words )) <= LOG_ZERO )
        {
            *prob = log_bo_weight ;
            return false ;
        }
        else
            return true ;
    }
    else
    {
        if ( next_level == NULL )
        {
            if ( (*prob = getWordProb( prev_words[order-1] )) <= LOG_ZERO )
            {
                *prob = log_bo_weight ;
                return false ;
            }
            else
                return true ;
        }
        else
        {
            if ( next_level->getProbWithBackoff( order , prev_words , prob ) == true )
                return true ;
            else
            {
                if ( (temp = getWordProb( prev_words[order-1] )) <= LOG_ZERO )
                {
                    *prob += log_bo_weight ;
                    return false ;
                }
                else
                {
                    *prob += temp ;
                    return true ;
                }
            }
        }
    }
}
Exemplo n.º 2
0
void EM (char *filename, char *db_name, char *db_count_name, int SegLen) {
    DBM	*db_word_prob;
    DBM *db_expected_count;
    struct FileText *ft = load_File (filename);
    struct PhraseTable *pt;
    struct ForwardBackward *fb;
    char buf[BLKSIZE];
    char *sentence;
    double expect_count;
    double current_count, total_count = 0.0;
    int n_rows = ft->n_rows;
    int is_old;

// open word_prob and expected_count dbm.
// load corpus into memory
//
    db_Open_ReadWrite (db_name, &db_word_prob);
    db_Open_ReadWrite (db_count_name, &db_expected_count);
    fb = malloc (sizeof(struct ForwardBackward));

// start to compute expected count
//
//

    for (int sent=0; sent<n_rows; ++sent) {
        sentence = ft->text[sent];
        fprintf (stdout, "Sentence: %s\n", sentence);
        fb->alpha = alpha (sentence, db_word_prob , SegLen);       // get alpha
        fb->beta = beta (sentence, db_word_prob , SegLen);         // get beta

        // phrase <=> word in this program.
        // creat phrases uniqueliy
        pt = creatPhrase (sentence);

        // scan 
        for (int p = 0; p < pt->n_phrase; p++) {
            if (pt->phrases[p].n_token <= SegLen && db_Is_Old_Record (db_word_prob, pt->phrases[p].content)) {
                // return P type
                current_count = getWordExpectCount (sentence, pt->phrases[p].content, db_word_prob, fb->alpha, fb->beta);
                if (db_Is_Old_Record (db_expected_count, pt->phrases[p].content))
		{
                    // return P type
                    expect_count = getWordProb (db_expected_count, pt->phrases[p].content);
                    expect_count += current_count;

		    // if expect_count => inf, given a small value
                    if ( 1.7e-307 > expect_count)
		    {
                      expect_count = 1.7e-307;
                    }

                    sprintf (buf, "%f", log(expect_count)); 
                    db_Update_String (db_expected_count, pt->phrases[p].content, buf, &is_old);
#ifdef DEBUG
                    fprintf (stdout, "Update: %f %s, %s\n", expect_count, buf, pt->phrases[p].content);
#endif
                }
		else
		{
                    expect_count = current_count;
                    
		    // if expect_count => inf, given a small value
                    if ( 1.7e-307 > expect_count)
		    {
                      expect_count = 1.7e-307;
                    }
                    
		    sprintf (buf, "%f", log(expect_count));
                    db_Update_String (db_expected_count, pt->phrases[p].content, buf, &is_old);
#ifdef DEBUG
                    fprintf (stdout, "New: %f %s, %s\n", expect_count, buf, pt->phrases[p].content);
#endif
                }
                total_count += current_count;
            }
        }
    }

    db_Close (db_word_prob);
    db_Close (db_expected_count);
}