bool LMInteriorLevelWordEntry::getProbWithBackoff( int order , int *prev_words , real *prob ) { // There should be 'order' entries in 'prev_words' and the ordering should // be eg. W3,W2,W1,W4 if order == 4. real temp ; #ifdef DEBUG if ( order < 1 ) error("LMInteriorLevelWordEntry::getProbWithBackoff - order out of range\n") ; #endif if ( order == 1 ) { if ( (*prob = getWordProb( *prev_words )) <= LOG_ZERO ) { *prob = log_bo_weight ; return false ; } else return true ; } else { if ( next_level == NULL ) { if ( (*prob = getWordProb( prev_words[order-1] )) <= LOG_ZERO ) { *prob = log_bo_weight ; return false ; } else return true ; } else { if ( next_level->getProbWithBackoff( order , prev_words , prob ) == true ) return true ; else { if ( (temp = getWordProb( prev_words[order-1] )) <= LOG_ZERO ) { *prob += log_bo_weight ; return false ; } else { *prob += temp ; return true ; } } } } }
void EM (char *filename, char *db_name, char *db_count_name, int SegLen) { DBM *db_word_prob; DBM *db_expected_count; struct FileText *ft = load_File (filename); struct PhraseTable *pt; struct ForwardBackward *fb; char buf[BLKSIZE]; char *sentence; double expect_count; double current_count, total_count = 0.0; int n_rows = ft->n_rows; int is_old; // open word_prob and expected_count dbm. // load corpus into memory // db_Open_ReadWrite (db_name, &db_word_prob); db_Open_ReadWrite (db_count_name, &db_expected_count); fb = malloc (sizeof(struct ForwardBackward)); // start to compute expected count // // for (int sent=0; sent<n_rows; ++sent) { sentence = ft->text[sent]; fprintf (stdout, "Sentence: %s\n", sentence); fb->alpha = alpha (sentence, db_word_prob , SegLen); // get alpha fb->beta = beta (sentence, db_word_prob , SegLen); // get beta // phrase <=> word in this program. // creat phrases uniqueliy pt = creatPhrase (sentence); // scan for (int p = 0; p < pt->n_phrase; p++) { if (pt->phrases[p].n_token <= SegLen && db_Is_Old_Record (db_word_prob, pt->phrases[p].content)) { // return P type current_count = getWordExpectCount (sentence, pt->phrases[p].content, db_word_prob, fb->alpha, fb->beta); if (db_Is_Old_Record (db_expected_count, pt->phrases[p].content)) { // return P type expect_count = getWordProb (db_expected_count, pt->phrases[p].content); expect_count += current_count; // if expect_count => inf, given a small value if ( 1.7e-307 > expect_count) { expect_count = 1.7e-307; } sprintf (buf, "%f", log(expect_count)); db_Update_String (db_expected_count, pt->phrases[p].content, buf, &is_old); #ifdef DEBUG fprintf (stdout, "Update: %f %s, %s\n", expect_count, buf, pt->phrases[p].content); #endif } else { expect_count = current_count; // if expect_count => inf, given a small value if ( 1.7e-307 > expect_count) { expect_count = 1.7e-307; } sprintf (buf, "%f", log(expect_count)); db_Update_String (db_expected_count, pt->phrases[p].content, buf, &is_old); #ifdef DEBUG fprintf (stdout, "New: %f %s, %s\n", expect_count, buf, pt->phrases[p].content); #endif } total_count += current_count; } } } db_Close (db_word_prob); db_Close (db_expected_count); }