/* --------------------------------------------------------------------------- decode one entry ---------------------------------------------------------------------- */ void CTIesrDict::decode_entry( char *word, int idx, char *pron ) { char buf[ MAX_STR ]; char pron_def[ MAX_PRON ]; int idx2, i; for( i = 0; i < pron[0] + 1; i++ ) pron_def[i] = pron[i]; /* go up, may be more than one entry, find the first entry of this word */ idx2 = idx; idx2 = dec_entry( idx2 ); if( idx2 >= first ) { buf[0] = '\0'; expand_str( buf, idx2 ); } while( ( idx2 >= first ) && compare_str( word, buf ) == 0 ) { idx = idx2; idx2 = dec_entry( idx2 ); if( idx2 >= first ) { buf[0] = '\0'; expand_str( buf, idx2 ); } } /* go down, starting from first entry of this word */ idx2 = idx; buf[0] = '\0'; expand_str( buf, idx2 ); while( ( idx2 <= last ) && compare_str( word, buf ) == 0 ) { print_pron( word, idx2, pron_def, pron ); return; /* only the first pron, discard multiple pron */ idx2 = inc_entry( idx2 ); if( idx2 <= last ) { buf[0] = '\0'; expand_str( buf, idx2 ); } } }
/* --------------------------------------------------------------------------- dictionary lookup, binary search, it returns: index: first <= index <= last -1: not found ---------------------------------------------------------------------- */ int CTIesrDict::lookup( char *word ) { int idx, icompare, imax, imin; char buf[ MAX_STR ]; imax = last; imin = first; while( imax >= imin ) { idx = mid_entry( imin, imax ); buf[0] = '\0'; expand_str( buf, idx ); icompare = compare_str( word, buf ); if( icompare == 0 ) { return idx; } else if( icompare > 0 ) { imin = inc_entry( idx ); } else { imax = dec_entry( idx ); } } return -1; }
void strapp(char** const str, const char * const app, uint32_t * const count, uint32_t * const size) { /*void strapp(char** str, const char * const app, uint32_t* count, uint32_t* size) {*/ uint32_t i; for(i=0; app[i]; ++i) { if(*count>=*size) expand_str(str,size); (*str)[(*count)++] = app[i]; } }
void compare_entry(char *str, int *cnt) { char buf[ MAX_STR ], *ptr; buf[0] = '\0'; expand_str(buf, cur_entry - 1); ptr = buf; *cnt = 0; while ( *str && *ptr && ( *str == *ptr ) ) { *cnt += 1; str++; ptr++; } }
void expand_str(char *buf, int idx) { int len; if ( (signed char) entry[ idx ][ 0 ] >= 0 ) { copy_str(buf, entry[ idx ]); } else { expand_str(buf, idx - 1); len = - (signed char) entry[ idx ][ 0 ]; buf += len; copy_str(buf, &entry[ idx ][ 1 ]); } }
char* toBritishStr(uint64_t n) { uint32_t i,first,size=10,count=0; char *out; out = (char*) malloc(size * sizeof(char)); for(i=mags-1,first=1; i<mags; --i) { if(n>=mag[i]) { if(!first) strapp(&out,and,&count,&size); first=0; getMagStr(n/mag[i],&out,&count,&size); strapp(&out,denom[i],&count,&size); n%=mag[i]; } } if(count>=size) expand_str(&out,&size); out[count++] = '\0'; return out; }
/* --------------------------------------------------------------------------- must do buf[0] = '\0' before calling expand_str() ---------------------------------------------------------------------- */ void CTIesrDict::expand_str( char *buf, int idx ) { int len; if( ( signed char) dict_beg[ idx ] >= 0 ) { copy_str( buf, &dict_beg[ idx ] ); } else { expand_str( buf, dec_entry( idx ) ); len = -( signed char) dict_beg[ idx ]; buf += len; copy_str( buf, &dict_beg[ idx + 1 ] ); } }
static char * read_file(FILE * inf) { sblock sb = sblock_nil; char * ln; while ((ln = read_line(inf)) && strlen(ln)) { char * eln = rindex(ln, '\n'); if (eln) *eln = '\0'; eln = expand_str(ln); sb = add_sblock(sb, eln); free(ln); free(eln); } ln = catenate_sblock(sb, "\n"); free_sblock(sb); return ln; }
/*---------------------------------------------------------------- LocatePron This function locates the one-based Nth pronunciation for a word in the dictionary. N is specified by the argument aEntryNumber. This function returns the byte entry location in the dictionary if the Nth pronunciation for the word exists, otherwise it returns 0. ----------------------------------------------------------------*/ int CTIesrDict::LocatePron( char* aWord, unsigned int aEntryNumber ) { int startLocation; int pronLocation; char dictWord[MAX_STR]; unsigned int entryNumber; int noMatch; char *ucWord; // User should not request the zeroth entry. if( aEntryNumber == 0 ) return 0; try { ucWord = new char[ strlen( aWord ) + 1]; } catch( std::bad_alloc &ex ) { return 0; } strcpy( ucWord, aWord ); chrtoupper( ucWord ); #ifdef _TIESRDICT_USE_TIESRDT if( doLetterMap ) { CTIesrDict::Errors error = map_dt_letters( ucWord ); if( error != ErrNone ) { delete [] ucWord; return 0; } } #endif // Check for existence of the word in the dictionary startLocation = lookup( ucWord ); if( startLocation == -1 ) { delete []ucWord; return 0; } // Found an entry for the word in the dictionary. // Search backward in dictionary to find the first entry of this word. noMatch = 0; while( !noMatch && startLocation > first ) { // Go to prior entry location in dictionary pronLocation = dec_entry( startLocation ); // Prior word at the entry location dictWord[0] = '\0'; expand_str( dictWord, pronLocation ); // If the dictionary word matches the word we want, then continue searching backward noMatch = compare_str( ucWord, dictWord ); if( !noMatch ) { startLocation = pronLocation; } } // If user wants the first entry, it is the present one if( aEntryNumber == 1 ) { delete [] ucWord; return startLocation; } // User wants an entry number > 1 for the word. // Try searching forward for the entry number the user wants entryNumber = 1; noMatch = 0; while( !noMatch && startLocation < last ) { // Go to next entry location in dictionary pronLocation = inc_entry( startLocation ); // Word at the present entry location dictWord[0] = '\0'; expand_str( dictWord, pronLocation ); // If the dictionary word matches the word wanted then increment count // and determine if it is the entry number wanted noMatch = compare_str( ucWord, dictWord ); if( !noMatch ) { entryNumber++; // Return this entry, which is the entry number wanted if( entryNumber == aEntryNumber ) { delete [] ucWord; return pronLocation; } // Continue searching, have not found desired entry number yet startLocation = pronLocation; } else { // No more words match, and the wanted entry has not been found delete [] ucWord; return 0; } } // Did not find the wanted entry number for the wanted word delete [] ucWord; return 0; }
/*---------------------------------------------------------------- GetNextEntry This function should only be called after a call to GetPronEntry. It provides a means of obtaining the next pronunciation of the word specified in GetPronEntry. If the next pronunciation does not exist, aPron will have zero in the first character, and if aPronString is not NULL, it will be set to "", and the fuction will return ErrNotInDictionary. This function call is provided so that dictionary search does not need to be done for each of multiple pronunciations looked up in the dictionary. ----------------------------------------------------------------*/ CTIesrDict::Errors CTIesrDict::GetNextEntry( char aPron[], char *aPronString ) { int pronLocation; // Check that a word has been looked up by GetPronEntry if( m_word == NULL || strcmp( m_word, "" ) == 0 ) { aPron[0] = 0; if( aPronString ) aPronString[0] = '\0'; return ErrFail; } // Try to find a valid dictionary entry location for the next // word entry. // If no dictionary entry has been looked up yet, // try to find the first dictionary entry. if( m_entryLocation == 0 ) { pronLocation = LocatePron( m_word, 1 ); } // Otherwise, if not at end of dictionary try to find next entry. else if( m_entryLocation < last ) { char dictWord[MAX_STR]; dictWord[0] = '\0'; char ucWord[MAX_STR]; CTIesrDict::Errors error; strcpy( ucWord, m_word ); chrtoupper( ucWord ); #ifdef _TIESRDICT_USE_TIESRDT // Convert word to indices if using TIesrDT and letter value >127 if( doLetterMap ) { error = map_dt_letters( ucWord ); if( error != ErrNone ) { return error; } } #endif pronLocation = inc_entry( m_entryLocation ); expand_str( dictWord, pronLocation ); // Next word entry does not match word if( strcmp( dictWord, ucWord ) != 0 ) { pronLocation = 0; } } // At the end of dictionary else pronLocation = 0; // If no valid location, then no more entries of this word exist // in the dictionary if( pronLocation < first || pronLocation > last ) { aPron[0] = 0; if( aPronString ) aPronString[0] = '\0'; return ErrNotInDictionary; } // A valid word exists in the next dictionary location m_entryLocation = pronLocation; // Get the pronunciation of the word according to the dictionary print_pron( m_word, m_entryLocation, m_defaultPron, aPron ); if( aPronString ) PronToString( aPron, aPronString ); m_entryNumber++; return ErrNone; }
/*---------------------------------------------------------------- GetNumberEntries Get the number of entries in the dictionary for a word specified by aWord. Note that this does NOT include the rule-based default pronunciation, only the number of pronunciations in the dictionary. Hence the number returned in aNumberEntries can be zero. ----------------------------------------------------------------*/ CTIesrDict::Errors CTIesrDict::GetNumberEntries( const char* aWord, unsigned int *aNumberEntries ) { int startLocation; int pronLocation; int noMatch; char dictWord[MAX_STR]; char *ucWord; int numEntries; CTIesrDict::Errors error; *aNumberEntries = 0; numEntries = 0; // convert the word to upper case. try { ucWord = new char[ strlen( aWord ) + 1]; } catch( std::bad_alloc &ex ) { return ErrMemory; } strcpy( ucWord, aWord ); chrtoupper( ucWord ); #ifdef _TIESRDICT_USE_TIESRDT // Convert word to indices if using TIesrDT and letter value >127 if( doLetterMap ) { error = map_dt_letters( ucWord ); if( error != ErrNone ) { delete [] ucWord; return error; } } #endif // Check for existence of the word in the dictionary startLocation = lookup( ucWord ); if( startLocation == -1 ) { delete [] ucWord; return ErrNone; } // Found an entry in the dictionary numEntries++; pronLocation = startLocation; noMatch = 0; // Search backward in dictionary to find all prior entries that match while( !noMatch && pronLocation > first ) { // Go to prior entry location in dictionary pronLocation = dec_entry( pronLocation ); // Prior word at the entry location dictWord[0] = '\0'; expand_str( dictWord, pronLocation ); // If the dictionary word matches the word we want, then increment count noMatch = compare_str( ucWord, dictWord ); if( !noMatch ) { numEntries++; } } // Look forward in the dictionary for subsequent matching entries pronLocation = startLocation; noMatch = 0; while( !noMatch && pronLocation < last ) { // Go to next entry location in dictionary pronLocation = inc_entry( pronLocation ); // Prior word at the entry location dictWord[0] = '\0'; expand_str( dictWord, pronLocation ); // If the dictionary word matches the word we want then increment count noMatch = compare_str( ucWord, dictWord ); if( !noMatch ) { numEntries++; } } *aNumberEntries = numEntries; delete [] ucWord; return ErrNone; }