int main(int argc, char * argv[]){ FILE * output = stdout; const char * bigram_filename = SYSTEM_BIGRAM; SystemTableInfo system_table_info; bool retval = system_table_info.load(SYSTEM_TABLE_INFO); if (!retval) { fprintf(stderr, "load table.conf failed.\n"); exit(ENOENT); } FacadePhraseIndex phrase_index; const pinyin_table_info_t * phrase_files = system_table_info.get_table_info(); if (!load_phrase_index(phrase_files, &phrase_index)) exit(ENOENT); Bigram bigram; bigram.attach(bigram_filename, ATTACH_READONLY); begin_data(output); gen_unigram(output, &phrase_index); gen_bigram(output, &phrase_index, &bigram); end_data(output); return 0; }
int main(int argc, char * argv[]){ FILE * output = stdout; const char * bigram_filename = "bigram.db"; FacadePhraseIndex phrase_index; //gb_char binary file MemoryChunk * chunk = new MemoryChunk; chunk->load("gb_char.bin"); phrase_index.load(1, chunk); //gbk_char binary file chunk = new MemoryChunk; chunk->load("gbk_char.bin"); phrase_index.load(2, chunk); Bigram bigram; bigram.attach(bigram_filename, ATTACH_READONLY); begin_data(output); gen_unigram(output, &phrase_index); gen_bigram(output, &phrase_index, &bigram); end_data(output); return 0; }
int main(int argc, char * argv[]){ int i = 1; const char * k_mixture_model_filename = NULL; FILE * output = stdout; while ( i < argc ){ if ( strcmp ("--help", argv[i]) == 0 ){ print_help(); exit(0); } else if ( strcmp ("--k-mixture-model-file", argv[i]) == 0 ){ if ( ++i > argc ){ print_help(); exit(EINVAL); } k_mixture_model_filename = argv[i]; } else { print_help(); exit(EINVAL); } ++i; } FacadePhraseIndex phrase_index; //gb_char binary file MemoryChunk * chunk = new MemoryChunk; chunk->load("gb_char.bin"); phrase_index.load(1, chunk); //gbk_char binary file chunk = new MemoryChunk; chunk->load("gbk_char.bin"); phrase_index.load(2, chunk); KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER); bigram.attach(k_mixture_model_filename, ATTACH_READONLY); print_k_mixture_model_magic_header(output, &bigram); print_k_mixture_model_array_headers(output, &bigram, &phrase_index); print_k_mixture_model_array_items(output, &bigram, &phrase_index); end_data(output); return 0; }
int main(int argc, char * argv[]){ FILE * output = stdout; const char * bigram_filename = "bigram.db"; MemoryChunk * chunk = NULL; FacadePhraseIndex phrase_index; if (!load_phrase_index(&phrase_index)) exit(ENOENT); Bigram bigram; bigram.attach(bigram_filename, ATTACH_READONLY); begin_data(output); gen_unigram(output, &phrase_index); gen_bigram(output, &phrase_index, &bigram); end_data(output); return 0; }
bool CSeqDBTaxInfo::GetTaxNames(Int4 tax_id, SSeqDBTaxInfo & info ) { static CTaxDBFileInfo t; if (t.IsMissingTaxInfo()) return false; Int4 low_index = 0; Int4 high_index = t.GetTaxidCount() - 1; const char * Data = t.GetDataPtr(); const CSeqDBTaxId* Index = t.GetIndexPtr(); Int4 low_taxid = Index[low_index ].GetTaxId(); Int4 high_taxid = Index[high_index].GetTaxId(); if((tax_id < low_taxid) || (tax_id > high_taxid)) return false; Int4 new_index = (low_index+high_index)/2; Int4 old_index = new_index; while(1) { Int4 curr_taxid = Index[new_index].GetTaxId(); if (tax_id < curr_taxid) { high_index = new_index; } else if (tax_id > curr_taxid){ low_index = new_index; } else { /* Got it ! */ break; } new_index = (low_index+high_index)/2; if (new_index == old_index) { if (tax_id > curr_taxid) { new_index++; } break; } old_index = new_index; } if (tax_id == Index[new_index].GetTaxId()) { info.taxid = tax_id; Uint4 begin_data(Index[new_index].GetOffset()); Uint4 end_data(0); if (new_index == high_index) { // Last index is special... end_data = Uint4(t.GetDataFileSize()); if (end_data < begin_data) { // Should not happen. ERR_POST( "Error: Offset error at end of taxdb file."); return false; } } else { end_data = (Index[new_index+1].GetOffset()); } const char * start_ptr = &Data[begin_data]; CSeqDB_Substring buffer(start_ptr, start_ptr + (end_data - begin_data)); CSeqDB_Substring sci, com, blast, king; bool rc1, rc2, rc3; rc1 = SeqDB_SplitString(buffer, sci, '\t'); rc2 = SeqDB_SplitString(buffer, com, '\t'); rc3 = SeqDB_SplitString(buffer, blast, '\t'); king = buffer; if (rc1 && rc2 && rc3 && buffer.Size()) { sci .GetString(info.scientific_name); com .GetString(info.common_name); blast .GetString(info.blast_name); king .GetString(info.s_kingdom); return true; } } return false; }