int main(int argc, char * argv[]){
    FILE * output = stdout;
    const char * bigram_filename = SYSTEM_BIGRAM;

    SystemTableInfo system_table_info;

    bool retval = system_table_info.load(SYSTEM_TABLE_INFO);
    if (!retval) {
        fprintf(stderr, "load table.conf failed.\n");
        exit(ENOENT);
    }

    FacadePhraseIndex phrase_index;

    const pinyin_table_info_t * phrase_files =
        system_table_info.get_table_info();

    if (!load_phrase_index(phrase_files, &phrase_index))
        exit(ENOENT);

    Bigram bigram;
    bigram.attach(bigram_filename, ATTACH_READONLY);

    begin_data(output);

    gen_unigram(output, &phrase_index);
    gen_bigram(output, &phrase_index, &bigram);

    end_data(output);
    return 0;
}
Esempio n. 2
0
int main(int argc, char * argv[]){
    FILE * output = stdout;
    const char * bigram_filename = "bigram.db";

    FacadePhraseIndex phrase_index;

    //gb_char binary file
    MemoryChunk * chunk = new MemoryChunk;
    chunk->load("gb_char.bin");
    phrase_index.load(1, chunk);

    //gbk_char binary file
    chunk = new MemoryChunk;
    chunk->load("gbk_char.bin");
    phrase_index.load(2, chunk);

    Bigram bigram;
    bigram.attach(bigram_filename, ATTACH_READONLY);

    begin_data(output);

    gen_unigram(output, &phrase_index);
    gen_bigram(output, &phrase_index, &bigram);

    end_data(output);
    return 0;
}
int main(int argc, char * argv[]){
    int i = 1;
    const char * k_mixture_model_filename = NULL;
    FILE * output = stdout;

    while ( i < argc ){
        if ( strcmp ("--help", argv[i]) == 0 ){
            print_help();
            exit(0);
        } else if ( strcmp ("--k-mixture-model-file", argv[i]) == 0 ){
            if ( ++i > argc ){
                print_help();
                exit(EINVAL);
            }
            k_mixture_model_filename = argv[i];
        } else {
            print_help();
            exit(EINVAL);
        }
        ++i;
    }

    FacadePhraseIndex phrase_index;

    //gb_char binary file
    MemoryChunk * chunk = new MemoryChunk;
    chunk->load("gb_char.bin");
    phrase_index.load(1, chunk);

    //gbk_char binary file
    chunk = new MemoryChunk;
    chunk->load("gbk_char.bin");
    phrase_index.load(2, chunk);

    KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER);
    bigram.attach(k_mixture_model_filename, ATTACH_READONLY);

    print_k_mixture_model_magic_header(output, &bigram);
    print_k_mixture_model_array_headers(output, &bigram, &phrase_index);
    print_k_mixture_model_array_items(output, &bigram, &phrase_index);

    end_data(output);

    return 0;
}
int main(int argc, char * argv[]){
    FILE * output = stdout;
    const char * bigram_filename = "bigram.db";
    MemoryChunk * chunk = NULL;

    FacadePhraseIndex phrase_index;
    if (!load_phrase_index(&phrase_index))
        exit(ENOENT);

    Bigram bigram;
    bigram.attach(bigram_filename, ATTACH_READONLY);

    begin_data(output);

    gen_unigram(output, &phrase_index);
    gen_bigram(output, &phrase_index, &bigram);

    end_data(output);
    return 0;
}
Esempio n. 5
0
bool CSeqDBTaxInfo::GetTaxNames(Int4             tax_id,
                                SSeqDBTaxInfo  & info )
{
	static CTaxDBFileInfo t;
    if (t.IsMissingTaxInfo()) return false;

    Int4 low_index  = 0;
    Int4 high_index = t.GetTaxidCount() - 1;
    
    const char * Data = t.GetDataPtr();
    const CSeqDBTaxId*  Index = t.GetIndexPtr();
    Int4 low_taxid  = Index[low_index ].GetTaxId();
    Int4 high_taxid = Index[high_index].GetTaxId();

    if((tax_id < low_taxid) || (tax_id > high_taxid))
        return false;
    
    Int4 new_index =  (low_index+high_index)/2;
    Int4 old_index = new_index;
    
    while(1) {
        Int4 curr_taxid = Index[new_index].GetTaxId();
        
        if (tax_id < curr_taxid) {
            high_index = new_index;
        } else if (tax_id > curr_taxid){
            low_index = new_index;
        } else { /* Got it ! */
            break;
        }
        
        new_index = (low_index+high_index)/2;
        if (new_index == old_index) {
            if (tax_id > curr_taxid) {
                new_index++;
            }
            break;
        }
        old_index = new_index;
    }
    
    if (tax_id == Index[new_index].GetTaxId()) {
        info.taxid = tax_id;
        
        Uint4 begin_data(Index[new_index].GetOffset());
        Uint4 end_data(0);
        
        if (new_index == high_index) {
            // Last index is special...
            end_data = Uint4(t.GetDataFileSize());
            
            if (end_data < begin_data) {
                // Should not happen.
                ERR_POST( "Error: Offset error at end of taxdb file.");
                return false;
            }
        } else {
            end_data = (Index[new_index+1].GetOffset());
        }
        
        const char * start_ptr = &Data[begin_data];
        
        CSeqDB_Substring buffer(start_ptr, start_ptr + (end_data - begin_data));
        CSeqDB_Substring sci, com, blast, king;
        bool rc1, rc2, rc3;
        
        rc1 = SeqDB_SplitString(buffer, sci, '\t');
        rc2 = SeqDB_SplitString(buffer, com, '\t');
        rc3 = SeqDB_SplitString(buffer, blast, '\t');
        king = buffer;
        
        if (rc1 && rc2 && rc3 && buffer.Size()) {
            sci   .GetString(info.scientific_name);
            com   .GetString(info.common_name);
            blast .GetString(info.blast_name);
            king  .GetString(info.s_kingdom);
            
            return true;
        }
    }
    
    return false;
}