Esempio n. 1
0
int main(int argc, char** argv) {

    // Create an instance of the Kytea program
    Kytea kytea;
    
    // Load a KyTea model from a model file
    //  this can be a binary or text model in any character encoding,
    //  it will be detected automatically
    kytea.readModel("../../data/model.bin");

    // Get the string utility class. This allows you to convert from
    //  the appropriate string encoding to Kytea's internal format
    StringUtil* util = kytea.getStringUtil(); 

    // Get the configuration class, this allows you to read or set the
    //  configuration for the analysis
    KyteaConfig* config = kytea.getConfig();

    // Map a plain text string to a KyteaString, and create a sentence object
    KyteaString surface_string = util->mapString("これはテストです。");
    KyteaSentence sentence(surface_string, util->normalize(surface_string));

    // Find the word boundaries
    kytea.calculateWS(sentence);
    // Find the pronunciations for each tag level
    for(int i = 0; i < config->getNumTags(); i++)
        kytea.calculateTags(sentence,i);

    // For each word in the sentence
    const KyteaSentence::Words & words =  sentence.words;
    for(int i = 0; i < (int)words.size(); i++) {
        // Print the word
        cout << util->showString(words[i].surface);
        // For each tag level
        for(int j = 0; j < (int)words[i].tags.size(); j++) {
            cout << "\t";
            // Print each of its tags
            for(int k = 0; k < (int)words[i].tags[j].size(); k++) {
                cout << " " << util->showString(words[i].tags[j][k].first) << 
                        "/" << words[i].tags[j][k].second;
            }
        }
        cout << endl;
    }
    cout << endl;

}
Esempio n. 2
0
void TextModelIO::readConfig(KyteaConfig & config) {
    string line,s1,s2;
    getline(*str_,line); // ignore the header
    while(getline(*str_, line) && line.length() != 0) {
        istringstream iss(line);
        iss >> s1;
        iss >> s2;
        config.parseTrainArg(s1.c_str(), (s2.length()==0?0:s2.c_str()));
    }
    numTags_ = config.getNumTags();
    
    getline(*str_,line); // check the header
    if(line != "characters") THROW_ERROR("Badly formatted file, expected 'characters', got '" << line << "'");
    getline(*str_, line); // get the serialized string util
    config.getStringUtil()->unserialize(line);
    getline(*str_, line); // check the last line
}
Esempio n. 3
0
// trains a KyTea model
int main(int argc, const char **argv) {

#ifndef KYTEA_SAFE
    try {
#endif
        KyteaConfig * config = new KyteaConfig;
        config->setDebug(1);
        config->setOnTraining(true);
        config->parseTrainCommandLine(argc, argv);

        Kytea kytea(config);
        kytea.trainAll();
        return 0;
#ifndef KYTEA_SAFE
    } catch (exception &e) {
        cerr << endl;
        cerr << " KyTea Error: " << e.what() << endl;
        return 1;
    }
#endif

}
Esempio n. 4
0
void TextModelIO::writeConfig(const KyteaConfig & config) {

    *str_ << "KyTea " << MODEL_IO_VERSION << " T " << config.getEncodingString() << endl;

    numTags_ = (int)config.getNumTags();
    if(!config.getDoWS()) *str_ << "-nows" << endl;
    if(!config.getDoTags()) *str_ << "-notags" << endl;
    *str_ << "-numtags " << numTags_ << endl;
    if(config.getBias()<0) *str_ << "-nobias" << endl;
    *str_ << "-charw " << (int)config.getCharWindow() << endl
          << "-charn " << (int)config.getCharN() << endl
          << "-typew " << (int)config.getTypeWindow() << endl
          << "-typen " << (int)config.getTypeN() << endl
          << "-dicn "  << (int)config.getDictionaryN() << endl
          << "-eps " << config.getEpsilon() << endl
          << "-solver " << config.getSolverType() << endl << endl;

    // write the character map
    *str_ << "characters" << endl
          << config.getStringUtil()->serialize() << endl;

    *str_ << endl;

}