Пример #1
0
//**********************************************************************
//
// M A I N
//
//**********************************************************************
int main(int argc, char *argv[])
{
  QCoreApplication a(argc, argv);
  QsLogging::initQsLog();
  if (argc<1) {
        cerr << USAGE;
        return EXIT_FAILURE;
    }
    QsLogging::initQsLog();
    readCommandLineArguments(argc,argv);
    if (param.help) {
        cerr << HELP;
        return EXIT_FAILURE;
    }


    string resourcesPath=getenv("LIMA_RESOURCES")==0?"/usr/share/apps/lima/resources":string(getenv("LIMA_RESOURCES"));
    string configDir=getenv("LIMA_CONF")==0?"/usr/share/config/lima":string(getenv("LIMA_CONF"));

    if ( (!param.language.size()) && (!param.codeFile.size()) ) {
        cerr << "no codefile nor language specified !" << endl;
        cerr << "Use e.g option '-l fre'." << endl;
        cerr << "Option '-h' gives full help" << endl;
        return EXIT_FAILURE;
    }
    else if ( param.language.size() ) {
        param.codeFile=resourcesPath+"/LinguisticProcessings/"+param.language+"/code-"+param.language+".xml";
    }

    cerr << "read proccodeManager from file " << param.codeFile << "..." << endl;
    PropertyCodeManager propcodemanager;
    propcodemanager.readFromXmlFile(param.codeFile);
    cerr << "get macroManager..." << endl;
    const PropertyManager& macroManager = propcodemanager.getPropertyManager("MACRO");
    const PropertyAccessor& propertyAccessor = macroManager.getPropertyAccessor();
    set<LinguisticCode> referenceProperties;
    for ( std::vector<string>::const_iterator macro = param.macro.begin() ;
            macro != param.macro.end() ; macro++ ) {
        cerr << "referenceProperties.insert(" << *macro << ")" << endl;
        LinguisticCode referenceProperty = macroManager.getPropertyValue(*macro);
        referenceProperties.insert(referenceProperty);
    }

    cerr << "referencePropertySet= ";
    set<LinguisticCode>::iterator propIt = referenceProperties.begin();
    if ( propIt != referenceProperties.end() ) {
        const std::string& symbol = macroManager.getPropertySymbolicValue(*propIt);
        cerr << symbol;
        propIt++;
    }
    for ( ; propIt != referenceProperties.end() ; propIt++ ) {
        const std::string& symbol = macroManager.getPropertySymbolicValue(*propIt);
        cerr << ", " << symbol;
    }
    cerr << endl;

    Lexicon lex;

    // read all files and count terms
    vector<string>::const_iterator
    file=param.inputFiles.begin(),
         file_end=param.inputFiles.end();
    for (;file!=file_end; file++) {

        ifstream fileIn((*file).c_str(), std::ifstream::binary);
        if (! fileIn) {
            cerr << "cannot open input file [" << *file << "]" << endl;
            continue;
        }
        BoWBinaryReader reader;
        try {
            reader.readHeader(fileIn);
        }
        catch (exception& e) {
            cerr << "Error: " << e.what() << endl;
            return EXIT_FAILURE;
        }

        switch (reader.getFileType()) {
        case BOWFILE_TEXT: {
            cerr << "Build lexicon from BoWText [" << *file << "]" << endl;
            try {
                readBowFileText(fileIn,reader, lex, propertyAccessor, referenceProperties);
            }
            catch (exception& e) {
                cerr << "Error: " << e.what() << endl;
            }
            break;
        }
        case BOWFILE_DOCUMENTST: {
            cerr << "ReadBoWFile: file contains a BoWDocumentST  -> not treated" << endl;
        }
        case BOWFILE_DOCUMENT: {
            cerr << "ReadBoWFile: build BoWdocument from  " << *file<< endl;
            BoWDocument* document=new BoWDocument();
            try {
                cerr << "ReadBoWFile: extract terms... " << endl;
                readDocuments(fileIn,document,reader, lex, macroManager, propertyAccessor, referenceProperties);
            }
            catch (exception& e) {
                cerr << "Error: " << e.what() << endl;
            }
            fileIn.close();
            delete document;
            break;
        }
        default: {
            cerr << "format of file " << reader.getFileTypeString() << " not managed"
                 << endl;
            return EXIT_FAILURE;
        }
        }
    }

    // output stream (default is 'cout')
    std::ostream *s_out;

    // Manage output
    if ( param.outputFilename.length() == 0) s_out=&std::cout;
    else s_out = new std::ofstream(param.outputFilename.c_str(), std::ios_base::out | std::ios_base::binary | std::ios_base::trunc);

    // output lexicon
    Lexicon::const_iterator
    w=lex.begin(),
      w_end=lex.end();
    for (;w!=w_end; w++) {
        (*s_out) << Common::Misc::limastring2utf8stdstring((*w).second.second) << "|"
        << Common::Misc::limastring2utf8stdstring((*w).first) << "|"
        << (*w).second.first << endl;
    }

    // Close output file (if any)
    if (  param.outputFilename.length() != 0)
        dynamic_cast<std::ofstream*>(s_out)->close();

    return EXIT_SUCCESS;
}
Пример #2
0
int run(int argc, char** argv)
{
  readCommandLineArguments(argc, argv);

  if (param->help)
  {
    usage(argc, argv);
    exit(0);
  }

  std::string resourcesPath = (getenv("LIMA_RESOURCES")!=0) ? string(getenv("LIMA_RESOURCES")) : string("/usr/share/apps/lima/resources");
  std::string configPath = (param->configDir.size()>0) ? param->configDir : string("");
  if (configPath.size() == 0)
    configPath = string(getenv("LIMA_CONF"));
  if (configPath.size() == 0)
    configPath = string("/usr/share/config/lima");

  if (QsLogging::initQsLog(QString::fromUtf8(configPath.c_str())) != 0)
  {
    LOGINIT("Common::Misc");
    LERROR << "Call to QsLogging::initQsLog(\"" << configPath << "\") failed.";
    return EXIT_FAILURE;
  }

  // Necessary to initialize factories
  Lima::AmosePluginsManager::single();

  setlocale(LC_ALL,"fr_FR.UTF-8");

  // check that input file exists
  {
    ifstream fin(param->input.c_str(), std::ifstream::binary);
    if (!fin.good())
    {
      cerr << "can't open input file " << param->input << endl;
      exit(-1);
    }
    fin.close();
  }

  // parse charchart
  if (param->charChart == "") {
    cerr << "please specify CharChart file with --charChart=<file> option" << endl;
    exit(0);
  }
  CharChart charChart;
  charChart.loadFromFile(param->charChart);

  try
  {
    cerr << "parse charChart file : " << param->charChart << endl;
//     cerr << "TODO: to implement at "<<__FILE__<<", line "<<__LINE__<<"!" <<std::endl;
//     exit(2);
//     charChart = 0;
/*    ParseCharClass parseCharClass;
    parseCharClass.parse(param->charChart);
    charChart = ParseChar::parse(param->charChart, parseCharClass);*/
  }
  catch (exception& e)
  {
    cerr << "Caught exception while parsing file " << param->charChart << endl;
    cerr << e.what() << endl;
    exit(-1);
  }

  if (param->extractKeys != "")
  {
    // just extract keys
    ofstream fout(param->extractKeys.c_str(), std::ofstream::binary);
    if (!fout.good())
    {
      cerr << "can't open file " << param->extractKeys << endl;
      exit(-1);
    }
    KeysLogger keysLogger(fout,&charChart,param->reverseKeys);

    cerr << "parse input file : " << param->input << endl;
    try
    {
      QXmlSimpleReader parser;
      //     parser->setValidationScheme(SAXParser::Val_Auto);
      //     parser->setDoNamespaces(false);
      //     parser->setDoSchema(false);
      //     parser->setValidationSchemaFullChecking(false);
      parser.setContentHandler(&keysLogger);
      parser.setErrorHandler(&keysLogger);
      QFile file(param->input.c_str());
      if (!file.open(QIODevice::ReadOnly))
      {
        std::cerr << "Error opening " << param->input << std::endl;
        return 1;
      }
      if (!parser.parse( QXmlInputSource(&file)))
      {
        std::cerr << "Error parsing " << param->input << " : " << parser.errorHandler()->errorString().toUtf8().constData() << std::endl;
        return 1;
      }
      else
      {
        std::cerr << std::endl;
      }
    }
    catch (const XMLException& toCatch)
    {
      std::cerr << "An error occurred  Error: " << toCatch.what() << endl;
      throw;
    }
    fout.close();
  } else {
    // compile dictionaries

    cerr << "parse property code file : " << param->propertyFile << endl;
    PropertyCodeManager propcodemanager;
    propcodemanager.readFromXmlFile(param->propertyFile);

    cerr << "parse symbolicCode file : " << param->symbolicCodes << endl;
    map<string,LinguisticCode> conversionMap;
    propcodemanager.convertSymbolicCodes(param->symbolicCodes,conversionMap);
    cerr << conversionMap.size() << " code read from symbolicCode file" << endl;
/*    for (map<string,LinguisticCode>::const_iterator it=conversionMap.begin();
         it!=conversionMap.end();
         it++)
    {
      cerr << it->first << " -> " << it->second << endl;
    }*/

    AbstractAccessByString* access(0);
    if (param->fsaKey!="") {
      cerr << "load fsa access method : " << param->fsaKey << endl;
      FsaAccessSpare16* fsaAccess=new FsaAccessSpare16();
      fsaAccess->read(param->fsaKey);
      access=fsaAccess;
    } else {
      cerr << "ERROR : no access Keys defined !" << endl;
      exit(-1);
    }
    cerr << access->getSize() << " keys loaded" << endl;

    cerr << "parse input file : " << param->input << endl;
    DictionaryCompiler handler(&charChart,access,conversionMap,param->reverseKeys);

    QXmlSimpleReader parser;
//     parser->setValidationScheme(SAXParser::Val_Auto);
//     parser->setDoNamespaces(false);
//     parser->setDoSchema(false);
//     parser->setValidationSchemaFullChecking(false);
    try
    {
      parser.setContentHandler(&handler);
      parser.setErrorHandler(&handler);
      QFile file(param->input.c_str());
      if (!file.open(QIODevice::ReadOnly))
      {
        std::cerr << "Error opening " << param->input << std::endl;
        return 1;
      }
      if (!parser.parse( QXmlInputSource(&file)))
      {
        std::cerr << "Error parsing " << param->input << " : " << parser.errorHandler()->errorString().toUtf8().constData() << std::endl;
        return 1;
      }
    }
    catch (const XMLException& toCatch)
    {
      cerr << "An error occurred  Error: " << toCatch.what() << endl;
      throw;
    }

    cerr << "write data to output file : " << param->output << endl;
    ofstream fout(param->output.c_str(),ios::out | ios::binary);
    if (!fout.good())
    {
      cerr << "can't open file " << param->output << endl;
      exit(-1);
    }
    handler.writeBinaryDictionary(fout);
    fout.close();
    delete access;
  }
  return EXIT_SUCCESS;
}