//********************************************************************** // // M A I N // //********************************************************************** int main(int argc, char *argv[]) { QCoreApplication a(argc, argv); QsLogging::initQsLog(); if (argc<1) { cerr << USAGE; return EXIT_FAILURE; } QsLogging::initQsLog(); readCommandLineArguments(argc,argv); if (param.help) { cerr << HELP; return EXIT_FAILURE; } string resourcesPath=getenv("LIMA_RESOURCES")==0?"/usr/share/apps/lima/resources":string(getenv("LIMA_RESOURCES")); string configDir=getenv("LIMA_CONF")==0?"/usr/share/config/lima":string(getenv("LIMA_CONF")); if ( (!param.language.size()) && (!param.codeFile.size()) ) { cerr << "no codefile nor language specified !" << endl; cerr << "Use e.g option '-l fre'." << endl; cerr << "Option '-h' gives full help" << endl; return EXIT_FAILURE; } else if ( param.language.size() ) { param.codeFile=resourcesPath+"/LinguisticProcessings/"+param.language+"/code-"+param.language+".xml"; } cerr << "read proccodeManager from file " << param.codeFile << "..." << endl; PropertyCodeManager propcodemanager; propcodemanager.readFromXmlFile(param.codeFile); cerr << "get macroManager..." << endl; const PropertyManager& macroManager = propcodemanager.getPropertyManager("MACRO"); const PropertyAccessor& propertyAccessor = macroManager.getPropertyAccessor(); set<LinguisticCode> referenceProperties; for ( std::vector<string>::const_iterator macro = param.macro.begin() ; macro != param.macro.end() ; macro++ ) { cerr << "referenceProperties.insert(" << *macro << ")" << endl; LinguisticCode referenceProperty = macroManager.getPropertyValue(*macro); referenceProperties.insert(referenceProperty); } cerr << "referencePropertySet= "; set<LinguisticCode>::iterator propIt = referenceProperties.begin(); if ( propIt != referenceProperties.end() ) { const std::string& symbol = macroManager.getPropertySymbolicValue(*propIt); cerr << symbol; propIt++; } for ( ; propIt != referenceProperties.end() ; propIt++ ) { const std::string& symbol = macroManager.getPropertySymbolicValue(*propIt); cerr << ", " << symbol; } cerr << endl; Lexicon lex; // read all files and count terms vector<string>::const_iterator file=param.inputFiles.begin(), file_end=param.inputFiles.end(); for (;file!=file_end; file++) { ifstream fileIn((*file).c_str(), std::ifstream::binary); if (! fileIn) { cerr << "cannot open input file [" << *file << "]" << endl; continue; } BoWBinaryReader reader; try { reader.readHeader(fileIn); } catch (exception& e) { cerr << "Error: " << e.what() << endl; return EXIT_FAILURE; } switch (reader.getFileType()) { case BOWFILE_TEXT: { cerr << "Build lexicon from BoWText [" << *file << "]" << endl; try { readBowFileText(fileIn,reader, lex, propertyAccessor, referenceProperties); } catch (exception& e) { cerr << "Error: " << e.what() << endl; } break; } case BOWFILE_DOCUMENTST: { cerr << "ReadBoWFile: file contains a BoWDocumentST -> not treated" << endl; } case BOWFILE_DOCUMENT: { cerr << "ReadBoWFile: build BoWdocument from " << *file<< endl; BoWDocument* document=new BoWDocument(); try { cerr << "ReadBoWFile: extract terms... " << endl; readDocuments(fileIn,document,reader, lex, macroManager, propertyAccessor, referenceProperties); } catch (exception& e) { cerr << "Error: " << e.what() << endl; } fileIn.close(); delete document; break; } default: { cerr << "format of file " << reader.getFileTypeString() << " not managed" << endl; return EXIT_FAILURE; } } } // output stream (default is 'cout') std::ostream *s_out; // Manage output if ( param.outputFilename.length() == 0) s_out=&std::cout; else s_out = new std::ofstream(param.outputFilename.c_str(), std::ios_base::out | std::ios_base::binary | std::ios_base::trunc); // output lexicon Lexicon::const_iterator w=lex.begin(), w_end=lex.end(); for (;w!=w_end; w++) { (*s_out) << Common::Misc::limastring2utf8stdstring((*w).second.second) << "|" << Common::Misc::limastring2utf8stdstring((*w).first) << "|" << (*w).second.first << endl; } // Close output file (if any) if ( param.outputFilename.length() != 0) dynamic_cast<std::ofstream*>(s_out)->close(); return EXIT_SUCCESS; }
int run(int argc, char** argv) { readCommandLineArguments(argc, argv); if (param->help) { usage(argc, argv); exit(0); } std::string resourcesPath = (getenv("LIMA_RESOURCES")!=0) ? string(getenv("LIMA_RESOURCES")) : string("/usr/share/apps/lima/resources"); std::string configPath = (param->configDir.size()>0) ? param->configDir : string(""); if (configPath.size() == 0) configPath = string(getenv("LIMA_CONF")); if (configPath.size() == 0) configPath = string("/usr/share/config/lima"); if (QsLogging::initQsLog(QString::fromUtf8(configPath.c_str())) != 0) { LOGINIT("Common::Misc"); LERROR << "Call to QsLogging::initQsLog(\"" << configPath << "\") failed."; return EXIT_FAILURE; } // Necessary to initialize factories Lima::AmosePluginsManager::single(); setlocale(LC_ALL,"fr_FR.UTF-8"); // check that input file exists { ifstream fin(param->input.c_str(), std::ifstream::binary); if (!fin.good()) { cerr << "can't open input file " << param->input << endl; exit(-1); } fin.close(); } // parse charchart if (param->charChart == "") { cerr << "please specify CharChart file with --charChart=<file> option" << endl; exit(0); } CharChart charChart; charChart.loadFromFile(param->charChart); try { cerr << "parse charChart file : " << param->charChart << endl; // cerr << "TODO: to implement at "<<__FILE__<<", line "<<__LINE__<<"!" <<std::endl; // exit(2); // charChart = 0; /* ParseCharClass parseCharClass; parseCharClass.parse(param->charChart); charChart = ParseChar::parse(param->charChart, parseCharClass);*/ } catch (exception& e) { cerr << "Caught exception while parsing file " << param->charChart << endl; cerr << e.what() << endl; exit(-1); } if (param->extractKeys != "") { // just extract keys ofstream fout(param->extractKeys.c_str(), std::ofstream::binary); if (!fout.good()) { cerr << "can't open file " << param->extractKeys << endl; exit(-1); } KeysLogger keysLogger(fout,&charChart,param->reverseKeys); cerr << "parse input file : " << param->input << endl; try { QXmlSimpleReader parser; // parser->setValidationScheme(SAXParser::Val_Auto); // parser->setDoNamespaces(false); // parser->setDoSchema(false); // parser->setValidationSchemaFullChecking(false); parser.setContentHandler(&keysLogger); parser.setErrorHandler(&keysLogger); QFile file(param->input.c_str()); if (!file.open(QIODevice::ReadOnly)) { std::cerr << "Error opening " << param->input << std::endl; return 1; } if (!parser.parse( QXmlInputSource(&file))) { std::cerr << "Error parsing " << param->input << " : " << parser.errorHandler()->errorString().toUtf8().constData() << std::endl; return 1; } else { std::cerr << std::endl; } } catch (const XMLException& toCatch) { std::cerr << "An error occurred Error: " << toCatch.what() << endl; throw; } fout.close(); } else { // compile dictionaries cerr << "parse property code file : " << param->propertyFile << endl; PropertyCodeManager propcodemanager; propcodemanager.readFromXmlFile(param->propertyFile); cerr << "parse symbolicCode file : " << param->symbolicCodes << endl; map<string,LinguisticCode> conversionMap; propcodemanager.convertSymbolicCodes(param->symbolicCodes,conversionMap); cerr << conversionMap.size() << " code read from symbolicCode file" << endl; /* for (map<string,LinguisticCode>::const_iterator it=conversionMap.begin(); it!=conversionMap.end(); it++) { cerr << it->first << " -> " << it->second << endl; }*/ AbstractAccessByString* access(0); if (param->fsaKey!="") { cerr << "load fsa access method : " << param->fsaKey << endl; FsaAccessSpare16* fsaAccess=new FsaAccessSpare16(); fsaAccess->read(param->fsaKey); access=fsaAccess; } else { cerr << "ERROR : no access Keys defined !" << endl; exit(-1); } cerr << access->getSize() << " keys loaded" << endl; cerr << "parse input file : " << param->input << endl; DictionaryCompiler handler(&charChart,access,conversionMap,param->reverseKeys); QXmlSimpleReader parser; // parser->setValidationScheme(SAXParser::Val_Auto); // parser->setDoNamespaces(false); // parser->setDoSchema(false); // parser->setValidationSchemaFullChecking(false); try { parser.setContentHandler(&handler); parser.setErrorHandler(&handler); QFile file(param->input.c_str()); if (!file.open(QIODevice::ReadOnly)) { std::cerr << "Error opening " << param->input << std::endl; return 1; } if (!parser.parse( QXmlInputSource(&file))) { std::cerr << "Error parsing " << param->input << " : " << parser.errorHandler()->errorString().toUtf8().constData() << std::endl; return 1; } } catch (const XMLException& toCatch) { cerr << "An error occurred Error: " << toCatch.what() << endl; throw; } cerr << "write data to output file : " << param->output << endl; ofstream fout(param->output.c_str(),ios::out | ios::binary); if (!fout.good()) { cerr << "can't open file " << param->output << endl; exit(-1); } handler.writeBinaryDictionary(fout); fout.close(); delete access; } return EXIT_SUCCESS; }