bool CChildView::OpenFile() { ClearHighlight(); CFileDialog fileDialog( TRUE, NULL, L"*.*" ); int result = fileDialog.DoModal(); if( result == IDOK ) { currentFilePath = fileDialog.GetPathName(); } else return 0; SetWindowText( fileDialog.GetFileName() ); TextHandler textHandler; if ( !textHandler.ReadFile( currentFilePath.GetString() ) ) { AfxMessageBox( Can_Not_Open_File ); return false; } if ( textHandler.GetText().c_str() == L"") return false; m_tvrichEdit.SetWindowTextW( textHandler.GetText().c_str() ); return true; }
int AppMain(int argc, char * argv[]) { if (argc < 3) { usage(argc, argv); return -1; } // Create the appropriate parser and acronyms list if needed Parser * parser = NULL; parser = TextHandlerManager::createParser(LocalParameter::docFormat, LocalParameter::acronyms); if (!parser) throw Exception("ParseToFile", "Unable to create parser for docFormat"); // Create the stopper if needed. Stopper * stopper = NULL; stopper = TextHandlerManager::createStopper(LocalParameter::stopwords); // Create the stemmer if needed. Stemmer * stemmer = NULL; stemmer = TextHandlerManager::createStemmer(LocalParameter::stemmer); // Create the document writer. if (LocalParameter::outFile.empty()) { throw Exception("ParseToFile", "outputFile must be specified"); } lemur::parse::WriterTextHandler writer(LocalParameter::outFile); // chain the parser/stopper/stemmer/indexer TextHandler * th = parser; if (stopper != NULL) { th->setTextHandler(stopper); th = stopper; } if (stemmer != NULL) { th->setTextHandler(stemmer); th = stemmer; } th->setTextHandler(&writer); // parse the data files for (int i = 2; i < argc; i++) { cerr << "Parsing " << argv[i] << endl; string filename(argv[i]); if (!indri::file::Path::exists(filename)) { throw Exception("ParseToFile", "datfile specified does not exist"); } parser->parse(filename); } // free memory delete(stopper); delete(stemmer); delete(parser); return 0; }
JNIEXPORT void JNICALL Java_lemurproject_lemur_ui_JBuildIndex_buildIndex (JNIEnv * env, jobject upclass, jstring paramfile) { const char* file; jboolean iscopy; // set up class for throwing exceptions jclass exception; exception = env->FindClass("java/lang/Exception"); // set up method for sending messages jmethodID writeMesg; writeMesg = env->GetMethodID(env->GetObjectClass(upclass), "displayMessage", "(Ljava/lang/String;)V"); // convert string file = env->GetStringUTFChars(paramfile, &iscopy); ParamPushFile(file); BIParam::get(); if (iscopy == JNI_TRUE) env->ReleaseStringUTFChars(paramfile, file); // Cannot create anything without Index name if (BIParam::index.empty()) { string s = "Could not find name of index to build. Did you try to modify the auto-generated param file?"; env->ThrowNew(exception, s.c_str()); return; } if (BIParam::indexType.empty()) { string s = "Could not find type of index you want to build. Did you try to modify the auto-generated param file?"; env->ThrowNew(exception, s.c_str()); return; } DocumentManager* docmgr = NULL; // Create DocumentManager with appropriate parse mode if (!BIParam::manager.empty()) { docmgr = DocMgrManager::createDocMgr(BIParam::mgrType, BIParam::manager, BIParam::docFormat, BIParam::dataFiles); if (!docmgr) { jstring str = env->NewStringUTF("\n WARNING: CONTINUING TO BUILD INDEX WITHOUT DOCUMENT MANAGER. COULD NOT CREATE DOCUMENT MANAGER.\n"); env->CallVoidMethod(upclass, writeMesg, str); } } // Create the appropriate parser and acronyms list if needed Parser * parser = NULL; parser = TextHandlerManager::createParser(BIParam::docFormat, BIParam::acronyms); // if failed to create parser, abort if (!parser) { string s = "Error trying to create parser. Build index failed."; env->ThrowNew(exception, s.c_str()); return; } // Create the stopper if needed. Stopper * stopper = NULL; try { stopper = TextHandlerManager::createStopper(BIParam::stopwords); } catch (Exception &ex) { jstring str = env->NewStringUTF("\n WARNING: CONTINUING TO BUILD INDEX WITHOUT STOPWORDS FILE LOADED.\n"); env->CallVoidMethod(upclass, writeMesg, str); } // Create the stemmer if needed. Stemmer * stemmer = NULL; try { stemmer = TextHandlerManager::createStemmer(BIParam::stemmer); } catch (Exception &ex) { jstring str = env->NewStringUTF("\n WARNING: CONTINUING TO BUILD INDEX WITHOUT STEMMING\n"); env->CallVoidMethod(upclass, writeMesg, str); } TextHandler* indexer; lemur::index::KeyfileIncIndex* index = NULL; if (BIParam::indexType == "indri") { indexer = new lemur::parse::IndriTextHandler(BIParam::index, BIParam::memory, parser); } else if (BIParam::indexType == "key") { index = new lemur::index::KeyfileIncIndex(BIParam::index, BIParam::memory); indexer = new lemur::parse::KeyfileTextHandler(index, BIParam::countStopWords); if (docmgr) ((lemur::parse::KeyfileTextHandler *)indexer)->setDocManager(docmgr->getMyID()); } // chain the parser/stopper/stemmer/indexer TextHandler *th; if (docmgr) { th = dynamic_cast<TextHandler*>(docmgr); cerr << "TH set" << endl; } else th = parser; if (stopper != NULL) { th->setTextHandler(stopper); th = stopper; } if (stemmer != NULL) { th->setTextHandler(stemmer); th = stemmer; } th->setTextHandler(indexer); if (docmgr) { string msg = "Parsing files"; jstring str = env->NewStringUTF(msg.c_str()); env->CallVoidMethod(upclass, writeMesg, str); docmgr->buildMgr(); } else { // parse the data files if (!indri::file::Path::exists(BIParam::dataFiles)) { string s = "Error finding list of files to index. Did you delete the auto-generated .data file?"; env->ThrowNew(exception, s.c_str()); return; } ifstream source(BIParam::dataFiles.c_str()); if (!source.is_open()) { string s = "Could not open list of files to index. Did you edit the auto-generated .data file?"; env->ThrowNew(exception, s.c_str()); return; } else { string filename; while (getline(source, filename)) { string msg = "Parsing file: " + filename; jstring str = env->NewStringUTF(msg.c_str()); env->CallVoidMethod(upclass, writeMesg, str); try { parser->parse(filename); } catch (Exception &ex) { msg = "Skipping file. Could not parse. " + ex.what(); jstring str = env->NewStringUTF(msg.c_str()); env->CallVoidMethod(upclass, writeMesg, str); } } //while } //else } // free memory delete(indexer); delete(stopper); delete(stemmer); delete(parser); if (index) delete(index); delete(docmgr); ParamPopFile(); }
std::string SortCmd::execute(){ TextHandler *handler = TextBuddy::Instance().getTextHandler(); handler->sortContents(); ss << FEEDBACK_SORT(filename) << std::endl; return ss.str(); }
int AppMain(int argc, char * argv[]) { if ((argc < 3) && LocalParameter::dataFiles.empty()) { usage(argc, argv); return -1; } // Cannot create anything without Index name if (LocalParameter::index.empty()) { LEMUR_THROW(LEMUR_MISSING_PARAMETER_ERROR, "Please provide a name for the index you want to build. \nCheck the \"index\" parameter."); } if (LocalParameter::indexType.empty()) { LEMUR_THROW(LEMUR_MISSING_PARAMETER_ERROR, "Please provide a type for the index you want to build. \nCheck the \"indexType\" parameter. \nValid values are \"inv\",\"key\", or \"indri\" "); } // Create the appropriate parser and acronyms list if needed Parser * parser = NULL; parser = TextHandlerManager::createParser(LocalParameter::docFormat, LocalParameter::acronyms); // if failed to create parser, abort if (!parser) { LEMUR_THROW(LEMUR_MISSING_PARAMETER_ERROR, "Please use a valid value for the required parameter \"docFormat\". Valid values are \"trec\", \"web\", \"reuters\",\"chinese\", \"chinesechar\", and \"arabic\". See program usage or Lemur documentation for more information."); } // Create the stopper if needed. Stopper * stopper = NULL; try { stopper = TextHandlerManager::createStopper(LocalParameter::stopwords); } catch (Exception &ex) { ex.writeMessage(); cerr << "WARNING: BuildIndex continuing without stop words file loaded." << endl << "To omit stop words, check the \"stopwords\" parameter." << endl; } // Create the stemmer if needed. Stemmer * stemmer = NULL; try { stemmer = TextHandlerManager::createStemmer(LocalParameter::stemmer); } catch (Exception &ex) { ex.writeMessage(); cerr << "WARNING: BuildIndex continuing without stemmer." << endl << "To use a stemmer, check the \"stemmer\" and other supporting parameters." << endl << "See program usage or Lemur documentation for more information."; } // Create the indexer. (Note: this has an InvFPPushIndex that // it uses to do the indexing, but InvFPTextHandler implements the // TextHandler class, so that it is compatible with my parser // architecture. See the TextHandler and InvFPTextHandler classes // for more info.) TextHandler* indexer; lemur::index::KeyfileIncIndex* index = NULL; if (LocalParameter::indexType == "indri") { indexer = new lemur::parse::IndriTextHandler(LocalParameter::index, LocalParameter::memory, parser); } else if (LocalParameter::indexType == "inv") { indexer = new lemur::parse::InvFPTextHandler(LocalParameter::index, LocalParameter::memory, LocalParameter::countStopWords, LocalParameter::position); } else if (LocalParameter::indexType == "key") { index = new lemur::index::KeyfileIncIndex(LocalParameter::index, LocalParameter::memory); indexer = new lemur::parse::KeyfileTextHandler(index, LocalParameter::countStopWords); } else { LEMUR_THROW(LEMUR_BAD_PARAMETER_ERROR,"Please use a valid value for the required parameter \"IndexType\". \nValid values are \"inv\",\"key\", or \"indri\"See program usage or Lemur documentation for more information."); } // chain the parser/stopper/stemmer/indexer TextHandler * th = parser; if (stopper != NULL) { th->setTextHandler(stopper); th = stopper; } if (stemmer != NULL) { th->setTextHandler(stemmer); th = stemmer; } th->setTextHandler(indexer); // parse the data files if (!LocalParameter::dataFiles.empty()) { if (!indri::file::Path::exists(LocalParameter::dataFiles)) { LEMUR_THROW(LEMUR_IO_ERROR, "\"dataFiles\" specified does not exist"); } ifstream source(LocalParameter::dataFiles.c_str()); if (!source.is_open()) { LEMUR_THROW(LEMUR_IO_ERROR,"could not open \"dataFiles\" specified"); } else { string filename; while (getline(source, filename)) { cerr << "Parsing file: " << filename <<endl; try { parser->parse(filename); } catch (Exception &ex) { LEMUR_RETHROW(ex,"Could not parse file"); } } } } else { for (int i = 2; i < argc; i++) { cerr << "Parsing file: " << argv[i] << endl; string filename(argv[i]); try { parser->parse(filename); } catch (Exception &ex) { LEMUR_RETHROW(ex, "Could not parse file"); } } } // free memory delete(indexer); delete(stemmer); delete(stopper); delete(parser); if (index) delete(index); return 0; }