int main(int argc, char *argv[]){ if (argc != 2){ cout << "Usage: " << argv[0] << " <filename>" << endl; exit (1); } else { try { FileReader *reader = _CLNEW FileReader(argv[1],"UTF-8"); Analyzer *analyzer = new StandardAnalyzer(); TokenStream *tokenStream = analyzer->tokenStream(_T("iets"), reader); Token token; while (tokenStream->next(&token)){ char buffer[1000]; STRCPY_TtoA(buffer, token.termText(), 1000); cout << "\t token: " << buffer; STRCPY_TtoA(buffer, token.type(), 1000); cout << " type: " << buffer << endl; } tokenStream->close(); delete tokenStream; //reader->close(); delete reader; delete analyzer; } catch (CLuceneError &e){ cerr << e.what() << endl; } } }
void getAnalyzedString(const wchar_t* input, wchar_t* output) { CL_NS_USE(index) CL_NS_USE(util) CL_NS_USE(store) CL_NS_USE(search) CL_NS_USE(document) CL_NS_USE(queryParser) CL_NS_USE(analysis) CL_NS_USE2(analysis,standard) /* * StandardAnalyzer sAnalyser; * Used before but this but this includes stopwords filters */ CustomAnalyzer sAnalyser((const wchar_t*)L"stdtokens>stdfilter>lowercase"); Reader* reader = _CLNEW StringReader(input); TokenStream* ts = sAnalyser.tokenStream(_T("dummy"), reader ); Token t; while(ts->next(&t)) { wcscat(output,t.termText()); wcscat(output,L"* "); } size_t len = wcslen(output); if(len == 0) wcscpy(output,L"*"); else { if(output[len-1] == L' ') output[len-1] = L'\0'; } ts->close(); _CLDELETE(ts); _CLDELETE(reader); }