struct evhttp_request* EvHttpSyncClient::newReqest() { struct evhttp_request* pReq = evhttp_request_new(handleResponse, this); if (pReq == NULL) { FX_LOG(ERROR, "evhttp_request_new FAILED"); setError(ST_ERROR, "evhttp_request_new FAILED"); return NULL; } int rv = evhttp_add_header(evhttp_request_get_output_headers(pReq), "Host", m_sHost.c_str()); if (rv != 0) { FX_LOG(ERROR, "evhttp_add_header FAILED"); setError(ST_ERROR, "evhttp_add_header FAILED"); return NULL; } rv = evhttp_add_header(evhttp_request_get_output_headers(pReq), "Connection", "keep-alive"); if (rv != 0) { FX_LOG(ERROR, "evhttp_add_header FAILED"); setError(ST_ERROR, "evhttp_add_header FAILED"); return NULL; } return pReq; }
void EvHttpSyncClient::get(const std::string& sURI) { struct evhttp_request* pReq = newReqest(); if (!pReq) { setError(ST_ERROR, "new request FAILED"); return ; } int rv = evhttp_make_request(m_pConn, pReq, EVHTTP_REQ_GET, sURI.c_str()); if (rv != 0) { FX_LOG(ERROR, "evhttp_make_request FAILED"); setError(ST_ERROR, "evhttp_make_request FAILED"); return; } rv = event_base_dispatch(m_evbase); if (rv != 0) { setError(ST_ERROR, "event loop failed"); FX_LOG(ERROR, "event loop failed: [%d]", rv); } FX_TRACE("made request: [%s]", sURI.c_str()); return ; }
void StandardStopFilter::setParam(const tstring& sParam) { KeyValueParser parser; if (parser.parse(sParam, TokenFilter::PARAM_SEPERATOR, TokenFilter::EQUAL_MARK)) { size_t nWords = 0; string sValue; if (parser.getValue("words", sValue)) { nWords = loadWords(sValue); } else if (parser.getValue("file", sValue)) { nWords = loadFile(sValue); } else { return ; } if (nWords == 0) { FX_LOG(ERROR, "Load stop words: [%s] FAILED.", sValue.c_str()); } else { FX_LOG(INFO, "Loaded [%u] stop words.", (uint32_t)nWords); } } }
void IndexBarrelKeeper::housekeep() { ScopedRWLock lock(m_lock, true); CommitMap toReserve; CommitMap::reverse_iterator it = m_commitMap.rbegin(); // Always reserve the last element toReserve.insert(*it++); for (; it != m_commitMap.rend(); it++) { if (it->second.use_count() > 1) { FX_LOG(INFO, "Staled commit is in use: [%d]", it->first); toReserve.insert(*it); } else { FX_LOG(INFO, "Clean staled commit: [%d]", it->first); it->second.reset(); } } toReserve.swap(m_commitMap); }
void FormulaScorer::setParameter(const std::string& sParam) { KeyValueParser kvParser; if (!kvParser.parse(sParam, ",", "=")) { FX_LOG(ERROR, "Parse formula parameter : [%s] FAILED", sParam.c_str()); return; } if (!kvParser.getValue("formula", m_sFormula)) { FX_LOG(ERROR, "Extract formula from [%s] FAILED", sParam.c_str()); return; } }
void StandardAnalyzer::setParam(const tstring& sParam) { KeyValueParser kvParser; bool ret = kvParser.parse(sParam, ";", "="); if (!ret) { FX_LOG(ERROR, "Invalid analyzer parameter: [%s]", sParam.c_str()); } else { tstring sValue; if (kvParser.getValue(PARAM_ALGORITHM, sValue)) { if (!strCompareNoCase(sValue.c_str(), "max_forward")) { m_eSegAlgorithm = SA_MAX_FORWARD; } else if (!strCompareNoCase(sValue.c_str(), "unigram")) { m_eSegAlgorithm = SA_UNIGRAM; } else { FX_LOG(ERROR, "Invalid parameter: [algorithm=%s]", sValue.c_str()); m_eSegAlgorithm = SA_MAX_FORWARD; } } if (kvParser.getValue(PARAM_ENCODE, sValue)) { if ((!strCompareNoCase(sValue.c_str(), "utf-8")) || (!strCompareNoCase(sValue.c_str(), "gbk")) || (!strCompareNoCase(sValue.c_str(), "gb2312"))) { m_sEncodeName = toLower(sValue); } else { FX_LOG(ERROR, "Invalid parameter: [encode=%s], " "using default [encode=utf-8]", sValue.c_str()); m_sEncodeName = toLower(sValue); } } } }
void AsyncDocumentConsumer::start() { size_t nSizeInMB = (size_t)GLOBAL_CONF().Build.memory; FX_LOG(INFO, "Allocate memory for building, total size: [%u] MB, segment count: [%u]", (uint32_t)nSizeInMB, (uint32_t)m_threadPool.capacity()); m_pAllocator.reset(new SyncSegregatedAllocator((size_t)(nSizeInMB * 1024 * 1024), m_threadPool.capacity() * MAX_CHUNK_COUNT_PER_SEGMENT)); m_pUpdateTask.reset(new OnDiskUpdateTask(m_pKeeper, m_docQueue, m_docUpdateQueue)); m_updateThread.start(*m_pUpdateTask); taskid_t taskId = 0, nextTaskId = INVALID_TASKID; for (size_t i = 0; i < m_threadPool.capacity(); i++) { IndexBarrelWriterPtr pIndexBarrelWriter = m_pKeeper->createBarrelWriter(); if (!pIndexBarrelWriter->init(m_pAllocator)) { FIRTEX_THROW_AND_LOG(RuntimeException, "Init index writer FAILED."); break; } if (i + 1 != m_threadPool.capacity()) { nextTaskId = taskId + 1; } TaskPtr pTask(new Task(taskId, nextTaskId, pIndexBarrelWriter, m_docQueue, *this)); m_threadPool.start(*pTask); m_tasks.push_back(pTask); } }
void StandardStopFilter::init(const tstring& stopfile) { if (loadFile(stopfile) == 0) { FX_LOG(ERROR, "Load stop word file: [%s] FAILED.", stopfile.c_str()); } }
void EvHttpSyncClient::post(const std::string& sURI, const std::string& sData) { struct evhttp_request* pReq = newReqest(); if (!pReq) { return; } int rv = evbuffer_add(evhttp_request_get_output_buffer(pReq), sData.c_str(), sData.length()); if (rv != 0) { setError(ST_ERROR, "evhttp_add_buffer FAILED"); return; } rv = evhttp_make_request(m_pConn, pReq, EVHTTP_REQ_POST, sURI.c_str()); if (rv != 0) { setError(ST_ERROR, "evhttp_make_request FAILED"); return; } rv = event_base_dispatch(m_evbase); if (rv != 0) { FX_LOG(WARN, "event loop failed: [%d]", rv); } }
void StandardAnalyzer::init(const tstring& sDictPath) { TokenizerPtr pTok; if (m_sEncodeName == "gbk" || m_sEncodeName == "gb2312") { pTok = new GBKStandardTokenizer(); } else { pTok = new UTF8StandardTokenizer(); } if (m_eSegAlgorithm == SA_UNIGRAM) { m_pSegmenter = new UnigramSegmenter(pTok); } else { m_pSegmenter = new MaxForwardSegmenter(pTok); } try { m_pSegmenter->init(sDictPath); } catch(const FirteXException& e) { FX_LOG(ERROR, _T("Load dictionary [%s] FAILED: [%s]"), sDictPath.c_str(), e.what().c_str()); throw; } }
/* static */ void EvHttpSyncClient::handleResponse(struct evhttp_request* req, void* arg) { FX_DEBUG("handleResponse"); EvHttpSyncClient* pThis = (EvHttpSyncClient*)arg; try { pThis->done(req); } catch(std::exception& e) { FX_LOG(ERROR, "Response exception thrown: [%s]: ", e.what()); } catch(const FirteXException& e) { FX_LOG(ERROR, "Response exception thrown: [%s]: ", e.what().c_str()); } }
void IndexBarrelKeeper::waitCommit() { FX_LOG(INFO, "Waiting last commit..."); forceCommit(); //Wait until all documents committed to file system m_pCommitScheduler->waitCommit(); m_pInMemBarrelMerger.reset(); }
void HTMLParser::parse(const tstring& sHtmlFile) { BinaryFile bf; try { bf.open(sHtmlFile, BinaryFile::READ); m_nFileSize = (size_t)bf.getLength(); if(m_nFileSize > MAX_FILESIZE - 1) { m_nFileSize = MAX_FILESIZE - 1; } if(!m_pReadBuffer) { m_nReadBufferSize = DEFAULT_READBUFFER_SIZE; if(m_nReadBufferSize < m_nFileSize + 1) m_nReadBufferSize = m_nFileSize + 1; m_pReadBuffer = new char[m_nReadBufferSize]; } else if(m_nFileSize + 1 > m_nReadBufferSize) { m_nReadBufferSize = m_nFileSize + 1; delete[] m_pReadBuffer; m_pReadBuffer = new char[m_nReadBufferSize]; } size_t nRet = bf.read(m_pReadBuffer, m_nFileSize); if(nRet != m_nFileSize) { FX_LOG(WARN, "Read file [%s] error", sHtmlFile.c_str()); bf.close(); return; } bf.close(); parse(m_pReadBuffer, m_nFileSize); } catch(const FirteXException& e) { FX_LOG(ERROR, "Parse file: [%s] FAILED. Error message: [%s]", sHtmlFile.c_str(), e.what().c_str()); } }
IndexWriterPtr Index::acquireWriter() { ScopedRWLock lock(m_lock, false); if (m_accessMode == READ) { FX_LOG(INFO, "The index is in READ mode, " "can't aquire writer of the index"); return IndexWriterPtr(); } return m_pWriter; }
void StandardAnalyzer::init() { tstring dict = getCoreDictPath(GLOBAL_CONF().General.dictionaryPath); File f(dict); if (!f.exists()) { FX_LOG(ERROR, _T("Core dictionary: [%s] not found"), dict.c_str()); FIRTEX_THROW(FileIOException, _T("Load dictionary FAILED.")); return; } init(dict); }
//static void Index::remove(FileSystemPtr& pFileSys) { BarrelsInfoPtr barrelsInfoPtr(new BarrelsInfo()); try { barrelsInfoPtr->read(pFileSys); //read barrels Info barrelsInfoPtr->remove(pFileSys); } catch(const IndexCollapseException& e) { FX_LOG(ERROR, "Remove index FAILED: [%s]", e.what().c_str()); FIRTEX_RETHROW(e); } }
void DocumentTemplate::makeSureMeta(Meta& meta, const DocumentSchema* pSchema) { const FieldSchema* pFieldSchema = pSchema->getSchema(meta.getFieldName().c_str()); if (!pFieldSchema) { FX_LOG(ERROR, "No field: [%s] in schema match the meta: [%s]", meta.getFieldName().c_str(), meta.getMetaName().c_str()); FIRTEX_THROW(InvalidConfigException, "No field: [%s] in schema match " "the meta: [%s]", meta.getFieldName().c_str(), meta.getMetaName().c_str()); } meta.setId(pFieldSchema->getId()); }
IndexReaderPtr Index::acquireReader(bool bRefresh) { if ((m_accessMode != READ) && (m_accessMode != RDWR)) { FX_LOG(INFO, _T("The access mode is WRITE or APPEND, " "can't aquire reader of the index")); return IndexReaderPtr(); } if (bRefresh) { CommitList commits; commits.load(m_pFileSys); commitid_t lastCommit = commits.getLastCommit(); commitid_t curCommit = m_pReader->getBarrelsInfo()->getCommitId(); if (lastCommit > curCommit) { m_pIndexBarrelKeeper->refresh(); IndexReaderPtr pNewReader(m_pReader->clone()); pNewReader->reopen(); ScopedRWLock lock(m_lock, true); m_pReader = pNewReader; pNewReader.reset(); if (m_pIndexBarrelKeeper->getHeldCommitCount() >= 2) { m_pIndexBarrelKeeper->housekeep(); } return m_pReader; } if (lastCommit < curCommit) { FIRTEX_THROW(IndexCollapseException, "Invalid commit id: " "current commit: [%d], last comit: [%d]", curCommit, lastCommit); } } ScopedRWLock lock(m_lock, false); return m_pReader; }
void IndexWriter::createMerger() { if (m_pIndexMerger.isNull()) { m_pIndexMerger.assign(new IndexMerger(m_pKeeper.get())); std::string sIdent = GLOBAL_CONF().Merge.strategy; MergePolicyPtr pMergePolicy = MergePolicyFactory::instance()->createMergePolicy(sIdent); if (pMergePolicy.isNull()) { FX_LOG(WARN, "Invalid merge policy identifier: [%s]", sIdent.c_str()); } else { m_pIndexMerger->setMergePolicy(pMergePolicy); } } }
void IndexBarrelKeeper::forceCommit() { FX_LOG(INFO, "Waiting last commit..."); InMemIndexMergerPtr pInMemBarrelMerger; { ScopedRWLock lock(m_lock, true); if (m_pInMemBarrelMerger) { pInMemBarrelMerger = m_pInMemBarrelMerger; m_pInMemBarrelMerger.reset(); } } if (pInMemBarrelMerger) { CommittablePtr pCommitObj = std::dynamic_pointer_cast<Committable>(pInMemBarrelMerger); m_pCommitScheduler->commit(pCommitObj); } }
void IndexBarrelKeeper::init(const BarrelsInfoPtr& pBarrelsInfo, Mode mode) { FX_LOG(INFO, "Initialize index barrel keeper."); m_mode = mode; m_sEncoding = pBarrelsInfo->getEncoding(); switch(mode) { case READ: loadOnDiskBarrel(pBarrelsInfo); break; case RDWR: setupIndexCleaner(); loadOnDiskBarrel(pBarrelsInfo); break; case WRITE: loadOnDiskDataForUpdate(pBarrelsInfo); setupIndexCleaner(); break; } }
void IndexBarrelKeeper::setupIndexCleaner() { string sCleaner = GLOBAL_CONF().Build.IndexCleaner.strategy; string sParam = GLOBAL_CONF().Build.IndexCleaner.param; if (sCleaner.empty()) { sCleaner = "keep_by_commit"; } if (sParam.empty()) { sParam = "keep_count="; NumberFormatter::append(sParam, DEFAULT_COMMIT_KEEP_COUNT); } m_pIndexCleaner.reset(IndexCleanerFactory::instance()->createIndexCleaner(sCleaner)); if (!m_pIndexCleaner) { FX_LOG(ERROR, "Create index cleaner: [%s] FAILED.", sCleaner.c_str()); return; } m_pIndexCleaner->init(m_pFileSys, sParam); }
bool IndexBarrelKeeper::doInsertCommit( commitid_t commId, const IndexBarrelPtr& pIndexBarrel) { commitid_t lastCommit = INVALID_COMMIT; if (!m_commitMap.empty()) { IndexBarrelPtr pLastBarrel = m_commitMap.rbegin()->second; lastCommit = pLastBarrel->getBarrelsInfo()->getCommitId(); } if (lastCommit >= commId) { return false; } FX_LOG(INFO, "Insert new commit: [%d]", commId); m_commitMap.insert(make_pair(commId, pIndexBarrel)); m_latestOnDiskCommit = commId; return true; }
InputStreamPool::~InputStreamPool() { for (StreamMap::iterator it = m_streamMap.begin(); it != m_streamMap.end(); ++it) { StreamVector& streams = it->second; for (size_t i = 0; i < streams.size(); ++i) { try { streams[i]->close(); } catch(const FileIOException& e) { FX_LOG(ERROR, "Close file FAILED: [%s]", e.what().c_str()); } } streams.clear(); } m_streamMap.clear(); m_nOpenedStreamCount = 0; }
commitid_t IndexBarrelKeeper::refreshDataForRead() { BarrelsInfoPtr pBarrelsInfo(new BarrelsInfo()); pBarrelsInfo->read(m_pFileSys); commitid_t latestCommit = pBarrelsInfo->getCommitId(); commitid_t prevCommit = INVALID_COMMIT; IndexBarrelPtr pLastBarrel; { ScopedRWLock lock(m_lock, false); if (!m_commitMap.empty()) { pLastBarrel = m_commitMap.rbegin()->second; prevCommit = pLastBarrel->getBarrelsInfo()->getCommitId(); } } if (latestCommit == prevCommit) { return prevCommit; } if (prevCommit == INVALID_COMMIT) { return createOnDiskBarrelReader(pBarrelsInfo); } FX_LOG(INFO, "Begin reopen index database, commitId: [%d]", latestCommit); IndexBarrelReaderPtr pReader; DeletedDocumentFilterPtr pDocFilter(pLastBarrel->getDeletedDocFilter()->clone()); pDocFilter->reopen(pBarrelsInfo); PrimaryKeyIndexPtr pPrimKey; if (m_pOnDiskPrimKeyIndex) { pPrimKey.reset(m_pOnDiskPrimKeyIndex->clone()); pPrimKey->reopen(pBarrelsInfo); } BarrelsInfoPtr pLastBarrelsInfo = pLastBarrel->getBarrelsInfo(); IndexBarrelReaderPtr pLastReader = pLastBarrel->getReader(); size_t bc = pBarrelsInfo->getBarrelCount(); if (bc == 1) { if (pLastBarrelsInfo->getBarrelCount() > 1 || pBarrelsInfo->getCommitId() - pLastBarrelsInfo->getCommitId() > 1) { const BarrelInfo& lastBarrelInfo = pBarrelsInfo->getLastBarrel(); const BitVector* pBitVector = pDocFilter->getDocFilter(lastBarrelInfo.getBaseDocId()); SingleIndexBarrelReader* pSingleReader = new SingleIndexBarrelReader( m_pFileSys, m_pDocSchema, m_pComponentBuilder); pReader.reset(pSingleReader); pSingleReader->open(&lastBarrelInfo, pBitVector); } else { const BarrelInfo& lastBarrelInfo = pBarrelsInfo->getLastBarrel(); const BitVector* pBitVector = pDocFilter->getDocFilter(lastBarrelInfo.getBaseDocId()); pReader.reset(pLastReader->clone()); SingleIndexBarrelReaderPtr pSingleReader = std::dynamic_pointer_cast<SingleIndexBarrelReader>(pReader); FIRTEX_ASSERT2(pSingleReader); pSingleReader->reopen(&lastBarrelInfo, pBitVector); } } else if (bc > 1) { if (pLastBarrelsInfo->getBarrelCount() > 1) { pReader.reset(pLastReader->clone()); MultiIndexBarrelReaderPtr pMultiReader = std::dynamic_pointer_cast<MultiIndexBarrelReader>(pReader); FIRTEX_ASSERT2(pMultiReader); pMultiReader->reopen(pBarrelsInfo, pDocFilter); } else { //TODO: optimize? MultiIndexBarrelReader* pMultiReader = new MultiIndexBarrelReader( m_pFileSys, m_pDocSchema, m_pComponentBuilder); pReader.reset(pMultiReader); pMultiReader->open(pBarrelsInfo, pDocFilter); } } FX_LOG(INFO, "End reopen index database."); IndexBarrelPtr pBarrel(new IndexBarrel(pBarrelsInfo, pReader, pDocFilter)); pBarrel->setEncoding(m_sEncoding); { ScopedRWLock lock(m_lock, true); bool bInserted = doInsertCommit(latestCommit, pBarrel); if (bInserted) { m_pOnDiskDocFilter = pDocFilter; m_pOnDiskPrimKeyIndex = pPrimKey; return latestCommit; } return prevCommit; } }
IndexBarrelWriterPtr IndexBarrelKeeper::createBarrelWriter() { FastMutex::Guard g(m_counterLock); FX_LOG(INFO, "Create new in-memory barrel: [%d]", m_barrelCounter); return IndexBarrelWriterPtr(new IndexBarrelWriter(this, m_barrelCounter++)); }