void DocPostingMerger::doMerge(const MergingTerm* pTerm) { docid_t docBuffer[RECORD_SIZE]; docid_t tfBuffer[RECORD_SIZE]; PostingDecoderPtr pDecoder = pTerm->getPostingIterator()->getPostingDecoder(); const DocIdRecycling* pDocIdRecycle = pTerm->getDocIdRecycling(); if (pDocIdRecycle->hasDeletions()) { docid_t baseDocId = pTerm->getNewBaseDocId(); uint32_t nDecoded = 0; docid_t lastDocId = 0; while ((nDecoded = pDecoder->decodeDocRecord(docBuffer, lastDocId)) > 0) { if (pDecoder->decodeTfRecord(tfBuffer) != nDecoded) { FIRTEX_THROW(IndexCollapseException, "Doc and Tf record is inconsistant."); } lastDocId = docBuffer[nDecoded - 1] + 1; size_t i, j; for (i = 0, j = 0; j < nDecoded; ++j) { docid_t docId = docBuffer[j]; docid_t newDocId = pDocIdRecycle->remap(docId); if (newDocId != INVALID_DOCID) { docBuffer[i] = newDocId; tfBuffer[i] = tfBuffer[j]; ++i; } } if (i > 0) { commitDocuments(baseDocId, docBuffer, tfBuffer, (uint32_t)i); } } } else { docid_t baseDocId = pTerm->getNewBaseDocId(); uint32_t nDecoded = 0; docid_t lastDocId = 0; FX_TRACE("Merge doc list: base doc id: %d", baseDocId); while ((nDecoded = pDecoder->decodeDocRecord(docBuffer, lastDocId)) > 0) { if (pDecoder->decodeTfRecord(tfBuffer) != nDecoded) { FIRTEX_THROW(IndexCollapseException, "Doc and Tf record is inconsistant."); } commitDocuments(baseDocId, docBuffer, tfBuffer, nDecoded); lastDocId = docBuffer[nDecoded - 1] + 1; } } }
void Path::parseWindows(const std::string& sPath) { clear(); std::string::const_iterator it = sPath.begin(); std::string::const_iterator end = sPath.end(); if (it != end) { if (*it == '\\' || *it == '/') { m_bAbsolute = true; ++it; } if (m_bAbsolute && it != end && (*it == '\\' || *it == '/')) // UNC { ++it; while (it != end && *it != '\\' && *it != '/') m_sNode += *it++; if (it != end) ++it; } else if (it != end) { char d = *it++; if (it != end && *it == ':') // drive letter { if (m_bAbsolute || !((d >= 'a' && d <= 'z') || (d >= 'A' && d <= 'Z'))) { FIRTEX_THROW(BadParameterException, "Bad parameter: [%s]", sPath.c_str()); } m_bAbsolute = true; m_sDevice += d; ++it; if (it == end || (*it != '\\' && *it != '/')) { FIRTEX_THROW(BadParameterException, "Bad parameter: [%s]", sPath.c_str()); } ++it; } else --it; } while (it != end) { std::string name; while (it != end && *it != '\\' && *it != '/') name += *it++; if (it != end) pushDirectory(name); else m_sName = name; if (it != end) ++it; } } if (!m_sNode.empty() && m_vDirs.empty() && !m_sName.empty()) makeDirectory(); }
void Index::open(const std::string& sIndexPath, AccessMode am, const DocumentSchema* pDocSchema) { std::string sFs = GLOBAL_CONF().Storage.filesystem; FileSystemPtr pFileSys = FileSystemFactory::instance()->createFileSystem(sFs); if (pFileSys.isNull()) { FIRTEX_THROW(InvalidConfigException, "Create file system: [%s] FAILED", sFs.c_str()); } FileSystem::OpenMode om = FileSystem::READ; switch (am) { case READ: om = FileSystem::READ; break; case WRITE: om = FileSystem::CREATE; break; case APPEND: case RDWR: om = FileSystem::APPEND; break; } pFileSys->open(sIndexPath, om); if ((am == APPEND) && (!pFileSys->fileExists(SCHEMA_FILENAME))) { am = WRITE; } open(pFileSys, am, pDocSchema); }
void BlockFileInputStream::seek(offset_t pos) { if (pos > m_pFile->fileActualSize || pos < 0) { FIRTEX_THROW(FileIOException, "Seek past EOF: " "[pos=%lld, file size=%lld], file=[%s]", (long long int)pos, (long long int)m_pFile->fileActualSize, m_pFile->fileName.c_str()); } blockid_t blockId = m_nCurPos / m_nBlockSize; blockid_t newBlockId = pos / m_nBlockSize; if (blockId != newBlockId) { m_nOffInBlk = m_nBlockSize; m_nCurPos = pos; } else // In current block buffer { if ((m_nCurPos % m_nBlockSize) == m_nOffInBlk) { m_nOffInBlk = (uint32_t)(pos % m_nBlockSize); } else // No data in buffer { m_nOffInBlk = m_nBlockSize; } m_nCurPos = pos; } }
void MMapFileInputStream::seek(offset_t pos) { if (pos < 0 || pos > m_pFile->fileActualSize) { FIRTEX_THROW(FileIOException, "Seek out of range, pos: [%lld], " "file size: [%lld]", (long long int)pos, (long long int)m_pFile->fileActualSize); } m_nCurPos = pos; }
void Index::remove(const std::string& sIndexPath) { std::string sFs =GLOBAL_CONF().Storage.filesystem; FileSystemPtr pFileSys = FileSystemFactory::instance()->createFileSystem(sFs); if (pFileSys.isNull()) { FIRTEX_THROW(InvalidConfigException, "Create file system: [%s] FAILED", sFs.c_str()); } pFileSys->open(sIndexPath, FileSystem::CREATE); remove(pFileSys); }
void FieldDefinition::addFieldType(const FieldType& fieldType) { TypeMap::const_iterator it = m_name2TypeMap.find(fieldType.getName()); if (it != m_name2TypeMap.end()) { FIRTEX_THROW(IllegalArgumentException, _T("Field type duplicate: [%s]"), fieldType.getName().c_str()); } FieldType* pFieldType = new FieldType(fieldType); m_types.push_back(pFieldType); m_name2TypeMap.insert(make_pair(pFieldType->getName(), pFieldType)); }
void StandardAnalyzer::init() { tstring dict = getCoreDictPath(GLOBAL_CONF().General.dictionaryPath); File f(dict); if (!f.exists()) { FX_LOG(ERROR, _T("Core dictionary: [%s] not found"), dict.c_str()); FIRTEX_THROW(FileIOException, _T("Load dictionary FAILED.")); return; } init(dict); }
void MultiTermIterator::addIterator(const BarrelInfo* pBarrelInfo, const TermIteratorPtr& iter) { if (m_iterators.size() > 0 && m_iterators.back()->barrelInfo->getBaseDocId() >= pBarrelInfo->getBaseDocId()) { FIRTEX_THROW(OutOfOrderException, "Term iterator is out of order: adding: " "[%d], last: [%d]", m_iterators.back()->barrelInfo->getBaseDocId(), pBarrelInfo->getBaseDocId()); } m_iterators.push_back(new Entry(pBarrelInfo, iter)); }
Field::Field(const FieldSchema* pFieldSchema, const Field::TokenViewPtr& value) : m_pFieldSchema(pFieldSchema) , m_fBoost(1.0) { FIRTEX_ASSERT2(m_pFieldSchema != NULL); if (!value) { FIRTEX_THROW(IllegalArgumentException, _T("Value cannot be null.")); } setTokenView(value); }
LoggerNameComponent(const std::string& specifier) { if (specifier == "") { m_nPrecision = -1; } else { if (!NumberParser::tryParseInt32(specifier, m_nPrecision)) { FIRTEX_THROW(InvalidConfigException, "Bad logger pattern: [%s].", specifier.c_str()); } } }
void DocumentTemplate::makeSureMeta(Meta& meta, const DocumentSchema* pSchema) { const FieldSchema* pFieldSchema = pSchema->getSchema(meta.getFieldName().c_str()); if (!pFieldSchema) { FX_LOG(ERROR, "No field: [%s] in schema match the meta: [%s]", meta.getFieldName().c_str(), meta.getMetaName().c_str()); FIRTEX_THROW(InvalidConfigException, "No field: [%s] in schema match " "the meta: [%s]", meta.getFieldName().c_str(), meta.getMetaName().c_str()); } meta.setId(pFieldSchema->getId()); }
int DateTime::dst() { #ifndef FX_WINDOWS std::time_t now = std::time(NULL); struct std::tm t; if (!localtime_r(&now, &t)) FIRTEX_THROW(SystemException, "cannot get local time DST offset"); return t.tm_isdst == 1 ? 3600 : 0; #else //FX_WINDOWS TIME_ZONE_INFORMATION tzInfo; DWORD dstFlag = GetTimeZoneInformation(&tzInfo); return dstFlag == TIME_ZONE_ID_DAYLIGHT ? -tzInfo.DaylightBias*60 : 0; #endif }
void MultiPostingDecoder::addDecoder(const BarrelInfo* pBarrelInfo, const PostingDecoderPtr& pPosting) { if (m_postingDecoders.size() > 0 && m_postingDecoders.back()->m_pBarrelInfo->getBaseDocId() >= pBarrelInfo->getBaseDocId()) { FIRTEX_THROW(OutOfOrderException, "Decoder is out of order."); } EntryPtr pTmp(new Entry(pBarrelInfo, pPosting)); m_postingDecoders.push_back(pTmp); const TermMeta& termMeta = pPosting->getTermMeta(); m_termMeta.getCTF() += termMeta.getCTF(); m_termMeta.getDocFreq() += termMeta.getDocFreq(); }
void EnvironmentImpl::setImpl(const std::string& name, const std::string& value) { FastMutex::Guard lock(sm_mutex); std::string var = name; var.append("="); var.append(value); sm_map[name] = var; if (putenv((char*) sm_map[name].c_str())) { std::string msg = "cannot set environment variable: "; msg.append(name); FIRTEX_THROW(SystemException, "%s", msg.c_str()); } }
IndexReaderPtr Index::acquireReader(bool bRefresh) { if ((m_accessMode != READ) && (m_accessMode != RDWR)) { FX_LOG(INFO, _T("The access mode is WRITE or APPEND, " "can't aquire reader of the index")); return IndexReaderPtr(); } if (bRefresh) { CommitList commits; commits.load(m_pFileSys); commitid_t lastCommit = commits.getLastCommit(); commitid_t curCommit = m_pReader->getBarrelsInfo()->getCommitId(); if (lastCommit > curCommit) { m_pIndexBarrelKeeper->refresh(); IndexReaderPtr pNewReader(m_pReader->clone()); pNewReader->reopen(); ScopedRWLock lock(m_lock, true); m_pReader = pNewReader; pNewReader.reset(); if (m_pIndexBarrelKeeper->getHeldCommitCount() >= 2) { m_pIndexBarrelKeeper->housekeep(); } return m_pReader; } if (lastCommit < curCommit) { FIRTEX_THROW(IndexCollapseException, "Invalid commit id: " "current commit: [%d], last comit: [%d]", curCommit, lastCommit); } } ScopedRWLock lock(m_lock, false); return m_pReader; }
void Timestamp::update() { #if defined(FX_WINDOWS) FILETIME ft; GetSystemTimeAsFileTime(&ft); ULARGE_INTEGER epoch; // UNIX epoch (1970-01-01 00:00:00) expressed in Windows NT FILETIME epoch.LowPart = 0xD53E8000; epoch.HighPart = 0x019DB1DE; ULARGE_INTEGER ts; ts.LowPart = ft.dwLowDateTime; ts.HighPart = ft.dwHighDateTime; ts.QuadPart -= epoch.QuadPart; m_ts = ts.QuadPart/10; #else struct timeval tv; if (gettimeofday(&tv, NULL)) FIRTEX_THROW(RuntimeException, "cannot get time of day"); m_ts = TimeVal(tv.tv_sec)*resolution() + tv.tv_usec; #endif }
void MultiStoredFieldsReader::open(const InputStreamPoolPtr& pStreamPool, const std::string& barrel) { FIRTEX_THROW(UnsupportedException, "Unsupport open()."); }
void ByteSliceOutputStream::seek(offset_t pos) { FIRTEX_THROW(UnsupportedException, "Seek operation is unsupported."); }
void Index::open(FileSystemPtr& pFileSys, AccessMode am, const DocumentSchema* pDocSchema) { FIRTEX_ASSERT2(m_pFileSys.isNull()); m_pFileSys = pFileSys; m_accessMode = am; BarrelsInfoPtr pBarrelsInfo(new BarrelsInfo()); pBarrelsInfo->read(pFileSys);//read barrels Info if (am == WRITE) { if (!pDocSchema) { FIRTEX_THROW(InvalidConfigException, "Schema is empty in write mode."); } m_pDocSchema = new DocumentSchema(*pDocSchema); writeSchema(m_pDocSchema, pFileSys); pBarrelsInfo->remove(m_pFileSys); m_pComponentBuilder = new ComponentBuilder(); m_pComponentBuilder->init(m_pDocSchema); initAnalyzerMapper(); m_pIndexBarrelKeeper = new IndexBarrelKeeper(m_pFileSys, m_pDocSchema.get(), m_pComponentBuilder.get(), m_pAnalyzerMapper.get()); m_pIndexBarrelKeeper->init(pBarrelsInfo, IndexBarrelKeeper::WRITE); openWriter(); } else // READ, APPEND or RDWR mode { if (pBarrelsInfo->getIndexVersion() != FX_INDEX_VERSION) { FIRTEX_THROW(VersionException, "Incompatible index version."); } if (pBarrelsInfo->getBarrelCount() > 0) { DocumentSchemaPtr pSchemaExist = readSchema(m_pFileSys); if (pSchemaExist.isNull()) { FIRTEX_THROW(IndexCollapseException, "Read schema FAILED."); } if (pDocSchema && !pSchemaExist->isEqual(*pDocSchema)) { FIRTEX_THROW(IllegalArgumentException, "The given document schema is not equal to the existing schema."); } m_pDocSchema = pSchemaExist; } else { if (!pDocSchema) { FIRTEX_THROW(IllegalArgumentException, "No document schema is specified."); } m_pDocSchema = new DocumentSchema(*pDocSchema); } m_pComponentBuilder = new ComponentBuilder(); m_pComponentBuilder->init(m_pDocSchema); initAnalyzerMapper(); m_pIndexBarrelKeeper = new IndexBarrelKeeper(m_pFileSys, m_pDocSchema.get(), m_pComponentBuilder.get(), m_pAnalyzerMapper.get()); if (am == READ || am == RDWR) { m_pIndexBarrelKeeper->init(pBarrelsInfo, (am == READ) ? IndexBarrelKeeper::READ : IndexBarrelKeeper::RDWR); openReader(); if (am == RDWR) { openWriter(); } } else if (am == APPEND) { m_pIndexBarrelKeeper->init(pBarrelsInfo, IndexBarrelKeeper::WRITE); openWriter(); } } }