コード例 #1
0
ファイル: DocPostingMerger.cpp プロジェクト: hxfxjun/firtex2
void DocPostingMerger::doMerge(const MergingTerm* pTerm)
{
    docid_t docBuffer[RECORD_SIZE];
    docid_t tfBuffer[RECORD_SIZE];

    PostingDecoderPtr pDecoder = pTerm->getPostingIterator()->getPostingDecoder();
    const DocIdRecycling* pDocIdRecycle = pTerm->getDocIdRecycling();
    if (pDocIdRecycle->hasDeletions())
    {
        docid_t baseDocId = pTerm->getNewBaseDocId();
        uint32_t nDecoded = 0;
        docid_t lastDocId = 0;
        while ((nDecoded = pDecoder->decodeDocRecord(docBuffer, lastDocId)) > 0)
        {
            if (pDecoder->decodeTfRecord(tfBuffer) != nDecoded)
            {
                FIRTEX_THROW(IndexCollapseException,
                             "Doc and Tf record is inconsistant.");
            }

            lastDocId = docBuffer[nDecoded - 1] + 1;

            size_t i, j;
            for (i = 0, j = 0; j < nDecoded; ++j)
            {
                docid_t docId = docBuffer[j];
                docid_t newDocId = pDocIdRecycle->remap(docId);
                if (newDocId != INVALID_DOCID)
                {
                    docBuffer[i] = newDocId;
                    tfBuffer[i] = tfBuffer[j];
                    ++i;
                }
            }
            if (i > 0)
            {
                commitDocuments(baseDocId, docBuffer, tfBuffer, (uint32_t)i);
            }
        }
    }
    else
    {
        docid_t baseDocId = pTerm->getNewBaseDocId();
        uint32_t nDecoded = 0;
        docid_t lastDocId = 0;
        FX_TRACE("Merge doc list: base doc id: %d", baseDocId);

        while ((nDecoded = pDecoder->decodeDocRecord(docBuffer, lastDocId)) > 0)
        {
            if (pDecoder->decodeTfRecord(tfBuffer) != nDecoded)
            {
                FIRTEX_THROW(IndexCollapseException,
                             "Doc and Tf record is inconsistant.");
            }
            commitDocuments(baseDocId, docBuffer, tfBuffer, nDecoded);
            lastDocId = docBuffer[nDecoded - 1] + 1;
        }
    }
}
コード例 #2
0
ファイル: Path.cpp プロジェクト: Web5design/firtex2
void Path::parseWindows(const std::string& sPath)
{
    clear();

    std::string::const_iterator it  = sPath.begin();
    std::string::const_iterator end = sPath.end();

    if (it != end)
    {
        if (*it == '\\' || *it == '/') { m_bAbsolute = true; ++it; }
        if (m_bAbsolute && it != end && (*it == '\\' || *it == '/')) // UNC
        {
            ++it;
            while (it != end && *it != '\\' && *it != '/') m_sNode += *it++;
            if (it != end) ++it;
        }
        else if (it != end)
        {
            char d = *it++;
            if (it != end && *it == ':') // drive letter
            {
                if (m_bAbsolute || !((d >= 'a' && d <= 'z') || (d >= 'A' && d <= 'Z')))
                {
                    FIRTEX_THROW(BadParameterException, "Bad parameter: [%s]", sPath.c_str());
                }
                m_bAbsolute = true;
                m_sDevice += d;
                ++it;
                if (it == end || (*it != '\\' && *it != '/'))
                {
                    FIRTEX_THROW(BadParameterException, "Bad parameter: [%s]", sPath.c_str());
                }
                ++it;
            }
            else --it;
        }
        while (it != end)
        {
            std::string name;
            while (it != end && *it != '\\' && *it != '/') name += *it++;
            if (it != end)
                pushDirectory(name);
            else
                m_sName = name;
            if (it != end) ++it;
        }
    }
    if (!m_sNode.empty() && m_vDirs.empty() && !m_sName.empty())
        makeDirectory();
}
コード例 #3
0
ファイル: Index.cpp プロジェクト: Web5design/firtex2
void Index::open(const std::string& sIndexPath, AccessMode am,
                 const DocumentSchema* pDocSchema)
{
    std::string sFs = GLOBAL_CONF().Storage.filesystem;
    FileSystemPtr pFileSys = FileSystemFactory::instance()->createFileSystem(sFs);
    if (pFileSys.isNull())
    {
        FIRTEX_THROW(InvalidConfigException, "Create file system: [%s] FAILED", sFs.c_str());
    }

    FileSystem::OpenMode om = FileSystem::READ;
    switch (am)
    {
    case READ:
        om = FileSystem::READ;
        break;        
    case WRITE:
        om = FileSystem::CREATE;
        break;
    case APPEND:
    case RDWR:
        om = FileSystem::APPEND;
        break;
    }
    pFileSys->open(sIndexPath, om);

    if ((am == APPEND) && (!pFileSys->fileExists(SCHEMA_FILENAME)))
    {
        am = WRITE;
    }
    open(pFileSys, am, pDocSchema);
}
コード例 #4
0
void BlockFileInputStream::seek(offset_t pos)
{
    if (pos > m_pFile->fileActualSize || pos < 0)
    {
        FIRTEX_THROW(FileIOException, "Seek past EOF: "
                     "[pos=%lld, file size=%lld], file=[%s]", (long long int)pos, 
                     (long long int)m_pFile->fileActualSize, m_pFile->fileName.c_str());
    }

    blockid_t blockId = m_nCurPos / m_nBlockSize;
    blockid_t newBlockId = pos / m_nBlockSize;
    if (blockId != newBlockId)
    {
        m_nOffInBlk = m_nBlockSize;
        m_nCurPos = pos;
    }
    else // In current block buffer
    {
        if ((m_nCurPos % m_nBlockSize) == m_nOffInBlk)
        {
            m_nOffInBlk = (uint32_t)(pos % m_nBlockSize);
        }
        else // No data in buffer
        {
            m_nOffInBlk = m_nBlockSize;
        }
        m_nCurPos = pos;
    }
}
コード例 #5
0
void MMapFileInputStream::seek(offset_t pos)
{
    if (pos < 0 || pos > m_pFile->fileActualSize)
    {
        FIRTEX_THROW(FileIOException, "Seek out of range, pos: [%lld], "
                     "file size: [%lld]", (long long int)pos, 
                     (long long int)m_pFile->fileActualSize);
    }

    m_nCurPos = pos;
}
コード例 #6
0
ファイル: Index.cpp プロジェクト: Web5design/firtex2
void Index::remove(const std::string& sIndexPath)
{
    std::string sFs =GLOBAL_CONF().Storage.filesystem;
    FileSystemPtr pFileSys = FileSystemFactory::instance()->createFileSystem(sFs);
    if (pFileSys.isNull())
    {
        FIRTEX_THROW(InvalidConfigException, "Create file system: [%s] FAILED", sFs.c_str());
    }
    pFileSys->open(sIndexPath, FileSystem::CREATE);
    remove(pFileSys);
}
コード例 #7
0
void FieldDefinition::addFieldType(const FieldType& fieldType)
{
    TypeMap::const_iterator it = m_name2TypeMap.find(fieldType.getName());
    if (it != m_name2TypeMap.end())
    {
        FIRTEX_THROW(IllegalArgumentException, _T("Field type duplicate: [%s]"), 
                     fieldType.getName().c_str());
    }
    FieldType* pFieldType = new FieldType(fieldType);
    m_types.push_back(pFieldType);
    m_name2TypeMap.insert(make_pair(pFieldType->getName(), pFieldType));
}
コード例 #8
0
ファイル: StandardAnalyzer.cpp プロジェクト: cffyh/firtex2
void StandardAnalyzer::init()
{
    tstring dict = getCoreDictPath(GLOBAL_CONF().General.dictionaryPath);
    File f(dict);
    if (!f.exists())
    {
        FX_LOG(ERROR, _T("Core dictionary: [%s] not found"), dict.c_str());
        FIRTEX_THROW(FileIOException, _T("Load dictionary FAILED."));
        return;
    }

    init(dict);
}
コード例 #9
0
void MultiTermIterator::addIterator(const BarrelInfo* pBarrelInfo,
                                    const TermIteratorPtr& iter)
{
    if (m_iterators.size() > 0
        && m_iterators.back()->barrelInfo->getBaseDocId() >= pBarrelInfo->getBaseDocId())
    {
        FIRTEX_THROW(OutOfOrderException, "Term iterator is out of order: adding: "
                     "[%d], last: [%d]", m_iterators.back()->barrelInfo->getBaseDocId(), 
                     pBarrelInfo->getBaseDocId());
    }

    m_iterators.push_back(new Entry(pBarrelInfo, iter));
}
コード例 #10
0
ファイル: Field.cpp プロジェクト: jaredguo/firtex2
Field::Field(const FieldSchema* pFieldSchema,
             const Field::TokenViewPtr& value)
    : m_pFieldSchema(pFieldSchema)
    , m_fBoost(1.0)
{
    FIRTEX_ASSERT2(m_pFieldSchema != NULL);

    if (!value)
    {
        FIRTEX_THROW(IllegalArgumentException, _T("Value cannot be null."));
    }

    setTokenView(value);
}
コード例 #11
0
 LoggerNameComponent(const std::string& specifier) 
 {
     if (specifier == "") 
     {
         m_nPrecision = -1;
     }
     else 
     {
         if (!NumberParser::tryParseInt32(specifier, m_nPrecision))
         {
             FIRTEX_THROW(InvalidConfigException, "Bad logger pattern: [%s].",
                     specifier.c_str());
         }
     }
 }
コード例 #12
0
void DocumentTemplate::makeSureMeta(Meta& meta, const DocumentSchema* pSchema)
{
    const FieldSchema* pFieldSchema = 
        pSchema->getSchema(meta.getFieldName().c_str());
    if (!pFieldSchema)
    {
        FX_LOG(ERROR, "No field: [%s] in schema match the meta: [%s]", 
               meta.getFieldName().c_str(), meta.getMetaName().c_str());
        FIRTEX_THROW(InvalidConfigException, "No field: [%s] in schema match "
                     "the meta: [%s]", meta.getFieldName().c_str(),
                     meta.getMetaName().c_str());
    }
    
    meta.setId(pFieldSchema->getId());
}
コード例 #13
0
ファイル: DateTime.cpp プロジェクト: Web5design/firtex2
int DateTime::dst()
{
#ifndef FX_WINDOWS
    std::time_t now = std::time(NULL);
    struct std::tm t;
    if (!localtime_r(&now, &t))
        FIRTEX_THROW(SystemException, "cannot get local time DST offset");
    return t.tm_isdst == 1 ? 3600 : 0;

#else //FX_WINDOWS
    TIME_ZONE_INFORMATION tzInfo;
    DWORD dstFlag = GetTimeZoneInformation(&tzInfo);
    return dstFlag == TIME_ZONE_ID_DAYLIGHT ? -tzInfo.DaylightBias*60 : 0;
#endif
}
コード例 #14
0
void MultiPostingDecoder::addDecoder(const BarrelInfo* pBarrelInfo,
                                     const PostingDecoderPtr& pPosting)
{
    if (m_postingDecoders.size() > 0
        && m_postingDecoders.back()->m_pBarrelInfo->getBaseDocId() >= pBarrelInfo->getBaseDocId())
    {
        FIRTEX_THROW(OutOfOrderException, "Decoder is out of order.");
    }

    EntryPtr pTmp(new Entry(pBarrelInfo, pPosting));
    m_postingDecoders.push_back(pTmp);
    const TermMeta& termMeta = pPosting->getTermMeta();
    m_termMeta.getCTF() += termMeta.getCTF();
    m_termMeta.getDocFreq() += termMeta.getDocFreq();
}
コード例 #15
0
void EnvironmentImpl::setImpl(const std::string& name, const std::string& value)
{
    FastMutex::Guard lock(sm_mutex);
	
    std::string var = name;
    var.append("=");
    var.append(value);
    sm_map[name] = var;
    if (putenv((char*) sm_map[name].c_str()))
    {
        std::string msg = "cannot set environment variable: ";
        msg.append(name);
        FIRTEX_THROW(SystemException, "%s", msg.c_str());
    }
}
コード例 #16
0
ファイル: Index.cpp プロジェクト: Web5design/firtex2
IndexReaderPtr Index::acquireReader(bool bRefresh)
{
    if ((m_accessMode != READ) && (m_accessMode != RDWR))
    {
        FX_LOG(INFO, _T("The access mode is WRITE or APPEND, "
                        "can't aquire reader of the index"));
        return IndexReaderPtr();
    }

    if (bRefresh)
    {
        CommitList commits;
        commits.load(m_pFileSys);
        commitid_t lastCommit = commits.getLastCommit();
        commitid_t curCommit = m_pReader->getBarrelsInfo()->getCommitId();
        if (lastCommit > curCommit)
        {
            m_pIndexBarrelKeeper->refresh();

            IndexReaderPtr pNewReader(m_pReader->clone());
            pNewReader->reopen();
            
            ScopedRWLock lock(m_lock, true);
            m_pReader = pNewReader;
            pNewReader.reset();

            if (m_pIndexBarrelKeeper->getHeldCommitCount() >= 2)
            {
                m_pIndexBarrelKeeper->housekeep();
            }

            return m_pReader;
        }
        if (lastCommit < curCommit)
        {
            FIRTEX_THROW(IndexCollapseException, "Invalid commit id: "
                            "current commit: [%d], last comit: [%d]", 
                         curCommit, lastCommit);
        }
    }

    ScopedRWLock lock(m_lock, false);
    return m_pReader;
}
コード例 #17
0
ファイル: Timestamp.cpp プロジェクト: Web5design/firtex2
void Timestamp::update()
{
#if defined(FX_WINDOWS)

    FILETIME ft;
    GetSystemTimeAsFileTime(&ft);
    ULARGE_INTEGER epoch; // UNIX epoch (1970-01-01 00:00:00) expressed in Windows NT FILETIME
    epoch.LowPart  = 0xD53E8000;
    epoch.HighPart = 0x019DB1DE;

    ULARGE_INTEGER ts;
    ts.LowPart  = ft.dwLowDateTime;
    ts.HighPart = ft.dwHighDateTime;
    ts.QuadPart -= epoch.QuadPart;
    m_ts = ts.QuadPart/10;

#else

    struct timeval tv;
    if (gettimeofday(&tv, NULL))
        FIRTEX_THROW(RuntimeException, "cannot get time of day");
    m_ts = TimeVal(tv.tv_sec)*resolution() + tv.tv_usec;
#endif
}
コード例 #18
0
void MultiStoredFieldsReader::open(const InputStreamPoolPtr& pStreamPool,
                                   const std::string& barrel)
{
    FIRTEX_THROW(UnsupportedException, "Unsupport open().");
}
コード例 #19
0
void ByteSliceOutputStream::seek(offset_t pos)
{
    FIRTEX_THROW(UnsupportedException, "Seek operation is unsupported.");
}
コード例 #20
0
ファイル: Index.cpp プロジェクト: Web5design/firtex2
void Index::open(FileSystemPtr& pFileSys, AccessMode am,
                 const DocumentSchema* pDocSchema)
{
    FIRTEX_ASSERT2(m_pFileSys.isNull());
    m_pFileSys = pFileSys;
    m_accessMode = am;
	
    BarrelsInfoPtr pBarrelsInfo(new BarrelsInfo());
    pBarrelsInfo->read(pFileSys);//read barrels Info

    if (am == WRITE)
    {
        if (!pDocSchema)
        {
            FIRTEX_THROW(InvalidConfigException, "Schema is empty in write mode.");
        }

        m_pDocSchema = new DocumentSchema(*pDocSchema);
        writeSchema(m_pDocSchema, pFileSys);

        pBarrelsInfo->remove(m_pFileSys);

        m_pComponentBuilder = new ComponentBuilder();
        m_pComponentBuilder->init(m_pDocSchema);
        initAnalyzerMapper();

        m_pIndexBarrelKeeper = new IndexBarrelKeeper(m_pFileSys, m_pDocSchema.get(),
                m_pComponentBuilder.get(), m_pAnalyzerMapper.get());
        m_pIndexBarrelKeeper->init(pBarrelsInfo, IndexBarrelKeeper::WRITE);

        openWriter();
    }
    else // READ, APPEND or RDWR mode
    {
        if (pBarrelsInfo->getIndexVersion() != FX_INDEX_VERSION)
        {
            FIRTEX_THROW(VersionException, "Incompatible index version.");
        }
        if (pBarrelsInfo->getBarrelCount() > 0)
        {
            DocumentSchemaPtr pSchemaExist = readSchema(m_pFileSys);
            if (pSchemaExist.isNull())
            {
                FIRTEX_THROW(IndexCollapseException, "Read schema FAILED.");
            }
            if (pDocSchema && !pSchemaExist->isEqual(*pDocSchema))
            {
                FIRTEX_THROW(IllegalArgumentException,
                        "The given document schema is not equal to the existing schema.");
            }
            m_pDocSchema = pSchemaExist;
        }
        else
        {
            if (!pDocSchema)
            {
                FIRTEX_THROW(IllegalArgumentException,
                        "No document schema is specified.");
            }
            m_pDocSchema = new DocumentSchema(*pDocSchema);
        }

        m_pComponentBuilder = new ComponentBuilder();
        m_pComponentBuilder->init(m_pDocSchema);
        initAnalyzerMapper();

        m_pIndexBarrelKeeper = new IndexBarrelKeeper(m_pFileSys, m_pDocSchema.get(),
                m_pComponentBuilder.get(), m_pAnalyzerMapper.get());

        if (am == READ || am == RDWR)
        {
            m_pIndexBarrelKeeper->init(pBarrelsInfo, (am == READ) ?
                    IndexBarrelKeeper::READ : IndexBarrelKeeper::RDWR);

            openReader();
            if (am == RDWR)
            {
                openWriter();
            }
        }
        else if (am == APPEND)
        {
            m_pIndexBarrelKeeper->init(pBarrelsInfo, IndexBarrelKeeper::WRITE);
            openWriter();
        }
    }
}