コード例 #1
0
ファイル: CPdfFilter.cpp プロジェクト: sambhare/sumatrapdf
HRESULT CPdfFilter::GetNextChunkValue(CChunkValue& chunkValue) {
    AutoFreeW str;

    switch (m_state) {
        case STATE_PDF_START:
            m_state = STATE_PDF_AUTHOR;
            chunkValue.SetTextValue(PKEY_PerceivedType, L"document");
            return S_OK;

        case STATE_PDF_AUTHOR:
            m_state = STATE_PDF_TITLE;
            str.Set(m_pdfEngine->GetProperty(DocumentProperty::Author));
            if (!str::IsEmpty(str.Get())) {
                chunkValue.SetTextValue(PKEY_Author, str);
                return S_OK;
            }
            // fall through

        case STATE_PDF_TITLE:
            m_state = STATE_PDF_DATE;
            str.Set(m_pdfEngine->GetProperty(DocumentProperty::Title));
            if (!str)
                str.Set(m_pdfEngine->GetProperty(DocumentProperty::Subject));
            if (!str::IsEmpty(str.Get())) {
                chunkValue.SetTextValue(PKEY_Title, str);
                return S_OK;
            }
            // fall through

        case STATE_PDF_DATE:
            m_state = STATE_PDF_CONTENT;
            str.Set(m_pdfEngine->GetProperty(DocumentProperty::ModificationDate));
            if (!str)
                str.Set(m_pdfEngine->GetProperty(DocumentProperty::CreationDate));
            if (!str::IsEmpty(str.Get())) {
                SYSTEMTIME systime;
                FILETIME filetime;
                if (PdfDateParse(str, &systime) && SystemTimeToFileTime(&systime, &filetime)) {
                    chunkValue.SetFileTimeValue(PKEY_ItemDate, filetime);
                    return S_OK;
                }
            }
            // fall through

        case STATE_PDF_CONTENT:
            while (++m_iPageNo <= m_pdfEngine->PageCount()) {
                str.Set(m_pdfEngine->ExtractPageText(m_iPageNo, L"\r\n"));
                if (str::IsEmpty(str.Get()))
                    continue;
                chunkValue.SetTextValue(PKEY_Search_Contents, str, CHUNK_TEXT);
                return S_OK;
            }
            m_state = STATE_PDF_END;
            // fall through

        case STATE_PDF_END:
        default:
            return FILTER_E_END_OF_CHUNKS;
    }
}
コード例 #2
0
ファイル: CEpubFilter.cpp プロジェクト: jingyu9575/sumatrapdf
HRESULT CEpubFilter::GetNextChunkValue(CChunkValue& chunkValue) {
    AutoFreeW str;

    switch (m_state) {
        case STATE_EPUB_START:
            m_state = STATE_EPUB_AUTHOR;
            chunkValue.SetTextValue(PKEY_PerceivedType, L"document");
            return S_OK;

        case STATE_EPUB_AUTHOR:
            m_state = STATE_EPUB_TITLE;
            str.Set(m_epubDoc->GetProperty(DocumentProperty::Author));
            if (!str::IsEmpty(str.Get())) {
                chunkValue.SetTextValue(PKEY_Author, str);
                return S_OK;
            }
            // fall through

        case STATE_EPUB_TITLE:
            m_state = STATE_EPUB_DATE;
            str.Set(m_epubDoc->GetProperty(DocumentProperty::Title));
            if (!str)
                str.Set(m_epubDoc->GetProperty(DocumentProperty::Subject));
            if (!str::IsEmpty(str.Get())) {
                chunkValue.SetTextValue(PKEY_Title, str);
                return S_OK;
            }
            // fall through

        case STATE_EPUB_DATE:
            m_state = STATE_EPUB_CONTENT;
            str.Set(m_epubDoc->GetProperty(DocumentProperty::ModificationDate));
            if (!str)
                str.Set(m_epubDoc->GetProperty(DocumentProperty::CreationDate));
            if (!str::IsEmpty(str.Get())) {
                SYSTEMTIME systime;
                if (IsoDateParse(str, &systime)) {
                    FILETIME filetime;
                    SystemTimeToFileTime(&systime, &filetime);
                    chunkValue.SetFileTimeValue(PKEY_ItemDate, filetime);
                    return S_OK;
                }
            }
            // fall through

        case STATE_EPUB_CONTENT:
            m_state = STATE_EPUB_END;
            str.Set(ExtractHtmlText(m_epubDoc));
            if (!str::IsEmpty(str.Get())) {
                chunkValue.SetTextValue(PKEY_Search_Contents, str, CHUNK_TEXT);
                return S_OK;
            }
            // fall through

        case STATE_EPUB_END:
        default:
            return FILTER_E_END_OF_CHUNKS;
    }
}
コード例 #3
0
HRESULT CTeXFilter::GetNextChunkValue(CChunkValue &chunkValue)
{
    WCHAR *start, *end;

ContinueParsing:
    if (!*m_pPtr && m_state == STATE_TEX_PREAMBLE) {
        // if there was no preamble, treat the whole document as content
        m_pPtr = m_pData;
        m_iDepth = 0;
        m_state = STATE_TEX_CONTENT;
    }
    else if (!*m_pPtr) {
        m_state = STATE_TEX_END;
    }

    switch (m_state)
    {
    case STATE_TEX_START:
        m_state = STATE_TEX_PREAMBLE;
        chunkValue.SetTextValue(PKEY_PerceivedType, L"document");
        return S_OK;
    case STATE_TEX_PREAMBLE:
        // the preamble (i.e. everything before \begin{document}) may contain
        // \author{...} and \title{...} commands
        start = end = NULL;
        while (*m_pPtr && !start) {
            switch (*m_pPtr++){
            case '\\':
                if (iscmdchar(*m_pPtr)) {
                    start = m_pPtr;
                    for (end = start; iscmdchar(*m_pPtr); m_pPtr++, end++);
                    break;
                }
                if (*m_pPtr)
                    m_pPtr++;
                break;
            case '{':
                ExtractBracedBlock();
                break;
            case '%':
                skipcomment(m_pPtr);
                break;
            }
        }
        if (!start)
            goto ContinueParsing;
        skipspace(m_pPtr);
        if (*m_pPtr != '{')
            goto ContinueParsing;
        m_pPtr++;

        if (!wcsncmp(start, L"author", end - start) || !wcsncmp(start, L"title", end - start)) {
            chunkValue.SetTextValue(*start == 'a' ? PKEY_Author : PKEY_Title, ExtractBracedBlock());
            return S_OK;
        }

        if (!wcsncmp(start, L"begin", end - start) && str::Eq(ExtractBracedBlock(), L"document"))
            m_state = STATE_TEX_CONTENT;
        goto ContinueParsing;
    case STATE_TEX_CONTENT:
        chunkValue.SetTextValue(PKEY_Search_Contents, ExtractBracedBlock(), CHUNK_TEXT);
        return S_OK;
    default:
        return FILTER_E_END_OF_CHUNKS;
    }
}