HRESULT CPdfFilter::GetNextChunkValue(CChunkValue& chunkValue) { AutoFreeW str; switch (m_state) { case STATE_PDF_START: m_state = STATE_PDF_AUTHOR; chunkValue.SetTextValue(PKEY_PerceivedType, L"document"); return S_OK; case STATE_PDF_AUTHOR: m_state = STATE_PDF_TITLE; str.Set(m_pdfEngine->GetProperty(DocumentProperty::Author)); if (!str::IsEmpty(str.Get())) { chunkValue.SetTextValue(PKEY_Author, str); return S_OK; } // fall through case STATE_PDF_TITLE: m_state = STATE_PDF_DATE; str.Set(m_pdfEngine->GetProperty(DocumentProperty::Title)); if (!str) str.Set(m_pdfEngine->GetProperty(DocumentProperty::Subject)); if (!str::IsEmpty(str.Get())) { chunkValue.SetTextValue(PKEY_Title, str); return S_OK; } // fall through case STATE_PDF_DATE: m_state = STATE_PDF_CONTENT; str.Set(m_pdfEngine->GetProperty(DocumentProperty::ModificationDate)); if (!str) str.Set(m_pdfEngine->GetProperty(DocumentProperty::CreationDate)); if (!str::IsEmpty(str.Get())) { SYSTEMTIME systime; FILETIME filetime; if (PdfDateParse(str, &systime) && SystemTimeToFileTime(&systime, &filetime)) { chunkValue.SetFileTimeValue(PKEY_ItemDate, filetime); return S_OK; } } // fall through case STATE_PDF_CONTENT: while (++m_iPageNo <= m_pdfEngine->PageCount()) { str.Set(m_pdfEngine->ExtractPageText(m_iPageNo, L"\r\n")); if (str::IsEmpty(str.Get())) continue; chunkValue.SetTextValue(PKEY_Search_Contents, str, CHUNK_TEXT); return S_OK; } m_state = STATE_PDF_END; // fall through case STATE_PDF_END: default: return FILTER_E_END_OF_CHUNKS; } }
HRESULT CEpubFilter::GetNextChunkValue(CChunkValue& chunkValue) { AutoFreeW str; switch (m_state) { case STATE_EPUB_START: m_state = STATE_EPUB_AUTHOR; chunkValue.SetTextValue(PKEY_PerceivedType, L"document"); return S_OK; case STATE_EPUB_AUTHOR: m_state = STATE_EPUB_TITLE; str.Set(m_epubDoc->GetProperty(DocumentProperty::Author)); if (!str::IsEmpty(str.Get())) { chunkValue.SetTextValue(PKEY_Author, str); return S_OK; } // fall through case STATE_EPUB_TITLE: m_state = STATE_EPUB_DATE; str.Set(m_epubDoc->GetProperty(DocumentProperty::Title)); if (!str) str.Set(m_epubDoc->GetProperty(DocumentProperty::Subject)); if (!str::IsEmpty(str.Get())) { chunkValue.SetTextValue(PKEY_Title, str); return S_OK; } // fall through case STATE_EPUB_DATE: m_state = STATE_EPUB_CONTENT; str.Set(m_epubDoc->GetProperty(DocumentProperty::ModificationDate)); if (!str) str.Set(m_epubDoc->GetProperty(DocumentProperty::CreationDate)); if (!str::IsEmpty(str.Get())) { SYSTEMTIME systime; if (IsoDateParse(str, &systime)) { FILETIME filetime; SystemTimeToFileTime(&systime, &filetime); chunkValue.SetFileTimeValue(PKEY_ItemDate, filetime); return S_OK; } } // fall through case STATE_EPUB_CONTENT: m_state = STATE_EPUB_END; str.Set(ExtractHtmlText(m_epubDoc)); if (!str::IsEmpty(str.Get())) { chunkValue.SetTextValue(PKEY_Search_Contents, str, CHUNK_TEXT); return S_OK; } // fall through case STATE_EPUB_END: default: return FILTER_E_END_OF_CHUNKS; } }
HRESULT CTeXFilter::GetNextChunkValue(CChunkValue &chunkValue) { WCHAR *start, *end; ContinueParsing: if (!*m_pPtr && m_state == STATE_TEX_PREAMBLE) { // if there was no preamble, treat the whole document as content m_pPtr = m_pData; m_iDepth = 0; m_state = STATE_TEX_CONTENT; } else if (!*m_pPtr) { m_state = STATE_TEX_END; } switch (m_state) { case STATE_TEX_START: m_state = STATE_TEX_PREAMBLE; chunkValue.SetTextValue(PKEY_PerceivedType, L"document"); return S_OK; case STATE_TEX_PREAMBLE: // the preamble (i.e. everything before \begin{document}) may contain // \author{...} and \title{...} commands start = end = NULL; while (*m_pPtr && !start) { switch (*m_pPtr++){ case '\\': if (iscmdchar(*m_pPtr)) { start = m_pPtr; for (end = start; iscmdchar(*m_pPtr); m_pPtr++, end++); break; } if (*m_pPtr) m_pPtr++; break; case '{': ExtractBracedBlock(); break; case '%': skipcomment(m_pPtr); break; } } if (!start) goto ContinueParsing; skipspace(m_pPtr); if (*m_pPtr != '{') goto ContinueParsing; m_pPtr++; if (!wcsncmp(start, L"author", end - start) || !wcsncmp(start, L"title", end - start)) { chunkValue.SetTextValue(*start == 'a' ? PKEY_Author : PKEY_Title, ExtractBracedBlock()); return S_OK; } if (!wcsncmp(start, L"begin", end - start) && str::Eq(ExtractBracedBlock(), L"document")) m_state = STATE_TEX_CONTENT; goto ContinueParsing; case STATE_TEX_CONTENT: chunkValue.SetTextValue(PKEY_Search_Contents, ExtractBracedBlock(), CHUNK_TEXT); return S_OK; default: return FILTER_E_END_OF_CHUNKS; } }