FX_BOOL GetContentsRect( CPDF_Document * pDoc, CPDF_Dictionary* pDict, CPDF_RectArray * pRectArray ) { CPDF_Page* pPDFPage = FX_NEW CPDF_Page; pPDFPage->Load( pDoc, pDict, FALSE ); pPDFPage->ParseContent(); FX_POSITION pos = pPDFPage->GetFirstObjectPosition(); while (pos) { CPDF_PageObject* pPageObject = pPDFPage->GetNextObject(pos); if (!pPageObject)continue; CPDF_Rect rc; rc.left = pPageObject->m_Left; rc.right = pPageObject->m_Right; rc.bottom = pPageObject->m_Bottom; rc.top = pPageObject->m_Top; if (IsValiableRect(rc, pDict->GetRect("MediaBox"))) { pRectArray->Add(rc); } } delete pPDFPage; return TRUE; }
DLLEXPORT FPDF_PAGE STDCALL FPDFPage_New(FPDF_DOCUMENT document, int page_index, double width, double height) { CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document); if (!pDoc) return nullptr; if (page_index < 0) page_index = 0; if (pDoc->GetPageCount() < page_index) page_index = pDoc->GetPageCount(); CPDF_Dictionary* pPageDict = pDoc->CreateNewPage(page_index); if (!pPageDict) return NULL; CPDF_Array* pMediaBoxArray = new CPDF_Array; pMediaBoxArray->Add(new CPDF_Number(0)); pMediaBoxArray->Add(new CPDF_Number(0)); pMediaBoxArray->Add(new CPDF_Number(FX_FLOAT(width))); pMediaBoxArray->Add(new CPDF_Number(FX_FLOAT(height))); pPageDict->SetAt("MediaBox", pMediaBoxArray); pPageDict->SetAt("Rotate", new CPDF_Number(0)); pPageDict->SetAt("Resources", new CPDF_Dictionary); CPDF_Page* pPage = new CPDF_Page; pPage->Load(pDoc, pPageDict); pPage->ParseContent(); return pPage; }
void PDF_GetPageText_Unicode(CFX_WideStringArray& lines, CPDF_Document* pDoc, CPDF_Dictionary* pPage, int iMinWidth, FX_DWORD flags) { lines.RemoveAll(); if (pPage == NULL) { return; } CPDF_Page page; page.Load(pDoc, pPage); CPDF_ParseOptions options; options.m_bTextOnly = TRUE; options.m_bSeparateForm = FALSE; page.ParseContent(&options); CFX_FloatRect page_bbox = page.GetPageBBox(); if (flags & PDF2TXT_AUTO_ROTATE) { CheckRotate(page, page_bbox); } CTextPage texts; texts.m_bAutoWidth = flags & PDF2TXT_AUTO_WIDTH; texts.m_bKeepColumn = flags & PDF2TXT_KEEP_COLUMN; texts.m_bBreakSpace = TRUE; FX_POSITION pos = page.GetFirstObjectPosition(); while (pos) { CPDF_PageObject* pObject = page.GetNextObject(pos); if (!(flags & PDF2TXT_INCLUDE_INVISIBLE)) { CFX_FloatRect rect(pObject->m_Left, pObject->m_Bottom, pObject->m_Right, pObject->m_Top); if (!page_bbox.Contains(rect)) { continue; } } texts.ProcessObject(pObject); } texts.WriteOutput(lines, iMinWidth); }
DLLEXPORT FPDF_PAGE STDCALL FPDFPage_New(FPDF_DOCUMENT document, int page_index, double width, double height) { if (!document) return NULL; // CPDF_Parser* pParser = (CPDF_Parser*)document; CPDF_Document* pDoc = (CPDF_Document*)document; if(page_index < 0) page_index = 0; if(pDoc->GetPageCount()<page_index) page_index = pDoc->GetPageCount(); // if (page_index < 0 || page_index >= pDoc->GetPageCount()) // return NULL; CPDF_Dictionary* pPageDict = pDoc->CreateNewPage(page_index); if(!pPageDict) return NULL; CPDF_Array* pMediaBoxArray = FX_NEW CPDF_Array; pMediaBoxArray->Add(FX_NEW CPDF_Number(0)); pMediaBoxArray->Add(FX_NEW CPDF_Number(0)); pMediaBoxArray->Add(FX_NEW CPDF_Number(FX_FLOAT(width))); pMediaBoxArray->Add(FX_NEW CPDF_Number(FX_FLOAT(height))); pPageDict->SetAt("MediaBox", pMediaBoxArray); pPageDict->SetAt("Rotate", FX_NEW CPDF_Number(0)); pPageDict->SetAt("Resources", FX_NEW CPDF_Dictionary); CPDF_Page* pPage = FX_NEW CPDF_Page; pPage->Load(pDoc,pPageDict); pPage->ParseContent(); return pPage; }
void PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_Document* pDoc, CPDF_Dictionary* pPage, FX_DWORD flags) { buffer.EstimateSize(0, 10240); CPDF_Page page; page.Load(pDoc, pPage); CPDF_ParseOptions options; options.m_bTextOnly = TRUE; options.m_bSeparateForm = FALSE; page.ParseContent(&options); _PDF_GetTextStream_Unicode(buffer, &page, TRUE, NULL); }
DLLEXPORT FPDF_PAGE STDCALL FPDF_LoadPage(FPDF_DOCUMENT document, int page_index) { if (document == NULL) return NULL; if (page_index < 0 || page_index >= FPDF_GetPageCount(document)) return NULL; CPDF_Document* pDoc = (CPDF_Document*)document; if (pDoc == NULL) return NULL; CPDF_Dictionary* pDict = pDoc->GetPage(page_index); if (pDict == NULL) return NULL; CPDF_Page* pPage = new CPDF_Page; pPage->Load(pDoc, pDict); pPage->ParseContent(); return pPage; }
DLLEXPORT int STDCALL FPDF_GetPageSizeByIndex(FPDF_DOCUMENT document, int page_index, double* width, double* height) { CPDF_Document* pDoc = (CPDF_Document*)document; if(pDoc == NULL) return FALSE; CPDF_Dictionary* pDict = pDoc->GetPage(page_index); if (pDict == NULL) return FALSE; CPDF_Page page; page.Load(pDoc, pDict); *width = page.GetPageWidth(); *height = page.GetPageHeight(); return TRUE; }
FX_BOOL Document::getPageNumWords(IFXJS_Context* cc, const CJS_Parameters& params, CJS_Value& vRet, CFX_WideString& sError) { ASSERT(m_pDocument != NULL); if (!m_pDocument->GetPermissions(FPDFPERM_EXTRACT_ACCESS)) return FALSE; int nPageNo = params.GetSize() > 0 ? params[0].ToInt() : 0; CPDF_Document* pDocument = m_pDocument->GetDocument(); ASSERT(pDocument != NULL); CJS_Context* pContext = static_cast<CJS_Context*>(cc); if (nPageNo < 0 || nPageNo >= pDocument->GetPageCount()) { sError = JSGetStringFromID(pContext, IDS_STRING_JSVALUEERROR); return FALSE; } CPDF_Dictionary* pPageDict = pDocument->GetPage(nPageNo); if (!pPageDict) return FALSE; CPDF_Page page; page.Load(pDocument, pPageDict); page.StartParse(); page.ParseContent(); FX_POSITION pos = page.GetFirstObjectPosition(); int nWords = 0; while (pos) { if (CPDF_PageObject* pPageObj = page.GetNextObject(pos)) { if (pPageObj->m_Type == PDFPAGE_TEXT) { CPDF_TextObject* pTextObj = (CPDF_TextObject*)pPageObj; nWords += CountWords(pTextObj); } } } vRet = nWords; return TRUE; }
DLLEXPORT FPDF_PAGE STDCALL FPDF_LoadPage(FPDF_DOCUMENT document, int page_index) { if (document == NULL) return NULL; if (page_index < 0 || page_index >= FPDF_GetPageCount(document)) return NULL; // CPDF_Parser* pParser = (CPDF_Parser*)document; CPDF_Document* pDoc = (CPDF_Document*)document; if (pDoc == NULL) return NULL; CPDF_Dictionary* pDict = pDoc->GetPage(page_index); if (pDict == NULL) return NULL; CPDF_Page* pPage = FX_NEW CPDF_Page; pPage->Load(pDoc, pDict); try { pPage->ParseContent(); } catch (...) { delete pPage; return NULL; } // CheckUnSupportError(pDoc, 0); return pPage; }
CFX_WideString PDF_GetFirstTextLine_Unicode(CPDF_Document* pDoc, CPDF_Dictionary* pPage) { CFX_WideTextBuf buffer; buffer.EstimateSize(0, 1024); CPDF_Page page; page.Load(pDoc, pPage); CPDF_ParseOptions options; options.m_bTextOnly = TRUE; options.m_bSeparateForm = FALSE; page.ParseContent(&options); CPDF_TextStream textstream(buffer, FALSE, NULL); FX_POSITION pos = page.GetFirstObjectPosition(); while (pos) { CPDF_PageObject* pObject = page.GetNextObject(pos); if (pObject->m_Type != PDFPAGE_TEXT) { continue; } if (textstream.ProcessObject((CPDF_TextObject*)pObject, TRUE)) { break; } } return buffer.GetWideString(); }
FX_BOOL Document::getPageNthWord(IFXJS_Context* cc, const CJS_Parameters& params, CJS_Value& vRet, CFX_WideString& sError) { ASSERT(m_pDocument != NULL); if (!m_pDocument->GetPermissions(FPDFPERM_EXTRACT_ACCESS)) return FALSE; int nPageNo = params.GetSize() > 0 ? params[0].ToInt() : 0; int nWordNo = params.GetSize() > 1 ? params[1].ToInt() : 0; bool bStrip = params.GetSize() > 2 ? params[2].ToBool() : true; CPDF_Document* pDocument = m_pDocument->GetDocument(); if (!pDocument) return FALSE; CJS_Context* pContext = static_cast<CJS_Context*>(cc); if (nPageNo < 0 || nPageNo >= pDocument->GetPageCount()) { sError = JSGetStringFromID(pContext, IDS_STRING_JSVALUEERROR); return FALSE; } CPDF_Dictionary* pPageDict = pDocument->GetPage(nPageNo); if (!pPageDict) return FALSE; CPDF_Page page; page.Load(pDocument, pPageDict); page.StartParse(); page.ParseContent(); FX_POSITION pos = page.GetFirstObjectPosition(); int nWords = 0; CFX_WideString swRet; while (pos) { if (CPDF_PageObject* pPageObj = page.GetNextObject(pos)) { if (pPageObj->m_Type == PDFPAGE_TEXT) { int nObjWords = CountWords((CPDF_TextObject*)pPageObj); if (nWords + nObjWords >= nWordNo) { swRet = GetObjWordStr((CPDF_TextObject*)pPageObj, nWordNo - nWords); break; } nWords += nObjWords; } } } if (bStrip) { swRet.TrimLeft(); swRet.TrimRight(); } vRet = swRet.c_str(); return TRUE; }