Beispiel #1
0
FX_BOOL GetContentsRect( CPDF_Document * pDoc, CPDF_Dictionary* pDict, CPDF_RectArray * pRectArray )
{
	CPDF_Page* pPDFPage = FX_NEW CPDF_Page;
	pPDFPage->Load( pDoc, pDict, FALSE );
	pPDFPage->ParseContent();

	FX_POSITION pos = pPDFPage->GetFirstObjectPosition();
	
	while (pos)
	{
		CPDF_PageObject* pPageObject = pPDFPage->GetNextObject(pos);
		if (!pPageObject)continue;
		
		CPDF_Rect rc;
		rc.left = pPageObject->m_Left;
		rc.right = pPageObject->m_Right;
		rc.bottom = pPageObject->m_Bottom;
		rc.top = pPageObject->m_Top;
		
		if (IsValiableRect(rc, pDict->GetRect("MediaBox")))
		{
			pRectArray->Add(rc);
		}
	}
	
	delete pPDFPage;
	return TRUE;
}
Beispiel #2
0
DLLEXPORT FPDF_PAGE STDCALL FPDFPage_New(FPDF_DOCUMENT document,
                                         int page_index,
                                         double width,
                                         double height) {
  CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document);
  if (!pDoc)
    return nullptr;

  if (page_index < 0)
    page_index = 0;
  if (pDoc->GetPageCount() < page_index)
    page_index = pDoc->GetPageCount();

  CPDF_Dictionary* pPageDict = pDoc->CreateNewPage(page_index);
  if (!pPageDict)
    return NULL;
  CPDF_Array* pMediaBoxArray = new CPDF_Array;
  pMediaBoxArray->Add(new CPDF_Number(0));
  pMediaBoxArray->Add(new CPDF_Number(0));
  pMediaBoxArray->Add(new CPDF_Number(FX_FLOAT(width)));
  pMediaBoxArray->Add(new CPDF_Number(FX_FLOAT(height)));

  pPageDict->SetAt("MediaBox", pMediaBoxArray);
  pPageDict->SetAt("Rotate", new CPDF_Number(0));
  pPageDict->SetAt("Resources", new CPDF_Dictionary);

  CPDF_Page* pPage = new CPDF_Page;
  pPage->Load(pDoc, pPageDict);
  pPage->ParseContent();

  return pPage;
}
Beispiel #3
0
void PDF_GetPageText_Unicode(CFX_WideStringArray& lines, CPDF_Document* pDoc, CPDF_Dictionary* pPage,
                             int iMinWidth, FX_DWORD flags)
{
    lines.RemoveAll();
    if (pPage == NULL) {
        return;
    }
    CPDF_Page page;
    page.Load(pDoc, pPage);
    CPDF_ParseOptions options;
    options.m_bTextOnly = TRUE;
    options.m_bSeparateForm = FALSE;
    page.ParseContent(&options);
    CFX_FloatRect page_bbox = page.GetPageBBox();
    if (flags & PDF2TXT_AUTO_ROTATE) {
        CheckRotate(page, page_bbox);
    }
    CTextPage texts;
    texts.m_bAutoWidth = flags & PDF2TXT_AUTO_WIDTH;
    texts.m_bKeepColumn = flags & PDF2TXT_KEEP_COLUMN;
    texts.m_bBreakSpace = TRUE;
    FX_POSITION pos = page.GetFirstObjectPosition();
    while (pos) {
        CPDF_PageObject* pObject = page.GetNextObject(pos);
        if (!(flags & PDF2TXT_INCLUDE_INVISIBLE)) {
            CFX_FloatRect rect(pObject->m_Left, pObject->m_Bottom, pObject->m_Right, pObject->m_Top);
            if (!page_bbox.Contains(rect)) {
                continue;
            }
        }
        texts.ProcessObject(pObject);
    }
    texts.WriteOutput(lines, iMinWidth);
}
Beispiel #4
0
DLLEXPORT FPDF_PAGE STDCALL FPDFPage_New(FPDF_DOCUMENT document, int page_index, double width, double height)
{
	if (!document)
		return NULL;

//	CPDF_Parser* pParser = (CPDF_Parser*)document;
	CPDF_Document* pDoc = (CPDF_Document*)document;
	if(page_index < 0)
		page_index = 0;
	if(pDoc->GetPageCount()<page_index)
		page_index = pDoc->GetPageCount();
//	if (page_index < 0 || page_index >= pDoc->GetPageCount()) 
//		return NULL;

	CPDF_Dictionary* pPageDict = pDoc->CreateNewPage(page_index);
	if(!pPageDict)
		return NULL;
	CPDF_Array* pMediaBoxArray = FX_NEW CPDF_Array;
	pMediaBoxArray->Add(FX_NEW CPDF_Number(0));
	pMediaBoxArray->Add(FX_NEW CPDF_Number(0));
	pMediaBoxArray->Add(FX_NEW CPDF_Number(FX_FLOAT(width)));
	pMediaBoxArray->Add(FX_NEW CPDF_Number(FX_FLOAT(height)));

	pPageDict->SetAt("MediaBox", pMediaBoxArray);
	pPageDict->SetAt("Rotate", FX_NEW CPDF_Number(0));
	pPageDict->SetAt("Resources", FX_NEW CPDF_Dictionary);

	CPDF_Page* pPage = FX_NEW CPDF_Page;
	pPage->Load(pDoc,pPageDict);
	pPage->ParseContent();

	return pPage;
}
Beispiel #5
0
void PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_Document* pDoc, CPDF_Dictionary* pPage, FX_DWORD flags)
{
    buffer.EstimateSize(0, 10240);
    CPDF_Page page;
    page.Load(pDoc, pPage);
    CPDF_ParseOptions options;
    options.m_bTextOnly = TRUE;
    options.m_bSeparateForm = FALSE;
    page.ParseContent(&options);
    _PDF_GetTextStream_Unicode(buffer, &page, TRUE, NULL);
}
Beispiel #6
0
DLLEXPORT FPDF_PAGE STDCALL FPDF_LoadPage(FPDF_DOCUMENT document, int page_index)
{
	if (document == NULL) return NULL;
	if (page_index < 0 || page_index >= FPDF_GetPageCount(document)) return NULL;

	CPDF_Document* pDoc = (CPDF_Document*)document;
	if (pDoc == NULL) return NULL;
	CPDF_Dictionary* pDict = pDoc->GetPage(page_index);
	if (pDict == NULL) return NULL;
	CPDF_Page* pPage = new CPDF_Page;
	pPage->Load(pDoc, pDict);
	pPage->ParseContent();
	return pPage;
}
Beispiel #7
0
DLLEXPORT int STDCALL FPDF_GetPageSizeByIndex(FPDF_DOCUMENT document, int page_index, double* width, double* height)
{
    CPDF_Document* pDoc = (CPDF_Document*)document;
    if(pDoc == NULL)
        return FALSE;

    CPDF_Dictionary* pDict = pDoc->GetPage(page_index);
    if (pDict == NULL) return FALSE;

    CPDF_Page page;
    page.Load(pDoc, pDict);
    *width = page.GetPageWidth();
    *height = page.GetPageHeight();

    return TRUE;
}
Beispiel #8
0
FX_BOOL Document::getPageNumWords(IFXJS_Context* cc, const CJS_Parameters& params, CJS_Value& vRet, CFX_WideString& sError)
{
	ASSERT(m_pDocument != NULL);

	if (!m_pDocument->GetPermissions(FPDFPERM_EXTRACT_ACCESS)) return FALSE;

	int nPageNo = params.GetSize() > 0 ? params[0].ToInt() : 0;

	CPDF_Document* pDocument = m_pDocument->GetDocument();
	ASSERT(pDocument != NULL);

	CJS_Context* pContext = static_cast<CJS_Context*>(cc);
	if (nPageNo < 0 || nPageNo >= pDocument->GetPageCount())
	{
		sError = JSGetStringFromID(pContext, IDS_STRING_JSVALUEERROR);
		return FALSE;
	}

	CPDF_Dictionary* pPageDict = pDocument->GetPage(nPageNo);
	if (!pPageDict) return FALSE;

	CPDF_Page page;
	page.Load(pDocument, pPageDict);
	page.StartParse();
	page.ParseContent();

	FX_POSITION pos = page.GetFirstObjectPosition();

	int nWords = 0;

	while (pos)
	{
		if (CPDF_PageObject* pPageObj = page.GetNextObject(pos))
		{
			if (pPageObj->m_Type == PDFPAGE_TEXT)
			{
				CPDF_TextObject* pTextObj = (CPDF_TextObject*)pPageObj;
				nWords += CountWords(pTextObj);
			}
		}
	}

	vRet = nWords;

	return TRUE;
}
Beispiel #9
0
DLLEXPORT FPDF_PAGE STDCALL FPDF_LoadPage(FPDF_DOCUMENT document, int page_index)
{
    if (document == NULL) return NULL;
    if (page_index < 0 || page_index >= FPDF_GetPageCount(document)) return NULL;
//	CPDF_Parser* pParser = (CPDF_Parser*)document;
    CPDF_Document* pDoc = (CPDF_Document*)document;
    if (pDoc == NULL) return NULL;
    CPDF_Dictionary* pDict = pDoc->GetPage(page_index);
    if (pDict == NULL) return NULL;
    CPDF_Page* pPage = FX_NEW CPDF_Page;
    pPage->Load(pDoc, pDict);
    try {
        pPage->ParseContent();
    }
    catch (...) {
        delete pPage;
        return NULL;
    }

//	CheckUnSupportError(pDoc, 0);

    return pPage;
}
Beispiel #10
0
CFX_WideString PDF_GetFirstTextLine_Unicode(CPDF_Document* pDoc, CPDF_Dictionary* pPage)
{
    CFX_WideTextBuf buffer;
    buffer.EstimateSize(0, 1024);
    CPDF_Page page;
    page.Load(pDoc, pPage);
    CPDF_ParseOptions options;
    options.m_bTextOnly = TRUE;
    options.m_bSeparateForm = FALSE;
    page.ParseContent(&options);
    CPDF_TextStream textstream(buffer, FALSE, NULL);
    FX_POSITION pos = page.GetFirstObjectPosition();
    while (pos) {
        CPDF_PageObject* pObject = page.GetNextObject(pos);
        if (pObject->m_Type != PDFPAGE_TEXT) {
            continue;
        }
        if (textstream.ProcessObject((CPDF_TextObject*)pObject, TRUE)) {
            break;
        }
    }
    return buffer.GetWideString();
}
Beispiel #11
0
FX_BOOL Document::getPageNthWord(IFXJS_Context* cc, const CJS_Parameters& params, CJS_Value& vRet, CFX_WideString& sError)
{
	ASSERT(m_pDocument != NULL);

	if (!m_pDocument->GetPermissions(FPDFPERM_EXTRACT_ACCESS)) return FALSE;

	int nPageNo = params.GetSize() > 0 ? params[0].ToInt() : 0;
	int nWordNo = params.GetSize() > 1 ? params[1].ToInt() : 0;
	bool bStrip = params.GetSize() > 2 ? params[2].ToBool() : true;

	CPDF_Document* pDocument = m_pDocument->GetDocument();
	if (!pDocument) return FALSE;

	CJS_Context* pContext = static_cast<CJS_Context*>(cc);
	if (nPageNo < 0 || nPageNo >= pDocument->GetPageCount())
	{
		sError = JSGetStringFromID(pContext, IDS_STRING_JSVALUEERROR);
		return FALSE;
	}

	CPDF_Dictionary* pPageDict = pDocument->GetPage(nPageNo);
	if (!pPageDict) return FALSE;

	CPDF_Page page;
	page.Load(pDocument, pPageDict);
	page.StartParse();
	page.ParseContent();

	FX_POSITION pos = page.GetFirstObjectPosition();

	int nWords = 0;

	CFX_WideString swRet;

	while (pos)
	{
		if (CPDF_PageObject* pPageObj = page.GetNextObject(pos))
		{
			if (pPageObj->m_Type == PDFPAGE_TEXT)
			{
				int nObjWords = CountWords((CPDF_TextObject*)pPageObj);

				if (nWords + nObjWords >= nWordNo)
				{
					swRet = GetObjWordStr((CPDF_TextObject*)pPageObj, nWordNo - nWords);
					break;
				}

				nWords += nObjWords;
			}
		}
	}

	if (bStrip)
	{
		swRet.TrimLeft();
		swRet.TrimRight();
	}

	vRet = swRet.c_str();
	return TRUE;
}