Exemplo n.º 1
** Called to perform WordFinder creation and text extraction on a PDF document
** @param pdDoc IN The PDDoc object on which to perform text extraction.
** @param startPg IN The page to start text extraction.
** @param endPg IN The page to end text extraction (inclusive).
** @param toUnicode IN Whether to extract text to Unicode encoding.
** @param pConfig IN Pointer to a WordFinder Configuration Record.
** @param pOutput IN/OUT Pointer to an output FILE stream to which the extracted 
**	text will be written.
** @return true to indicate text extraction operation a success, false otherwise.
bool ExtractText(PDDoc pdDoc, ASInt32 startPg, ASInt32 endPg, 
				 ASBool toUnicode, PDWordFinderConfig pConfig, FILE* pOutput)
	if (startPg < 0 || endPg <0 || startPg > endPg || endPg > PDDocGetNumPages(pdDoc) - 1)
		AVAlertNote("Exceeding starting or ending page number limit of current document."); 
		return false;

	PDWordFinder pdWordFinder = NULL;

	pdWordFinder = PDDocCreateWordFinderEx(pdDoc, WF_LATEST_VERSION, toUnicode, pConfig);
	if (toUnicode) fprintf(pOutput, "%c%c", 0xfe, 0xff);

	for (int i = startPg; i <= endPg; i++)
		PDWordFinderEnumWords(pdWordFinder, i, ASCallbackCreateProto(PDWordProc, &WordEnumProc), pOutput);

	char buf[256], errmsg[256];
	sprintf(buf, "[ExtractText()]Error %d: %s",  ErrGetCode(ERRORCODE), ASGetErrorString(ERRORCODE, errmsg, sizeof(errmsg)));
	if (pdWordFinder) PDWordFinderDestroy(pdWordFinder);
	if( pOutput) fclose(pOutput);
	return false;

return true;
Exemplo n.º 2
ACCB1 void ACCB2 MyPluginCommand(void *clientData)
	// try to get front PDF document 
	AVDoc avDoc = AVAppGetActiveDoc();
	PDDoc pdDoc = NULL;
	int numPages = 0;
	if(avDoc==NULL) {
		// if no doc is loaded, make a message.
		//strcat(str,"There is no PDF document loaded in Acrobat.");
	else {
		// if a PDF is open, get its number of pages
		pdDoc = AVDocGetPDDoc (avDoc);
		numPages = PDDocGetNumPages (pdDoc);

	if(pdDoc == NULL)	return;
	// get a PDF open in front.
	/*CDocument document;
	PDDoc pdDoc = (PDDoc)document;

	if(pdDoc == NULL){

	// enumerate from PDF conversion handlers to find the "rtf" handler.
	AVConversionEnumFromPDFConverters(myAVConversionFromPDFEnumProc, NULL);

	string szPath = "C:\\";
	// save the rtf file to the snippetRunner's output files folder. 
	ASPathName OutPath = SnippetRunnerUtils::getOutputPath(RTF_FILE);
		OutPath = ASFileSysCreatePathName (NULL, ASAtomFromString("Cstring"), RTF_FILE, 0);
	if(OutPath==NULL) {
		AVAlertNote("Cannot open an output file.");
	// do conversion
	AVConversionStatus status=AVConversionConvertFromPDFWithHandler(RightHandler, 
					NULL,kAVConversionNoFlags, pdDoc, OutPath, ASGetDefaultFileSys(),NULL);

		// save the rtf file to the snippetRunner's output files folder. 
	// enumerate from PDF conversion handlers to find the "rtf" handler.
	AVConversionEnumFromPDFConverters(myAVConversionFromPDFEnumProc2, NULL);

	OutPath = SnippetRunnerUtils::getOutputPath(TXT_FILE);
		OutPath = ASFileSysCreatePathName (NULL, ASAtomFromString("Cstring"), TXT_FILE, 0);
	if(OutPath==NULL) {
		AVAlertNote("Cannot open an output file.");
	// do conversion
					NULL,kAVConversionNoFlags, pdDoc, OutPath, ASGetDefaultFileSys(),NULL);

	// check the returned status and show message 
/*	if(status == kAVConversionSuccess)
		AVAlertNote("The Rtf file was saved in SnippetRunner output folder.");
	else if(status == kAVConversionFailed)
		AVAlertNote("The Rtf file conversion failed.");
	else if(status == kAVConversionSuccessAsync)
		AVAlertNote("The conversion will continue asynchronously.");
	else if(status == kAVConversionCancelled)
		AVAlertNote("The conversion was cancelled.");

	ASInt32 nStartPage, nEndPage, nToUnicode;
	ASBool bToUnicode;
	nStartPage = 1;
	nEndPage = numPages;
	nToUnicode = 0;
	bToUnicode = false;

	//bToUnicode = (nToUnicode==0)?(false):(true);

	// Set up WordFinder creation options record
	PDWordFinderConfigRec wfConfig;
	memset(&wfConfig, 0, sizeof(PDWordFinderConfigRec));
	wfConfig.recSize = sizeof(PDWordFinderConfigRec);
	wfConfig.ignoreCharGaps = true;
	wfConfig.ignoreLineGaps = false;
	wfConfig.noAnnots = true;
	wfConfig.noEncodingGuess = true;		// leave non-Roman single-byte font alone

	// Std Roman treatment for custom encoding; overrides the noEncodingGuess option
	wfConfig.unknownToStdEnc = false;		
	wfConfig.disableTaggedPDF = false;		// legacy mode WordFinder creation
	wfConfig.noXYSort = false;
	wfConfig.preserveSpaces = false;
	wfConfig.noLigatureExp = false;
	wfConfig.noHyphenDetection = false;
	wfConfig.trustNBSpace = false;
	wfConfig.noExtCharOffset = false;		// text extraction efficiency
	wfConfig.noStyleInfo = false;			// text extraction efficiency
	wfConfig.decomposeTbl = NULL;			// Unicode character replacement
	wfConfig.decomposeTblSize = 0;
	wfConfig.charTypeTbl = NULL;			// Custom char type table
	wfConfig.charTypeTblSize = 0;
	ASPathName pathName;
	pathName = SnippetRunnerUtils::getOutputPath("output.txt");

	//on the Mac ASFileSysDisplayStringFromPath returns an Mac OS 9 style path.
	pOutput = fopen(ASFileSysDisplayStringFromPath (NULL, pathName), "w+b");

	ASPlatformPath platformPath;
	ASFileSysAcquirePlatformPath (NULL, pathName, ASAtomFromString("POSIXPath"), &platformPath);
	POSIXPath_Ptr path = ASPlatformPathGetPOSIXPathPtr (platformPath);
	pOutput = fopen(path, "w+b");
	ASFileSysReleasePlatformPath(NULL, platformPath);
	if (pOutput)
		if(ExtractText(pdDoc, nStartPage - 1, nEndPage - 1, 
			bToUnicode, &wfConfig, pOutput))
				Console::displayString("Text extraction completed."); 
		AVAlertNote("Text extraction aborted.");

	if (pathName) ASFileSysReleasePath(NULL, pathName);

	if (pOutput) fclose(pOutput);

Exemplo n.º 3
	// Определяем количество страниц в активном докменте
	m_PagesInDocument = PDDocGetNumPages(AVDocGetPDDoc(AVAppGetActiveDoc()));
	// Создаем набор непротиворечивых данных, используемых по умолчанию
	// Единицы измерения - дюймы
	m_UnitType = INCH;
	m_UnitK = 1.0;
	// Режим работы - по умолчанию фиксированный размер страницы, фиксированная схема, подбор размера пластины
	m_OPMode = C;
	// По умолчанию используем печать STANDARD, SADDLE STITCHED, GERMAN FOLD, 8-UP Imposition scheme
	m_FLDType	= GERMAN;
	m_NRows		= 2;
	m_NCols		= 4;
	m_Places    = m_NCols * m_NRows;
	// Сборка тетрадями по 16 листов (одна двустороняя пластина для заданого типа печати)
	m_NPagesPerSection = 16;
	m_PlatesPerSection = 1;
	// Количество тетрадей на документ и количество чистых страниц
	m_WhitePages = m_NPagesPerSection - (m_PagesInDocument % m_NPagesPerSection);
	m_Sections   = (m_PagesInDocument + m_WhitePages) / m_NPagesPerSection;
	// Полное число пластин в задании
	m_PlatesTotal = m_Sections * m_PlatesPerSection;
	// Технологические параметры
	// Устанавливаем принимаемые по умолчанию технологические параметры генератора
	m_TechAllowence_Y_width = m_TechAllowence_X_width = m_TechAllowence_M_width = ASFloatToFixed(72 * 10 / 25.4);
	// Отступ между страницами. По умолчанию 2мм = 0.079 дюйма
	m_PlaceholdersGap = ASFloatToFixed(72 * 0.079);
	// Размер страницы документа
	ASFixedRect m_PageCropBox;
	PDPageGetCropBox(PDDocAquirePage(AVDocGetPDDoc(AVAppGetActiveDoc()), 0), &m_PageCropBox);
	// Вычисляем размер области печати
	m_PrintingArea.left		= m_TechAllowence_Y_width;
	m_PrintingArea.right	= m_PrintingArea.left + 
		ASFixedMul((m_PageCropBox.right - m_PageCropBox.left), ASInt16ToFixed(m_NCols)) + 
		ASFixedMul(m_PlaceholdersGap, ASInt16ToFixed(m_NCols - 1));
	m_PrintingArea.bottom	= m_TechAllowence_X_width;
	m_PrintingArea.top		= m_PrintingArea.bottom +
		ASFixedMul((m_PageCropBox.top - m_PageCropBox.bottom), ASInt16ToFixed(m_NRows)) +
		ASFixedMul(m_TechAllowence_M_width, ASInt16ToFixed(m_NRows - 1));
	// Размер листа
	m_OutputMediaBox.bottom		= fixedZero;
	m_OutputMediaBox.top		= m_OutputMediaBox.bottom +
		ASFixedMul(m_TechAllowence_X_width, fixedTwo) + (m_PrintingArea.top - m_PrintingArea.bottom);
	m_OutputMediaBox.left		= fixedZero;
	m_OutputMediaBox.right		= m_OutputMediaBox.left +
		ASFixedMul(m_TechAllowence_Y_width, fixedTwo) + (m_PrintingArea.right - m_PrintingArea.left);
	// Формат печати - вычисляемый, ориентация - ЛАНДШАФТ
	m_OutputMediaSize.H = ASFloatToFixed(m_OutputMediaBox.top - m_OutputMediaBox.bottom) / 72.0;
	m_OutputMediaSize.W = ASFloatToFixed(m_OutputMediaBox.right - m_OutputMediaBox.left) / 72.0;
	m_MediaOrient		= LANDSCAPE;
	// Отмена зеркальной печати
	m_InversePrinting = false;
	// Устанавливаем общие к-ты масштабирования-перемещения-поворота для страницы
	m_X_Scale_Total = fixedOne;
	m_Y_Scale_Total = fixedOne;
	m_X_Translation_Total = fixedZero;
	m_Y_Translation_Total = fixedZero;
	m_Rotation_Angle_Total = 0.0;
	m_ALFA_Skew_Total = 0.0;
	m_BETA_Skew_Total = 0.0;
	// По умолчанию печатаем все технологические метки
	m_PrintRegMarks				= true;
	m_PrintCMYKColorBar			= true;
	m_PrintPANTONEcolorBar		= true;
	m_PrintProgressiveGray		= true;
	m_PrintCFMarks				= true;
	// Создаем массив технологических меток
	// Текущий номер страницы предварительного просмотра
	m_CurrentPreviewPN = 0;
	m_CurrentPreviewPDPage = NULL;
	// Толщина бумаги
	m_PaperThickness = 0.002;
	// Режим предварительного просмотра - включен
	m_PreviewMode = TRUE;