EStatusCode AbstractContentContext::WriteTextCommandWithDirectGlyphSelection(const GlyphUnicodeMappingList& inText,ITextCommand* inTextCommand) { PDFUsedFont* currentFont = mGraphicStack.GetCurrentState().mFont; if(!currentFont) { TRACE_LOG("AbstractContentContext::WriteTextCommandWithDirectGlyphSelection, Cannot write text, no current font is defined"); return PDFHummus::eFailure; } ObjectIDType fontObjectID; UShortList encodedCharactersList; bool writeAsCID; if(currentFont->EncodeStringForShowing(inText,fontObjectID,encodedCharactersList,writeAsCID) != PDFHummus::eSuccess) { TRACE_LOG("AbstractcontextContext::WriteTextCommandWithDirectGlyphSelection, Unexepcted failure, Cannot encode characters"); return PDFHummus::eFailure; } // Write the font reference (only if required) std::string fontName = GetResourcesDictionary()->AddFontMapping(fontObjectID); if(mGraphicStack.GetCurrentState().mPlacedFontName != fontName || mGraphicStack.GetCurrentState().mPlacedFontSize != mGraphicStack.GetCurrentState().mFontSize) TfLow(fontName,mGraphicStack.GetCurrentState().mFontSize); // Now write the string using the text command OutputStringBufferStream stringStream; char formattingBuffer[5]; UShortList::iterator it = encodedCharactersList.begin(); if(writeAsCID) { for(;it!= encodedCharactersList.end();++it) { SAFE_SPRINTF_2(formattingBuffer,5,"%02x%02x",((*it)>>8) & 0x00ff,(*it) & 0x00ff); stringStream.Write((const Byte*)formattingBuffer,4); } inTextCommand->WriteHexStringCommand(stringStream.ToString()); } else { for(;it!= encodedCharactersList.end();++it)
BoolAndString Ascii7Encoding::Encode(const string& inString) { OutputStringBufferStream asciiString; bool encodingGood = true; IOBasicTypes::Byte buffer; string::const_iterator it = inString.begin(); for(;it != inString.end() && encodingGood;++it) { if(((IOBasicTypes::Byte)*it) <= 127) { buffer = (char)*it; asciiString.Write(&buffer,1); } else encodingGood = false; } return BoolAndString(encodingGood,asciiString.ToString()); }
BoolAndString PDFParserTokenizer::GetNextToken() { BoolAndString result; Byte buffer; OutputStringBufferStream tokenBuffer; if(!mStream || (!mStream->NotEnded() && !mHasTokenBuffer)) { result.first = false; return result; } do { SkipTillToken(); if(!mStream->NotEnded()) { result.first = false; break; } // before reading the first byte save the token position, for external queries mRecentTokenPosition = mStreamPositionTracker; // get the first byte of the token if(GetNextByteForToken(buffer) != PDFHummus::eSuccess) { result.first = false; break; } tokenBuffer.Write(&buffer,1); result.first = true; // will only be changed to false in case of read error // now determine how to continue based on the first byte of the token (there are some special cases) switch(buffer) { case '%': { // for a comment, the token goes on till the end of line marker [not including] while(mStream->NotEnded()) { if(GetNextByteForToken(buffer) != PDFHummus::eSuccess) { result.first = false; break; } if(0xD == buffer|| 0xA == buffer) break; tokenBuffer.Write(&buffer,1); } result.second = tokenBuffer.ToString(); break; } case '(': { // for a literal string, the token goes on until the balanced-closing right paranthesis int balanceLevel = 1; bool backSlashEncountered = false; while(balanceLevel > 0 && mStream->NotEnded()) { if(GetNextByteForToken(buffer) != PDFHummus::eSuccess) { result.first = false; break; } if(backSlashEncountered) { backSlashEncountered = false; if(0xA == buffer || 0xD == buffer) { // ignore backslash and newline. might also need to read extra // for cr-ln if(0xD == buffer && mStream->NotEnded()) { if(GetNextByteForToken(buffer) != PDFHummus::eSuccess) { result.first = false; break; } if(buffer != 0xA) SaveTokenBuffer(buffer); } } else { tokenBuffer.Write(scBackSlash,1); tokenBuffer.Write(&buffer,1); } } else { if('\\' == buffer) { backSlashEncountered = true; continue; } else if('(' == buffer) ++balanceLevel; else if(')' == buffer) --balanceLevel; tokenBuffer.Write(&buffer,1); } } if(result.first) result.second = tokenBuffer.ToString(); break; } case '<': { // k. this might be a dictionary start marker or a hax string start. depending on whether it has a < following it or not // Hex string, read till end of hex string marker if(!mStream->NotEnded()) { result.second = tokenBuffer.ToString(); break; } if(GetNextByteForToken(buffer) != PDFHummus::eSuccess) { result.first = false; break; } if('<' == buffer) { // Dictionary start marker tokenBuffer.Write(&buffer,1); result.second = tokenBuffer.ToString(); break; } else { // Hex string tokenBuffer.Write(&buffer,1); while(mStream->NotEnded()) { if(GetNextByteForToken(buffer) != PDFHummus::eSuccess) { result.first = false; break; } if(!IsPDFWhiteSpace(buffer)) tokenBuffer.Write(&buffer,1); if('>' == buffer) break; } } result.second = tokenBuffer.ToString(); break; } case '[': // for all array or executable tokanizers, the tokanizer is just the mark case ']': case '{': case '}': result.second = tokenBuffer.ToString(); break; case '>': // parse end dictionary marker as a single entity or a hex string end marker { if(!mStream->NotEnded()) // this means a loose end string marker...wierd { result.second = tokenBuffer.ToString(); break; } if(GetNextByteForToken(buffer) != PDFHummus::eSuccess) { result.first = false; break; } if('>' == buffer) { tokenBuffer.Write(&buffer,1); result.second = tokenBuffer.ToString(); break; } else { // hex string loose end SaveTokenBuffer(buffer); result.second = tokenBuffer.ToString(); break; } break; } default: // regular token. read till next breaker or whitespace { while(mStream->NotEnded()) { if(GetNextByteForToken(buffer) != PDFHummus::eSuccess) { result.first = false; break; } if(IsPDFWhiteSpace(buffer)) { break; } else if(IsPDFEntityBreaker(buffer)) { SaveTokenBuffer(buffer); // for a non-space breaker, save the token for next token read break; } else tokenBuffer.Write(&buffer,1); } result.second = tokenBuffer.ToString(); if(result.first && mStream->NotEnded() && scStream == result.second) { // k. a bit of a special case here for streams. the reading changes after the keyword "stream", // essentially forcing the next content to start after either CR, CR-LF or LF. so there might be a little // skip to do here. // if indeed there's a "stream", so the last buffer read should have been either CR or LF, which means (respectively) // that we should either skip one more "LF" or do nothing (based on what was parsed) // verify that buffer is either CR or LF, and behave accordingly if(scCR == buffer) // CR. should be CR-LF or CR alone { if(GetNextByteForToken(buffer) == PDFHummus::eSuccess) { // if CR-LF treat as a single line, otherwise put back token nicely cause CR is alone if(buffer != scLF) SaveTokenBuffer(buffer); } result.first = true; } else result.first = (scLF == buffer); // otherwise must be LF } break; } } }while(false); return result; }