FPDFText_MarkedContent CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) { CPDF_TextObject* pTextObj = Obj.m_pTextObj.Get(); size_t nContentMarks = pTextObj->m_ContentMarks.CountItems(); if (nContentMarks == 0) return FPDFText_MarkedContent::Pass; WideString actText; bool bExist = false; const CPDF_Dictionary* pDict = nullptr; for (size_t i = 0; i < nContentMarks; ++i) { const CPDF_ContentMarkItem* item = pTextObj->m_ContentMarks.GetItem(i); pDict = item->GetParam(); if (!pDict) continue; const CPDF_String* temp = ToString(pDict->GetObjectFor("ActualText")); if (temp) { bExist = true; actText = temp->GetUnicodeText(); } } if (!bExist) return FPDFText_MarkedContent::Pass; if (m_pPreTextObj) { const CPDF_ContentMarks& marks = m_pPreTextObj->m_ContentMarks; if (marks.CountItems() == nContentMarks && marks.GetItem(nContentMarks - 1)->GetParam() == pDict) { return FPDFText_MarkedContent::Done; } } if (actText.IsEmpty()) return FPDFText_MarkedContent::Pass; CPDF_Font* pFont = pTextObj->GetFont(); bExist = false; for (size_t i = 0; i < actText.GetLength(); ++i) { if (pFont->CharCodeFromUnicode(actText[i]) != CPDF_Font::kInvalidCharCode) { bExist = true; break; } } if (!bExist) return FPDFText_MarkedContent::Pass; bExist = false; for (size_t i = 0; i < actText.GetLength(); ++i) { wchar_t wChar = actText[i]; if ((wChar > 0x80 && wChar < 0xFFFD) || (wChar <= 0x80 && isprint(wChar))) { bExist = true; break; } } if (!bExist) return FPDFText_MarkedContent::Done; return FPDFText_MarkedContent::Delay; }
WideString CPDF_FileSpec::DecodeFileName(const WideString& filepath) { if (filepath.GetLength() <= 1) return WideString(); #if _FX_PLATFORM_ == _FX_PLATFORM_APPLE_ if (filepath.Left(sizeof("/Mac") - 1) == WideStringView(L"/Mac")) return ChangeSlashToPlatform(filepath.c_str() + 1); return ChangeSlashToPlatform(filepath.c_str()); #elif _FX_PLATFORM_ == _FX_PLATFORM_WINDOWS_ if (filepath[0] != L'/') return ChangeSlashToPlatform(filepath.c_str()); if (filepath[1] == L'/') return ChangeSlashToPlatform(filepath.c_str() + 1); if (filepath[2] == L'/') { WideString result; result += filepath[1]; result += L':'; result += ChangeSlashToPlatform(filepath.c_str() + 2); return result; } WideString result; result += L'\\'; result += ChangeSlashToPlatform(filepath.c_str()); return result; #else return WideString(filepath); #endif }
WideString CPDF_FileSpec::EncodeFileName(const WideString& filepath) { if (filepath.GetLength() <= 1) return WideString(); #if _FX_PLATFORM_ == _FX_PLATFORM_WINDOWS_ if (filepath[1] == L':') { WideString result(L'/'); result += filepath[0]; if (filepath[2] != L'\\') result += L'/'; result += ChangeSlashToPDF(filepath.c_str() + 2); return result; } if (filepath[0] == L'\\' && filepath[1] == L'\\') return ChangeSlashToPDF(filepath.c_str() + 1); if (filepath[0] == L'\\') return L'/' + ChangeSlashToPDF(filepath.c_str()); return ChangeSlashToPDF(filepath.c_str()); #elif _FX_PLATFORM_ == _FX_PLATFORM_APPLE_ if (filepath.Left(sizeof("Mac") - 1).EqualsASCII("Mac")) return L'/' + ChangeSlashToPDF(filepath.c_str()); return ChangeSlashToPDF(filepath.c_str()); #else return WideString(filepath); #endif }
bool CPWL_ListBox::OnNotifySelectionChanged(bool bKeyDown, uint32_t nFlag) { if (!m_pFillerNotify) return false; CPWL_Wnd::ObservedPtr thisObserved(this); WideString swChange = GetText(); WideString strChangeEx; int nSelStart = 0; int nSelEnd = swChange.GetLength(); bool bRC; bool bExit; std::tie(bRC, bExit) = m_pFillerNotify->OnBeforeKeyStroke( GetAttachedData(), swChange, strChangeEx, nSelStart, nSelEnd, bKeyDown, nFlag); if (!thisObserved) return false; return bExit; }
void CPDF_TextPage::ProcessMarkedContent(PDFTEXT_Obj Obj) { CPDF_TextObject* pTextObj = Obj.m_pTextObj.Get(); size_t nContentMarks = pTextObj->m_ContentMarks.CountItems(); if (nContentMarks == 0) return; WideString actText; for (size_t n = 0; n < nContentMarks; ++n) { const CPDF_ContentMarkItem* item = pTextObj->m_ContentMarks.GetItem(n); const CPDF_Dictionary* pDict = item->GetParam(); if (pDict) actText = pDict->GetUnicodeTextFor("ActualText"); } if (actText.IsEmpty()) return; CPDF_Font* pFont = pTextObj->GetFont(); CFX_Matrix matrix = pTextObj->GetTextMatrix() * Obj.m_formMatrix; for (size_t k = 0; k < actText.GetLength(); ++k) { wchar_t wChar = actText[k]; if (wChar <= 0x80 && !isprint(wChar)) wChar = 0x20; if (wChar >= 0xFFFD) continue; PAGECHAR_INFO charinfo; charinfo.m_Origin = pTextObj->GetPos(); charinfo.m_Index = m_TextBuf.GetLength(); charinfo.m_Unicode = wChar; charinfo.m_CharCode = pFont->CharCodeFromUnicode(wChar); charinfo.m_Flag = FPDFTEXT_CHAR_PIECE; charinfo.m_pTextObj = pTextObj; charinfo.m_CharBox = pTextObj->GetRect(); charinfo.m_Matrix = matrix; m_TempTextBuf.AppendChar(wChar); m_TempCharList.push_back(charinfo); } }
void CPDF_TextPage::CloseTempLine() { if (m_TempCharList.empty()) return; WideString str = m_TempTextBuf.MakeString(); bool bPrevSpace = false; for (size_t i = 0; i < str.GetLength(); ++i) { if (str[i] != ' ') { bPrevSpace = false; continue; } if (bPrevSpace) { m_TempTextBuf.Delete(i, 1); m_TempCharList.erase(m_TempCharList.begin() + i); str.Delete(i); --i; } bPrevSpace = true; } CFX_BidiString bidi(str); if (m_parserflag == FPDFText_Direction::Right) bidi.SetOverallDirectionRight(); CFX_BidiChar::Direction eCurrentDirection = bidi.OverallDirection(); for (const auto& segment : bidi) { if (segment.direction == CFX_BidiChar::RIGHT || (segment.direction == CFX_BidiChar::NEUTRAL && eCurrentDirection == CFX_BidiChar::RIGHT)) { eCurrentDirection = CFX_BidiChar::RIGHT; for (int m = segment.start + segment.count; m > segment.start; --m) AddCharInfoByRLDirection(str[m - 1], m_TempCharList[m - 1]); } else { eCurrentDirection = CFX_BidiChar::LEFT; for (int m = segment.start; m < segment.start + segment.count; ++m) AddCharInfoByLRDirection(str[m], m_TempCharList[m]); } } m_TempCharList.clear(); m_TempTextBuf.Delete(0, m_TempTextBuf.GetLength()); }
void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) { CPDF_TextObject* pTextObj = Obj.m_pTextObj.Get(); if (fabs(pTextObj->GetRect().Width()) < kSizeEpsilon) return; CFX_Matrix formMatrix = Obj.m_formMatrix; CPDF_Font* pFont = pTextObj->GetFont(); CFX_Matrix matrix = pTextObj->GetTextMatrix() * formMatrix; FPDFText_MarkedContent ePreMKC = PreMarkedContent(Obj); if (ePreMKC == FPDFText_MarkedContent::Done) { m_pPreTextObj = pTextObj; m_perMatrix = formMatrix; return; } GenerateCharacter result = GenerateCharacter::None; if (m_pPreTextObj) { result = ProcessInsertObject(pTextObj, formMatrix); if (result == GenerateCharacter::LineBreak) m_CurlineRect = Obj.m_pTextObj->GetRect(); else m_CurlineRect.Union(Obj.m_pTextObj->GetRect()); switch (result) { case GenerateCharacter::None: break; case GenerateCharacter::Space: { Optional<PAGECHAR_INFO> pGenerateChar = GenerateCharInfo(TEXT_SPACE_CHAR); if (pGenerateChar) { if (!formMatrix.IsIdentity()) pGenerateChar->m_Matrix = formMatrix; m_TempTextBuf.AppendChar(TEXT_SPACE_CHAR); m_TempCharList.push_back(*pGenerateChar); } break; } case GenerateCharacter::LineBreak: CloseTempLine(); if (m_TextBuf.GetSize()) { AppendGeneratedCharacter(TEXT_RETURN_CHAR, formMatrix); AppendGeneratedCharacter(TEXT_LINEFEED_CHAR, formMatrix); } break; case GenerateCharacter::Hyphen: if (pTextObj->CountChars() == 1) { CPDF_TextObjectItem item; pTextObj->GetCharInfo(0, &item); WideString wstrItem = pTextObj->GetFont()->UnicodeFromCharCode(item.m_CharCode); if (wstrItem.IsEmpty()) wstrItem += (wchar_t)item.m_CharCode; wchar_t curChar = wstrItem[0]; if (IsHyphenCode(curChar)) return; } while (m_TempTextBuf.GetSize() > 0 && m_TempTextBuf.AsStringView()[m_TempTextBuf.GetLength() - 1] == 0x20) { m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); m_TempCharList.pop_back(); } PAGECHAR_INFO* charinfo = &m_TempCharList.back(); m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); charinfo->m_Unicode = 0x2; charinfo->m_Flag = FPDFTEXT_CHAR_HYPHEN; m_TempTextBuf.AppendChar(0xfffe); break; } } else { m_CurlineRect = Obj.m_pTextObj->GetRect(); } if (ePreMKC == FPDFText_MarkedContent::Delay) { ProcessMarkedContent(Obj); m_pPreTextObj = pTextObj; m_perMatrix = formMatrix; return; } m_pPreTextObj = pTextObj; m_perMatrix = formMatrix; float baseSpace = CalculateBaseSpace(pTextObj, matrix); const bool bR2L = IsRightToLeft(*pTextObj, *pFont); const bool bIsBidiAndMirrorInverse = bR2L && (matrix.a * matrix.d - matrix.b * matrix.c) < 0; int32_t iBufStartAppend = m_TempTextBuf.GetLength(); int32_t iCharListStartAppend = pdfium::CollectionSize<int32_t>(m_TempCharList); float spacing = 0; const size_t nItems = pTextObj->CountItems(); for (size_t i = 0; i < nItems; ++i) { CPDF_TextObjectItem item; PAGECHAR_INFO charinfo; pTextObj->GetItemInfo(i, &item); if (item.m_CharCode == static_cast<uint32_t>(-1)) { WideString str = m_TempTextBuf.MakeString(); if (str.IsEmpty()) str = m_TextBuf.AsStringView(); if (str.IsEmpty() || str[str.GetLength() - 1] == TEXT_SPACE_CHAR) continue; float fontsize_h = pTextObj->m_TextState.GetFontSizeH(); spacing = -fontsize_h * item.m_Origin.x / 1000; continue; } float charSpace = pTextObj->m_TextState.GetCharSpace(); if (charSpace > 0.001) spacing += matrix.TransformDistance(charSpace); else if (charSpace < -0.001) spacing -= matrix.TransformDistance(fabs(charSpace)); spacing -= baseSpace; if (spacing && i > 0) { float fontsize_h = pTextObj->m_TextState.GetFontSizeH(); uint32_t space_charcode = pFont->CharCodeFromUnicode(' '); float threshold = 0; if (space_charcode != CPDF_Font::kInvalidCharCode) threshold = fontsize_h * pFont->GetCharWidthF(space_charcode) / 1000; if (threshold > fontsize_h / 3) threshold = 0; else threshold /= 2; if (threshold == 0) { threshold = static_cast<float>(GetCharWidth(item.m_CharCode, pFont)); threshold = NormalizeThreshold(threshold, 300, 500, 700); threshold = fontsize_h * threshold / 1000; } if (threshold && (spacing && spacing >= threshold)) { charinfo.m_Unicode = TEXT_SPACE_CHAR; charinfo.m_Flag = FPDFTEXT_CHAR_GENERATED; charinfo.m_pTextObj = pTextObj; charinfo.m_Index = m_TextBuf.GetLength(); m_TempTextBuf.AppendChar(TEXT_SPACE_CHAR); charinfo.m_CharCode = CPDF_Font::kInvalidCharCode; charinfo.m_Matrix = formMatrix; charinfo.m_Origin = matrix.Transform(item.m_Origin); charinfo.m_CharBox = CFX_FloatRect(charinfo.m_Origin.x, charinfo.m_Origin.y, charinfo.m_Origin.x, charinfo.m_Origin.y); m_TempCharList.push_back(charinfo); } if (item.m_CharCode == CPDF_Font::kInvalidCharCode) continue; } spacing = 0; WideString wstrItem = pFont->UnicodeFromCharCode(item.m_CharCode); bool bNoUnicode = false; if (wstrItem.IsEmpty() && item.m_CharCode) { wstrItem += static_cast<wchar_t>(item.m_CharCode); bNoUnicode = true; } charinfo.m_Index = -1; charinfo.m_CharCode = item.m_CharCode; charinfo.m_Flag = bNoUnicode ? FPDFTEXT_CHAR_UNUNICODE : FPDFTEXT_CHAR_NORMAL; charinfo.m_pTextObj = pTextObj; charinfo.m_Origin = matrix.Transform(item.m_Origin); const FX_RECT rect = charinfo.m_pTextObj->GetFont()->GetCharBBox(charinfo.m_CharCode); const float fFontSize = pTextObj->GetFontSize() / 1000; charinfo.m_CharBox.top = rect.top * fFontSize + item.m_Origin.y; charinfo.m_CharBox.left = rect.left * fFontSize + item.m_Origin.x; charinfo.m_CharBox.right = rect.right * fFontSize + item.m_Origin.x; charinfo.m_CharBox.bottom = rect.bottom * fFontSize + item.m_Origin.y; if (fabsf(charinfo.m_CharBox.top - charinfo.m_CharBox.bottom) < kSizeEpsilon) { charinfo.m_CharBox.top = charinfo.m_CharBox.bottom + pTextObj->GetFontSize(); } if (fabsf(charinfo.m_CharBox.right - charinfo.m_CharBox.left) < kSizeEpsilon) { charinfo.m_CharBox.right = charinfo.m_CharBox.left + pTextObj->GetCharWidth(charinfo.m_CharCode); } charinfo.m_CharBox = matrix.TransformRect(charinfo.m_CharBox); charinfo.m_Matrix = matrix; if (wstrItem.IsEmpty()) { charinfo.m_Unicode = 0; m_TempCharList.push_back(charinfo); m_TempTextBuf.AppendChar(0xfffe); continue; } int nTotal = wstrItem.GetLength(); bool bDel = false; const int count = std::min(pdfium::CollectionSize<int>(m_TempCharList), 7); float threshold = charinfo.m_Matrix.TransformXDistance( static_cast<float>(TEXT_CHARRATIO_GAPDELTA) * pTextObj->GetFontSize()); for (int n = pdfium::CollectionSize<int>(m_TempCharList); n > pdfium::CollectionSize<int>(m_TempCharList) - count; --n) { const PAGECHAR_INFO& charinfo1 = m_TempCharList[n - 1]; CFX_PointF diff = charinfo1.m_Origin - charinfo.m_Origin; if (charinfo1.m_CharCode == charinfo.m_CharCode && charinfo1.m_pTextObj->GetFont() == charinfo.m_pTextObj->GetFont() && fabs(diff.x) < threshold && fabs(diff.y) < threshold) { bDel = true; break; } } if (!bDel) { for (int nIndex = 0; nIndex < nTotal; ++nIndex) { charinfo.m_Unicode = wstrItem[nIndex]; if (charinfo.m_Unicode) { charinfo.m_Index = m_TextBuf.GetLength(); m_TempTextBuf.AppendChar(charinfo.m_Unicode); } else { m_TempTextBuf.AppendChar(0xfffe); } m_TempCharList.push_back(charinfo); } } else if (i == 0) { WideString str = m_TempTextBuf.MakeString(); if (!str.IsEmpty() && str[str.GetLength() - 1] == TEXT_SPACE_CHAR) { m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); m_TempCharList.pop_back(); } } } if (bIsBidiAndMirrorInverse) SwapTempTextBuf(iCharListStartAppend, iBufStartAppend); }