TEST_F(FPDFTextEmbeddertest, TextSearch) { EXPECT_TRUE(OpenDocument("testing/resources/hello_world.pdf")); FPDF_PAGE page = LoadPage(0); EXPECT_NE(nullptr, page); FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); EXPECT_NE(nullptr, textpage); // Avoid issues with system wchar_t width vs. FPDF_WideString. const unsigned short nope[] = {'n', 'o', 'p', 'e', '\0'}; const unsigned short world[] = {'w', 'o', 'r', 'l', 'd', '\0'}; const unsigned short world_caps[] = {'W', 'O', 'R', 'L', 'D', '\0'}; const unsigned short world_substr[] = {'o', 'r', 'l', 'd', '\0'}; // No occurences of "nope" in test page. FPDF_SCHHANDLE search = FPDFText_FindStart(textpage, nope, 0, 0); EXPECT_NE(nullptr, search); EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); EXPECT_EQ(0, FPDFText_GetSchCount(search)); // Advancing finds nothing. EXPECT_FALSE(FPDFText_FindNext(search)); EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); EXPECT_EQ(0, FPDFText_GetSchCount(search)); // Retreating finds nothing. EXPECT_FALSE(FPDFText_FindPrev(search)); EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); EXPECT_EQ(0, FPDFText_GetSchCount(search)); FPDFText_FindClose(search); // Two occurences of "world" in test page. search = FPDFText_FindStart(textpage, world, 0, 2); EXPECT_NE(nullptr, search); // Remains not found until advanced. EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); EXPECT_EQ(0, FPDFText_GetSchCount(search)); // First occurence of "world" in this test page. EXPECT_TRUE(FPDFText_FindNext(search)); EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); EXPECT_EQ(5, FPDFText_GetSchCount(search)); // Last occurence of "world" in this test page. EXPECT_TRUE(FPDFText_FindNext(search)); EXPECT_EQ(24, FPDFText_GetSchResultIndex(search)); EXPECT_EQ(5, FPDFText_GetSchCount(search)); // Found position unchanged when fails to advance. EXPECT_FALSE(FPDFText_FindNext(search)); EXPECT_EQ(24, FPDFText_GetSchResultIndex(search)); EXPECT_EQ(5, FPDFText_GetSchCount(search)); // Back to first occurence. EXPECT_TRUE(FPDFText_FindPrev(search)); EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); EXPECT_EQ(5, FPDFText_GetSchCount(search)); // Found position unchanged when fails to retreat. EXPECT_FALSE(FPDFText_FindPrev(search)); EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); EXPECT_EQ(5, FPDFText_GetSchCount(search)); FPDFText_FindClose(search); // Exact search unaffected by case sensitiity and whole word flags. search = FPDFText_FindStart(textpage, world, FPDF_MATCHCASE | FPDF_MATCHWHOLEWORD, 0); EXPECT_NE(nullptr, search); EXPECT_TRUE(FPDFText_FindNext(search)); EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); EXPECT_EQ(5, FPDFText_GetSchCount(search)); FPDFText_FindClose(search); // Default is case-insensitive, so matching agaist caps works. search = FPDFText_FindStart(textpage, world_caps, 0, 0); EXPECT_NE(nullptr, search); EXPECT_TRUE(FPDFText_FindNext(search)); EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); EXPECT_EQ(5, FPDFText_GetSchCount(search)); FPDFText_FindClose(search); // But can be made case sensitive, in which case this fails. search = FPDFText_FindStart(textpage, world_caps, FPDF_MATCHCASE, 0); EXPECT_FALSE(FPDFText_FindNext(search)); EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); EXPECT_EQ(0, FPDFText_GetSchCount(search)); FPDFText_FindClose(search); // Default is match anywhere within word, so matching substirng works. search = FPDFText_FindStart(textpage, world_substr, 0, 0); EXPECT_TRUE(FPDFText_FindNext(search)); EXPECT_EQ(8, FPDFText_GetSchResultIndex(search)); EXPECT_EQ(4, FPDFText_GetSchCount(search)); FPDFText_FindClose(search); // But can be made to mach word boundaries, in which case this fails. search = FPDFText_FindStart(textpage, world_substr, FPDF_MATCHWHOLEWORD, 0); EXPECT_FALSE(FPDFText_FindNext(search)); // TODO(tsepez): investigate strange index/count values in this state. FPDFText_FindClose(search); FPDFText_ClosePage(textpage); UnloadPage(page); }
TEST_F(FPDFTextEmbeddertest, TextSearch) { EXPECT_TRUE(OpenDocument("hello_world.pdf")); FPDF_PAGE page = LoadPage(0); EXPECT_TRUE(page); FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); EXPECT_TRUE(textpage); std::unique_ptr<unsigned short, pdfium::FreeDeleter> nope = GetFPDFWideString(L"nope"); std::unique_ptr<unsigned short, pdfium::FreeDeleter> world = GetFPDFWideString(L"world"); std::unique_ptr<unsigned short, pdfium::FreeDeleter> world_caps = GetFPDFWideString(L"WORLD"); std::unique_ptr<unsigned short, pdfium::FreeDeleter> world_substr = GetFPDFWideString(L"orld"); // No occurences of "nope" in test page. FPDF_SCHHANDLE search = FPDFText_FindStart(textpage, nope.get(), 0, 0); EXPECT_TRUE(search); EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); EXPECT_EQ(0, FPDFText_GetSchCount(search)); // Advancing finds nothing. EXPECT_FALSE(FPDFText_FindNext(search)); EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); EXPECT_EQ(0, FPDFText_GetSchCount(search)); // Retreating finds nothing. EXPECT_FALSE(FPDFText_FindPrev(search)); EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); EXPECT_EQ(0, FPDFText_GetSchCount(search)); FPDFText_FindClose(search); // Two occurences of "world" in test page. search = FPDFText_FindStart(textpage, world.get(), 0, 2); EXPECT_TRUE(search); // Remains not found until advanced. EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); EXPECT_EQ(0, FPDFText_GetSchCount(search)); // First occurence of "world" in this test page. EXPECT_TRUE(FPDFText_FindNext(search)); EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); EXPECT_EQ(5, FPDFText_GetSchCount(search)); // Last occurence of "world" in this test page. EXPECT_TRUE(FPDFText_FindNext(search)); EXPECT_EQ(24, FPDFText_GetSchResultIndex(search)); EXPECT_EQ(5, FPDFText_GetSchCount(search)); // Found position unchanged when fails to advance. EXPECT_FALSE(FPDFText_FindNext(search)); EXPECT_EQ(24, FPDFText_GetSchResultIndex(search)); EXPECT_EQ(5, FPDFText_GetSchCount(search)); // Back to first occurence. EXPECT_TRUE(FPDFText_FindPrev(search)); EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); EXPECT_EQ(5, FPDFText_GetSchCount(search)); // Found position unchanged when fails to retreat. EXPECT_FALSE(FPDFText_FindPrev(search)); EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); EXPECT_EQ(5, FPDFText_GetSchCount(search)); FPDFText_FindClose(search); // Exact search unaffected by case sensitiity and whole word flags. search = FPDFText_FindStart(textpage, world.get(), FPDF_MATCHCASE | FPDF_MATCHWHOLEWORD, 0); EXPECT_TRUE(search); EXPECT_TRUE(FPDFText_FindNext(search)); EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); EXPECT_EQ(5, FPDFText_GetSchCount(search)); FPDFText_FindClose(search); // Default is case-insensitive, so matching agaist caps works. search = FPDFText_FindStart(textpage, world_caps.get(), 0, 0); EXPECT_TRUE(search); EXPECT_TRUE(FPDFText_FindNext(search)); EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); EXPECT_EQ(5, FPDFText_GetSchCount(search)); FPDFText_FindClose(search); // But can be made case sensitive, in which case this fails. search = FPDFText_FindStart(textpage, world_caps.get(), FPDF_MATCHCASE, 0); EXPECT_FALSE(FPDFText_FindNext(search)); EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); EXPECT_EQ(0, FPDFText_GetSchCount(search)); FPDFText_FindClose(search); // Default is match anywhere within word, so matching substirng works. search = FPDFText_FindStart(textpage, world_substr.get(), 0, 0); EXPECT_TRUE(FPDFText_FindNext(search)); EXPECT_EQ(8, FPDFText_GetSchResultIndex(search)); EXPECT_EQ(4, FPDFText_GetSchCount(search)); FPDFText_FindClose(search); // But can be made to mach word boundaries, in which case this fails. search = FPDFText_FindStart(textpage, world_substr.get(), FPDF_MATCHWHOLEWORD, 0); EXPECT_FALSE(FPDFText_FindNext(search)); // TODO(tsepez): investigate strange index/count values in this state. FPDFText_FindClose(search); FPDFText_ClosePage(textpage); UnloadPage(page); }