std::string t2utf8(LPCTSTR arg, size_t length) { #ifdef _UNICODE return w2utf8(arg, length); #else std::wstring data = a2w(arg, length); return w2utf8(data.c_str(), data.length()); #endif }
void process_pde_text(PdeTextP text, ptree& node) { PdfTextState ts; switch (text->GetType()) { case kPdeText: { node.put("type", "text_paragraph"); std::wstring s; s.resize(text->GetText(nullptr, 0)); text->GetText((wchar_t*)s.c_str(), s.size()); node.put("text", w2utf8(s.c_str())); text->GetTextState(&ts); auto num_lines = text->GetNumTextLines(); for (auto i = 0; i < num_lines; i++) { ptree line_node; PdeTextLineP text_line = text->GetTextLine(i); process_pde_element((PdeElementP)text_line, line_node); node.add_child("element", line_node); } } break; case kPdeTextLine: { PdeTextLineP text_line = (PdeTextLine*)text; node.put("type", "text_line"); std::wstring s; s.resize(text_line->GetText(nullptr, 0)); text_line->GetText((wchar_t*)s.c_str(), s.size()); node.put("text", w2utf8(s.c_str())); text_line->GetTextState(&ts); auto num_word = text_line->GetNumWords(); for (auto i = 0; i < num_word; i++) { ptree word_node; PdeWordP text_word = text_line->GetWord(i); process_pde_element((PdeElementP)text_word, word_node); node.add_child("element", word_node); } } break; case kPdeWord: { PdeWordP word = (PdeWord*)text; node.put("type", "text_word"); std::wstring s; s.resize(word->GetText(nullptr, 0)); word->GetText((wchar_t*)s.c_str(), s.size()); node.put("text", w2utf8(s.c_str())); word->GetTextState(&ts); } break; } process_pdf_text_state(ts, node); }
void process_pdf_font(PdfFontP font, ptree& node) { std::wstring s1, s2, s3; s1.resize(font->GetFaceName(nullptr, 0)); font->GetFaceName((wchar_t*)s1.c_str(), s1.size()); node.put("face_name", w2utf8(s1.c_str())); s2.resize(font->GetFontName(nullptr, 0)); font->GetFontName((wchar_t*)s2.c_str(), s2.size()); node.put("font_name", w2utf8(s2.c_str())); s3.resize(font->GetSystemFontName(nullptr, 0)); font->GetSystemFontName((wchar_t*)s3.c_str(), s3.size()); node.put("system_font_name", w2utf8(s3.c_str())); node.put("system_font_bold", font->GetSystemFontBold()); node.put("system_font_italic", font->GetSystemFontItalic()); node.put("system_font_charset", font->GetSystemFontCharset()); }
std::string strconv::a2utf8(std::string str) { return w2utf8(a2w(str)); }