int TextHandler::convert_pdf_to_text(const string & filepath, const TextID & tid, string & text) { //读取原文件 ByteBuffer data; if(false == read_file(filepath, data) ) { return ERR_READ_PDF_INPUT_FILE_FAILED; } //生成source文件 string source_file_path = m_default_file_path + tid + PDF_SOURCE_POSTFIX; if(false == write_file(source_file_path, data.GetPtr(), data.GetLength()) ) { return ERR_WRITE_PDF_SOURCE_FILE; } string pdf_ext_filepath = m_default_file_path + tid + PDF_EXT_TXT_POSTFIX; //call xpdf string cmd = "./mdata/pdf/pdftotext -cfg ./mdata/pdf/xpdf-chinese-simplified/xpdfrc -enc GBK " + source_file_path + " " + pdf_ext_filepath; system(cmd.c_str()); //读取抽取的文件 ByteBuffer tmp_data; if(false == read_file(pdf_ext_filepath, tmp_data)) { return ERR_READ_PDF_EXT_FILE_FAILED; } tmp_data.String(text); return SUCCESS; }
static void TestMD5FromString(const char* value, const char* expectedBase64Hash) { Aws::String source(value); ByteBuffer digest = HashingUtils::CalculateMD5(source); ASSERT_EQ(16uL, digest.GetLength()); Aws::String base64Hash = HashingUtils::Base64Encode(digest); ASSERT_STREQ(expectedBase64Hash, base64Hash.c_str()); }
HashResult Sha256HMACCommonCryptoImpl::Calculate(const ByteBuffer& toSign, const ByteBuffer& secret) { unsigned int length = CC_SHA256_DIGEST_LENGTH; ByteBuffer digest(length); std::memset(digest.GetUnderlyingData(), 0, length); CCHmac(kCCHmacAlgSHA256, secret.GetUnderlyingData(), secret.GetLength(), toSign.GetUnderlyingData(), toSign.GetLength(), digest.GetUnderlyingData()); return HashResult(std::move(digest)); }
TEST(HashingUtilsTest, TestSHA256FromString) { AWS_BEGIN_MEMORY_TEST(16, 10) Aws::String toHash = "TestToHash"; ByteBuffer digest = HashingUtils::CalculateSHA256(toHash); ASSERT_EQ(32uL, digest.GetLength()); Aws::String base64Hash = HashingUtils::Base64Encode(digest); EXPECT_STREQ("No9GqyFhBA5QWj9+YUchjN83IByaCH5Lqji0McSOKyg=", base64Hash.c_str()); AWS_END_MEMORY_TEST }
TEST(HashingUtilsTest, TestSHA256HMAC) { const char* toHash = "TestHash"; const char* secret = "TestSecret"; AWS_BEGIN_MEMORY_TEST(16, 10) ByteBuffer digest = HashingUtils::CalculateSHA256HMAC( ByteBuffer((unsigned char*) toHash, 8), ByteBuffer((unsigned char*) secret, 10)); Aws::String computedHashAsHex = HashingUtils::HexEncode(digest); ASSERT_EQ(32uL, digest.GetLength()); EXPECT_STREQ("43cf04fa24b873a456670d34ef9af2cb7870483327b5767509336fa66fb7986c", computedHashAsHex.c_str()); AWS_END_MEMORY_TEST }
TEST(UUIDTest, TestPlatformGeneratesUUID) { Aws::Set<Aws::String> generatedUUids; for(size_t i = 0u; i < 1000u; ++i) { UUID uuid = UUID::RandomUUID(); Aws::String uuidStr = uuid; ASSERT_EQ(36u, uuidStr.length()); ByteBuffer rawUUID = uuid; ASSERT_EQ(16u, rawUUID.GetLength()); ASSERT_EQ(0x40u, 0x40u & rawUUID[6]); ASSERT_EQ(0x80u, 0x80u & rawUUID[8]); ASSERT_EQ(generatedUUids.end(), generatedUUids.find(uuidStr)); generatedUUids.insert(uuidStr); } }
int TextHandler::convert_html_to_text(const string & filepath, const TextID & tid, string & text) { //读取原文件 ByteBuffer data; if(false == read_file(filepath, data) ) { return ERR_READ_HTML_INPUT_FILE_FAILED; } //生成source文件 string source_file_path = m_default_file_path + tid + HTML_SOURCE_POSTFIX; if(false == write_file(source_file_path, data.GetPtr(), data.GetLength()) ) { return ERR_WRITE_HTML_SOURCE_FILE; } string html_ext_filepath = m_default_file_path + tid + HTML_EXT_TXT_POSTFIX; string html_info_filepath = m_default_file_path + tid + HTML_INFO_POSTFIX; //call html parser string cmd = "java -jar ./mdata/html/htmlAnalysis.jar -g \"" + source_file_path + "\" \"" + html_ext_filepath + "\" \"" + html_info_filepath + "\""; system(cmd.c_str()); //读取抽取的文件 ByteBuffer tmp_data; if(false == read_file(html_ext_filepath, tmp_data)) { return ERR_READ_HTML_EXT_FILE_FAILED; } tmp_data.String(text); return SUCCESS; }
ByteBuffer BytecodeHeader::GetByteBuffer() { ByteBuffer buffer; auto bodyOffsetReservation = buffer.Reserve(4); // Write string constants buffer.WriteU32(constStringTable->constants.size()); for (auto &key : constStringTable->constants) buffer.WriteString(key.val); // Write numeric constants buffer.WriteU32(constNumberTable->constants.size()); for (auto &key : constNumberTable->constants) buffer.WriteU32(*reinterpret_cast<uint32_t *>(&key.val)); buffer.WriteU32(buffer.GetLength(), bodyOffsetReservation); // Write function names and offsets return buffer; }
HashResult BCryptHashImpl::Calculate(const ByteBuffer& toHash, const ByteBuffer& secret) { if (!IsValid()) { return HashResult(); } std::lock_guard<std::mutex> locker(m_algorithmMutex); BCryptHashContext context(m_algorithmHandle, m_hashObject, m_hashObjectLength, secret); if (!context.IsValid()) { AWS_LOG_ERROR(logTag, "Error creating hash handle."); return HashResult(); } return HashData(context, static_cast<PBYTE>(toHash.GetUnderlyingData()), static_cast<ULONG>(toHash.GetLength())); }
// tool functions for build result int TextHandler::build_base_result_file(const TextID & tid, const string & build_id, const vector<boost::tuple<string, string, string, size_t>> & result_vec, const bool is_blank_between_sent, const bool is_bilingual, string & base_result_file_path, string & result_text) { //读取base source文件 ByteBuffer source; string source_file_path = m_default_file_path + tid + BASE_TEXT_SOURCE_POSTFIX; if(false == read_file(source_file_path, source)) { return ERR_READ_BASE_SOURCE_FILE; } //读取base pos文件 ByteBuffer pos_data; string pos_file_path = m_default_file_path + tid + BASE_TEXT_POS_POSTFIX; if(false == read_file(pos_file_path, pos_data) ) { return ERR_READ_BASE_POS_FILE; } //生成结果文件 TextFormat format; if(false == format.UnSerialization(pos_data.String()) ) { return ERR_PARSE_RESULT_FORMAT; } //为每个段落生成tgt string curr_para_tgt; vector<string> para_tgt_vec; para_tgt_vec.resize(format.Size(), ""); size_t i = 0; for(i=0; i<result_vec.size(); ++i) { string final_tgt; if(result_vec[i].get<2>().size() > 0) final_tgt += result_vec[i].get<2>(); else final_tgt += result_vec[i].get<1>(); if(result_vec[i].get<3>() >= format.Size()) { lerr << "formate restor failed. para_size = " << format.Size() << " result_vec[" << i << "].para_idx = " << result_vec[i].get<2>() << endl; continue; } if(i+1 < result_vec.size() && is_blank_between_sent) para_tgt_vec[result_vec[i].get<3>()] += final_tgt + " "; else para_tgt_vec[result_vec[i].get<3>()] += final_tgt; } //重新生成原文 result_text.clear(); size_t para_idx = 0; size_t offset = 0; while(offset < source.GetLength()) { if(para_idx < format.Size()) { size_t len = format[para_idx]._offset - offset; //输出非翻译段落 if(len > 0) { result_text += source.String().substr(offset, len); } //输出翻译段落 if(is_bilingual) result_text += source.String().substr(format[para_idx]._offset, format[para_idx]._len) + " ["; result_text += para_tgt_vec[para_idx]; if(is_bilingual) result_text += "] "; //更新偏移量 offset = format[para_idx]._offset + format[para_idx]._len; ++para_idx; } else { size_t len = source.GetLength() - offset; //输出非翻译段落 if(len > 0) { result_text += source.String().substr(offset, len); } break; } } //输出结果到文件 base_result_file_path = m_default_file_path + tid + "." + build_id + BASE_TEXT_TARGET_POSTFIX; if(false == write_file(base_result_file_path, result_text.c_str(), result_text.size()) ) { return ERR_WRITE_BASE_RESULT_FILE; } return SUCCESS; }
BCryptHashContext(void* algorithmHandle, PBYTE hashObject, DWORD hashObjectLength, const ByteBuffer& secret) : m_hashHandle(nullptr), m_isValid(false) { NTSTATUS status = BCryptCreateHash(algorithmHandle, &m_hashHandle, hashObject, hashObjectLength, secret.GetUnderlyingData(), (ULONG)secret.GetLength(), 0); m_isValid = NT_SUCCESS(status); }