int TextHandler::convert_pdf_to_text(const string & filepath, const TextID & tid, string & text)
{
    //读取原文件
    ByteBuffer data;

    if(false == read_file(filepath, data) )
    {
        return ERR_READ_PDF_INPUT_FILE_FAILED;
    }

    //生成source文件
    string source_file_path = m_default_file_path + tid + PDF_SOURCE_POSTFIX;

    if(false == write_file(source_file_path, data.GetPtr(), data.GetLength()) )
    {
        return ERR_WRITE_PDF_SOURCE_FILE;
    }

    string pdf_ext_filepath = m_default_file_path + tid + PDF_EXT_TXT_POSTFIX;

    //call xpdf
    string cmd = "./mdata/pdf/pdftotext -cfg ./mdata/pdf/xpdf-chinese-simplified/xpdfrc -enc GBK " + source_file_path + " " + pdf_ext_filepath;
    system(cmd.c_str());

    //读取抽取的文件
    ByteBuffer tmp_data;
    if(false == read_file(pdf_ext_filepath, tmp_data))
    {
        return ERR_READ_PDF_EXT_FILE_FAILED;
    }

    tmp_data.String(text);

    return SUCCESS;
}
static void TestMD5FromString(const char* value, const char* expectedBase64Hash)
{
    Aws::String source(value);

    ByteBuffer digest = HashingUtils::CalculateMD5(source);
    ASSERT_EQ(16uL, digest.GetLength());

    Aws::String base64Hash = HashingUtils::Base64Encode(digest);
    ASSERT_STREQ(expectedBase64Hash, base64Hash.c_str());
}
            HashResult Sha256HMACCommonCryptoImpl::Calculate(const ByteBuffer& toSign, const ByteBuffer& secret)
            {
                unsigned int length = CC_SHA256_DIGEST_LENGTH;
                ByteBuffer digest(length);
                std::memset(digest.GetUnderlyingData(), 0, length);

                CCHmac(kCCHmacAlgSHA256, secret.GetUnderlyingData(), secret.GetLength(), toSign.GetUnderlyingData(), toSign.GetLength(), digest.GetUnderlyingData());

                return HashResult(std::move(digest));
            }
TEST(HashingUtilsTest, TestSHA256FromString)
{
    AWS_BEGIN_MEMORY_TEST(16, 10)

    Aws::String toHash = "TestToHash";

    ByteBuffer digest = HashingUtils::CalculateSHA256(toHash);
    ASSERT_EQ(32uL, digest.GetLength());

    Aws::String base64Hash = HashingUtils::Base64Encode(digest);
    EXPECT_STREQ("No9GqyFhBA5QWj9+YUchjN83IByaCH5Lqji0McSOKyg=", base64Hash.c_str());

    AWS_END_MEMORY_TEST
}
TEST(HashingUtilsTest, TestSHA256HMAC)
{
    const char* toHash = "TestHash";
    const char* secret = "TestSecret";

    AWS_BEGIN_MEMORY_TEST(16, 10)

    ByteBuffer digest = HashingUtils::CalculateSHA256HMAC(
            ByteBuffer((unsigned char*) toHash, 8), ByteBuffer((unsigned char*) secret, 10));

    Aws::String computedHashAsHex = HashingUtils::HexEncode(digest);

    ASSERT_EQ(32uL, digest.GetLength());
    EXPECT_STREQ("43cf04fa24b873a456670d34ef9af2cb7870483327b5767509336fa66fb7986c", computedHashAsHex.c_str());

    AWS_END_MEMORY_TEST
}
TEST(UUIDTest, TestPlatformGeneratesUUID)
{
    Aws::Set<Aws::String> generatedUUids;

    for(size_t i = 0u; i < 1000u; ++i)
    {
        UUID uuid = UUID::RandomUUID();
        Aws::String uuidStr = uuid;        
        ASSERT_EQ(36u, uuidStr.length());
        ByteBuffer rawUUID = uuid;
        ASSERT_EQ(16u, rawUUID.GetLength());
        ASSERT_EQ(0x40u, 0x40u & rawUUID[6]);
        ASSERT_EQ(0x80u, 0x80u & rawUUID[8]);

        ASSERT_EQ(generatedUUids.end(), generatedUUids.find(uuidStr));
        generatedUUids.insert(uuidStr);
    }    
}
int TextHandler::convert_html_to_text(const string & filepath, const TextID & tid, string & text)
{
    //读取原文件
    ByteBuffer data;

    if(false == read_file(filepath, data) )
    {
        return ERR_READ_HTML_INPUT_FILE_FAILED;
    }

    //生成source文件
    string source_file_path = m_default_file_path + tid + HTML_SOURCE_POSTFIX;

    if(false == write_file(source_file_path, data.GetPtr(), data.GetLength()) )
    {
        return ERR_WRITE_HTML_SOURCE_FILE;
    }

    string html_ext_filepath = m_default_file_path + tid + HTML_EXT_TXT_POSTFIX;
    string html_info_filepath = m_default_file_path + tid + HTML_INFO_POSTFIX;

    //call html parser
    string cmd = "java -jar ./mdata/html/htmlAnalysis.jar -g \"" + source_file_path + "\" \"" + html_ext_filepath + "\" \"" + html_info_filepath + "\"";
    system(cmd.c_str());

    //读取抽取的文件
    ByteBuffer tmp_data;
    if(false == read_file(html_ext_filepath, tmp_data))
    {
        return ERR_READ_HTML_EXT_FILE_FAILED;
    }

    tmp_data.String(text);

    return SUCCESS;
}
ByteBuffer BytecodeHeader::GetByteBuffer()
{
  ByteBuffer buffer;

  auto bodyOffsetReservation = buffer.Reserve(4);

  // Write string constants
  buffer.WriteU32(constStringTable->constants.size());

  for (auto &key : constStringTable->constants)
    buffer.WriteString(key.val);

  // Write numeric constants
  buffer.WriteU32(constNumberTable->constants.size());

  for (auto &key : constNumberTable->constants)
    buffer.WriteU32(*reinterpret_cast<uint32_t *>(&key.val));

  buffer.WriteU32(buffer.GetLength(), bodyOffsetReservation);

  // Write function names and offsets

  return buffer;
}
            HashResult BCryptHashImpl::Calculate(const ByteBuffer& toHash, const ByteBuffer& secret)
            {
                if (!IsValid())
                {
                    return HashResult();
                }

                std::lock_guard<std::mutex> locker(m_algorithmMutex);

                BCryptHashContext context(m_algorithmHandle, m_hashObject, m_hashObjectLength, secret);
                if (!context.IsValid())
                {
                    AWS_LOG_ERROR(logTag, "Error creating hash handle.");
                    return HashResult();
                }

                return HashData(context, static_cast<PBYTE>(toHash.GetUnderlyingData()), static_cast<ULONG>(toHash.GetLength()));
            }
// tool functions for build result
int TextHandler::build_base_result_file(const TextID & tid,
                                        const string & build_id,
                                        const vector<boost::tuple<string, string, string, size_t>> & result_vec,
                                        const bool is_blank_between_sent,
                                        const bool is_bilingual,
                                        string & base_result_file_path,
                                        string & result_text)
{
    //读取base source文件
    ByteBuffer source;
    string source_file_path = m_default_file_path + tid + BASE_TEXT_SOURCE_POSTFIX;

    if(false == read_file(source_file_path, source))
    {
        return ERR_READ_BASE_SOURCE_FILE;
    }

    //读取base pos文件
    ByteBuffer pos_data;
    string pos_file_path = m_default_file_path + tid + BASE_TEXT_POS_POSTFIX;

    if(false == read_file(pos_file_path, pos_data) )
    {
        return ERR_READ_BASE_POS_FILE;
    }

    //生成结果文件
    TextFormat format;
    if(false == format.UnSerialization(pos_data.String()) )
    {
        return ERR_PARSE_RESULT_FORMAT;
    }

    //为每个段落生成tgt
    string curr_para_tgt;
    vector<string> para_tgt_vec;
    para_tgt_vec.resize(format.Size(), "");

    size_t i = 0;

    for(i=0; i<result_vec.size(); ++i)
    {

        string final_tgt;

        if(result_vec[i].get<2>().size() > 0)
            final_tgt += result_vec[i].get<2>();
        else
            final_tgt += result_vec[i].get<1>();

        if(result_vec[i].get<3>() >= format.Size())
        {
            lerr << "formate restor failed. para_size = " << format.Size() << " result_vec[" << i << "].para_idx = " << result_vec[i].get<2>() << endl;
            continue;
        }

        if(i+1 < result_vec.size() && is_blank_between_sent)
            para_tgt_vec[result_vec[i].get<3>()] += final_tgt + " ";
        else
            para_tgt_vec[result_vec[i].get<3>()] += final_tgt;
    }

    //重新生成原文
    result_text.clear();
    size_t para_idx = 0;
    size_t offset = 0;

    while(offset < source.GetLength())
    {
        if(para_idx < format.Size())
        {
            size_t len = format[para_idx]._offset - offset;

            //输出非翻译段落
            if(len > 0)
            {
                result_text += source.String().substr(offset, len);
            }

            //输出翻译段落
            if(is_bilingual)
                result_text += source.String().substr(format[para_idx]._offset, format[para_idx]._len) + " [";

            result_text += para_tgt_vec[para_idx];

            if(is_bilingual)
                result_text += "] ";

            //更新偏移量
            offset = format[para_idx]._offset + format[para_idx]._len;
            ++para_idx;
        }
        else
        {
            size_t len = source.GetLength() - offset;

            //输出非翻译段落
            if(len > 0)
            {
                result_text += source.String().substr(offset, len);
            }

            break;
        }
    }

    //输出结果到文件
    base_result_file_path =  m_default_file_path + tid + "." + build_id + BASE_TEXT_TARGET_POSTFIX;

    if(false == write_file(base_result_file_path, result_text.c_str(), result_text.size()) )
    {
        return ERR_WRITE_BASE_RESULT_FILE;
    }

    return SUCCESS;
}
 BCryptHashContext(void* algorithmHandle, PBYTE hashObject, DWORD hashObjectLength, const ByteBuffer& secret) :
     m_hashHandle(nullptr),
     m_isValid(false)
 {
     NTSTATUS status = BCryptCreateHash(algorithmHandle, &m_hashHandle, hashObject, hashObjectLength, secret.GetUnderlyingData(), (ULONG)secret.GetLength(), 0);
     m_isValid = NT_SUCCESS(status);
 }