int test_main( int, char*[] ) { { // basic_filebuf runtime results are ignored; as long as they don't crash // or throw we are satisfied fs::basic_filebuf<char> bfb; fs::filebuf cfb; bfb.open( "fstream_test_bffoo", std::ios_base::in ); cfb.open( "fstream_test_bffoo", std::ios_base::in ); # ifndef BOOST_NO_STD_WSTRING fs::wfilebuf wfb; wfb.open( "fstream_test_bffoo", std::ios_base::in ); # endif } std::remove( "fstream_test_bfoo" ); std::remove( "fstream_test_cfoo" ); # ifndef BOOST_NO_STD_WSTRING std::remove( "fstream_test_wfoo" ); # endif { fs::basic_ofstream<char> bofs( "fstream_test_bfoo" ); fs::ofstream cofs( "fstream_test_cfoo" ); BOOST_CHECK( bofs.is_open() ); BOOST_CHECK( cofs.is_open() ); bofs << "fstream_test_bfoo"; cofs << "fstream_test_cfoo"; // these will fail, but they still test the interface bofs.open( "fstream_test_bfoo" ); cofs.open( "fstream_test_cfoo" ); # ifndef BOOST_NO_STD_WSTRING fs::wofstream wofs( "fstream_test_wfoo" ); BOOST_CHECK( wofs.is_open() ); wofs << L"fstream_test_wfoo"; wofs.open( "fstream_test_wfoo" ); // expected to fail # endif } { fs::basic_ifstream<char> bifs( "fstream_test_bfoo" ); fs::ifstream cifs( "fstream_test_cfoo" ); BOOST_CHECK( bifs.is_open() ); BOOST_CHECK( cifs.is_open() ); std::string b; std::string c; bifs >> b; cifs >> c; BOOST_CHECK( b == "fstream_test_bfoo" ); BOOST_CHECK( c == "fstream_test_cfoo" ); // these will fail, but they still test the interface bifs.open( "fstream_test_bfoo" ); cifs.open( "fstream_test_cfoo" ); # ifndef BOOST_NO_STD_WSTRING fs::wifstream wifs( "fstream_test_wfoo" ); BOOST_CHECK( wifs.is_open() ); std::wstring w; wifs >> w; BOOST_CHECK( w == L"fstream_test_wfoo" ); wifs.open( "fstream_test_wfoo" ); // expected to fail # endif } { fs::basic_fstream<char> bfs( "fstream_test_bfoo" ); fs::fstream cfs( "fstream_test_cfoo" ); BOOST_CHECK( bfs.is_open() ); BOOST_CHECK( cfs.is_open() ); std::string b; std::string c; bfs >> b; cfs >> c; BOOST_CHECK( b == "fstream_test_bfoo" ); BOOST_CHECK( c == "fstream_test_cfoo" ); // these will fail, but they still test the interface bfs.open( "fstream_test_bfoo" ); cfs.open( "fstream_test_cfoo" ); # ifndef BOOST_NO_STD_WSTRING fs::wfstream wfs( "fstream_test_wfoo" ); BOOST_CHECK( wfs.is_open() ); std::wstring w; wfs >> w; BOOST_CHECK( w == L"fstream_test_wfoo" ); wfs.open( "fstream_test_wfoo" ); // expected to fail # endif } // std::remove( "fstream_test_bfoo" ); // std::remove( "fstream_test_cfoo" ); // # ifndef BOOST_NO_STD_WSTRING // std::remove( "fstream_test_wfoo" ); // # endif return 0; }
void matrix_vector_mul_SSE_f48_loop_unrolled (fl48** mat, fl48* &vec) { // TESTING change SIZE to min 8 - but multiple of 8 fl48* result = new fl48[SIZE]; __m128i load_mask = _mm_set_epi8(11, 10, 9, 8, 7, 6, 255, 255, 5, 4, 3, 2, 1, 0, 255, 255); for(unsigned i=0;i<SIZE;i+=8) { // row // requiring 8 at a time - because loop un-roll __m128d running_sum1 = _mm_set1_pd(0.0); // running sum initially 0 __m128d running_sum2 = _mm_set1_pd(0.0); // running sum initially 0 __m128d running_sum3 = _mm_set1_pd(0.0); // running sum initially 0 __m128d running_sum4 = _mm_set1_pd(0.0); // running sum initially 0 __m128d running_sum5 = _mm_set1_pd(0.0); // running sum initially 0 __m128d running_sum6 = _mm_set1_pd(0.0); // running sum initially 0 __m128d running_sum7 = _mm_set1_pd(0.0); // running sum initially 0 __m128d running_sum8 = _mm_set1_pd(0.0); // running sum initially 0 for(unsigned j=0;j<SIZE;j+=2) { // col - requires skipping on 2 at a time __m128i mat_vect = _mm_loadu_si128((__m128i*) &mat[i][j]); // hoping that addresses are as expected - seems like this is the way it's stored mat_vect = _mm_shuffle_epi8(mat_vect, load_mask); __m128i vec_elem = _mm_loadu_si128((__m128i*) &vec[j]); vec_elem = _mm_shuffle_epi8(vec_elem, load_mask); __m128d mult = _mm_mul_pd((__m128d)mat_vect,(__m128d)vec_elem); running_sum1 = _mm_add_pd(mult,running_sum1); mat_vect = _mm_loadu_si128((__m128i*) &mat[i+1][j]); mat_vect = _mm_shuffle_epi8(mat_vect, load_mask); mult = _mm_mul_pd((__m128d)mat_vect,(__m128d)vec_elem); running_sum2 = _mm_add_pd(mult,running_sum2); mat_vect = _mm_loadu_si128((__m128i*) &mat[i+2][j]); mat_vect = _mm_shuffle_epi8(mat_vect, load_mask); mult = _mm_mul_pd((__m128d)mat_vect,(__m128d)vec_elem); running_sum3 = _mm_add_pd(mult,running_sum3); mat_vect = _mm_loadu_si128((__m128i*) &mat[i+3][j]); mat_vect = _mm_shuffle_epi8(mat_vect, load_mask); mult = _mm_mul_pd((__m128d)mat_vect,(__m128d)vec_elem); running_sum4 = _mm_add_pd(mult,running_sum4); mat_vect = _mm_loadu_si128((__m128i*) &mat[i+4][j]); mat_vect = _mm_shuffle_epi8(mat_vect, load_mask); mult = _mm_mul_pd((__m128d)mat_vect,(__m128d)vec_elem); running_sum5 = _mm_add_pd(mult,running_sum5); mat_vect = _mm_loadu_si128((__m128i*) &mat[i+5][j]); mat_vect = _mm_shuffle_epi8(mat_vect, load_mask); mult = _mm_mul_pd((__m128d)mat_vect,(__m128d)vec_elem); running_sum6 = _mm_add_pd(mult,running_sum6); mat_vect = _mm_loadu_si128((__m128i*) &mat[i+6][j]); mat_vect = _mm_shuffle_epi8(mat_vect, load_mask); mult = _mm_mul_pd((__m128d)mat_vect,(__m128d)vec_elem); running_sum7 = _mm_add_pd(mult,running_sum7); mat_vect = _mm_loadu_si128((__m128i*) &mat[i+7][j]); mat_vect = _mm_shuffle_epi8(mat_vect, load_mask); mult = _mm_mul_pd((__m128d)mat_vect,(__m128d)vec_elem); running_sum8 = _mm_add_pd(mult,running_sum8); } __m128i mask = _mm_set_epi8(7 ,6 ,5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); __m128i sum_shuffled = _mm_shuffle_epi8((__m128i)running_sum1, mask); running_sum1 = _mm_add_pd(running_sum1,(__m128d)sum_shuffled); sum_shuffled = _mm_shuffle_epi8((__m128i)running_sum2, mask); running_sum2 = _mm_add_pd(running_sum2,(__m128d)sum_shuffled); sum_shuffled = _mm_shuffle_epi8((__m128i)running_sum3, mask); running_sum3 = _mm_add_pd(running_sum3,(__m128d)sum_shuffled); sum_shuffled = _mm_shuffle_epi8((__m128i)running_sum4, mask); running_sum4 = _mm_add_pd(running_sum4,(__m128d)sum_shuffled); sum_shuffled = _mm_shuffle_epi8((__m128i)running_sum5, mask); running_sum5 = _mm_add_pd(running_sum5,(__m128d)sum_shuffled); sum_shuffled = _mm_shuffle_epi8((__m128i)running_sum6, mask); running_sum6 = _mm_add_pd(running_sum6,(__m128d)sum_shuffled); sum_shuffled = _mm_shuffle_epi8((__m128i)running_sum7, mask); running_sum7 = _mm_add_pd(running_sum7,(__m128d)sum_shuffled); sum_shuffled = _mm_shuffle_epi8((__m128i)running_sum8, mask); running_sum8 = _mm_add_pd(running_sum8,(__m128d)sum_shuffled); // mesh them into 4 __m128i mask_first = _mm_set_epi8(255,255,255,255,255,255,255,255, 7 ,6 ,5, 4, 3, 2, 1, 0); __m128i mask_second = _mm_set_epi8(7 ,6 ,5, 4, 3, 2, 1, 0, 255,255,255,255,255,255,255,255); running_sum1 = (__m128d)_mm_shuffle_epi8((__m128i)running_sum1, mask_first); running_sum2 = (__m128d)_mm_shuffle_epi8((__m128i)running_sum2, mask_second); running_sum1 = (__m128d)_mm_or_si128((__m128i)running_sum1, (__m128i)running_sum2); running_sum3 = (__m128d)_mm_shuffle_epi8((__m128i)running_sum3, mask_first); running_sum4 = (__m128d)_mm_shuffle_epi8((__m128i)running_sum4, mask_second); running_sum2 = (__m128d)_mm_or_si128((__m128i)running_sum3, (__m128i)running_sum4); running_sum5 = (__m128d)_mm_shuffle_epi8((__m128i)running_sum5, mask_first); running_sum6 = (__m128d)_mm_shuffle_epi8((__m128i)running_sum6, mask_second); running_sum3 = (__m128d)_mm_or_si128((__m128i)running_sum6, (__m128i)running_sum5); running_sum7 = (__m128d)_mm_shuffle_epi8((__m128i)running_sum7, mask_first); running_sum8 = (__m128d)_mm_shuffle_epi8((__m128i)running_sum8, mask_second); running_sum4 = (__m128d)_mm_or_si128((__m128i)running_sum8, (__m128i)running_sum7); // RS 1-4 are right and expected here too // rs 5-8 neglected and not required from now __m128i a01_round = convert_double_to_f48_SSE((__m128i)running_sum1); __m128i a23_round = convert_double_to_f48_SSE((__m128i)running_sum2); __m128i a45_round = convert_double_to_f48_SSE((__m128i)running_sum3); __m128i a67_round = convert_double_to_f48_SSE((__m128i)running_sum4); // place them right for memory write __m128i match_mask = _mm_set_epi8(3,2,1,0,255,255,255,255,255,255,255,255,255,255,255,255); // mask used to match the missing spaces __m128i a23_shuffled = _mm_shuffle_epi8((__m128i)a23_round, match_mask); // shuffle the positions required for the space in a01 for a2 a01_round = _mm_or_si128(a01_round,a23_shuffled); a23_round = _mm_srli_si128 (a23_round, 4); // using _mm_srli_si128 instead of _mm_sll_epi64 because the epi64 shitfs witin each double element in the 128 item match_mask = _mm_set_epi8(7,6,5,4,3,2,1,0,255,255,255,255,255,255,255,255); // reset the match mask for a4 and small bit of a5 __m128i a45_shuffled = _mm_shuffle_epi8((__m128i)a45_round, match_mask); // shuffle a45 to fit in a23 a23_round = _mm_or_si128(a23_round,a45_shuffled); a45_round = _mm_srli_si128(a45_round, 8); // using _mm_srli_si128 instead of _mm_sll_epi64 because the epi64 shitfs witin each double element in the 128 item match_mask = _mm_set_epi8(11,10,9,8,7,6,5,4,3,2,1,0,255,255,255,255); __m128i a67_shuffled = _mm_shuffle_epi8((__m128i)a67_round, match_mask); a45_round = _mm_or_si128(a45_round,a67_shuffled); // WRITE BACK TO MEMORY! _mm_storeu_pd((double*)&result[i], (__m128d)a01_round); _mm_storeu_pd(bofs(&result[i],2), (__m128d)a23_round); _mm_storeu_pd(bofs(&result[i],4), (__m128d)a45_round); } vec = result; }
bool Dictionary::compile(const Param ¶m, const std::vector<std::string> &dics, const char *matrix_file, const char *matrix_bin_file, const char *left_id_file, const char *right_id_file, const char *rewrite_file, const char *pos_id_file, const char *output) { Connector matrix; scoped_ptr<DictionaryRewriter> rewrite(0); scoped_ptr<POSIDGenerator> posid(0); scoped_ptr<ContextID> cid(0); scoped_ptr<Writer> writer(0); scoped_ptr<StringBuffer> os(0); Node node; std::vector<std::pair<std::string, Token*> > dic; size_t offset = 0; unsigned int lexsize = 0; std::string w, feature, ufeature, lfeature, rfeature, fbuf, key; int lid, rid, cost; const std::string from = param.get<std::string>("dictionary-charset"); const std::string to = param.get<std::string>("charset"); const bool wakati = param.get<bool>("wakati"); const int type = param.get<int>("type"); const std::string node_format = param.get<std::string>("node-format"); // for backward compatibility std::string config_charset = param.get<std::string>("config-charset"); if (config_charset.empty()) config_charset = from; CHECK_DIE(!from.empty()) << "input dictionary charset is empty"; CHECK_DIE(!to.empty()) << "output dictionary charset is empty"; Iconv iconv; CHECK_DIE(iconv.open(from.c_str(), to.c_str())) << "iconv_open() failed with from=" << from << " to=" << to; Iconv config_iconv; CHECK_DIE(config_iconv.open(config_charset.c_str(), from.c_str())) << "iconv_open() failed with from=" << config_charset << " to=" << from; if (!node_format.empty()) { writer.reset(new Writer); os.reset(new StringBuffer); memset(&node, 0, sizeof(node)); } if (!matrix.openText(matrix_file) && !matrix.open(matrix_bin_file)) { matrix.set_left_size(1); matrix.set_right_size(1); } posid.reset(new POSIDGenerator); posid->open(pos_id_file, &config_iconv); std::istringstream iss(UNK_DEF_DEFAULT); for (size_t i = 0; i < dics.size(); ++i) { std::ifstream ifs(dics[i].c_str()); std::istream *is = &ifs; if (!ifs) { if (type == MECAB_UNK_DIC) { std::cerr << dics[i] << " is not found. minimum setting is used." << std::endl; is = &iss; } else { CHECK_DIE(ifs) << "no such file or directory: " << dics[i]; } } std::cout << "reading " << dics[i] << " ... "; char line[BUF_SIZE]; size_t num = 0; while (is->getline(line, sizeof(line))) { char *col[8]; const size_t n = tokenizeCSV(line, col, 5); CHECK_DIE(n == 5) << "format error: " << line; w = col[0]; lid = std::atoi(col[1]); rid = std::atoi(col[2]); cost = std::atoi(col[3]); feature = col[4]; int pid = posid->id(feature.c_str()); if (lid < 0 || rid < 0) { if (!rewrite.get()) { rewrite.reset(new DictionaryRewriter); rewrite->open(rewrite_file, &config_iconv); } CHECK_DIE(rewrite->rewrite(feature, &ufeature, &lfeature, &rfeature)) << "rewrite failed: " << feature; if (!cid.get()) { cid.reset(new ContextID); cid->open(left_id_file, right_id_file, &config_iconv); CHECK_DIE(cid->left_size() == matrix.left_size() && cid->right_size() == matrix.right_size()) << "Context ID files(" << left_id_file << " or " << right_id_file << " may be broken"; } lid = cid->lid(lfeature.c_str()); rid = cid->rid(rfeature.c_str()); } CHECK_DIE(lid >= 0 && rid >= 0 && matrix.is_valid(lid, rid)) << "invalid ids are found lid=" << lid << " rid=" << rid; if (w.empty()) { std::cerr << "empty word is found, discard this line" << std::endl; continue; } if (!iconv.convert(&feature)) { std::cerr << "iconv conversion failed. skip this entry" << std::endl; continue; } if (type != MECAB_UNK_DIC && !iconv.convert(&w)) { std::cerr << "iconv conversion failed. skip this entry" << std::endl; continue; } if (!node_format.empty()) { node.surface = w.c_str(); node.feature = feature.c_str(); node.length = w.size(); node.rlength = w.size(); node.posid = pid; node.stat = MECAB_NOR_NODE; CHECK_DIE(os.get()); CHECK_DIE(writer.get()); os->clear(); CHECK_DIE(writer->writeNode(&*os, node_format.c_str(), w.c_str(), &node)) << "conversion error: " << feature << " with " << node_format; *os << '\0'; feature = os->str(); } key.clear(); if (!wakati) key = feature + '\0'; Token* token = new Token; token->lcAttr = lid; token->rcAttr = rid; token->posid = pid; token->wcost = cost; token->feature = offset; token->compound = 0; dic.push_back(std::make_pair<std::string, Token*>(w, token)); // append to output buffer if (!wakati) fbuf.append(key.data(), key.size()); offset += key.size(); ++num; ++lexsize; } std::cout << num << std::endl; } if (wakati) fbuf.append("\0", 1); std::sort(dic.begin(), dic.end()); size_t bsize = 0; size_t idx = 0; std::string prev; std::vector<const char *> str; std::vector<size_t> len; std::vector<Darts::DoubleArray::result_type> val; for (size_t i = 0; i < dic.size(); ++i) { if (i != 0 && prev != dic[i].first) { str.push_back(dic[idx].first.c_str()); len.push_back(dic[idx].first.size()); val.push_back(bsize +(idx << 8)); bsize = 1; idx = i; } else { ++bsize; } prev = dic[i].first; } str.push_back(dic[idx].first.c_str()); len.push_back(dic[idx].first.size()); val.push_back(bsize +(idx << 8)); CHECK_DIE(str.size() == len.size()); CHECK_DIE(str.size() == val.size()); Darts::DoubleArray da; CHECK_DIE(da.build(str.size(), const_cast<char **>(&str[0]), &len[0], &val[0], &progress_bar_darts) == 0) << "unkown error in building double-array"; std::string tbuf; for (size_t i = 0; i < dic.size(); ++i) { tbuf.append(reinterpret_cast<const char*>(dic[i].second), sizeof(Token)); delete dic[i].second; } dic.clear(); // needs to be 8byte(64bit) aligned while (tbuf.size() % 8 != 0) { Token dummy; memset(&dummy, 0, sizeof(Token)); tbuf.append(reinterpret_cast<const char*>(&dummy), sizeof(Token)); } unsigned int dummy = 0; unsigned int lsize = matrix.left_size(); unsigned int rsize = matrix.right_size(); unsigned int dsize = da.unit_size() * da.size(); unsigned int tsize = tbuf.size(); unsigned int fsize = fbuf.size(); unsigned int version = DIC_VERSION; char charset[32]; std::fill(charset, charset + sizeof(charset), '\0'); std::strncpy(charset, to.c_str(), 31); std::ofstream bofs(output, std::ios::binary|std::ios::out); CHECK_DIE(bofs) << "permission denied: " << output; unsigned int magic = 0; // needs to be 64bit aligned // 10*32 = 64*5 bofs.write(reinterpret_cast<const char *>(&magic), sizeof(unsigned int)); bofs.write(reinterpret_cast<const char *>(&version), sizeof(unsigned int)); bofs.write(reinterpret_cast<const char *>(&type), sizeof(unsigned int)); bofs.write(reinterpret_cast<const char *>(&lexsize), sizeof(unsigned int)); bofs.write(reinterpret_cast<const char *>(&lsize), sizeof(unsigned int)); bofs.write(reinterpret_cast<const char *>(&rsize), sizeof(unsigned int)); bofs.write(reinterpret_cast<const char *>(&dsize), sizeof(unsigned int)); bofs.write(reinterpret_cast<const char *>(&tsize), sizeof(unsigned int)); bofs.write(reinterpret_cast<const char *>(&fsize), sizeof(unsigned int)); bofs.write(reinterpret_cast<const char *>(&dummy), sizeof(unsigned int)); // 32 * 8 = 64 * 4 bofs.write(reinterpret_cast<const char *>(charset), sizeof(charset)); bofs.write(reinterpret_cast<const char*>(da.array()), da.unit_size() * da.size()); bofs.write(const_cast<const char *>(tbuf.data()), tbuf.size()); bofs.write(const_cast<const char *>(fbuf.data()), fbuf.size()); // save magic id magic = static_cast<unsigned int>(bofs.tellp()); magic ^= DictionaryMagicID; bofs.seekp(0); bofs.write(reinterpret_cast<const char *>(&magic), sizeof(unsigned int)); bofs.close(); return true; }