Example #1
0
int test_main( int, char*[] )
{
    {   // basic_filebuf runtime results are ignored; as long as they don't crash
        // or throw we are satisfied
        fs::basic_filebuf<char> bfb;
        fs::filebuf cfb;

        bfb.open( "fstream_test_bffoo", std::ios_base::in );
        cfb.open( "fstream_test_bffoo", std::ios_base::in );

#   ifndef BOOST_NO_STD_WSTRING
        fs::wfilebuf wfb;
        wfb.open( "fstream_test_bffoo", std::ios_base::in );
#   endif
    }

    std::remove( "fstream_test_bfoo" );
    std::remove( "fstream_test_cfoo" );
# ifndef BOOST_NO_STD_WSTRING
    std::remove( "fstream_test_wfoo" );
# endif
    {
        fs::basic_ofstream<char> bofs( "fstream_test_bfoo" );
        fs::ofstream cofs( "fstream_test_cfoo" );

        BOOST_CHECK( bofs.is_open() );
        BOOST_CHECK( cofs.is_open() );

        bofs << "fstream_test_bfoo";
        cofs << "fstream_test_cfoo";

        // these will fail, but they still test the interface
        bofs.open( "fstream_test_bfoo" );
        cofs.open( "fstream_test_cfoo" );

#   ifndef BOOST_NO_STD_WSTRING
        fs::wofstream wofs( "fstream_test_wfoo" );
        BOOST_CHECK( wofs.is_open() );
        wofs << L"fstream_test_wfoo";
        wofs.open( "fstream_test_wfoo" ); // expected to fail
#   endif
    }

    {
        fs::basic_ifstream<char> bifs( "fstream_test_bfoo" );
        fs::ifstream cifs( "fstream_test_cfoo" );

        BOOST_CHECK( bifs.is_open() );
        BOOST_CHECK( cifs.is_open() );

        std::string b;
        std::string c;

        bifs >> b;
        cifs >> c;

        BOOST_CHECK( b == "fstream_test_bfoo" );
        BOOST_CHECK( c == "fstream_test_cfoo" );

        // these will fail, but they still test the interface
        bifs.open( "fstream_test_bfoo" );
        cifs.open( "fstream_test_cfoo" );

#   ifndef BOOST_NO_STD_WSTRING
        fs::wifstream wifs( "fstream_test_wfoo" );
        BOOST_CHECK( wifs.is_open() );
        std::wstring w;
        wifs >> w;
        BOOST_CHECK( w == L"fstream_test_wfoo" );
        wifs.open( "fstream_test_wfoo" ); // expected to fail
#   endif
    }

    {
        fs::basic_fstream<char> bfs( "fstream_test_bfoo" );
        fs::fstream cfs( "fstream_test_cfoo" );

        BOOST_CHECK( bfs.is_open() );
        BOOST_CHECK( cfs.is_open() );

        std::string b;
        std::string c;

        bfs >> b;
        cfs >> c;

        BOOST_CHECK( b == "fstream_test_bfoo" );
        BOOST_CHECK( c == "fstream_test_cfoo" );

        // these will fail, but they still test the interface
        bfs.open( "fstream_test_bfoo" );
        cfs.open( "fstream_test_cfoo" );

#   ifndef BOOST_NO_STD_WSTRING
        fs::wfstream wfs( "fstream_test_wfoo" );
        BOOST_CHECK( wfs.is_open() );
        std::wstring w;
        wfs >> w;
        BOOST_CHECK( w == L"fstream_test_wfoo" );
        wfs.open( "fstream_test_wfoo" ); // expected to fail
#   endif
    }

//  std::remove( "fstream_test_bfoo" );
//  std::remove( "fstream_test_cfoo" );
//  #   ifndef BOOST_NO_STD_WSTRING
//  std::remove( "fstream_test_wfoo" );
//  #   endif
    return 0;
}
Example #2
0
void matrix_vector_mul_SSE_f48_loop_unrolled (fl48** mat, fl48* &vec)
{
    // TESTING change SIZE to min 8 - but multiple of 8
    fl48* result = new fl48[SIZE];
  __m128i load_mask = _mm_set_epi8(11, 10, 9, 8, 7, 6, 255, 255,
  			      5, 4, 3, 2, 1, 0, 255, 255);
  for(unsigned i=0;i<SIZE;i+=8) { // row // requiring 8 at a time - because loop un-roll
    __m128d running_sum1 = _mm_set1_pd(0.0); // running sum initially 0
    __m128d running_sum2 = _mm_set1_pd(0.0); // running sum initially 0
    __m128d running_sum3 = _mm_set1_pd(0.0); // running sum initially 0
    __m128d running_sum4 = _mm_set1_pd(0.0); // running sum initially 0
    __m128d running_sum5 = _mm_set1_pd(0.0); // running sum initially 0
    __m128d running_sum6 = _mm_set1_pd(0.0); // running sum initially 0
    __m128d running_sum7 = _mm_set1_pd(0.0); // running sum initially 0
    __m128d running_sum8 = _mm_set1_pd(0.0); // running sum initially 0

    for(unsigned j=0;j<SIZE;j+=2) { // col - requires skipping on 2 at a time
      __m128i mat_vect = _mm_loadu_si128((__m128i*) &mat[i][j]); // hoping that addresses are as expected - seems like this is the way it's stored
      mat_vect = _mm_shuffle_epi8(mat_vect, load_mask);
      __m128i vec_elem = _mm_loadu_si128((__m128i*) &vec[j]);
      vec_elem = _mm_shuffle_epi8(vec_elem, load_mask);
      __m128d mult = _mm_mul_pd((__m128d)mat_vect,(__m128d)vec_elem);
      running_sum1 = _mm_add_pd(mult,running_sum1);

      mat_vect = _mm_loadu_si128((__m128i*) &mat[i+1][j]);
      mat_vect = _mm_shuffle_epi8(mat_vect, load_mask);
      mult = _mm_mul_pd((__m128d)mat_vect,(__m128d)vec_elem);
      running_sum2 = _mm_add_pd(mult,running_sum2);

      mat_vect = _mm_loadu_si128((__m128i*) &mat[i+2][j]);
      mat_vect = _mm_shuffle_epi8(mat_vect, load_mask);
      mult = _mm_mul_pd((__m128d)mat_vect,(__m128d)vec_elem);
      running_sum3 = _mm_add_pd(mult,running_sum3);

      mat_vect = _mm_loadu_si128((__m128i*) &mat[i+3][j]);
      mat_vect = _mm_shuffle_epi8(mat_vect, load_mask);
      mult = _mm_mul_pd((__m128d)mat_vect,(__m128d)vec_elem);
      running_sum4 = _mm_add_pd(mult,running_sum4);

      mat_vect = _mm_loadu_si128((__m128i*) &mat[i+4][j]);
      mat_vect = _mm_shuffle_epi8(mat_vect, load_mask);
      mult = _mm_mul_pd((__m128d)mat_vect,(__m128d)vec_elem);
      running_sum5 = _mm_add_pd(mult,running_sum5);

      mat_vect = _mm_loadu_si128((__m128i*) &mat[i+5][j]);
      mat_vect = _mm_shuffle_epi8(mat_vect, load_mask);
      mult = _mm_mul_pd((__m128d)mat_vect,(__m128d)vec_elem);
      running_sum6 = _mm_add_pd(mult,running_sum6);

      mat_vect = _mm_loadu_si128((__m128i*) &mat[i+6][j]);
      mat_vect = _mm_shuffle_epi8(mat_vect, load_mask);
      mult = _mm_mul_pd((__m128d)mat_vect,(__m128d)vec_elem);
      running_sum7 = _mm_add_pd(mult,running_sum7);

      mat_vect = _mm_loadu_si128((__m128i*) &mat[i+7][j]);
      mat_vect = _mm_shuffle_epi8(mat_vect, load_mask);
      mult = _mm_mul_pd((__m128d)mat_vect,(__m128d)vec_elem);
      running_sum8 = _mm_add_pd(mult,running_sum8);
    }
    __m128i mask = _mm_set_epi8(7 ,6 ,5, 4, 3, 2, 1, 0,
		      15, 14, 13, 12, 11, 10, 9, 8);
    __m128i sum_shuffled = _mm_shuffle_epi8((__m128i)running_sum1, mask);
    running_sum1 = _mm_add_pd(running_sum1,(__m128d)sum_shuffled);
    sum_shuffled = _mm_shuffle_epi8((__m128i)running_sum2, mask);
    running_sum2 = _mm_add_pd(running_sum2,(__m128d)sum_shuffled);
    sum_shuffled = _mm_shuffle_epi8((__m128i)running_sum3, mask);
    running_sum3 = _mm_add_pd(running_sum3,(__m128d)sum_shuffled);
    sum_shuffled = _mm_shuffle_epi8((__m128i)running_sum4, mask);
    running_sum4 = _mm_add_pd(running_sum4,(__m128d)sum_shuffled);
    sum_shuffled = _mm_shuffle_epi8((__m128i)running_sum5, mask);
    running_sum5 = _mm_add_pd(running_sum5,(__m128d)sum_shuffled);
    sum_shuffled = _mm_shuffle_epi8((__m128i)running_sum6, mask);
    running_sum6 = _mm_add_pd(running_sum6,(__m128d)sum_shuffled);
    sum_shuffled = _mm_shuffle_epi8((__m128i)running_sum7, mask);
    running_sum7 = _mm_add_pd(running_sum7,(__m128d)sum_shuffled);
    sum_shuffled = _mm_shuffle_epi8((__m128i)running_sum8, mask);
    running_sum8 = _mm_add_pd(running_sum8,(__m128d)sum_shuffled);

    // mesh them into 4
    __m128i mask_first = _mm_set_epi8(255,255,255,255,255,255,255,255,
			      7 ,6 ,5, 4, 3, 2, 1, 0);
    __m128i mask_second = _mm_set_epi8(7 ,6 ,5, 4, 3, 2, 1, 0,
			      255,255,255,255,255,255,255,255);

    running_sum1 = (__m128d)_mm_shuffle_epi8((__m128i)running_sum1, mask_first);
    running_sum2 = (__m128d)_mm_shuffle_epi8((__m128i)running_sum2, mask_second);
    running_sum1 = (__m128d)_mm_or_si128((__m128i)running_sum1, (__m128i)running_sum2);

    running_sum3 = (__m128d)_mm_shuffle_epi8((__m128i)running_sum3, mask_first);
    running_sum4 = (__m128d)_mm_shuffle_epi8((__m128i)running_sum4, mask_second);
    running_sum2 = (__m128d)_mm_or_si128((__m128i)running_sum3, (__m128i)running_sum4);

    running_sum5 = (__m128d)_mm_shuffle_epi8((__m128i)running_sum5, mask_first);
    running_sum6 = (__m128d)_mm_shuffle_epi8((__m128i)running_sum6, mask_second);
    running_sum3 = (__m128d)_mm_or_si128((__m128i)running_sum6, (__m128i)running_sum5);

    running_sum7 = (__m128d)_mm_shuffle_epi8((__m128i)running_sum7, mask_first);
    running_sum8 = (__m128d)_mm_shuffle_epi8((__m128i)running_sum8, mask_second);
    running_sum4 = (__m128d)_mm_or_si128((__m128i)running_sum8, (__m128i)running_sum7);

    // RS 1-4 are right and expected here too
    // rs 5-8 neglected and not required from now

    __m128i a01_round = convert_double_to_f48_SSE((__m128i)running_sum1);
    __m128i a23_round = convert_double_to_f48_SSE((__m128i)running_sum2);
    __m128i a45_round = convert_double_to_f48_SSE((__m128i)running_sum3);
    __m128i a67_round = convert_double_to_f48_SSE((__m128i)running_sum4);

    // place them right for memory write
    __m128i match_mask = _mm_set_epi8(3,2,1,0,255,255,255,255,255,255,255,255,255,255,255,255); // mask used to match the missing spaces
    __m128i a23_shuffled = _mm_shuffle_epi8((__m128i)a23_round, match_mask); // shuffle the positions required for the space in a01 for a2
    a01_round = _mm_or_si128(a01_round,a23_shuffled);

    a23_round = _mm_srli_si128 (a23_round, 4); // using _mm_srli_si128 instead of _mm_sll_epi64 because the epi64 shitfs witin each double element in the 128 item

    match_mask = _mm_set_epi8(7,6,5,4,3,2,1,0,255,255,255,255,255,255,255,255); // reset the match mask for a4 and small bit of a5
    __m128i a45_shuffled = _mm_shuffle_epi8((__m128i)a45_round, match_mask); // shuffle a45 to fit in a23
    a23_round = _mm_or_si128(a23_round,a45_shuffled);

    a45_round = _mm_srli_si128(a45_round, 8); // using _mm_srli_si128 instead of _mm_sll_epi64 because the epi64 shitfs witin each double element in the 128 item

    match_mask = _mm_set_epi8(11,10,9,8,7,6,5,4,3,2,1,0,255,255,255,255);
    __m128i a67_shuffled = _mm_shuffle_epi8((__m128i)a67_round, match_mask);
    a45_round = _mm_or_si128(a45_round,a67_shuffled);
     // WRITE BACK TO MEMORY!
    _mm_storeu_pd((double*)&result[i], (__m128d)a01_round);
    _mm_storeu_pd(bofs(&result[i],2), (__m128d)a23_round);
    _mm_storeu_pd(bofs(&result[i],4), (__m128d)a45_round);
  }
  vec = result;
}
bool Dictionary::compile(const Param &param,
                         const std::vector<std::string> &dics,
                         const char *matrix_file,
                         const char *matrix_bin_file,
                         const char *left_id_file,
                         const char *right_id_file,
                         const char *rewrite_file,
                         const char *pos_id_file,
                         const char *output) {
  Connector matrix;
  scoped_ptr<DictionaryRewriter> rewrite(0);
  scoped_ptr<POSIDGenerator> posid(0);
  scoped_ptr<ContextID> cid(0);
  scoped_ptr<Writer> writer(0);
  scoped_ptr<StringBuffer> os(0);
  Node node;

  std::vector<std::pair<std::string, Token*> > dic;

  size_t offset  = 0;
  unsigned int lexsize = 0;
  std::string w, feature, ufeature, lfeature, rfeature, fbuf, key;
  int lid, rid, cost;

  const std::string from = param.get<std::string>("dictionary-charset");
  const std::string to = param.get<std::string>("charset");
  const bool wakati = param.get<bool>("wakati");
  const int type = param.get<int>("type");
  const std::string node_format = param.get<std::string>("node-format");

  // for backward compatibility
  std::string config_charset = param.get<std::string>("config-charset");
  if (config_charset.empty()) config_charset = from;

  CHECK_DIE(!from.empty()) << "input dictionary charset is empty";
  CHECK_DIE(!to.empty())   << "output dictionary charset is empty";

  Iconv iconv;
  CHECK_DIE(iconv.open(from.c_str(), to.c_str()))
      << "iconv_open() failed with from=" << from << " to=" << to;

  Iconv config_iconv;
  CHECK_DIE(config_iconv.open(config_charset.c_str(), from.c_str()))
      << "iconv_open() failed with from=" << config_charset << " to=" << from;

  if (!node_format.empty()) {
    writer.reset(new Writer);
    os.reset(new StringBuffer);
    memset(&node, 0, sizeof(node));
  }

  if (!matrix.openText(matrix_file) &&
      !matrix.open(matrix_bin_file)) {
    matrix.set_left_size(1);
    matrix.set_right_size(1);
  }

  posid.reset(new POSIDGenerator);
  posid->open(pos_id_file, &config_iconv);

  std::istringstream iss(UNK_DEF_DEFAULT);

  for (size_t i = 0; i < dics.size(); ++i) {
    std::ifstream ifs(dics[i].c_str());
    std::istream *is = &ifs;
    if (!ifs) {
      if (type == MECAB_UNK_DIC) {
        std::cerr << dics[i]
                  << " is not found. minimum setting is used." << std::endl;
        is = &iss;
      } else {
        CHECK_DIE(ifs) << "no such file or directory: " << dics[i];
      }
    }

    std::cout << "reading " << dics[i] << " ... ";

    char line[BUF_SIZE];
    size_t num = 0;

    while (is->getline(line, sizeof(line))) {
      char *col[8];
      const size_t n = tokenizeCSV(line, col, 5);
      CHECK_DIE(n == 5) << "format error: " << line;

      w = col[0];
      lid = std::atoi(col[1]);
      rid = std::atoi(col[2]);
      cost = std::atoi(col[3]);
      feature = col[4];
      int pid = posid->id(feature.c_str());

      if (lid < 0  || rid < 0) {
        if (!rewrite.get()) {
          rewrite.reset(new DictionaryRewriter);
          rewrite->open(rewrite_file, &config_iconv);
        }

        CHECK_DIE(rewrite->rewrite(feature, &ufeature, &lfeature, &rfeature))
            << "rewrite failed: " << feature;

        if (!cid.get()) {
          cid.reset(new ContextID);
          cid->open(left_id_file, right_id_file, &config_iconv);
          CHECK_DIE(cid->left_size()  == matrix.left_size() &&
                    cid->right_size() == matrix.right_size())
              << "Context ID files("
              << left_id_file
              << " or "
              << right_id_file << " may be broken";
        }

        lid = cid->lid(lfeature.c_str());
        rid = cid->rid(rfeature.c_str());
      }

      CHECK_DIE(lid >= 0 && rid >= 0 && matrix.is_valid(lid, rid))
          << "invalid ids are found lid=" << lid << " rid=" << rid;

      if (w.empty()) {
        std::cerr << "empty word is found, discard this line" << std::endl;
        continue;
      }

      if (!iconv.convert(&feature)) {
        std::cerr << "iconv conversion failed. skip this entry"
                  << std::endl;
        continue;
      }

      if (type != MECAB_UNK_DIC && !iconv.convert(&w)) {
        std::cerr << "iconv conversion failed. skip this entry"
                  << std::endl;
        continue;
      }

      if (!node_format.empty()) {
        node.surface = w.c_str();
        node.feature = feature.c_str();
        node.length  = w.size();
        node.rlength = w.size();
        node.posid   = pid;
        node.stat    = MECAB_NOR_NODE;
        CHECK_DIE(os.get());
        CHECK_DIE(writer.get());
        os->clear();
        CHECK_DIE(writer->writeNode(&*os,
                                    node_format.c_str(),
                                    w.c_str(),
                                    &node)) <<
            "conversion error: " << feature << " with " << node_format;
        *os << '\0';
        feature = os->str();
      }

      key.clear();
      if (!wakati) key = feature + '\0';

      Token* token  = new Token;
      token->lcAttr = lid;
      token->rcAttr = rid;
      token->posid  = pid;
      token->wcost = cost;
      token->feature = offset;
      token->compound = 0;
      dic.push_back(std::make_pair<std::string, Token*>(w, token));

      // append to output buffer
      if (!wakati) fbuf.append(key.data(), key.size());
      offset += key.size();

      ++num;
      ++lexsize;
    }

    std::cout << num << std::endl;
  }

  if (wakati) fbuf.append("\0", 1);

  std::sort(dic.begin(), dic.end());

  size_t bsize = 0;
  size_t idx = 0;
  std::string prev;
  std::vector<const char *> str;
  std::vector<size_t> len;
  std::vector<Darts::DoubleArray::result_type> val;

  for (size_t i = 0; i < dic.size(); ++i) {
    if (i != 0 && prev != dic[i].first) {
      str.push_back(dic[idx].first.c_str());
      len.push_back(dic[idx].first.size());
      val.push_back(bsize +(idx << 8));
      bsize = 1;
      idx = i;
    } else {
      ++bsize;
    }
    prev = dic[i].first;
  }
  str.push_back(dic[idx].first.c_str());
  len.push_back(dic[idx].first.size());
  val.push_back(bsize +(idx << 8));

  CHECK_DIE(str.size() == len.size());
  CHECK_DIE(str.size() == val.size());

  Darts::DoubleArray da;
  CHECK_DIE(da.build(str.size(), const_cast<char **>(&str[0]),
                     &len[0], &val[0], &progress_bar_darts) == 0)
      << "unkown error in building double-array";

  std::string tbuf;
  for (size_t i = 0; i < dic.size(); ++i) {
    tbuf.append(reinterpret_cast<const char*>(dic[i].second),
                sizeof(Token));
    delete dic[i].second;
  }
  dic.clear();

  // needs to be 8byte(64bit) aligned
  while (tbuf.size() % 8 != 0) {
    Token dummy;
    memset(&dummy, 0, sizeof(Token));
    tbuf.append(reinterpret_cast<const char*>(&dummy), sizeof(Token));
  }

  unsigned int dummy = 0;
  unsigned int lsize = matrix.left_size();
  unsigned int rsize = matrix.right_size();
  unsigned int dsize = da.unit_size() * da.size();
  unsigned int tsize = tbuf.size();
  unsigned int fsize = fbuf.size();

  unsigned int version = DIC_VERSION;
  char charset[32];
  std::fill(charset, charset + sizeof(charset), '\0');
  std::strncpy(charset, to.c_str(), 31);

  std::ofstream bofs(output, std::ios::binary|std::ios::out);
  CHECK_DIE(bofs) << "permission denied: " << output;

  unsigned int magic = 0;

  // needs to be 64bit aligned
  // 10*32 = 64*5
  bofs.write(reinterpret_cast<const char *>(&magic),   sizeof(unsigned int));
  bofs.write(reinterpret_cast<const char *>(&version), sizeof(unsigned int));
  bofs.write(reinterpret_cast<const char *>(&type),    sizeof(unsigned int));
  bofs.write(reinterpret_cast<const char *>(&lexsize), sizeof(unsigned int));
  bofs.write(reinterpret_cast<const char *>(&lsize),   sizeof(unsigned int));
  bofs.write(reinterpret_cast<const char *>(&rsize),   sizeof(unsigned int));
  bofs.write(reinterpret_cast<const char *>(&dsize),   sizeof(unsigned int));
  bofs.write(reinterpret_cast<const char *>(&tsize),   sizeof(unsigned int));
  bofs.write(reinterpret_cast<const char *>(&fsize),   sizeof(unsigned int));
  bofs.write(reinterpret_cast<const char *>(&dummy),   sizeof(unsigned int));

  // 32 * 8 = 64 * 4
  bofs.write(reinterpret_cast<const char *>(charset),  sizeof(charset));

  bofs.write(reinterpret_cast<const char*>(da.array()),
             da.unit_size() * da.size());
  bofs.write(const_cast<const char *>(tbuf.data()), tbuf.size());
  bofs.write(const_cast<const char *>(fbuf.data()), fbuf.size());

  // save magic id
  magic = static_cast<unsigned int>(bofs.tellp());
  magic ^= DictionaryMagicID;
  bofs.seekp(0);
  bofs.write(reinterpret_cast<const char *>(&magic), sizeof(unsigned int));

  bofs.close();

  return true;
}