bool DecoderFeatureIndex::buildFeature(LearnerPath *path) {
  path->rnode->wcost = path->cost = 0.0;

  std::string ufeature1;
  std::string lfeature1;
  std::string rfeature1;
  std::string ufeature2;
  std::string lfeature2;
  std::string rfeature2;

  CHECK_DIE(rewrite_.rewrite2(path->lnode->feature,
                              &ufeature1,
                              &lfeature1,
                              &rfeature1))
      << " cannot rewrite pattern: "
      << path->lnode->feature;

  CHECK_DIE(rewrite_.rewrite2(path->rnode->feature,
                              &ufeature2,
                              &lfeature2,
                              &rfeature2))
      << " cannot rewrite pattern: "
      << path->rnode->feature;

  if (!buildUnigramFeature(path, ufeature2.c_str()))
    return false;

  if (!buildBigramFeature(path, rfeature1.c_str(), lfeature2.c_str()))
    return false;

  return true;
}
Beispiel #2
0
bool LearnerTagger::connect(size_t pos, LearnerNode *_rNode) {
  for (LearnerNode *rNode = _rNode ; rNode; rNode = rNode->bnext) {
    for (LearnerNode *lNode = end_node_list_[pos]; lNode;
         lNode = lNode->enext) {
      LearnerPath *path   = allocator_->newPath();
      std::memset(path, 0, sizeof(Path));
      path->rnode   = rNode;
      path->lnode   = lNode;
      path->fvector = 0;
      path->cost    = 0.0;
      path->rnode   = rNode;
      path->lnode   = lNode;
      path->lnext   = rNode->lpath;
      rNode->lpath  = path;
      path->rnext   = lNode->rpath;
      lNode->rpath  = path;
      CHECK_DIE(feature_index_->buildFeature(path));
      CHECK_DIE(path->fvector);
    }
    const size_t x = rNode->rlength + pos;
    rNode->enext  = end_node_list_[x];
    end_node_list_[x] = rNode;
  }

  return true;
}
bool DictionaryRewriter::open(const char *filename,
                              Iconv *iconv) {
  std::ifstream ifs(filename);
  CHECK_DIE(ifs) << "no such file or directory: " << filename;
  int append_to = 0;
  std::string line;
  while (std::getline(ifs, line)) {
    if (iconv) iconv->convert(&line);
    if (line.empty() || line[0] == '#') continue;
    if (line == "[unigram rewrite]") {
      append_to = 1;
    } else if (line == "[left rewrite]") {
      append_to = 2;
    } else if (line == "[right rewrite]") {
      append_to = 3;
    } else {
      CHECK_DIE(append_to != 0) << "no sections found";
      char *str = const_cast<char *>(line.c_str());
      switch (append_to) {
        case 1: append_rewrite_rule(&unigram_rewrite_, str); break;
        case 2: append_rewrite_rule(&left_rewrite_,    str); break;
        case 3: append_rewrite_rule(&right_rewrite_,   str); break;
      }
    }
  }
  return true;
}
bool POSIDGenerator::open(const char *filename,
                          Iconv *iconv) {
  scoped_ptr<std::istream> p_ist;

  const jma::DictUnit* dict = jma::JMA_Dictionary::instance()->getDict(filename);
  if(dict)
    p_ist.reset(new std::istrstream(dict->text_, dict->length_));
  else
    p_ist.reset(new std::ifstream(filename));

  if (!*p_ist) {
    std::cerr << filename
              << " is not found. minimum setting is used" << std::endl;
    rewrite_.resize(1);
    rewrite_.back().set_pattern("*", "1");
    return true;
  }

  std::string line;
  char *col[2];
  while (std::getline(*p_ist, line)) {
    if (iconv) iconv->convert(&line);
    const size_t n = tokenize2(const_cast<char *>(line.c_str()),
                               " \t", col, 2);
    CHECK_DIE(n == 2) << "format error: " << line;
    for (char *p = col[1]; *p; ++p) {
      CHECK_DIE(*p >= '0' && *p <= '9') << "not a number: " << col[1];
    }
    rewrite_.resize(rewrite_.size() + 1);
    rewrite_.back().set_pattern(col[0], col[1]);
  }
  return true;
}
void enum_csv_dictionaries(const char *path,
                           std::vector<std::string> *dics) {
  dics->clear();

#if defined(_WIN32) && !defined(__CYGWIN__)
  WIN32_FIND_DATA wfd;
  HANDLE hFind;
  const std::string pat = create_filename(path, "*.csv");
  hFind = FindFirstFile(pat.c_str(), &wfd);
  CHECK_DIE(hFind != INVALID_HANDLE_VALUE)
      << "Invalid File Handle. Get Last Error reports";
  do {
    std::string tmp = create_filename(path, wfd.cFileName);
    dics->push_back(tmp);
  } while (FindNextFile(hFind, &wfd));
  FindClose(hFind);
#else
  DIR *dir = opendir(path);
  CHECK_DIE(dir) << "no such directory: " << path;

  for (struct dirent *dp = readdir(dir);
       dp;
       dp = readdir(dir)) {
    const std::string tmp = dp->d_name;
    if (tmp.size() >= 5) {
      std::string ext = tmp.substr(tmp.size() - 4, 4);
      toLower(&ext);
      if (ext == ".csv")
        dics->push_back(create_filename(path, tmp));
    }
  }
  closedir(dir);
#endif
}
bool POSIDGenerator::open(const char *filename,
                          Iconv *iconv) {
  std::ifstream ifs(filename);
  if (!ifs) {
    std::cerr << filename
              << " is not found. minimum setting is used" << std::endl;
    rewrite_.resize(1);
    rewrite_.back().set_pattern("*", "1");
    return true;
  }

  std::string line;
  char *col[2];
  while (std::getline(ifs, line)) {
    if (iconv) iconv->convert(&line);
    const size_t n = tokenize2(const_cast<char *>(line.c_str()),
                               " \t", col, 2);
    CHECK_DIE(n == 2) << "format error: " << line;
    for (char *p = col[1]; *p; ++p) {
      CHECK_DIE(*p >= '0' && *p <= '9') << "not a number: " << col[1];
    }
    rewrite_.resize(rewrite_.size() + 1);
    rewrite_.back().set_pattern(col[0], col[1]);
  }
  return true;
}
bool DictionaryRewriter::open(const char *filename,
                              Iconv *iconv) {
  scoped_ptr<std::istream> p_ist;

  const jma::DictUnit* dict = jma::JMA_Dictionary::instance()->getDict(filename);
  if(dict)
    p_ist.reset(new std::istrstream(dict->text_, dict->length_));
  else
    p_ist.reset(new std::ifstream(filename));

  CHECK_DIE(*p_ist) << "no such file or directory: " << filename;
  int append_to = 0;
  std::string line;
  while (std::getline(*p_ist, line)) {
    if (iconv) iconv->convert(&line);
    if (line.empty() || line[0] == '#') continue;
    if (line == "[unigram rewrite]") {
      append_to = 1;
    } else if (line == "[left rewrite]") {
      append_to = 2;
    } else if (line == "[right rewrite]") {
      append_to = 3;
    } else {
      CHECK_DIE(append_to != 0) << "no sections found";
      char *str = const_cast<char *>(line.c_str());
      switch (append_to) {
        case 1: append_rewrite_rule(&unigram_rewrite_, str); break;
        case 2: append_rewrite_rule(&left_rewrite_,    str); break;
        case 3: append_rewrite_rule(&right_rewrite_,   str); break;
      }
    }
  }
  return true;
}
  static bool read(std::istream *is,
                   std::vector<std::vector<std::string> > *r,
                   const std::vector<int> &level) {
    if (!*is) return false;
    char buf[BUF_SIZE];
    char *col[2];
    char *cvs[BUF_SIZE];
    r->clear();
    while (is->getline(buf, sizeof(buf))) {
      if (std::strcmp(buf, "EOS") == 0) break;
      CHECK_DIE(tokenize(buf, "\t", col,  2) == 2) << "format error";
      cvs[0] = col[0];
      size_t n = tokenizeCSV(col[1], cvs + 1, sizeof(cvs) - 1);
      std::vector<std::string> tmp;
      for (size_t i = 0; i < level.size(); ++i) {
        size_t m = level[i] < 0 ? n : level[i];
        CHECK_DIE(m <= n) << " out of range " << level[i];
        std::string output;
        for (size_t j = 0; j <= m; ++j) {
          output += cvs[j];
          if (j != 0) output += "\t";
        }
        tmp.push_back(output);
      }
      r->push_back(tmp);
    }

    return true;
  }
void copy(const char *src, const char *dst) {
  std::cout << "copying " << src << " to " <<  dst << std::endl;
  Mmap<char> mmap;
  CHECK_DIE(mmap.open(src)) << mmap.what();
  std::ofstream ofs(dst, std::ios::binary|std::ios::out);
  CHECK_DIE(ofs) << "permission denied: " << dst;
  ofs.write(reinterpret_cast<char*>(mmap.begin()), mmap.size());
  ofs.close();
}
int POSIDGenerator::id(const char *feature) const {
  char buf[BUF_SIZE];
  char *col[BUF_SIZE];
  CHECK_DIE(std::strlen(feature) < sizeof(buf) - 1) << "too long feature";
  std::strncpy(buf, feature, sizeof(buf) - 1);
  const size_t n = tokenizeCSV(buf, col, sizeof(col));
  CHECK_DIE(n < sizeof(col)) << "too long CSV entities";
  std::string tmp;
  if (!rewrite_.rewrite(n, const_cast<const char **>(col), &tmp))
    return -1;
  return std::atoi(tmp.c_str());
}
Beispiel #11
0
  static int run(int argc, char **argv) {
    static const MeCab::Option long_options[] = {
      { "output",   'o',  0,   "FILE", "set the output filename" },
      { "version",  'v',  0,   0,    "show the version and exit"   },
      { "help",  'h',  0,   0,    "show this help and exit."   },
      { 0, 0, 0, 0 }
    };

    MeCab::Param param;
    param.open(argc, argv, long_options);

    if (!param.open(argc, argv, long_options)) {
      std::cout << param.what() << "\n\n" <<  COPYRIGHT
                << "\ntry '--help' for more information." << std::endl;
      return -1;
    }

    if (!param.help_version()) {
      return 0;
    }

    const std::vector<std::string> &tmp = param.rest_args();
    std::vector<std::string> files = tmp;
    if (files.empty()) {
      files.push_back("-");
    }

    std::string output = param.get<std::string>("output");
    if (output.empty()) output = "-";
    MeCab::ostream_wrapper ofs(output.c_str());
    CHECK_DIE(*ofs) << "permission denied: " << output;

    scoped_fixed_array<char, BUF_SIZE> buf;
    char *col[2];
    std::string str;
    for (size_t i = 0; i < files.size(); ++i) {
      MeCab::istream_wrapper ifs(files[i].c_str());
      CHECK_DIE(*ifs) << "no such file or directory: " << files[i];
      while (ifs->getline(buf.get(), buf.size())) {
        const size_t n = tokenize(buf.get(), "\t ", col, 2);
        CHECK_DIE(n <= 2) << "format error: " << buf.get();
        if (std::strcmp(col[0], "EOS") == 0 && !str.empty()) {
          *ofs << str << std::endl;
          str.clear();
        } else {
          str += col[0];
        }
      }
    }

    return 0;
  }
Beispiel #12
0
bool DecoderLearnerTagger::open(const Param &param) {
  close();
  allocator_data_.reset(new Allocator<LearnerNode, LearnerPath>());
  tokenizer_data_.reset(new Tokenizer<LearnerNode, LearnerPath>());
  feature_index_data_.reset(new DecoderFeatureIndex);
  allocator_ = allocator_data_.get();
  tokenizer_ = tokenizer_data_.get();
  feature_index_ = feature_index_data_.get();

  CHECK_DIE(tokenizer_->open(param)) << tokenizer_->what();
  CHECK_DIE(feature_index_->open(param));

  return true;
}
// without cache
bool DictionaryRewriter::rewrite(const std::string &feature,
                                 std::string *ufeature,
                                 std::string *lfeature,
                                 std::string *rfeature) const {
  char buf[BUF_SIZE];
  char *col[BUF_SIZE];
  CHECK_DIE(feature.size() < sizeof(buf) - 1) << "too long feature";
  std::strncpy(buf, feature.c_str(), sizeof(buf) - 1);
  size_t n = tokenizeCSV(buf, col, sizeof(col));
  CHECK_DIE(n < sizeof(col)) << "too long CSV entities";
  return (unigram_rewrite_.rewrite(n, const_cast<const char **>(col),
                                   ufeature) &&
          left_rewrite_.rewrite(n, const_cast<const char **>(col),
                                lfeature) &&
          right_rewrite_.rewrite(n, const_cast<const char **>(col),
                                 rfeature));
}
  static bool genmatrix(const char *filename,
                        const ContextID &cid,
                        DecoderFeatureIndex *fi,
                        int factor,
                        int default_cost) {
    std::ofstream ofs(filename);
    CHECK_DIE(ofs) << "permission denied: " << filename;

    LearnerPath path;
    LearnerNode rnode;
    LearnerNode lnode;
    rnode.stat = lnode.stat = MECAB_NOR_NODE;
    rnode.rpath = &path;
    lnode.lpath = &path;
    path.lnode = &lnode;
    path.rnode = &rnode;

    const std::map<std::string, int> &left =  cid.left_ids();
    const std::map<std::string, int> &right = cid.right_ids();

    CHECK_DIE(left.size())  << "left id size is empty";
    CHECK_DIE(right.size()) << "right id size is empty";

    ofs << right.size() << ' ' << left.size() << std::endl;

    size_t l = 0;
    for (std::map<std::string, int>::const_iterator rit = right.begin();
         rit != right.end();
         ++rit) {
      ++l;
      progress_bar("emitting matrix      ", l+1, right.size());
      for (std::map<std::string, int>::const_iterator lit = left.begin();
           lit != left.end();
           ++lit) {
        path.rnode->wcost = 0;
        fi->buildBigramFeature(&path, rit->first.c_str(), lit->first.c_str());
        fi->calcCost(&path);
        ofs << rit->second << ' ' << lit->second << ' '
            << tocost(path.cost, factor, default_cost) << std::endl;
      }
    }

    return true;
  }
bool RewritePattern::rewrite(size_t size,
                             const char **input,
                             std::string *output) const {
  if (spat_.size() > size) return false;
  for (size_t i = 0; i < spat_.size(); ++i) {
    if (!match_rewrite_pattern(spat_[i].c_str(), input[i]))
      return false;
  }

  output->clear();
  for (size_t i = 0; i < dpat_.size(); ++i) {
    std::string elm;
    const char *begin = dpat_[i].c_str();
    const char *end = begin + dpat_[i].size();
    for (const char *p = begin; p < end; ++p) {
      if (*p == '$') {
        ++p;
        size_t n = 0;
        for (; p < end; ++p) {
          switch (*p) {
            case '0': case '1': case '2': case '3': case '4':
            case '5': case '6': case '7': case '8': case '9':
              n = 10 * n + (*p - '0');
              break;
            default:
              goto NEXT;
          }
        }
     NEXT:
        CHECK_DIE(n > 0 && (n - 1) < size)
            << " out of range: [" << dpat_[i] << "] " << n;
        elm += input[n - 1];
        if (p < end) elm += *p;
      } else {
        elm += *p;
      }
    }
    CHECK_DIE(escape_csv_element(&elm));
    *output += elm;
    if (i + 1 != dpat_.size()) *output += ",";
  }

  return true;
}
 static void gencid(const char *filename,
                    DictionaryRewriter *rewrite,
                    ContextID *cid) {
   std::ifstream ifs(filename);
   CHECK_DIE(ifs) << "no such file or directory: " << filename;
   char line[BUF_SIZE];
   std::cout << "reading " << filename << " ... " << std::flush;
   size_t num = 0;
   std::string feature, ufeature, lfeature, rfeature;
   char *col[8];
   while (ifs.getline(line, sizeof(line))) {
     const size_t n = tokenizeCSV(line, col, 5);
     CHECK_DIE(n == 5) << "format error: " << line;
     feature = col[4];
     rewrite->rewrite2(feature, &ufeature, &lfeature, &rfeature);
     cid->add(lfeature.c_str(), rfeature.c_str());
     ++num;
   }
   std::cout << num << std::endl;
   ifs.close();
 }
bool FeatureIndex::convert(const char* txtfile, const char *binfile) {
  std::ifstream ifs(txtfile);

  CHECK_DIE(ifs) << "no such file or directory: " << txtfile;

  char buf[BUF_SIZE];
  char *column[4];
  std::map<std::string, double> dic;

  while (ifs.getline(buf, sizeof(buf))) {
    CHECK_DIE(tokenize2(buf, "\t", column, 2) == 2)
        << "format error: " << buf;

    dic.insert(std::make_pair<std::string, double>
               (std::string(column[1]), atof(column[0]) ));
  }

  std::ofstream ofs(binfile, std::ios::out | std::ios::binary);
  CHECK_DIE(ofs) << "permission denied: " << binfile;

  std::vector<char *> key;
  unsigned int size = static_cast<unsigned int>(dic.size());
  ofs.write(reinterpret_cast<const char*>(&size), sizeof(unsigned int));

  for (std::map<std::string, double>::const_iterator
           it = dic.begin(); it != dic.end(); ++it) {
    key.push_back(const_cast<char*>(it->first.c_str()));
    ofs.write(reinterpret_cast<const char*>(&it->second), sizeof(double));
  }

  Darts::DoubleArray da;
  CHECK_DIE(da.build(key.size(), &key[0], 0, 0, 0) == 0)
      << "unkown error in building double array: " << binfile;

  ofs.write(reinterpret_cast<const char*>(da.array()),
            da.unit_size() * da.size());

  return true;
}
Beispiel #18
0
bool NE::open(const Param &param) {
  close();

  if (action_mode() == PARSING_MODE) {
    const std::string filename = param.get<std::string>("ne-model");
    std::vector<const char*> argv;
    argv.push_back(param.program_name());
    argv.push_back("-m");
    argv.push_back(filename.c_str());
    model_ = crfpp_model_new(argv.size(),
                              const_cast<char **>(&argv[0]));
    // CHECK_FALSE(tagger_) << crfpp_strerror(tagger_);
    // CHECK_FALSE(crfpp_ysize(tagger_) >= 2);
    // CHECK_FALSE(crfpp_xsize(tagger_) == 3);
    // for (size_t i = 0; i < crfpp_ysize(tagger_); ++i) {
    //   const char *p = crfpp_yname(tagger_, i);
    //   CHECK_FALSE(p && (p[0] == 'B' || p[0] == 'I' || p[0] == 'O'));
    // }
  }

  // "名詞,数,"
  ne_composite_ipa_   ="\xE5\x90\x8D\xE8\xA9\x9E,\xE6\x95\xB0,";
  // "名詞,数詞"
  ne_composite_juman_ = "\xE5\x90\x8D\xE8\xA9\x9E,\xE6\x95\xB0\xE8\xA9\x9E,";
  // "名詞,数詞"
  ne_composite_unidic_ = "\xE5\x90\x8D\xE8\xA9\x9E,\xE6\x95\xB0\xE8\xA9\x9E,";

  Iconv iconv;
  iconv.open(UTF8, charset());
  CHECK_DIE(iconv.convert(&ne_composite_ipa_));
  CHECK_DIE(iconv.convert(&ne_composite_juman_));
  CHECK_DIE(iconv.convert(&ne_composite_unidic_));
  CHECK_FALSE(!ne_composite_ipa_.empty());
  CHECK_FALSE(!ne_composite_juman_.empty());
  CHECK_FALSE(!ne_composite_unidic_.empty());

  return true;
}
Beispiel #19
0
  static bool read(std::istream *is,
                   std::vector<std::vector<std::string> > *r,
                   const std::vector<int> &level) {
    if (!*is) {
      return false;
    }

    char *col[2];
    scoped_fixed_array<char, BUF_SIZE> buf;
    scoped_fixed_array<char *, BUF_SIZE> csv;
    r->clear();
    while (is->getline(buf.get(), buf.size())) {
      if (std::strcmp(buf.get(), "EOS") == 0) {
        break;
      }
      CHECK_DIE(tokenize(buf.get(), "\t", col,  2) == 2) << "format error";
      csv[0] = col[0];
      size_t n = tokenizeCSV(col[1], csv.get() + 1, csv.size() - 1);
      std::vector<std::string> tmp;
      for (size_t i = 0; i < level.size(); ++i) {
        size_t m = level[i] < 0 ? n : level[i];
        CHECK_DIE(m <= n) << " out of range " << level[i];
        std::string output;
        for (size_t j = 0; j <= m; ++j) {
          output += csv[j];
          if (j != 0) {
            output += "\t";
          }
        }
        tmp.push_back(output);
      }
      r->push_back(tmp);
    }

    return true;
  }
Beispiel #20
0
/*  dwarf_CU_dieoffset_given_die returns
    the global debug_info section offset of the CU die
    that is the CU containing the given (passed-in) die.
    This information makes it possible for a consumer to
    find and print context information for any die.

    Use dwarf_offdie() passing in the offset this returns
    to get a die pointer to the CU die.  */
int
dwarf_CU_dieoffset_given_die(Dwarf_Die die,
    Dwarf_Off*       return_offset,
    Dwarf_Error*     error)
{
    Dwarf_Off  dieoff = 0;
    Dwarf_CU_Context cucontext = 0;

    CHECK_DIE(die, DW_DLV_ERROR);
    cucontext = die->di_cu_context;
    dieoff =  cucontext->cc_debug_offset;
    /*  The following call cannot fail, so no error check. */
    dwarf_get_cu_die_offset_given_cu_header_offset_b(
        cucontext->cc_dbg, dieoff,
        die->di_is_info, return_offset,error);
    return DW_DLV_OK;
}
Beispiel #21
0
/*  Validate the sibling DIE. This only makes sense to call
    if the sibling's DIEs have been travsersed and 
    dwarf_child() called on each,
    so that the last DIE dwarf_child saw was the last. 
    Essentially ensuring that (after such traversal) that we
    are in the same place a sibling attribute would identify.
    In case we return DW_DLV_ERROR, the global offset of the last
    DIE traversed by dwarf_child is returned through *offset 

    It is essentially guaranteed that  dbg->de_last_die 
    is a stale DIE pointer of a deallocated DIE when we get here. 
    It must not be used as a DIE pointer here,
    just as a sort of anonymous pointer that we just check against
    NULL.

    There is a (subtle?) dependence on the fact that when we call this
    the last dwarf_child() call would have been for this sibling.
    Meaning that this works in a depth-first traversal even though there
    is no stack of 'de_last_die' values.

    The check for dbg->de_last_die just ensures sanity.

    If one is switching between normal debug_frame and eh_frame
    (traversing them in tandem, let us say) in a single
    Dwarf_Debug this validator makes no sense. 
    It works if one processes a .debug_frame (entirely) and
    then an eh_frame (or vice versa) though.
    Use caution.
*/
int 
dwarf_validate_die_sibling(Dwarf_Die sibling,Dwarf_Off *offset)
{
    Dwarf_Debug dbg = 0;
    Dwarf_Error *error = 0;
    Dwarf_Debug_InfoTypes dis = 0;
    CHECK_DIE(sibling, DW_DLV_ERROR);
    dbg = sibling->di_cu_context->cc_dbg;

    dis = sibling->di_is_info? &dbg->de_info_reading: &dbg->de_types_reading;
    
    *offset = 0;
    if (dis->de_last_die && dis->de_last_di_ptr) {
        if (sibling->di_debug_ptr == dis->de_last_di_ptr) {
            return (DW_DLV_OK);
        }
    }
    /* Calculate global offset used for error reporting */
    dwarf_ptr_CU_offset(sibling->di_cu_context,
        dis->de_last_di_ptr,sibling->di_is_info,offset);
    return (DW_DLV_ERROR);
}
Beispiel #22
0
int ContextID::lid(const char *l) const {
  std::map<std::string, int>::const_iterator it = left_.find(l);
  CHECK_DIE(it != left_.end())
      << "cannot find LEFT-ID  for " << l;
  return it->second;
}
Beispiel #23
0
int ContextID::rid(const char *r) const {
  std::map<std::string, int>::const_iterator it = right_.find(r);
  CHECK_DIE(it != right_.end())
      << "cannot find RIGHT-ID  for " << r;
  return it->second;
}
/*
        return DW_DLV_OK if ok. else DW_DLV_NO_ENTRY or DW_DLV_ERROR
        If err_count_out is non-NULL, this is a special 'check'
        call.
*/
int
_dwarf_internal_printlines(Dwarf_Die die, Dwarf_Error * error,
   int * err_count_out, int only_line_header)
{
    /* 
       This pointer is used to scan the portion of the .debug_line
       section for the current cu. */
    Dwarf_Small *line_ptr = 0;
    Dwarf_Small *orig_line_ptr = 0;

    /* 
       This points to the last byte of the .debug_line portion for the
       current cu. */
    Dwarf_Small *line_ptr_end = 0;

    /* 
       Pointer to a DW_AT_stmt_list attribute in case it exists in the
       die. */
    Dwarf_Attribute stmt_list_attr = 0;

    /* Pointer to DW_AT_comp_dir attribute in die. */
    Dwarf_Attribute comp_dir_attr = 0;

    /* Pointer to name of compilation directory. */
    Dwarf_Small *comp_dir = NULL;

    /* 
       Offset into .debug_line specified by a DW_AT_stmt_list
       attribute. */
    Dwarf_Unsigned line_offset = 0;

    struct Line_Table_Prefix_s prefix;


    /* These are the state machine state variables. */
    Dwarf_Addr address = 0;
    Dwarf_Word file = 1;
    Dwarf_Word line = 1;
    Dwarf_Word column = 0;
    Dwarf_Bool is_stmt = false;
    Dwarf_Bool basic_block = false;
    Dwarf_Bool end_sequence = false;
    Dwarf_Bool prologue_end = false;
    Dwarf_Bool epilogue_begin = false;
    Dwarf_Small isa = 0;


    Dwarf_Sword i=0;

    /* 
       This is the current opcode read from the statement program. */
    Dwarf_Small opcode=0;


    /* 
       These variables are used to decode leb128 numbers. Leb128_num
       holds the decoded number, and leb128_length is its length in
       bytes. */
    Dwarf_Word leb128_num=0;
    Dwarf_Word leb128_length=0;
    Dwarf_Sword advance_line=0;
    Dwarf_Half attrform = 0;
    /* 
       This is the operand of the latest fixed_advance_pc extended
       opcode. */
    Dwarf_Half fixed_advance_pc=0;

    /* In case there are wierd bytes 'after' the line table
     * prologue this lets us print something. This is a gcc
     * compiler bug and we expect the bytes count to be 12.
     */
    Dwarf_Small* bogus_bytes_ptr = 0;
    Dwarf_Unsigned bogus_bytes_count = 0;


    /* The Dwarf_Debug this die belongs to. */
    Dwarf_Debug dbg=0;
    int resattr = DW_DLV_ERROR;
    int lres =    DW_DLV_ERROR;
    int res  =    DW_DLV_ERROR;

    /* ***** BEGIN CODE ***** */

    if (error != NULL) {
        *error = NULL;
    }

    CHECK_DIE(die, DW_DLV_ERROR);
    dbg = die->di_cu_context->cc_dbg;

    res = _dwarf_load_section(dbg, &dbg->de_debug_line,error);
    if (res != DW_DLV_OK) {
        return res;
    }

    resattr = dwarf_attr(die, DW_AT_stmt_list, &stmt_list_attr, error);
    if (resattr != DW_DLV_OK) {
        return resattr;
    }


    /* The list of relevant FORMs is small. 
       DW_FORM_data4, DW_FORM_data8, DW_FORM_sec_offset 
    */
    lres = dwarf_whatform(stmt_list_attr,&attrform,error);
    if (lres != DW_DLV_OK) {
        return lres;
    }
    if (attrform != DW_FORM_data4 && attrform != DW_FORM_data8 &&
        attrform != DW_FORM_sec_offset ) {
        _dwarf_error(dbg, error, DW_DLE_LINE_OFFSET_BAD);
        return (DW_DLV_ERROR);
    }
    lres = dwarf_global_formref(stmt_list_attr, &line_offset, error);
    if (lres != DW_DLV_OK) {
        return lres;
    }

    if (line_offset >= dbg->de_debug_line.dss_size) {
        _dwarf_error(dbg, error, DW_DLE_LINE_OFFSET_BAD);
        return (DW_DLV_ERROR);
    }
    orig_line_ptr = dbg->de_debug_line.dss_data;
    line_ptr = dbg->de_debug_line.dss_data + line_offset;
    dwarf_dealloc(dbg, stmt_list_attr, DW_DLA_ATTR);

    /* 
       If die has DW_AT_comp_dir attribute, get the string that names
       the compilation directory. */
    resattr = dwarf_attr(die, DW_AT_comp_dir, &comp_dir_attr, error);
    if (resattr == DW_DLV_ERROR) {
        return resattr;
    }
    if (resattr == DW_DLV_OK) {
        int cres = DW_DLV_ERROR;
        char *cdir = 0;

        cres = dwarf_formstring(comp_dir_attr, &cdir, error);
        if (cres == DW_DLV_ERROR) {
            return cres;
        } else if (cres == DW_DLV_OK) {
            comp_dir = (Dwarf_Small *) cdir;
        }
    }
    if (resattr == DW_DLV_OK) {
        dwarf_dealloc(dbg, comp_dir_attr, DW_DLA_ATTR);
    }

    dwarf_init_line_table_prefix(&prefix);
    {
        Dwarf_Small *line_ptr_out = 0;
        int dres = dwarf_read_line_table_prefix(dbg,
            line_ptr,dbg->de_debug_line.dss_size - line_offset,
            &line_ptr_out,
            &prefix, 
            &bogus_bytes_ptr,
            &bogus_bytes_count,
            error,
            err_count_out);
        if (dres == DW_DLV_ERROR) {
            dwarf_free_line_table_prefix(&prefix);
            return dres;
        }
        if (dres == DW_DLV_NO_ENTRY) {
            dwarf_free_line_table_prefix(&prefix);
            return dres;
        }
        line_ptr_end = prefix.pf_line_ptr_end;
        line_ptr = line_ptr_out;
    }
    if(only_line_header) {
         /* Just checking for header errors, nothing more here.*/
         dwarf_free_line_table_prefix(&prefix);
         return DW_DLV_OK;
    }


    printf("total line info length %ld bytes, "
           "line offset 0x%" DW_PR_DUx " %" DW_PR_DSd "\n",
           (long) prefix.pf_total_length,
           (Dwarf_Unsigned) line_offset, 
           (Dwarf_Signed) line_offset);
    printf("line table version %d\n",(int) prefix.pf_version);
    printf("line table length field length %d prologue length %d\n",
           (int)prefix.pf_length_field_length,
           (int)prefix.pf_prologue_length);
    printf("compilation_directory %s\n",
           comp_dir ? ((char *) comp_dir) : "");

    printf("  min instruction length %d\n",
           (int) prefix.pf_minimum_instruction_length);
    printf("  default is stmt        %d\n", (int)
           prefix.pf_default_is_stmt);
    printf("  line base              %d\n", (int)
           prefix.pf_line_base);
    printf("  line_range             %d\n", (int)
           prefix.pf_line_range);
    printf("  opcode base            %d\n", (int)
           prefix.pf_opcode_base);
    printf("  standard opcode count  %d\n", (int)
           prefix.pf_std_op_count);

    for (i = 1; i < prefix.pf_opcode_base; i++) {
        printf("  opcode[%2d] length  %d\n", (int) i,
               (int) prefix.pf_opcode_length_table[i - 1]);
    }
    printf("  include directories count %d\n", (int)
           prefix.pf_include_directories_count);

    
    for (i = 0; i < prefix.pf_include_directories_count; ++i) {
        printf("  include dir[%d] %s\n",
               (int) i, prefix.pf_include_directories[i]);
    }
    printf("  files count            %d\n", (int)
           prefix.pf_files_count);

    for (i = 0; i < prefix.pf_files_count; ++i) {
        struct Line_Table_File_Entry_s *lfile =
            prefix.pf_line_table_file_entries + i;

        Dwarf_Unsigned tlm2 = lfile->lte_last_modification_time;
        Dwarf_Unsigned di = lfile->lte_directory_index;
        Dwarf_Unsigned fl = lfile->lte_length_of_file;

        printf("  file[%d]  %s (file-number: %d) \n",
               (int) i, (char *) lfile->lte_filename,
               (int)(i+1));

        printf("    dir index %d\n", (int) di);
        {
            time_t tt = (time_t) tlm2;

            printf("    last time 0x%x %s",     /* ctime supplies
                                                   newline */
                   (unsigned) tlm2, ctime(&tt));
        }
        printf("    file length %ld 0x%lx\n",
               (long) fl, (unsigned long) fl);


    }


    {
        Dwarf_Unsigned offset = 0;
        if(bogus_bytes_count > 0) {
            Dwarf_Unsigned wcount = bogus_bytes_count;
            Dwarf_Unsigned boffset = bogus_bytes_ptr - orig_line_ptr;
            printf("*** DWARF CHECK: the line table prologue  header_length "
                " is %" DW_PR_DUu " too high, we pretend it is smaller."
                "Section offset: %" DW_PR_DUu " (0x%" DW_PR_DUx ") ***\n",
                wcount, boffset,boffset);
            *err_count_out += 1;
        }
        offset = line_ptr - orig_line_ptr;

        printf("  statement prog offset in section: %" DW_PR_DUu " 0x%" DW_PR_DUx "\n",
               offset, offset);
    }

    /* Initialize the part of the state machine dependent on the
       prefix.  */
    is_stmt = prefix.pf_default_is_stmt;


    print_line_header();
    /* Start of statement program.  */
    while (line_ptr < line_ptr_end) {
        int type = 0;

        printf(" [0x%06" DW_PR_DSx "] ", 
            (Dwarf_Signed) (line_ptr - orig_line_ptr));
        opcode = *(Dwarf_Small *) line_ptr;
        line_ptr++;
        /* 'type' is the output */
        WHAT_IS_OPCODE(type, opcode, prefix.pf_opcode_base,
                       prefix.pf_opcode_length_table, line_ptr,
                       prefix.pf_std_op_count);

        if (type == LOP_DISCARD) {
            int oc;
            int opcnt = prefix.pf_opcode_length_table[opcode];

            printf("*** DWARF CHECK: DISCARD standard opcode %d "
                "with %d operands: "
                "not understood.", opcode, opcnt);
            *err_count_out += 1;
            for (oc = 0; oc < opcnt; oc++) {
                /* 
                 * Read and discard operands we don't
                 * understand.
                 * Arbitrary choice of unsigned read.
                 * Signed read would work as well.
                 */
                Dwarf_Unsigned utmp2;

                DECODE_LEB128_UWORD(line_ptr, utmp2);
                printf(" %" DW_PR_DUu " (0x%" DW_PR_DUx ")",
                       (Dwarf_Unsigned) utmp2,
                       (Dwarf_Unsigned) utmp2);
            }

            printf("***\n");
            /* do nothing, necessary ops done */
        } else if (type == LOP_SPECIAL) {
            /* This op code is a special op in the object, no matter
               that it might fall into the standard op range in this
               compile Thatis, these are special opcodes between
               special_opcode_base and MAX_LINE_OP_CODE.  (including
               special_opcode_base and MAX_LINE_OP_CODE) */
            char special[50];
            unsigned origop = opcode;

            opcode = opcode - prefix.pf_opcode_base;
            address = address + prefix.pf_minimum_instruction_length *
                (opcode / prefix.pf_line_range);
            line =
                line + prefix.pf_line_base +
                opcode % prefix.pf_line_range;

            sprintf(special, "Specialop %3u", origop);
            print_line_detail(special,
                              opcode, address, (int) file, line, column,
                              is_stmt, basic_block, end_sequence,
                              prologue_end, epilogue_begin, isa);

            basic_block = false;

        } else if (type == LOP_STANDARD) {
            switch (opcode) {

            case DW_LNS_copy:{

                    print_line_detail("DW_LNS_copy",
                                      opcode, address, file, line,
                                      column, is_stmt, basic_block,
                                      end_sequence, prologue_end,
                                      epilogue_begin, isa);

                    basic_block = false;
                    break;
                }

            case DW_LNS_advance_pc:{
                    Dwarf_Unsigned utmp2;


                    DECODE_LEB128_UWORD(line_ptr, utmp2);
                    printf("DW_LNS_advance_pc val %" DW_PR_DSd " 0x%" DW_PR_DUx "\n",
                           (Dwarf_Signed) (Dwarf_Word) utmp2,
                           (Dwarf_Unsigned) (Dwarf_Word) utmp2);
                    leb128_num = (Dwarf_Word) utmp2;
                    address =
                        address +
                        prefix.pf_minimum_instruction_length *
                        leb128_num;
                    break;
                }

            case DW_LNS_advance_line:{
                    Dwarf_Signed stmp;


                    DECODE_LEB128_SWORD(line_ptr, stmp);
                    advance_line = (Dwarf_Sword) stmp;
                    printf("DW_LNS_advance_line val %" DW_PR_DSd " 0x%" DW_PR_DSx "\n",
                           (Dwarf_Signed) advance_line,
                           (Dwarf_Signed) advance_line);
                    line = line + advance_line;
                    break;
                }

            case DW_LNS_set_file:{
                    Dwarf_Unsigned utmp2;


                    DECODE_LEB128_UWORD(line_ptr, utmp2);
                    file = (Dwarf_Word) utmp2;
                    printf("DW_LNS_set_file  %ld\n", (long) file);
                    break;
                }

            case DW_LNS_set_column:{
                    Dwarf_Unsigned utmp2;


                    DECODE_LEB128_UWORD(line_ptr, utmp2);
                    column = (Dwarf_Word) utmp2;
                    printf("DW_LNS_set_column val %" DW_PR_DSd " 0x%" DW_PR_DSx "\n",
                           (Dwarf_Signed) column, (Dwarf_Signed) column);
                    break;
                }

            case DW_LNS_negate_stmt:{
                    is_stmt = !is_stmt;
                    printf("DW_LNS_negate_stmt\n");
                    break;
                }

            case DW_LNS_set_basic_block:{

                    printf("DW_LNS_set_basic_block\n");
                    basic_block = true;
                    break;
                }

            case DW_LNS_const_add_pc:{
                    opcode = MAX_LINE_OP_CODE - prefix.pf_opcode_base;
                    address =
                        address +
                        prefix.pf_minimum_instruction_length * (opcode /
                                                                prefix.
                                                                pf_line_range);

                    printf("DW_LNS_const_add_pc new address 0x%" DW_PR_DSx "\n",
                           (Dwarf_Signed) address);
                    break;
                }

            case DW_LNS_fixed_advance_pc:{

                    READ_UNALIGNED(dbg, fixed_advance_pc, Dwarf_Half,
                                   line_ptr, sizeof(Dwarf_Half));
                    line_ptr += sizeof(Dwarf_Half);
                    address = address + fixed_advance_pc;
                    printf("DW_LNS_fixed_advance_pc val %" DW_PR_DSd 
                       " 0x%" DW_PR_DSx " new address 0x%" DW_PR_DSx "\n",
                           (Dwarf_Signed) fixed_advance_pc,
                           (Dwarf_Signed) fixed_advance_pc,
                           (Dwarf_Signed) address);
                    break;
                }
            case DW_LNS_set_prologue_end:{

                    prologue_end = true;
                    printf("DW_LNS_set_prologue_end set true.\n");
                    break;


                }
                /* New in DWARF3 */
            case DW_LNS_set_epilogue_begin:{
                    epilogue_begin = true;
                    printf("DW_LNS_set_epilogue_begin set true.\n");
                    break;
                }

                /* New in DWARF3 */
            case DW_LNS_set_isa:{
                    Dwarf_Unsigned utmp2;

                    DECODE_LEB128_UWORD(line_ptr, utmp2);
                    isa = utmp2;
                    printf("DW_LNS_set_isa new value 0x%" DW_PR_DUx ".\n",
                           (Dwarf_Unsigned) utmp2);
                    if (isa != utmp2) {
                        /* The value of the isa did not fit in our
                           local so we record it wrong. declare an
                           error. */
                        dwarf_free_line_table_prefix(&prefix);

                        _dwarf_error(dbg, error,
                                     DW_DLE_LINE_NUM_OPERANDS_BAD);
                        return (DW_DLV_ERROR);
                    }
                    break;
                }
            }


        } else if (type == LOP_EXTENDED) {
            Dwarf_Unsigned utmp3 = 0;
            Dwarf_Word instr_length = 0;
            Dwarf_Small ext_opcode = 0;

            DECODE_LEB128_UWORD(line_ptr, utmp3);
            instr_length = (Dwarf_Word) utmp3;
            ext_opcode = *(Dwarf_Small *) line_ptr;
            line_ptr++;
            switch (ext_opcode) {

            case DW_LNE_end_sequence:{
                    end_sequence = true;

                    print_line_detail("DW_LNE_end_sequence extended",
                                      opcode, address, file, line,
                                      column, is_stmt, basic_block,
                                      end_sequence, prologue_end,
                                      epilogue_begin, isa);

                    address = 0;
                    file = 1;
                    line = 1;
                    column = 0;
                    is_stmt = prefix.pf_default_is_stmt;
                    basic_block = false;
                    end_sequence = false;
                    prologue_end = false;
                    epilogue_begin = false;


                    break;
                }

            case DW_LNE_set_address:{
                    {
                        READ_UNALIGNED(dbg, address, Dwarf_Addr,
                                       line_ptr, 
                                       die->di_cu_context->cc_address_size);

                        line_ptr += die->di_cu_context->cc_address_size;
                        printf("DW_LNE_set_address address 0x%" DW_PR_DUx "\n",
                               (Dwarf_Unsigned) address);
                    }

                    break;
                }

            case DW_LNE_define_file:{
                    Dwarf_Unsigned di = 0;
                    Dwarf_Unsigned tlm = 0;
                    Dwarf_Unsigned fl = 0;

                    Dwarf_Small *fn = (Dwarf_Small *) line_ptr;
                    line_ptr = line_ptr + strlen((char *) line_ptr) + 1;

                    di = _dwarf_decode_u_leb128(line_ptr,
                                                &leb128_length);
                    line_ptr = line_ptr + leb128_length;

                    tlm = _dwarf_decode_u_leb128(line_ptr,
                                                 &leb128_length);
                    line_ptr = line_ptr + leb128_length;

                    fl = _dwarf_decode_u_leb128(line_ptr,
                                                &leb128_length);
                    line_ptr = line_ptr + leb128_length;


                    printf("DW_LNE_define_file %s \n", fn);
                    printf("    dir index %d\n", (int) di);
                    {
                        time_t tt3 = (time_t) tlm;

                        /* ctime supplies newline */
                        printf("    last time 0x%x %s",
                               (unsigned) tlm, ctime(&tt3));
                    }
                    printf("    file length %ld 0x%lx\n",
                           (long) fl, (unsigned long) fl);

                    break;
                }

            default:{
                 /* This is an extended op code we do not know about,
                    other than we know now many bytes it is
                    (and the op code and the bytes of operand). */

                 Dwarf_Unsigned remaining_bytes = instr_length -1;
                 if(instr_length < 1 || remaining_bytes > DW_LNE_LEN_MAX) {
                      dwarf_free_line_table_prefix(&prefix);
                      _dwarf_error(dbg, error,
                                 DW_DLE_LINE_EXT_OPCODE_BAD);
                      return (DW_DLV_ERROR);
                 }
                 printf("DW_LNE extended op 0x%x ",ext_opcode);
                 printf("Bytecount: " DW_PR_DUu , instr_length);
                 if(remaining_bytes > 0) {
                     printf(" linedata: 0x");
                     while (remaining_bytes > 0) {
                        printf("%02x",(unsigned char)(*(line_ptr)));
                        line_ptr++;
                        remaining_bytes--;
                     }
                 }
                 printf("\n");
                }
                break;
            }

        }
    }

    dwarf_free_line_table_prefix(&prefix);
    return (DW_DLV_OK);
}
Beispiel #25
0
N *Tokenizer<N, P>::lookup(const char *begin, const char *end,
                           Allocator<N, P> *allocator, Lattice *lattice) const {
  CharInfo cinfo;
  N *result_node = 0;
  size_t mblen = 0;
  size_t clen = 0;

  end = static_cast<size_t>(end - begin) >= 65535 ? begin + 65535 : end;

  if (isPartial) {
    const size_t begin_pos = begin - lattice->sentence();
    for (size_t n = begin_pos + 1; n < lattice->size(); ++n) {
      if (lattice->boundary_constraint(n) == MECAB_TOKEN_BOUNDARY) {
        end = lattice->sentence() + n;
        break;
      }
    }
  }

  const char *begin2 = property_.seekToOtherType(begin, end, space_,
                                                 &cinfo, &mblen, &clen);

  Dictionary::result_type *daresults = allocator->mutable_results();
  const size_t results_size = allocator->results_size();

  for (std::vector<Dictionary *>::const_iterator it = dic_.begin();
       it != dic_.end(); ++it) {
    const size_t n = (*it)->commonPrefixSearch(
        begin2,
        static_cast<size_t>(end - begin2),
        daresults, results_size);

    for (size_t i = 0; i < n; ++i) {
      size_t size = (*it)->token_size(daresults[i]);
      const Token *token = (*it)->token(daresults[i]);
      for (size_t j = 0; j < size; ++j) {
        N *new_node = allocator->newNode();
        read_node_info(**it, *(token + j), &new_node);
        new_node->length = daresults[i].length;
        new_node->rlength = begin2 - begin + new_node->length;
        new_node->surface = begin2;
        new_node->stat = MECAB_NOR_NODE;
        new_node->char_type = cinfo.default_type;
        if (isPartial && !is_valid_node(lattice, new_node)) {
          continue;
        }
        new_node->bnext = result_node;
        result_node = new_node;
      }
    }
  }

  if (result_node && !cinfo.invoke) {
    return result_node;
  }

  const char *begin3 = begin2 + mblen;
  const char *group_begin3 = 0;

  if (begin3 > end) {
    ADDUNKNWON;
    if (result_node) {
    return result_node;
  }
  }

  if (cinfo.group) {
    const char *tmp = begin3;
    CharInfo fail;
    begin3 = property_.seekToOtherType(begin3, end, cinfo,
                                       &fail, &mblen, &clen);
    if (clen <= max_grouping_size_) {
      ADDUNKNWON;
    }
    group_begin3 = begin3;
    begin3 = tmp;
  }

  for (size_t i = 1; i <= cinfo.length; ++i) {
    if (begin3 > end) {
      break;
    }
    if (begin3 == group_begin3) {
      continue;
    }
    clen = i;
    ADDUNKNWON;
    if (!cinfo.isKindOf(property_.getCharInfo(begin3, end, &mblen))) {
      break;
    }
    begin3 += mblen;
  }

  if (!result_node) {
    ADDUNKNWON;
  }

  if (isPartial && !result_node) {
    begin3 = begin2;
    while (true) {
      cinfo = property_.getCharInfo(begin3, end, &mblen);
      begin3 += mblen;
      if (begin3 > end ||
          lattice->boundary_constraint(begin3 - lattice->sentence())
          != MECAB_INSIDE_TOKEN) {
        break;
      }
    }
    ADDUNKNWON;

    if (!result_node) {
      N *new_node = allocator->newNode();
      new_node->char_type = cinfo.default_type;
      new_node->surface = begin2;
      new_node->length = begin3 - begin2;
      new_node->rlength = begin3 - begin;
      new_node->stat = MECAB_UNK_NODE;
      new_node->bnext = result_node;
      new_node->feature =
          lattice->feature_constraint(begin - lattice->sentence());
      CHECK_DIE(new_node->feature);
      result_node = new_node;
    }
  }

  return result_node;
}
bool EncoderFeatureIndex::buildFeature(LearnerPath *path) {
  path->rnode->wcost = path->cost = 0.0;

  std::string ufeature1;
  std::string lfeature1;
  std::string rfeature1;
  std::string ufeature2;
  std::string lfeature2;
  std::string rfeature2;

  CHECK_DIE(rewrite_.rewrite2(path->lnode->feature,
                              &ufeature1,
                              &lfeature1,
                              &rfeature1))
      << " cannot rewrite pattern: "
      << path->lnode->feature;

  CHECK_DIE(rewrite_.rewrite2(path->rnode->feature,
                              &ufeature2,
                              &lfeature2,
                              &rfeature2))
      << " cannot rewrite pattern: "
      << path->rnode->feature;

  {
    os_.clear();
    os_ << ufeature2 << ' ' << path->rnode->char_type << '\0';
    const std::string key(os_.str());
    std::map<std::string, std::pair<const int *, size_t> >::iterator
        it = feature_cache_.find(key);
    if (it != feature_cache_.end()) {
      path->rnode->fvector = it->second.first;
      it->second.second++;
    } else {
      if (!buildUnigramFeature(path, ufeature2.c_str())) return false;
      feature_cache_.insert(std::pair
                            <std::string, std::pair<const int *, size_t> >
                            (key,
                             std::make_pair<const int *, size_t>
                             (path->rnode->fvector, 1)));
    }
  }

  {
    os_.clear();
    os_ << rfeature1 << ' ' << lfeature2 << '\0';
    std::string key(os_.str());
    std::map<std::string, std::pair<const int *, size_t> >::iterator
        it = feature_cache_.find(key);
    if (it != feature_cache_.end()) {
      path->fvector = it->second.first;
      it->second.second++;
    } else {
      if (!buildBigramFeature(path, rfeature1.c_str(), lfeature2.c_str()))
        return false;
      feature_cache_.insert(std::pair
                            <std::string, std::pair<const int *, size_t> >
                            (key, std::make_pair<const int *, size_t>
                             (path->fvector, 1)));
    }
  }

  CHECK_DIE(path->fvector) << " fvector is NULL";
  CHECK_DIE(path->rnode->fvector) << "fevector is NULL";

  return true;
}
Beispiel #27
0
/*
	return DW_DLV_OK if ok. else DW_DLV_NO_ENTRY or DW_DLV_ERROR
*/
int
_dwarf_internal_printlines(Dwarf_Die die, Dwarf_Error * error)
{
    /* 
       This pointer is used to scan the portion of the .debug_line
       section for the current cu. */
    Dwarf_Small *line_ptr;
    Dwarf_Small *orig_line_ptr;

    /* 
       This points to the last byte of the .debug_line portion for the 
       current cu. */
    Dwarf_Small *line_ptr_end;

    /* 
       This points to the end of the statement program prologue for the 
       current cu, and serves to check that the prologue was correctly 
       decoded. */
    Dwarf_Small *check_line_ptr;

    /* 
       Pointer to a DW_AT_stmt_list attribute in case it exists in the 
       die. */
    Dwarf_Attribute stmt_list_attr;

    /* Pointer to DW_AT_comp_dir attribute in die. */
    Dwarf_Attribute comp_dir_attr;

    /* Pointer to name of compilation directory. */
    Dwarf_Small *comp_dir = NULL;

    /* 
       Offset into .debug_line specified by a DW_AT_stmt_list
       attribute. */
    Dwarf_Unsigned line_offset;

    /* These are the fields of the statement program header. */
    Dwarf_Unsigned total_length;
    Dwarf_Half version;
    Dwarf_Unsigned prologue_length;
    Dwarf_Small minimum_instruction_length;
    Dwarf_Small default_is_stmt;
    Dwarf_Sbyte line_base;
    Dwarf_Small line_range;
    Dwarf_Small opcode_base;

    Dwarf_Small *opcode_length;

    /* These are the state machine state variables. */
    Dwarf_Addr address;
    Dwarf_Word file;
    Dwarf_Word line;
    Dwarf_Word column;
    Dwarf_Bool is_stmt;
    Dwarf_Bool basic_block;
    Dwarf_Bool end_sequence;

    Dwarf_Sword i, file_entry_count, include_directories_count;

    /* 
       This is the current opcode read from the statement program. */
    Dwarf_Small opcode;

    /* 
       Pointer to a Dwarf_Line_Context_s structure that contains the
       context such as file names and include directories for the set
       of lines being generated. */
    Dwarf_Line_Context line_context;


    /* 
       These variables are used to decode leb128 numbers. Leb128_num
       holds the decoded number, and leb128_length is its length in
       bytes. */
    Dwarf_Word leb128_num;
    Dwarf_Word leb128_length;
    Dwarf_Sword advance_line;

    /* 
       This is the operand of the latest fixed_advance_pc extended
       opcode. */
    Dwarf_Half fixed_advance_pc;

    /* This is the length of an extended opcode instr.  */
    Dwarf_Word instr_length;
    Dwarf_Small ext_opcode;
    int local_length_size;
    /*REFERENCED*/ /* Not used in this instance of the macro */
    int local_extension_size;

    /* The Dwarf_Debug this die belongs to. */
    Dwarf_Debug dbg;
    int resattr;
    int lres;

    int res;

    /* ***** BEGIN CODE ***** */

    if (error != NULL)
	*error = NULL;

    CHECK_DIE(die, DW_DLV_ERROR)
	dbg = die->di_cu_context->cc_dbg;

    res =
       _dwarf_load_section(dbg,
		           dbg->de_debug_line_index,
			   &dbg->de_debug_line,
		           error);
    if (res != DW_DLV_OK) {
	return res;
    }

    resattr = dwarf_attr(die, DW_AT_stmt_list, &stmt_list_attr, error);
    if (resattr != DW_DLV_OK) {
	return resattr;
    }



    lres = dwarf_formudata(stmt_list_attr, &line_offset, error);
    if (lres != DW_DLV_OK) {
	return lres;
    }

    if (line_offset >= dbg->de_debug_line_size) {
	_dwarf_error(dbg, error, DW_DLE_LINE_OFFSET_BAD);
	return (DW_DLV_ERROR);
    }
    orig_line_ptr = dbg->de_debug_line;
    line_ptr = dbg->de_debug_line + line_offset;
    dwarf_dealloc(dbg, stmt_list_attr, DW_DLA_ATTR);

    /* 
       If die has DW_AT_comp_dir attribute, get the string that names
       the compilation directory. */
    resattr = dwarf_attr(die, DW_AT_comp_dir, &comp_dir_attr, error);
    if (resattr == DW_DLV_ERROR) {
	return resattr;
    }
    if (resattr == DW_DLV_OK) {
	int cres;
	char *cdir;

	cres = dwarf_formstring(comp_dir_attr, &cdir, error);
	if (cres == DW_DLV_ERROR) {
	    return cres;
	} else if (cres == DW_DLV_OK) {
	    comp_dir = (Dwarf_Small *) cdir;
	}
    }
    if (resattr == DW_DLV_OK) {
	dwarf_dealloc(dbg, comp_dir_attr, DW_DLA_ATTR);
    }

    /* 
       Following is a straightforward decoding of the statement
       program prologue information. */

    /* READ_AREA_LENGTH updates line_ptr for consumed bytes */
    READ_AREA_LENGTH(dbg, total_length, Dwarf_Unsigned,
		     line_ptr, local_length_size, local_extension_size);



    line_ptr_end = line_ptr + total_length;
    if (line_ptr_end > dbg->de_debug_line + dbg->de_debug_line_size) {
	_dwarf_error(dbg, error, DW_DLE_DEBUG_LINE_LENGTH_BAD);
	return (DW_DLV_ERROR);
    }

    printf("total line info length %ld bytes, "
	   "line offset 0x%llx %lld\n",
	   (long) total_length,
	   (long long) line_offset, (long long) line_offset);
    printf("compilation_directory %s\n",
	   comp_dir ? ((char *) comp_dir) : "");
    READ_UNALIGNED(dbg, version, Dwarf_Half,
		   line_ptr, sizeof(Dwarf_Half));
    line_ptr += sizeof(Dwarf_Half);
    if (version != CURRENT_VERSION_STAMP) {
	_dwarf_error(dbg, error, DW_DLE_VERSION_STAMP_ERROR);
	return (DW_DLV_ERROR);
    }

    READ_UNALIGNED(dbg, prologue_length, Dwarf_Unsigned,
		   line_ptr, local_length_size);
    line_ptr += local_length_size;
    check_line_ptr = line_ptr;

    minimum_instruction_length = *(Dwarf_Small *) line_ptr;
    line_ptr = line_ptr + sizeof(Dwarf_Small);

    default_is_stmt = *(Dwarf_Small *) line_ptr;
    line_ptr = line_ptr + sizeof(Dwarf_Small);

    line_base = *(Dwarf_Sbyte *) line_ptr;
    line_ptr = line_ptr + sizeof(Dwarf_Sbyte);

    line_range = *(Dwarf_Small *) line_ptr;
    line_ptr = line_ptr + sizeof(Dwarf_Small);

    opcode_base = *(Dwarf_Small *) line_ptr;
    line_ptr = line_ptr + sizeof(Dwarf_Small);
    printf("  min instruction length %d\n",
	   (int) minimum_instruction_length);
    printf("  default is stmt        %d\n", (int) default_is_stmt);
    printf("  line base              %d\n", (int) line_base);
    printf("  line_range             %d\n", (int) line_range);

    opcode_length = (Dwarf_Small *)
	alloca(sizeof(Dwarf_Small) * opcode_base);

    for (i = 1; i < opcode_base; i++) {
	opcode_length[i] = *(Dwarf_Small *) line_ptr;
	printf("  opcode[%d] length %d\n", (int) i,
	       (int) opcode_length[i]);
	line_ptr = line_ptr + sizeof(Dwarf_Small);
    }

    include_directories_count = 0;
    while ((*(char *) line_ptr) != '\0') {
	printf("  include dir[%d] %s\n",
	       (int) include_directories_count, line_ptr);
	line_ptr = line_ptr + strlen((char *) line_ptr) + 1;
	include_directories_count++;
    }
    line_ptr++;

    file_entry_count = 0;
    while (*(char *) line_ptr != '\0') {

	Dwarf_Unsigned tlm2;
	Dwarf_Unsigned di;
	Dwarf_Unsigned fl;

	printf("  file[%d]  %s\n",
	       (int) file_entry_count, (char *) line_ptr);

	line_ptr = line_ptr + strlen((char *) line_ptr) + 1;

	di = _dwarf_decode_u_leb128(line_ptr, &leb128_length);
	line_ptr = line_ptr + leb128_length;

	tlm2 = _dwarf_decode_u_leb128(line_ptr, &leb128_length);
	line_ptr = line_ptr + leb128_length;

	fl = _dwarf_decode_u_leb128(line_ptr, &leb128_length);
	line_ptr = line_ptr + leb128_length;

	printf("    dir index %d\n", (int) di);
	{
	    time_t tt = (time_t) tlm2;

	    printf("    last time 0x%x %s",	/* ctime supplies
						   newline */
		   (unsigned) tlm2, ctime(&tt));
	}
	printf("    file length %ld 0x%lx\n",
	       (long) fl, (unsigned long) fl);


	file_entry_count++;
    }
    line_ptr++;

    if (line_ptr != check_line_ptr + prologue_length) {
	_dwarf_error(dbg, error, DW_DLE_LINE_PROLOG_LENGTH_BAD);
	return (DW_DLV_ERROR);
    }

    /* Set up context structure for this set of lines. */
    line_context = (Dwarf_Line_Context)
	_dwarf_get_alloc(dbg, DW_DLA_LINE_CONTEXT, 1);
    if (line_context == NULL) {
	_dwarf_error(dbg, error, DW_DLE_ALLOC_FAIL);
	return (DW_DLV_ERROR);
    }

    printf("  statement prog offset in section: %lld 0x%llx\n",
	   (long long) (line_ptr - orig_line_ptr),
	   (long long) (line_ptr - orig_line_ptr));

    /* Initialize the state machine.  */
    address = 0;
    file = 1;
    line = 1;
    column = 0;
    is_stmt = default_is_stmt;
    basic_block = false;
    end_sequence = false;

    print_line_header();
    /* Start of statement program.  */
    while (line_ptr < line_ptr_end) {
	int type;

	printf(" [0x%06llx] ", (long long) (line_ptr - orig_line_ptr));
	opcode = *(Dwarf_Small *) line_ptr;
	line_ptr++;
	/* 'type' is the output */
	WHAT_IS_OPCODE(type, opcode, opcode_base,
		       opcode_length, line_ptr);



	if (type == LOP_DISCARD) {
	    /* do nothing, necessary ops done */
	} else if (type == LOP_SPECIAL) {
	    /* This op code is a special op in the object, no matter
	       that it might fall into the standard op range in this
	       compile Thatis, these are special opcodes between
	       special_opcode_base and MAX_LINE_OP_CODE.  (including
	       special_opcode_base and MAX_LINE_OP_CODE) */
	    char special[50];
	    unsigned origop = opcode;

	    opcode = opcode - opcode_base;
	    address = address + minimum_instruction_length *
		(opcode / line_range);
	    line = line + line_base + opcode % line_range;

	    sprintf(special, "Specialop %3u", origop);
	    print_line_detail(special,
			      opcode, address, (int) file, line, column,
			      is_stmt, basic_block, end_sequence);

	    basic_block = false;

	} else if (type == LOP_STANDARD) {
	    switch (opcode) {

	    case DW_LNS_copy:{
		    if (opcode_length[DW_LNS_copy] != 0) {
			_dwarf_error(dbg, error,
				     DW_DLE_LINE_NUM_OPERANDS_BAD);
			return (DW_DLV_ERROR);
		    }

		    print_line_detail("DW_LNS_copy",
				      opcode, address, file, line,
				      column, is_stmt, basic_block,
				      end_sequence);

		    basic_block = false;
		    break;
		}

	    case DW_LNS_advance_pc:{
		    Dwarf_Unsigned utmp2;

		    if (opcode_length[DW_LNS_advance_pc] != 1) {
			_dwarf_error(dbg, error,
				     DW_DLE_LINE_NUM_OPERANDS_BAD);
			return (DW_DLV_ERROR);
		    }

		    DECODE_LEB128_UWORD(line_ptr, utmp2)
			printf("DW_LNS_advance_pc val %lld 0x%llx\n",
			       (long long) (Dwarf_Word) utmp2,
			       (long long) (Dwarf_Word) utmp2);
		    leb128_num = (Dwarf_Word) utmp2;
		    address =
			address +
			minimum_instruction_length * leb128_num;
		    break;
		}

	    case DW_LNS_advance_line:{
		    Dwarf_Signed stmp;

		    if (opcode_length[DW_LNS_advance_line] != 1) {
			_dwarf_error(dbg, error,
				     DW_DLE_LINE_NUM_OPERANDS_BAD);
			return (DW_DLV_ERROR);
		    }

		    DECODE_LEB128_SWORD(line_ptr, stmp)
			advance_line = (Dwarf_Sword) stmp;
		    printf("DW_LNS_advance_line val %lld 0x%llx\n",
			   (long long) advance_line,
			   (long long) advance_line);
		    line = line + advance_line;
		    break;
		}

	    case DW_LNS_set_file:{
		    Dwarf_Unsigned utmp2;

		    if (opcode_length[DW_LNS_set_file] != 1) {
			_dwarf_error(dbg, error,
				     DW_DLE_LINE_NUM_OPERANDS_BAD);
			return (DW_DLV_ERROR);
		    }

		    DECODE_LEB128_UWORD(line_ptr, utmp2)
			file = (Dwarf_Word) utmp2;
		    printf("DW_LNS_set_file  %ld\n", (long) file);
		    break;
		}

	    case DW_LNS_set_column:{
		    Dwarf_Unsigned utmp2;

		    if (opcode_length[DW_LNS_set_column] != 1) {
			_dwarf_error(dbg, error,
				     DW_DLE_LINE_NUM_OPERANDS_BAD);
			return (DW_DLV_ERROR);
		    }

		    DECODE_LEB128_UWORD(line_ptr, utmp2)
			column = (Dwarf_Word) utmp2;
		    printf("DW_LNS_set_column val %lld 0x%llx\n",
			   (long long) column, (long long) column);
		    break;
		}

	    case DW_LNS_negate_stmt:{
		    if (opcode_length[DW_LNS_negate_stmt] != 0) {
			_dwarf_error(dbg, error,
				     DW_DLE_LINE_NUM_OPERANDS_BAD);
			return (DW_DLV_ERROR);
		    }

		    is_stmt = !is_stmt;
		    printf("DW_LNS_negate_stmt\n");
		    break;
		}

	    case DW_LNS_set_basic_block:{
		    if (opcode_length[DW_LNS_set_basic_block] != 0) {
			_dwarf_error(dbg, error,
				     DW_DLE_LINE_NUM_OPERANDS_BAD);
			return (DW_DLV_ERROR);
		    }

		    printf("DW_LNS_set_basic_block\n");
		    basic_block = true;
		    break;
		}

	    case DW_LNS_const_add_pc:{
		    opcode = MAX_LINE_OP_CODE - opcode_base;
		    address = address + minimum_instruction_length *
			(opcode / line_range);

		    printf("DW_LNS_const_add_pc new address 0x%llx\n",
			   (long long) address);
		    break;
		}

	    case DW_LNS_fixed_advance_pc:{
		    if (opcode_length[DW_LNS_fixed_advance_pc] != 1) {
			_dwarf_error(dbg, error,
				     DW_DLE_LINE_NUM_OPERANDS_BAD);
			return (DW_DLV_ERROR);
		    }

		    READ_UNALIGNED(dbg, fixed_advance_pc, Dwarf_Half,
				   line_ptr, sizeof(Dwarf_Half));
		    line_ptr += sizeof(Dwarf_Half);
		    address = address + fixed_advance_pc;
		    printf("DW_LNS_fixed_advance_pc val %lld 0x%llx"
			   " new address 0x%llx\n",
			   (long long) fixed_advance_pc,
			   (long long) fixed_advance_pc,
			   (long long) address);
		    break;
		}
	    }

	} else if (type == LOP_EXTENDED) {
	    Dwarf_Unsigned utmp3;

	    DECODE_LEB128_UWORD(line_ptr, utmp3)
		instr_length = (Dwarf_Word) utmp3;
	    ext_opcode = *(Dwarf_Small *) line_ptr;
	    line_ptr++;
	    switch (ext_opcode) {

	    case DW_LNE_end_sequence:{
		    end_sequence = true;

		    print_line_detail("DW_LNE_end_sequence extended",
				      opcode, address, file, line,
				      column, is_stmt, basic_block,
				      end_sequence);

		    address = 0;
		    file = 1;
		    line = 1;
		    column = 0;
		    is_stmt = default_is_stmt;
		    basic_block = false;
		    end_sequence = false;

		    break;
		}

	    case DW_LNE_set_address:{
		    if (instr_length - 1 == dbg->de_pointer_size) {
			READ_UNALIGNED(dbg, address, Dwarf_Addr,
				       line_ptr, dbg->de_pointer_size);

			line_ptr += dbg->de_pointer_size;
			printf("DW_LNE_set_address address 0x%llx\n",
			       (long long) address);
		    } else {
			_dwarf_error(dbg, error,
				     DW_DLE_LINE_SET_ADDR_ERROR);
			return (DW_DLV_ERROR);
		    }

		    break;
		}

	    case DW_LNE_define_file:{


		    Dwarf_Small *fn;
		    Dwarf_Signed di;
		    Dwarf_Signed tlm;
		    Dwarf_Unsigned fl;

		    fn = (Dwarf_Small *) line_ptr;
		    line_ptr = line_ptr + strlen((char *) line_ptr) + 1;

		    di = _dwarf_decode_u_leb128(line_ptr,
						&leb128_length);
		    line_ptr = line_ptr + leb128_length;

		    tlm =
			_dwarf_decode_u_leb128(line_ptr,
					       &leb128_length);
		    line_ptr = line_ptr + leb128_length;

		    fl = _dwarf_decode_u_leb128(line_ptr,
						&leb128_length);
		    line_ptr = line_ptr + leb128_length;


		    printf("DW_LNE_define_file %s \n", fn);
		    printf("    dir index %d\n", (int) di);
		    {
			time_t tt3 = (time_t) tlm;

			/* ctime supplies newline */
			printf("    last time 0x%x %s",
			       (unsigned) tlm, ctime(&tt3));
		    }
		    printf("    file length %ld 0x%lx\n",
			   (long) fl, (unsigned long) fl);

		    break;
		}

	    default:{
		    _dwarf_error(dbg, error,
				 DW_DLE_LINE_EXT_OPCODE_BAD);
		    return (DW_DLV_ERROR);
		}
	    }

	}
    }

    return (DW_DLV_OK);
}
Beispiel #28
0
/*  return DW_DLV_OK if ok. else DW_DLV_NO_ENTRY or DW_DLV_ERROR
    If err_count_out is non-NULL, this is a special 'check'
    call.  */
static int
_dwarf_internal_printlines(Dwarf_Die die,
    Dwarf_Error * error,
    int * err_count_out,
    int only_line_header)
{
    /*  This pointer is used to scan the portion of the .debug_line
        section for the current cu. */
    Dwarf_Small *line_ptr = 0;
    Dwarf_Small *orig_line_ptr = 0;

    /*  Pointer to a DW_AT_stmt_list attribute in case it exists in the
        die. */
    Dwarf_Attribute stmt_list_attr = 0;

    /*  Pointer to DW_AT_comp_dir attribute in die. */
    Dwarf_Attribute comp_dir_attr = 0;

    /*  Pointer to name of compilation directory. */
    Dwarf_Small *comp_dir = NULL;

    /*  Offset into .debug_line specified by a DW_AT_stmt_list
        attribute. */
    Dwarf_Unsigned line_offset = 0;

    Dwarf_Sword i=0;
    Dwarf_Word u=0;

    /*  These variables are used to decode leb128 numbers. Leb128_num
        holds the decoded number, and leb128_length is its length in
        bytes. */
    Dwarf_Half attrform = 0;

    /*  In case there are wierd bytes 'after' the line table
        prologue this lets us print something. This is a gcc
        compiler bug and we expect the bytes count to be 12.  */
    Dwarf_Small* bogus_bytes_ptr = 0;
    Dwarf_Unsigned bogus_bytes_count = 0;
    Dwarf_Half address_size = 0;
    Dwarf_Unsigned fission_offset = 0;


    /* The Dwarf_Debug this die belongs to. */
    Dwarf_Debug dbg=0;
    Dwarf_CU_Context cu_context = 0;
    Dwarf_Line_Context line_context = 0;
    int resattr = DW_DLV_ERROR;
    int lres =    DW_DLV_ERROR;
    int res  =    DW_DLV_ERROR;
    Dwarf_Small *line_ptr_actuals  = 0;
    Dwarf_Small *line_ptr_end = 0;
    Dwarf_Small *section_start = 0;

    /* ***** BEGIN CODE ***** */

    if (error != NULL) {
        *error = NULL;
    }

    CHECK_DIE(die, DW_DLV_ERROR);
    cu_context = die->di_cu_context;
    dbg = cu_context->cc_dbg;

    res = _dwarf_load_section(dbg, &dbg->de_debug_line,error);
    if (res != DW_DLV_OK) {
        return res;
    }
    if (!dbg->de_debug_line.dss_size) {
        return (DW_DLV_NO_ENTRY);
    }

    address_size = _dwarf_get_address_size(dbg, die);
    resattr = dwarf_attr(die, DW_AT_stmt_list, &stmt_list_attr, error);
    if (resattr != DW_DLV_OK) {
        return resattr;
    }


    /*  The list of relevant FORMs is small.
        DW_FORM_data4, DW_FORM_data8, DW_FORM_sec_offset
    */
    lres = dwarf_whatform(stmt_list_attr,&attrform,error);
    if (lres != DW_DLV_OK) {
        return lres;
    }
    if (attrform != DW_FORM_data4 && attrform != DW_FORM_data8 &&
        attrform != DW_FORM_sec_offset ) {
        _dwarf_error(dbg, error, DW_DLE_LINE_OFFSET_BAD);
        return (DW_DLV_ERROR);
    }
    lres = dwarf_global_formref(stmt_list_attr, &line_offset, error);
    if (lres != DW_DLV_OK) {
        return lres;
    }

    if (line_offset >= dbg->de_debug_line.dss_size) {
        _dwarf_error(dbg, error, DW_DLE_LINE_OFFSET_BAD);
        return (DW_DLV_ERROR);
    }
    section_start =  dbg->de_debug_line.dss_data;
    {
        Dwarf_Unsigned fission_size = 0;
        int resfis = _dwarf_get_fission_addition_die(die, DW_SECT_LINE,
            &fission_offset,&fission_size,error);
        if(resfis != DW_DLV_OK) {
            return resfis;
        }
    }

    orig_line_ptr = section_start + line_offset + fission_offset;
    line_ptr = orig_line_ptr;
    dwarf_dealloc(dbg, stmt_list_attr, DW_DLA_ATTR);

    /*  If die has DW_AT_comp_dir attribute, get the string that names
        the compilation directory. */
    resattr = dwarf_attr(die, DW_AT_comp_dir, &comp_dir_attr, error);
    if (resattr == DW_DLV_ERROR) {
        return resattr;
    }
    if (resattr == DW_DLV_OK) {
        int cres = DW_DLV_ERROR;
        char *cdir = 0;

        cres = dwarf_formstring(comp_dir_attr, &cdir, error);
        if (cres == DW_DLV_ERROR) {
            return cres;
        } else if (cres == DW_DLV_OK) {
            comp_dir = (Dwarf_Small *) cdir;
        }
    }
    if (resattr == DW_DLV_OK) {
        dwarf_dealloc(dbg, comp_dir_attr, DW_DLA_ATTR);
    }
    line_context = (Dwarf_Line_Context)
        _dwarf_get_alloc(dbg, DW_DLA_LINE_CONTEXT, 1);
    if (line_context == NULL) {
        _dwarf_error(dbg, error, DW_DLE_ALLOC_FAIL);
        return (DW_DLV_ERROR);
    }
    {
        Dwarf_Small *newlinep = 0;
        int dres = _dwarf_read_line_table_header(dbg,
            cu_context,
            section_start,
            line_ptr,
            dbg->de_debug_line.dss_size,
            &newlinep,
            line_context,
            &bogus_bytes_ptr,
            &bogus_bytes_count,
            error,
            err_count_out);
        if (dres == DW_DLV_ERROR) {
            dwarf_srclines_dealloc_b(line_context);
            return dres;
        }
        if (dres == DW_DLV_NO_ENTRY) {
            dwarf_srclines_dealloc_b(line_context);
            return dres;
        }
        line_ptr_end = line_context->lc_line_ptr_end;
        line_ptr = newlinep;
        if (line_context->lc_actuals_table_offset > 0) {
            line_ptr_actuals = line_context->lc_line_prologue_start +
                line_context->lc_actuals_table_offset;
        }
    }
    line_context->lc_compilation_directory = comp_dir;
    if (only_line_header) {
        /* Just checking for header errors, nothing more here.*/
        dwarf_srclines_dealloc_b(line_context);
        return DW_DLV_OK;
    }

    dwarf_printf(dbg,
        "total line info length %ld bytes,"
        " line offset 0x%" DW_PR_XZEROS DW_PR_DUx
        " %" DW_PR_DUu "\n",
        (long) line_context->lc_total_length,
        line_context->lc_section_offset,
        line_context->lc_section_offset);
    if (line_context->lc_version_number <= DW_LINE_VERSION5) {
        dwarf_printf(dbg,
            "line table version %d\n",(int) line_context->lc_version_number);
    } else {
        dwarf_printf(dbg,
            "line table version 0x%x\n",(int) line_context->lc_version_number);
    }
    dwarf_printf(dbg,
        "line table length field length %d prologue length %d\n",
        (int)line_context->lc_length_field_length,
        (int)line_context->lc_prologue_length);
    dwarf_printf(dbg,
        "compilation_directory %s\n",
        comp_dir ? ((char *) comp_dir) : "");

    dwarf_printf(dbg,
        "  min instruction length %d\n",
        (int) line_context->lc_minimum_instruction_length);
    if (line_context->lc_version_number == EXPERIMENTAL_LINE_TABLES_VERSION) {
        dwarf_printf(dbg, "  actuals table offset "
            "0x%" DW_PR_XZEROS DW_PR_DUx
            " logicals table offset "
            "0x%" DW_PR_XZEROS DW_PR_DUx "\n",
            line_context->lc_actuals_table_offset,
            line_context->lc_logicals_table_offset);
    }
    if (line_context->lc_version_number == DW_LINE_VERSION5) {
        dwarf_printf(dbg,
            "  segment selector size %d\n",
            (int) line_context->lc_segment_selector_size);
        dwarf_printf(dbg,
            "  address    size       %d\n",
            (int) line_context->lc_address_size);
    }
    dwarf_printf(dbg,
        "  default is stmt        %d\n",(int)line_context->lc_default_is_stmt);
    dwarf_printf(dbg,
        "  line base              %d\n",(int)line_context->lc_line_base);
    dwarf_printf(dbg,
        "  line_range             %d\n",(int)line_context->lc_line_range);
    dwarf_printf(dbg,
        "  opcode base            %d\n",(int)line_context->lc_opcode_base);
    dwarf_printf(dbg,
        "  standard opcode count  %d\n",(int)line_context->lc_std_op_count);

    for (i = 1; i < line_context->lc_opcode_base; i++) {
        dwarf_printf(dbg,
            "  opcode[%2d] length  %d\n", (int) i,
            (int) line_context->lc_opcode_length_table[i - 1]);
    }
    dwarf_printf(dbg,
        "  include directories count %d\n",
        (int) line_context->lc_include_directories_count);
    for (u = 0; u < line_context->lc_include_directories_count; ++u) {
        dwarf_printf(dbg,
            "  include dir[%u] %s\n",
            (int) u, line_context->lc_include_directories[u]);
    }
    dwarf_printf(dbg,
        "  files count            %d\n",
        (int) line_context->lc_file_entry_count);

    if (line_context->lc_file_entry_count) {
        Dwarf_File_Entry fe = line_context->lc_file_entries;
        Dwarf_File_Entry fe2 = fe;
        unsigned fiu = 0;

        for (fiu = 0 ; fe2 ; fe2 = fe->fi_next,++fiu ) {
            Dwarf_Unsigned tlm2 = 0;
            Dwarf_Unsigned di = 0;
            Dwarf_Unsigned fl = 0;

            fe = fe2;
            tlm2 = fe->fi_time_last_mod;
            di = fe->fi_dir_index;
            fl = fe->fi_file_length;

            dwarf_printf(dbg,
                "  file[%u]  %s (file-number: %u) \n",
                (unsigned) fiu, (char *) fe->fi_file_name,
                (unsigned)(fiu+1));
            dwarf_printf(dbg,
                "    dir index %d\n", (int) di);
            {
                time_t tt = (time_t) tlm2;

                /* ctime supplies newline */
                dwarf_printf(dbg,
                    "    last time 0x%x %s",
                    (unsigned) tlm2, ctime(&tt));
                }
            dwarf_printf(dbg,
                "    file length %ld 0x%lx\n",
                (long) fl, (unsigned long) fl);
        }
    }

    if (line_context->lc_version_number == EXPERIMENTAL_LINE_TABLES_VERSION) {
        /*  Print the subprograms list. */
        Dwarf_Unsigned count = line_context->lc_subprogs_count;
        Dwarf_Unsigned exu = 0;
        Dwarf_Subprog_Entry sub = line_context->lc_subprogs;
        dwarf_printf(dbg,"  subprograms count"
            " %" DW_PR_DUu "\n",count);
        if (count > 0) {
            dwarf_printf(dbg,"    indx  file   line   name\n");
        }
        for (exu = 0 ; exu < count ; exu++,sub++) {
            dwarf_printf(dbg,"    [%2" DW_PR_DUu "] %4" DW_PR_DUu
                "    %4" DW_PR_DUu " %s\n",
                exu+1,
                sub->ds_decl_file,
                sub->ds_decl_line,
                sub->ds_subprog_name);
        }
    }
    {
        Dwarf_Unsigned offset = 0;
        if (bogus_bytes_count > 0) {
            Dwarf_Unsigned wcount = bogus_bytes_count;
            Dwarf_Unsigned boffset = bogus_bytes_ptr - section_start;
            dwarf_printf(dbg,
                "*** DWARF CHECK: the line table prologue  header_length "
                " is %" DW_PR_DUu " too high, we pretend it is smaller."
                "Section offset: 0x%" DW_PR_XZEROS DW_PR_DUx
                " (%" DW_PR_DUu ") ***\n",
                wcount, boffset,boffset);
            *err_count_out += 1;
        }
        offset = line_ptr - section_start;
        dwarf_printf(dbg,
            "  statement prog offset in section: 0x%"
            DW_PR_XZEROS DW_PR_DUx " (%" DW_PR_DUu ")\n",
            offset, offset);
    }

    {
        Dwarf_Bool doaddrs = false;
        Dwarf_Bool dolines = true;

        _dwarf_print_line_context_record(dbg,line_context);
        if (!line_ptr_actuals) {
            /* Normal single level line table. */

            Dwarf_Bool is_single_table = true;
            Dwarf_Bool is_actuals_table = false;
            print_line_header(dbg, is_single_table, is_actuals_table);
            res = read_line_table_program(dbg,
                line_ptr, line_ptr_end, orig_line_ptr,
                section_start,
                line_context,
                address_size, doaddrs, dolines,
                is_single_table,
                is_actuals_table,
                error,
                err_count_out);
            if (res != DW_DLV_OK) {
                dwarf_srclines_dealloc_b(line_context);
                return res;
            }
        } else {
            Dwarf_Bool is_single_table = false;
            Dwarf_Bool is_actuals_table = false;
            if (line_context->lc_version_number !=
                EXPERIMENTAL_LINE_TABLES_VERSION) {
                dwarf_srclines_dealloc_b(line_context);
                _dwarf_error(dbg, error, DW_DLE_VERSION_STAMP_ERROR);
                return (DW_DLV_ERROR);
            }
            /* Read Logicals */
            print_line_header(dbg, is_single_table, is_actuals_table);
            res = read_line_table_program(dbg,
                line_ptr, line_ptr_actuals, orig_line_ptr,
                section_start,
                line_context,
                address_size, doaddrs, dolines,
                is_single_table,
                is_actuals_table,
                error,err_count_out);
            if (res != DW_DLV_OK) {
                dwarf_srclines_dealloc_b(line_context);
                return res;
            }
            if (line_context->lc_actuals_table_offset > 0) {
                is_actuals_table = true;
                /* Read Actuals */

                print_line_header(dbg, is_single_table, is_actuals_table);
                res = read_line_table_program(dbg,
                    line_ptr_actuals, line_ptr_end, orig_line_ptr,
                    section_start,
                    line_context,
                    address_size, doaddrs, dolines,
                    is_single_table,
                    is_actuals_table,
                    error,
                    err_count_out);
                if (res != DW_DLV_OK) {
                    dwarf_srclines_dealloc_b(line_context);
                    return res;
                }
            }
        }
    }
    dwarf_srclines_dealloc_b(line_context);
    return DW_DLV_OK;
}
Beispiel #29
0
bool EncoderLearnerTagger::read(std::istream *is,
                                std::vector<double> *observed) {
  scoped_fixed_array<char, BUF_SIZE> line;
  char *column[8];
  std::string sentence;
  std::vector<LearnerNode *> corpus;
  ans_path_list_.clear();

  bool eos = false;

  for (;;) {
    if (!is->getline(line.get(), line.size())) {
      is->clear(std::ios::eofbit|std::ios::badbit);
      return true;
    }

    eos = (std::strcmp(line.get(), "EOS") == 0 || line[0] == '\0');

    LearnerNode *m = new LearnerNode;
    std::memset(m, 0, sizeof(LearnerNode));

    if (eos) {
      m->stat = MECAB_EOS_NODE;
    } else {
      const size_t size = tokenize(line.get(), "\t", column, 2);
      CHECK_DIE(size == 2) << "format error: " << line.get();
      m->stat    = MECAB_NOR_NODE;
      m->surface = mystrdup(column[0]);
      m->feature = mystrdup(column[1]);
      m->length  = m->rlength = std::strlen(column[0]);
    }

    corpus.push_back(m);

    if (eos) {
      break;
    }

    sentence.append(column[0]);
  }

  CHECK_DIE(!sentence.empty()) << "empty sentence";

  CHECK_DIE(eos) << "\"EOS\" is not found";

  begin_data_.reset_string(sentence);
  begin_ = begin_data_.get();

  initList();

  size_t pos = 0;
  for (size_t i = 0; corpus[i]->stat != MECAB_EOS_NODE; ++i) {
    LearnerNode *found = 0;
    for (LearnerNode *node = lookup(pos); node; node = node->bnext) {
      if (node_cmp_eq(*(corpus[i]), *node, eval_size_, unk_eval_size_)) {
        found = node;
        break;
      }
    }

    // cannot find node even using UNKNOWN WORD PROSESSING
    if (!found) {
      LearnerNode *node = allocator_->newNode();
      node->surface  = begin_ + pos;
      node->length   = node->rlength = std::strlen(corpus[i]->surface);
      node->feature  = feature_index_->strdup(corpus[i]->feature);
      node->stat     = MECAB_NOR_NODE;
      node->fvector  = 0;
      node->wcost    = 0.0;
      node->bnext    = begin_node_list_[pos];
      begin_node_list_[pos] = node;
      std::cout << "adding virtual node: " << node->feature << std::endl;
    }

    pos += corpus[i]->length;
  }

  buildLattice();

  LearnerNode* prev = end_node_list_[0];  // BOS
  prev->anext = 0;
  pos = 0;

  for (size_t i = 0; i < corpus.size(); ++i) {
    LearnerNode *rNode = 0;
    for (LearnerNode *node = begin_node_list_[pos]; node; node = node->bnext) {
      if (corpus[i]->stat == MECAB_EOS_NODE ||
          node_cmp_eq(*(corpus[i]), *node, eval_size_, unk_eval_size_)) {
        rNode = node;  // take last node
      }
    }

    LearnerPath *lpath = 0;
    for (LearnerPath *path = rNode->lpath; path; path = path->lnext) {
      if (prev == path->lnode) {
        lpath = path;
        break;
      }
    }

    CHECK_DIE(lpath->fvector) << "lpath is NULL";
    for (const int *f = lpath->fvector; *f != -1; ++f) {
      if (*f >= static_cast<long>(observed->size())) {
        observed->resize(*f + 1);
      }
      ++(*observed)[*f];
    }

    if (lpath->rnode->stat != MECAB_EOS_NODE) {
      for (const int *f = lpath->rnode->fvector; *f != -1; ++f) {
        if (*f >= static_cast<long>(observed->size())) {
          observed->resize(*f + 1);
        }
        ++(*observed)[*f];
      }
    }

    ans_path_list_.push_back(lpath);

    prev->anext = rNode;
    prev = rNode;

    if (corpus[i]->stat == MECAB_EOS_NODE) {
      break;
    }

    pos += std::strlen(corpus[i]->surface);
  }

  prev->anext = begin_node_list_[len_];  // connect to EOS
  begin_node_list_[len_]->anext = 0;

  for (size_t i = 0 ; i < corpus.size(); ++i) {
    delete [] corpus[i]->surface;
    delete [] corpus[i]->feature;
    delete corpus[i];
  }

  return true;
}
Beispiel #30
0
bool CharProperty::compile(const char *cfile,
                           const char *ufile,
                           const char *ofile) {
  scoped_fixed_array<char, BUF_SIZE> line;
  scoped_fixed_array<char *, 512> col;
  size_t id = 0;
  std::vector<Range> range;
  std::map<std::string, CharInfo> category;
  std::vector<std::string> category_ary;
  std::ifstream ifs(WPATH(cfile));
  std::istringstream iss(CHAR_PROPERTY_DEF_DEFAULT);
  std::istream *is = &ifs;

  if (!ifs) {
    std::cerr << cfile
              << " is not found. minimum setting is used" << std::endl;
    is = &iss;
  }

  while (is->getline(line.get(), line.size())) {
    if (std::strlen(line.get()) == 0 || line[0] == '#') {
      continue;
    }
    const size_t size = tokenize2(line.get(), "\t ", col.get(), col.size());
    CHECK_DIE(size >= 2) << "format error: " << line.get();

    // 0xFFFF..0xFFFF hoge hoge hgoe #
    if (std::strncmp(col[0], "0x", 2) == 0) {
      std::string low = col[0];
      std::string high;
      size_t pos = low.find("..");

      if (pos != std::string::npos) {
        high = low.substr(pos + 2, low.size() - pos - 2);
        low  = low.substr(0, pos);
      } else {
        high = low;
      }

      Range r;
      r.low = atohex(low.c_str());
      r.high = atohex(high.c_str());

      CHECK_DIE(r.low >= 0 && r.low < 0xffff &&
                r.high >= 0 && r.high < 0xffff &&
                r.low <= r.high)
          << "range error: low=" << r.low << " high=" << r.high;

      for (size_t i = 1; i < size; ++i) {
        if (col[i][0] == '#') {
          break;  // skip comments
        }
        CHECK_DIE(category.find(std::string(col[i])) != category.end())
            << "category [" << col[i] << "] is undefined";
        r.c.push_back(col[i]);
      }
      range.push_back(r);
    } else {
      CHECK_DIE(size >= 4) << "format error: " << line.get();

      std::string key = col[0];
      CHECK_DIE(category.find(key) == category.end())
          << "category " << key << " is already defined";

      CharInfo c;
      std::memset(&c, 0, sizeof(c));
      c.invoke  = std::atoi(col[1]);
      c.group   = std::atoi(col[2]);
      c.length  = std::atoi(col[3]);
      c.default_type = id++;

      category.insert(std::pair<std::string, CharInfo>(key, c));
      category_ary.push_back(key);
    }
  }

  CHECK_DIE(category.size() < 18) << "too many categories(>= 18)";

  CHECK_DIE(category.find("DEFAULT") != category.end())
      << "category [DEFAULT] is undefined";

  CHECK_DIE(category.find("SPACE") != category.end())
      << "category [SPACE] is undefined";

  std::istringstream iss2(UNK_DEF_DEFAULT);
  std::ifstream ifs2(WPATH(ufile));
  std::istream *is2 = &ifs2;

  if (!ifs2) {
    std::cerr << ufile
              << " is not found. minimum setting is used." << std::endl;
    is2 = &iss2;
  }

  std::set<std::string> unk;
  while (is2->getline(line.get(), line.size())) {
    const size_t n = tokenizeCSV(line.get(), col.get(), 2);
    CHECK_DIE(n >= 1) << "format error: " << line.get();
    const std::string key = col[0];
    CHECK_DIE(category.find(key) != category.end())
        << "category [" << key << "] is undefined in " << cfile;
    unk.insert(key);
  }

  for (std::map<std::string, CharInfo>::const_iterator it = category.begin();
       it != category.end();
       ++it) {
    CHECK_DIE(unk.find(it->first) != unk.end())
        << "category [" << it->first << "] is undefined in " << ufile;
  }

  std::vector<CharInfo> table(0xffff);
  {
    std::vector<std::string> tmp;
    tmp.push_back("DEFAULT");
    const CharInfo c = encode(tmp, &category);
    std::fill(table.begin(), table.end(), c);
  }

  for (std::vector<Range>::const_iterator it = range.begin();
       it != range.end();
       ++it) {
    const CharInfo c = encode(it->c, &category);
    for (int i = it->low; i <= it->high; ++i) {
      table[i] = c;
    }
  }

  // output binary table
  {
    std::ofstream ofs(WPATH(ofile), std::ios::binary|std::ios::out);
    CHECK_DIE(ofs) << "permission denied: " << ofile;

    unsigned int size = static_cast<unsigned int>(category.size());
    ofs.write(reinterpret_cast<const char*>(&size), sizeof(size));
    for (std::vector<std::string>::const_iterator it = category_ary.begin();
         it != category_ary.end();
         ++it) {
      char buf[32];
      std::fill(buf, buf + sizeof(buf), '\0');
      std::strncpy(buf, it->c_str(), sizeof(buf) - 1);
      ofs.write(reinterpret_cast<const char*>(buf), sizeof(buf));
    }
    ofs.write(reinterpret_cast<const char*>(&table[0]),
              sizeof(CharInfo) * table.size());
    ofs.close();
  }

  return true;
}