Ejemplo n.º 1
0
/**
 * @brief Builds a BCC lattice
 */
void BCCSystemBuilder::buildAtoms()
{
    Vector4d basis(zero4);
    double displacement;

    displacement = latticeData_.latticeConstant/2;

    buildLattice();

    basis << displacement, displacement, displacement, 0;

    buildLattice(basis);
    std::cout << "Creating BCC lattice with " << latticeData_.cells[0] << "x" << latticeData_.cells[1] << "x" << latticeData_.cells[2] << "x2 = " << latticeData_.cells[0]*latticeData_.cells[1]*latticeData_.cells[2]*2 << " atoms." << std::endl;

}
Ejemplo n.º 2
0
	double Tagger::calcGradient(double *expected) // calculate the gradience
	{
		double s = 0.0;
		buildLattice();
		forwardbackward();
		//begin
		size_t nodesize = getSizeOfNodes();
		size_t tagsize = featureIndexPtr->getSizeOfTags();
		for (size_t eachNodeIdx = 0; eachNodeIdx < nodesize; ++eachNodeIdx)
			for (size_t eachTagIdx = 0; eachTagIdx < tagsize; ++eachTagIdx)
				getNode(eachNodeIdx, eachTagIdx)->calcExpectation(expected, Z_, tagsize);

		for (size_t eachNodeIdx = 0; eachNodeIdx < nodesize; ++eachNodeIdx)
		{
			for (auto &featureId : *(getNode(eachNodeIdx, answer[eachNodeIdx])->fVector))
				--expected[featureId + answer[eachNodeIdx]];
			s += getNode(eachNodeIdx, answer[eachNodeIdx])->cost;//unigram cost

			const vector<Path*> &lpath = getNode(eachNodeIdx, answer[eachNodeIdx])->leftPath;
			for (auto it = lpath.begin(); it != lpath.end(); ++it)
			{
				if ((*it)->leftNode->y == answer[(*it)->leftNode->x])
				{
					for (auto &featureId : *((*it)->fVector))
						--expected[featureId + (*it)->leftNode->y * tagsize + (*it)->rightNode->y];
					s += (*it)->cost;  // bigram cost
					break;
				}
			}
		}
	
		viterbi();  // call for eval()
		releaseNodeAndPath();
		return Z_ - s;
	}
Ejemplo n.º 3
0
bool DecoderLearnerTagger::parse(std::istream* is, std::ostream *os) {
  allocator_->free();
  feature_index_->clear();

  if (!begin_) {
    begin_data_.reset(new char[BUF_SIZE * 16]);
    begin_ = begin_data_.get();
  }

  if (!is->getline(const_cast<char *>(begin_), BUF_SIZE * 16)) {
    is->clear(std::ios::eofbit|std::ios::badbit);
    return false;
  }

  initList();
  buildLattice();
  viterbi();

  for (LearnerNode *node = end_node_list_[0]->next;
       node->next; node = node->next) {
    os->write(node->surface, node->length);
    *os << '\t' << node->feature << '\n';
  }
  *os << "EOS\n";

  return true;
}
Ejemplo n.º 4
0
  double TaggerImpl::gradient(double *expected) {
    if (x_.empty()) return 0.0;

    buildLattice();
    forwardbackward();
    double s = 0.0;

    for (size_t i = 0;   i < x_.size(); ++i)
      for (size_t j = 0; j < ysize_; ++j)
        node_[i][j]->calcExpectation(expected, Z_, ysize_);

    for (size_t i = 0;   i < x_.size(); ++i) {
      for (int *f = node_[i][answer_[i]]->fvector; *f != -1; ++f)
        --expected[*f + answer_[i]];
      s += node_[i][answer_[i]]->cost;  // UNIGRAM cost
      const std::vector<Path *> &lpath = node_[i][answer_[i]]->lpath;
      for (const_Path_iterator it = lpath.begin(); it != lpath.end(); ++it) {
        if ((*it)->lnode->y == answer_[(*it)->lnode->x]) {
          for (int *f = (*it)->fvector; *f != -1; ++f)
            --expected[*f +(*it)->lnode->y * ysize_ +(*it)->rnode->y];
          s += (*it)->cost;  // BIGRAM COST
          break;
        }
      }
    }

    viterbi();  // call for eval()

    return Z_ - s ;
  }
Ejemplo n.º 5
0
void
CIMIContext::removeFromHistoryCache(std::vector<unsigned>& wids)
{
    if (!m_pHistory)
        return;

    m_pHistory->forget(&(wids[0]), &(wids[0]) + wids.size());
    buildLattice(m_pPySegmentor);
}
Ejemplo n.º 6
0
void CIMIContext::deleteCandidate (CCandidate &candi)
{
    unsigned wid = candi.m_wordId;

    if (wid > INI_USRDEF_WID) {
        m_pHistory->forget (wid);
        m_pUserDict->removeWord (wid);
        buildLattice (m_latestSegments, candi.m_start+1);
    }
}
Ejemplo n.º 7
0
  double TaggerImpl::collins(double *collins) {
    if (x_.empty()) return 0.0;

    buildLattice();
    viterbi();  // call for finding argmax y*
    double s = 0.0;

    // if correct parse, do not run forward + backward
    {
      size_t num = 0;
      for (size_t i = 0; i < x_.size(); ++i)
        if (answer_[i] == result_[i]) ++num;

      if (num == x_.size()) return 0.0;
    }

    for (size_t i = 0; i < x_.size(); ++i) {
      // answer
      {
        s += node_[i][answer_[i]]->cost;
        for (int *f = node_[i][answer_[i]]->fvector; *f != -1; ++f)
          ++collins[*f + answer_[i]];

        const std::vector<Path *> &lpath = node_[i][answer_[i]]->lpath;
        for (const_Path_iterator it = lpath.begin(); it != lpath.end(); ++it) {
          if ((*it)->lnode->y == answer_[(*it)->lnode->x]) {
            for (int *f = (*it)->fvector; *f != -1; ++f)
              ++collins[*f +(*it)->lnode->y * ysize_ +(*it)->rnode->y];
            s += (*it)->cost;
            break;
          }
        }
      }

      // result
      {
        s -= node_[i][result_[i]]->cost;
        for (int *f = node_[i][result_[i]]->fvector; *f != -1; ++f)
          --collins[*f + result_[i]];

        const std::vector<Path *> &lpath = node_[i][result_[i]]->lpath;
        for (const_Path_iterator it = lpath.begin(); it != lpath.end(); ++it) {
          if ((*it)->lnode->y == result_[(*it)->lnode->x]) {
            for (int *f = (*it)->fvector; *f != -1; ++f)
              --collins[*f +(*it)->lnode->y * ysize_ +(*it)->rnode->y];
            s -= (*it)->cost;
            break;
          }
        }
      }
    }

    return -s;
  }
Ejemplo n.º 8
0
  bool TaggerImpl::parse() {
    CHECK_FALSE(feature_index_->buildFeatures(this))
      << feature_index_->what();

    if (x_.empty()) return true;
    buildLattice();
    if (nbest_ || vlevel_ >= 1) forwardbackward();
    viterbi();
    if (nbest_) initNbest();

    return true;
  }
Ejemplo n.º 9
0
bool EncoderLearnerTagger::read(std::istream *is,
                                std::vector<double> *observed) {
  scoped_fixed_array<char, BUF_SIZE> line;
  char *column[8];
  std::string sentence;
  std::vector<LearnerNode *> corpus;
  ans_path_list_.clear();

  bool eos = false;

  for (;;) {
    if (!is->getline(line.get(), line.size())) {
      is->clear(std::ios::eofbit|std::ios::badbit);
      return true;
    }

    eos = (std::strcmp(line.get(), "EOS") == 0 || line[0] == '\0');

    LearnerNode *m = new LearnerNode;
    std::memset(m, 0, sizeof(LearnerNode));

    if (eos) {
      m->stat = MECAB_EOS_NODE;
    } else {
      const size_t size = tokenize(line.get(), "\t", column, 2);
      CHECK_DIE(size == 2) << "format error: " << line.get();
      m->stat    = MECAB_NOR_NODE;
      m->surface = mystrdup(column[0]);
      m->feature = mystrdup(column[1]);
      m->length  = m->rlength = std::strlen(column[0]);
    }

    corpus.push_back(m);

    if (eos) {
      break;
    }

    sentence.append(column[0]);
  }

  CHECK_DIE(!sentence.empty()) << "empty sentence";

  CHECK_DIE(eos) << "\"EOS\" is not found";

  begin_data_.reset_string(sentence);
  begin_ = begin_data_.get();

  initList();

  size_t pos = 0;
  for (size_t i = 0; corpus[i]->stat != MECAB_EOS_NODE; ++i) {
    LearnerNode *found = 0;
    for (LearnerNode *node = lookup(pos); node; node = node->bnext) {
      if (node_cmp_eq(*(corpus[i]), *node, eval_size_, unk_eval_size_)) {
        found = node;
        break;
      }
    }

    // cannot find node even using UNKNOWN WORD PROSESSING
    if (!found) {
      LearnerNode *node = allocator_->newNode();
      node->surface  = begin_ + pos;
      node->length   = node->rlength = std::strlen(corpus[i]->surface);
      node->feature  = feature_index_->strdup(corpus[i]->feature);
      node->stat     = MECAB_NOR_NODE;
      node->fvector  = 0;
      node->wcost    = 0.0;
      node->bnext    = begin_node_list_[pos];
      begin_node_list_[pos] = node;
      std::cout << "adding virtual node: " << node->feature << std::endl;
    }

    pos += corpus[i]->length;
  }

  buildLattice();

  LearnerNode* prev = end_node_list_[0];  // BOS
  prev->anext = 0;
  pos = 0;

  for (size_t i = 0; i < corpus.size(); ++i) {
    LearnerNode *rNode = 0;
    for (LearnerNode *node = begin_node_list_[pos]; node; node = node->bnext) {
      if (corpus[i]->stat == MECAB_EOS_NODE ||
          node_cmp_eq(*(corpus[i]), *node, eval_size_, unk_eval_size_)) {
        rNode = node;  // take last node
      }
    }

    LearnerPath *lpath = 0;
    for (LearnerPath *path = rNode->lpath; path; path = path->lnext) {
      if (prev == path->lnode) {
        lpath = path;
        break;
      }
    }

    CHECK_DIE(lpath->fvector) << "lpath is NULL";
    for (const int *f = lpath->fvector; *f != -1; ++f) {
      if (*f >= static_cast<long>(observed->size())) {
        observed->resize(*f + 1);
      }
      ++(*observed)[*f];
    }

    if (lpath->rnode->stat != MECAB_EOS_NODE) {
      for (const int *f = lpath->rnode->fvector; *f != -1; ++f) {
        if (*f >= static_cast<long>(observed->size())) {
          observed->resize(*f + 1);
        }
        ++(*observed)[*f];
      }
    }

    ans_path_list_.push_back(lpath);

    prev->anext = rNode;
    prev = rNode;

    if (corpus[i]->stat == MECAB_EOS_NODE) {
      break;
    }

    pos += std::strlen(corpus[i]->surface);
  }

  prev->anext = begin_node_list_[len_];  // connect to EOS
  begin_node_list_[len_]->anext = 0;

  for (size_t i = 0 ; i < corpus.size(); ++i) {
    delete [] corpus[i]->surface;
    delete [] corpus[i]->feature;
    delete corpus[i];
  }

  return true;
}
Ejemplo n.º 10
0
/**
 * @brief Builds the lattice
 */
void SCSystemBuilder::buildAtoms()
{
    buildLattice();
    std::cout << "Creating SC lattice with " << latticeData_.cells[0] << "x" << latticeData_.cells[1] << "x" << latticeData_.cells[2] << " = " << latticeData_.cells[0]*latticeData_.cells[1]*latticeData_.cells[2] << " atoms." << std::endl;
}