/** * @brief Builds a BCC lattice */ void BCCSystemBuilder::buildAtoms() { Vector4d basis(zero4); double displacement; displacement = latticeData_.latticeConstant/2; buildLattice(); basis << displacement, displacement, displacement, 0; buildLattice(basis); std::cout << "Creating BCC lattice with " << latticeData_.cells[0] << "x" << latticeData_.cells[1] << "x" << latticeData_.cells[2] << "x2 = " << latticeData_.cells[0]*latticeData_.cells[1]*latticeData_.cells[2]*2 << " atoms." << std::endl; }
double Tagger::calcGradient(double *expected) // calculate the gradience { double s = 0.0; buildLattice(); forwardbackward(); //begin size_t nodesize = getSizeOfNodes(); size_t tagsize = featureIndexPtr->getSizeOfTags(); for (size_t eachNodeIdx = 0; eachNodeIdx < nodesize; ++eachNodeIdx) for (size_t eachTagIdx = 0; eachTagIdx < tagsize; ++eachTagIdx) getNode(eachNodeIdx, eachTagIdx)->calcExpectation(expected, Z_, tagsize); for (size_t eachNodeIdx = 0; eachNodeIdx < nodesize; ++eachNodeIdx) { for (auto &featureId : *(getNode(eachNodeIdx, answer[eachNodeIdx])->fVector)) --expected[featureId + answer[eachNodeIdx]]; s += getNode(eachNodeIdx, answer[eachNodeIdx])->cost;//unigram cost const vector<Path*> &lpath = getNode(eachNodeIdx, answer[eachNodeIdx])->leftPath; for (auto it = lpath.begin(); it != lpath.end(); ++it) { if ((*it)->leftNode->y == answer[(*it)->leftNode->x]) { for (auto &featureId : *((*it)->fVector)) --expected[featureId + (*it)->leftNode->y * tagsize + (*it)->rightNode->y]; s += (*it)->cost; // bigram cost break; } } } viterbi(); // call for eval() releaseNodeAndPath(); return Z_ - s; }
bool DecoderLearnerTagger::parse(std::istream* is, std::ostream *os) { allocator_->free(); feature_index_->clear(); if (!begin_) { begin_data_.reset(new char[BUF_SIZE * 16]); begin_ = begin_data_.get(); } if (!is->getline(const_cast<char *>(begin_), BUF_SIZE * 16)) { is->clear(std::ios::eofbit|std::ios::badbit); return false; } initList(); buildLattice(); viterbi(); for (LearnerNode *node = end_node_list_[0]->next; node->next; node = node->next) { os->write(node->surface, node->length); *os << '\t' << node->feature << '\n'; } *os << "EOS\n"; return true; }
double TaggerImpl::gradient(double *expected) { if (x_.empty()) return 0.0; buildLattice(); forwardbackward(); double s = 0.0; for (size_t i = 0; i < x_.size(); ++i) for (size_t j = 0; j < ysize_; ++j) node_[i][j]->calcExpectation(expected, Z_, ysize_); for (size_t i = 0; i < x_.size(); ++i) { for (int *f = node_[i][answer_[i]]->fvector; *f != -1; ++f) --expected[*f + answer_[i]]; s += node_[i][answer_[i]]->cost; // UNIGRAM cost const std::vector<Path *> &lpath = node_[i][answer_[i]]->lpath; for (const_Path_iterator it = lpath.begin(); it != lpath.end(); ++it) { if ((*it)->lnode->y == answer_[(*it)->lnode->x]) { for (int *f = (*it)->fvector; *f != -1; ++f) --expected[*f +(*it)->lnode->y * ysize_ +(*it)->rnode->y]; s += (*it)->cost; // BIGRAM COST break; } } } viterbi(); // call for eval() return Z_ - s ; }
void CIMIContext::removeFromHistoryCache(std::vector<unsigned>& wids) { if (!m_pHistory) return; m_pHistory->forget(&(wids[0]), &(wids[0]) + wids.size()); buildLattice(m_pPySegmentor); }
void CIMIContext::deleteCandidate (CCandidate &candi) { unsigned wid = candi.m_wordId; if (wid > INI_USRDEF_WID) { m_pHistory->forget (wid); m_pUserDict->removeWord (wid); buildLattice (m_latestSegments, candi.m_start+1); } }
double TaggerImpl::collins(double *collins) { if (x_.empty()) return 0.0; buildLattice(); viterbi(); // call for finding argmax y* double s = 0.0; // if correct parse, do not run forward + backward { size_t num = 0; for (size_t i = 0; i < x_.size(); ++i) if (answer_[i] == result_[i]) ++num; if (num == x_.size()) return 0.0; } for (size_t i = 0; i < x_.size(); ++i) { // answer { s += node_[i][answer_[i]]->cost; for (int *f = node_[i][answer_[i]]->fvector; *f != -1; ++f) ++collins[*f + answer_[i]]; const std::vector<Path *> &lpath = node_[i][answer_[i]]->lpath; for (const_Path_iterator it = lpath.begin(); it != lpath.end(); ++it) { if ((*it)->lnode->y == answer_[(*it)->lnode->x]) { for (int *f = (*it)->fvector; *f != -1; ++f) ++collins[*f +(*it)->lnode->y * ysize_ +(*it)->rnode->y]; s += (*it)->cost; break; } } } // result { s -= node_[i][result_[i]]->cost; for (int *f = node_[i][result_[i]]->fvector; *f != -1; ++f) --collins[*f + result_[i]]; const std::vector<Path *> &lpath = node_[i][result_[i]]->lpath; for (const_Path_iterator it = lpath.begin(); it != lpath.end(); ++it) { if ((*it)->lnode->y == result_[(*it)->lnode->x]) { for (int *f = (*it)->fvector; *f != -1; ++f) --collins[*f +(*it)->lnode->y * ysize_ +(*it)->rnode->y]; s -= (*it)->cost; break; } } } } return -s; }
bool TaggerImpl::parse() { CHECK_FALSE(feature_index_->buildFeatures(this)) << feature_index_->what(); if (x_.empty()) return true; buildLattice(); if (nbest_ || vlevel_ >= 1) forwardbackward(); viterbi(); if (nbest_) initNbest(); return true; }
bool EncoderLearnerTagger::read(std::istream *is, std::vector<double> *observed) { scoped_fixed_array<char, BUF_SIZE> line; char *column[8]; std::string sentence; std::vector<LearnerNode *> corpus; ans_path_list_.clear(); bool eos = false; for (;;) { if (!is->getline(line.get(), line.size())) { is->clear(std::ios::eofbit|std::ios::badbit); return true; } eos = (std::strcmp(line.get(), "EOS") == 0 || line[0] == '\0'); LearnerNode *m = new LearnerNode; std::memset(m, 0, sizeof(LearnerNode)); if (eos) { m->stat = MECAB_EOS_NODE; } else { const size_t size = tokenize(line.get(), "\t", column, 2); CHECK_DIE(size == 2) << "format error: " << line.get(); m->stat = MECAB_NOR_NODE; m->surface = mystrdup(column[0]); m->feature = mystrdup(column[1]); m->length = m->rlength = std::strlen(column[0]); } corpus.push_back(m); if (eos) { break; } sentence.append(column[0]); } CHECK_DIE(!sentence.empty()) << "empty sentence"; CHECK_DIE(eos) << "\"EOS\" is not found"; begin_data_.reset_string(sentence); begin_ = begin_data_.get(); initList(); size_t pos = 0; for (size_t i = 0; corpus[i]->stat != MECAB_EOS_NODE; ++i) { LearnerNode *found = 0; for (LearnerNode *node = lookup(pos); node; node = node->bnext) { if (node_cmp_eq(*(corpus[i]), *node, eval_size_, unk_eval_size_)) { found = node; break; } } // cannot find node even using UNKNOWN WORD PROSESSING if (!found) { LearnerNode *node = allocator_->newNode(); node->surface = begin_ + pos; node->length = node->rlength = std::strlen(corpus[i]->surface); node->feature = feature_index_->strdup(corpus[i]->feature); node->stat = MECAB_NOR_NODE; node->fvector = 0; node->wcost = 0.0; node->bnext = begin_node_list_[pos]; begin_node_list_[pos] = node; std::cout << "adding virtual node: " << node->feature << std::endl; } pos += corpus[i]->length; } buildLattice(); LearnerNode* prev = end_node_list_[0]; // BOS prev->anext = 0; pos = 0; for (size_t i = 0; i < corpus.size(); ++i) { LearnerNode *rNode = 0; for (LearnerNode *node = begin_node_list_[pos]; node; node = node->bnext) { if (corpus[i]->stat == MECAB_EOS_NODE || node_cmp_eq(*(corpus[i]), *node, eval_size_, unk_eval_size_)) { rNode = node; // take last node } } LearnerPath *lpath = 0; for (LearnerPath *path = rNode->lpath; path; path = path->lnext) { if (prev == path->lnode) { lpath = path; break; } } CHECK_DIE(lpath->fvector) << "lpath is NULL"; for (const int *f = lpath->fvector; *f != -1; ++f) { if (*f >= static_cast<long>(observed->size())) { observed->resize(*f + 1); } ++(*observed)[*f]; } if (lpath->rnode->stat != MECAB_EOS_NODE) { for (const int *f = lpath->rnode->fvector; *f != -1; ++f) { if (*f >= static_cast<long>(observed->size())) { observed->resize(*f + 1); } ++(*observed)[*f]; } } ans_path_list_.push_back(lpath); prev->anext = rNode; prev = rNode; if (corpus[i]->stat == MECAB_EOS_NODE) { break; } pos += std::strlen(corpus[i]->surface); } prev->anext = begin_node_list_[len_]; // connect to EOS begin_node_list_[len_]->anext = 0; for (size_t i = 0 ; i < corpus.size(); ++i) { delete [] corpus[i]->surface; delete [] corpus[i]->feature; delete corpus[i]; } return true; }
/** * @brief Builds the lattice */ void SCSystemBuilder::buildAtoms() { buildLattice(); std::cout << "Creating SC lattice with " << latticeData_.cells[0] << "x" << latticeData_.cells[1] << "x" << latticeData_.cells[2] << " = " << latticeData_.cells[0]*latticeData_.cells[1]*latticeData_.cells[2] << " atoms." << std::endl; }