/// Refines this model based on a recently performed action and change in beliefs void TransitionModel::trainIncremental(const GVec& beliefs, const GVec& actions, const GVec& nextBeliefs) { // Buffer the pattern GVec& destIn = trainInput.row(trainPos); GVec& destOut = trainOutput.row(trainPos); trainPos++; trainSize = std::max(trainSize, trainPos); if(trainPos >= trainInput.rows()) trainPos = 0; if(beliefs.size() + actions.size() != destIn.size() || beliefs.size() != destOut.size()) throw Ex("size mismatch"); destIn.put(0, beliefs); destIn.put(beliefs.size(), actions); for(size_t i = 0; i < destOut.size(); i++) destOut[i] = 0.5 * (nextBeliefs[i] - beliefs[i]); /* destIn.print(); std::cout << "->"; destOut.print(); std::cout << "\n"; std::cout << to_str(0.5 * cos(destIn[2])) << ", " << to_str(0.5 * sin(destIn[2])) << "\n"; */ // Refine the model size_t iters = std::min(trainIters, 1000 * trainSize); for(size_t i = 0; i < iters; i++) doSomeTraining(); }
/// Decodes beliefs to predict observations void ObservationModel::beliefsToObservations(const GVec& beliefs, GVec& observations) { observations.resize(decoder.outputLayer().outputs()); if(tutor) tutor->state_to_observations(beliefs, observations); else { decoder.forwardProp(beliefs); observations.copy(decoder.outputLayer().activation()); } }
/// Encodes observations to predict beliefs void ObservationModel::observationsToBeliefs(const GVec& observations, GVec& beliefs) { beliefs.resize(encoder.outputLayer().outputs()); if(tutor) tutor->observations_to_state(observations, beliefs); else { beliefs.put(0, observations, 0, beliefs.size()); encoder.forwardProp(observations); beliefs.copy(encoder.outputLayer().activation()); } }
/// Refines the beliefs to correspond with actual observations void ObservationModel::calibrateBeliefs(GVec& beliefs, const GVec& observations) { if(tutor) tutor->observations_to_state(observations, beliefs); else { GNeuralNetLayer& layIn = encoder.outputLayer(); for(size_t i = 0; i < calibrationIters; i++) { decoder.forwardProp(beliefs); decoder.backpropagate(observations); decoder.layer(0).backPropError(&layIn); beliefs.addScaled(decoder.learningRate(), layIn.error()); beliefs.clip(-1.0, 1.0); } } }
void GNaiveBayes::predict(const GVec& in, GVec& out) { if(m_nSampleCount <= 0) throw Ex("You must call train before you call eval"); for(size_t n = 0; n < m_pRelLabels->size(); n++) out[n] = m_pOutputs[n]->predict(in.data(), m_equivalentSampleSize, &m_rand); }
void GNaiveBayes::predictDistribution(const GVec& in, GPrediction* out) { if(m_nSampleCount <= 0) throw Ex("You must call train before you call eval"); for(size_t n = 0; n < m_pRelLabels->size(); n++) m_pOutputs[n]->eval(in.data(), &out[n], m_equivalentSampleSize); }
/// Finds the best plan and copies its first step void PlanningSystem::chooseNextActions(const GVec& beliefs, GVec& actions) { if(tutor) tutor->choose_actions(beliefs, actions); else { // Find the best plan (according to the contentment model) and ask the mentor to evaluate it size_t planBestIndex = 0; double bestContentment = -1e300; for(size_t i = 0; i < plans.size(); i++) { double d = evaluatePlan(beliefs, *plans[i]); if(d > bestContentment) { bestContentment = d; planBestIndex = i; } } //std::cout << "Best contentment: " << to_str(bestContentment) << "\n"; GMatrix& bestPlan = *plans[planBestIndex]; askMentorToEvaluatePlan(beliefs, bestPlan); // Pick a random plan from the population and ask the mentor to evaluate it (for contrast) size_t planBindex = rand.next(plans.size() - 1); if(planBindex >= planBestIndex) planBindex++; askMentorToEvaluatePlan(beliefs, *plans[planBindex]); // Make a random one-step plan, and ask the mentor to evaluate it (for contrast) GVec& action = randomPlan[0]; action.fillUniform(rand); askMentorToEvaluatePlan(beliefs, randomPlan); // Copy the first action vector of the best plan for our chosen action GVec* bestActions = &bestPlan[0]; if(burnIn > 0 || rand.uniform() < explorationRate) bestActions = &randomPlan[0]; if(burnIn > 0) burnIn--; GAssert(bestActions->size() == actionDims); actions.copy(*bestActions); } }
// virtual void GNaiveInstance::trainIncremental(const GVec& pIn, const GVec& pOut) { if(!m_pHeap) m_pHeap = new GHeap(1024); double* pOutputs = (double*)m_pHeap->allocAligned(sizeof(double) * m_pRelLabels->size()); GVec::copy(pOutputs, pOut.data(), m_pRelLabels->size()); for(size_t i = 0; i < m_pRelFeatures->size(); i++) { if(pIn[i] != UNKNOWN_REAL_VALUE) m_pAttrs[i]->addInstance(pIn[i], pOutputs); } }
// virtual void GLinearDistribution::predictDistribution(const GVec& in, GPrediction* out) { m_pAInv->multiply(in, m_buf); double v = in.dotProduct(m_buf); for(size_t i = 0; i < m_pWBar->rows(); i++) { GNormalDistribution* pNorm = (*out).makeNormal(); double m = m_pWBar->row(i).dotProduct(in); pNorm->setMeanAndVariance(m, v); out++; } }
/// Refines the encoder and decoder based on the new observation. void ObservationModel::trainIncremental(const GVec& observation) { // Buffer the pattern GVec* dest; if(validationPos < trainPos) { dest = &validation.row(validationPos); if(++validationPos >= validation.rows()) validationPos = 0; validationSize = std::max(validationSize, validationPos); } else { dest = &train.row(trainPos); trainPos++; trainSize = std::max(trainSize, trainPos); if(trainPos >= train.rows()) trainPos = 0; } dest->copy(observation); // Train size_t iters = std::min(trainIters, trainSize); for(size_t i = 0; i < iters; i++) doSomeTraining(); }
/// Predict the belief vector that will result if the specified action is performed void TransitionModel::anticipateNextBeliefs(const GVec& beliefs, const GVec& actions, GVec& anticipatedBeliefs) { if(tutor) tutor->transition(beliefs, actions, anticipatedBeliefs); else { GAssert(beliefs.size() + actions.size() == model.layer(0).inputs()); buf.resize(beliefs.size() + actions.size()); buf.put(0, beliefs); buf.put(beliefs.size(), actions); model.forwardProp(buf); anticipatedBeliefs.copy(beliefs); anticipatedBeliefs.addScaled(2.0, model.outputLayer().activation()); anticipatedBeliefs.clip(-1.0, 1.0); } }
/// Perturbs a random plan void PlanningSystem::mutate() { double d = rand.uniform(); GMatrix& p = *plans[rand.next(plans.size())]; if(d < 0.1) { // lengthen the plan if(p.rows() < maxPlanLength) { GVec* newActions = new GVec(actionDims); newActions->fillUniform(rand); p.takeRow(newActions, rand.next(p.rows() + 1)); } } else if(d < 0.2) { // shorten the plan if(p.rows() > 1) { p.deleteRow(rand.next(p.rows())); } } else if(d < 0.7) { // perturb a single element of an action vector GVec& actions = p[rand.next(p.rows())]; size_t i = rand.next(actions.size()); actions[i] = std::max(0.0, std::min(1.0, actions[i] + 0.03 * rand.normal())); } else if(d < 0.9) { // perturb a whole action vector GVec& actions = p[rand.next(p.rows())]; for(size_t i = 0; i < actions.size(); i++) { actions[i] = std::max(0.0, std::min(1.0, actions[i] + 0.02 * rand.normal())); } } else { // perturb the whole plan for(size_t j = 0; j < p.rows(); j++) { GVec& actions = p[j]; for(size_t i = 0; i < actions.size(); i++) { actions[i] = std::max(0.0, std::min(1.0, actions[i] + 0.01 * rand.normal())); } } } }
// virtual void GWag::trainInner(const GMatrix& features, const GMatrix& labels) { GNeuralNetLearner* pTemp = NULL; std::unique_ptr<GNeuralNetLearner> hTemp; size_t weights = 0; GVec pWeightBuf; GVec pWeightBuf2; for(size_t i = 0; i < m_models; i++) { m_pNN->train(features, labels); if(pTemp) { // Average m_pNN with pTemp if(!m_noAlign) m_pNN->nn().align(pTemp->nn()); pTemp->nn().weightsToVector(pWeightBuf.data()); m_pNN->nn().weightsToVector(pWeightBuf2.data()); pWeightBuf *= (double(i) / (i + 1)); pWeightBuf.addScaled(1.0 / (i + 1), pWeightBuf2); pTemp->nn().vectorToWeights(pWeightBuf.data()); } else { // Copy the m_pNN GDom doc; GDomNode* pNode = m_pNN->serialize(&doc); GLearnerLoader ll; pTemp = new GNeuralNetLearner(pNode); hTemp.reset(pTemp); weights = pTemp->nn().weightCount(); pWeightBuf.resize(weights); pWeightBuf2.resize(weights); } } pTemp->nn().weightsToVector(pWeightBuf.data()); m_pNN->nn().vectorToWeights(pWeightBuf.data()); }
GEnsemblePredictWorker(GMasterThread& master, GEnsemble* pEnsemble, size_t outDims) : GWorkerThread(master), m_pEnsemble(pEnsemble) { m_prediction.resize(outDims); }
void GffLoader::load(GList<GenomicSeqData>& seqdata, GFValidateFunc* gf_validate, bool doCluster, bool doCollapseRedundant, bool matchAllIntrons, bool fuzzSpan, bool forceExons) { GffReader* gffr=new GffReader(f, this->transcriptsOnly, false); //not only mRNA features, not sorted gffr->showWarnings(this->showWarnings); // keepAttrs mergeCloseExons noExonAttr gffr->readAll(this->fullAttributes, this->mergeCloseExons, this->noExonAttrs); GVec<int> pseudoAttrIds; GVec<int> pseudoFeatureIds; if (this->noPseudo) { GffNameList& fnames = gffr->names->feats; for (int i=0;i<fnames.Count();i++) { char* n=fnames[i]->name; if (startsWith(n, "pseudo")) { pseudoFeatureIds.Add(fnames[i]->idx); } } GffNameList& attrnames = gffr->names->attrs; for (int i=0;i<attrnames.Count();i++) { char* n=attrnames[i]->name; char* p=strifind(n, "pseudo"); if (p==n || (p==n+2 && tolower(n[0])=='i' && tolower(n[1])=='s')) { pseudoAttrIds.Add(attrnames[i]->idx); } } } //int redundant=0; //redundant annotation discarded if (verbose) GMessage(" .. loaded %d genomic features from %s\n", gffr->gflst.Count(), fname.chars()); //int rna_deleted=0; //add to GenomicSeqData, adding to existing loci and identifying intron-chain duplicates for (int k=0;k<gffr->gflst.Count();k++) { GffObj* m=gffr->gflst[k]; if (strcmp(m->getFeatureName(), "locus")==0 && m->getAttr("transcripts")!=NULL) { continue; //discard locus meta-features } if (this->noPseudo) { bool is_pseudo=false; for (int i=0;i<pseudoFeatureIds.Count();++i) { if (pseudoFeatureIds[i]==m->ftype_id) { is_pseudo=true; break; } } if (is_pseudo) continue; for (int i=0;i<pseudoAttrIds.Count();++i) { char* attrv=NULL; if (m->attrs!=NULL) attrv=m->attrs->getAttr(pseudoAttrIds[i]); if (attrv!=NULL) { char fc=tolower(attrv[0]); if (fc=='t' || fc=='y' || fc=='1') { is_pseudo=true; break; } } } if (is_pseudo) continue; //last resort: // scan all the attribute values for "pseudogene" keyword (NCBI does that for "product" attr) /* if (m->attrs!=NULL) { for (int i=0;i<m->attrs->Count();++i) { GffAttr& a=*(m->attrs->Get(i)); if (strifind(a.attr_val, "pseudogene")) { is_pseudo=true; break; } } } if (is_pseudo) continue; */ } //pseudogene detection requested char* rloc=m->getAttr("locus"); if (rloc!=NULL && startsWith(rloc, "RLOC_")) { m->removeAttr("locus", rloc); } /* if (m->exons.Count()==0 && m->children.Count()==0) { //a non-mRNA feature with no subfeatures //add a dummy exon just to have the generic exon checking work m->addExon(m->start,m->end); } */ if (forceExons) { // && m->children.Count()==0) { m->exon_ftype_id=gff_fid_exon; } //GList<GffObj> gfadd(false,false); -- for gf_validate()? if (gf_validate!=NULL && !(*gf_validate)(m, NULL)) { continue; } m->isUsed(true); //so the gffreader won't destroy it int i=-1; GenomicSeqData f(m->gseq_id); GenomicSeqData* gdata=NULL; if (seqdata.Found(&f,i)) gdata=seqdata[i]; else { //entry not created yet for this genomic seq gdata=new GenomicSeqData(m->gseq_id); seqdata.Add(gdata); } /* for (int k=0;k<gfadd.Count();k++) { bool keep=placeGf(gfadd[k], gdata, doCluster, doCollapseRedundant, matchAllIntrons, fuzzSpan); if (!keep) { gfadd[k]->isUsed(false); //DEBUG GMessage("Feature %s(%d-%d) is going to be discarded..\n",gfadd[k]->getID(), gfadd[k]->start, gfadd[k]->end); } } */ bool keep=placeGf(m, gdata, doCluster, doCollapseRedundant, matchAllIntrons, fuzzSpan); if (!keep) { m->isUsed(false); //DEBUG //GMessage("Feature %s(%d-%d) is going to be discarded..\n",m->getID(), m->start, m->end); } } //for each read gffObj //if (verbose) GMessage(" .. %d records from %s clustered into loci.\n", gffr->gflst.Count(), fname.chars()); if (f!=stdin) { fclose(f); f=NULL; } delete gffr; }
// virtual void GNaiveBayes::trainIncremental(const GVec& in, const GVec& out) { for(size_t n = 0; n < m_pRelLabels->size(); n++) m_pOutputs[n]->AddTrainingSample(in.data(), (int)out[n]); m_nSampleCount++; }
GEvolutionaryOptimizerNode(size_t dims) { m_pVector.resize(dims); }