Beispiel #1
0
int main ( int argc, const char* argv[] ) {
  ucam::util::initLogger ( argc, argv );
  FORCELINFO ( argv[0] << " starts!" );
  ucam::util::RegistryPO rg ( argc, argv );
  FORCELINFO ( rg.dump ( "CONFIG parameters:\n=====================",
                         "=====================" ) );
  if (rg.get<std::string> (HifstConstants::kAction) ==
      HifstConstants::kActionProjectweight2 ) {
    run<fst::LexStdArc
    , fst::LexStdArc
    ,  fst::GenericWeightMapper<fst::LexStdArc, fst::LexStdArc, fst::MakeWeight2<fst::LexStdArc> >,  fst::MakeWeight2<fst::LexStdArc>
    > (rg);
  } else if (rg.get<std::string> (HifstConstants::kAction) ==
             HifstConstants::kActionLex2std) {
    run<fst::LexStdArc
    , fst::StdArc
    ,  fst::GenericWeightMapper<fst::LexStdArc, fst::StdArc, fst::LexToStd >,  fst::LexToStd
    > (rg);
  } else if (rg.get<std::string> (HifstConstants::kAction) ==
             HifstConstants::kActionStd2lex) {
    run<fst::StdArc
    , fst::LexStdArc
    ,  fst::GenericWeightMapper<fst::StdArc, fst::LexStdArc, fst::MakeWeight2<fst::LexStdArc> >,  fst::MakeWeight2<fst::LexStdArc>
    > (rg);
  } else {
    LERROR ("Action not recognized! Check program option.");
  }
  FORCELINFO ( argv[0] << " finished!" );
}
int main (int argc,  const char* argv[] ) {
  ucam::util::initLogger ( argc, argv );
  FORCELINFO ( argv[0] << " starts!" );
  ucam::util::RegistryPO rg ( argc, argv );
  FORCELINFO ( rg.dump ( "CONFIG parameters:\n=====================",
                         "=====================" ) )  ;
  ucam::util::PatternAddress<unsigned> pi (rg.get<std::string>
      (HifstConstants::kInput) );
  ucam::util::PatternAddress<unsigned> po (rg.get<std::string>
      (HifstConstants::kOutput) );
  for ( ucam::util::IntRangePtr ir (ucam::util::IntRangeFactory ( rg,
                                    HifstConstants::kRangeOne ) );
        !ir->done();
        ir->next() ) {
    fst::VectorFst<fst::StdArc> *mfst = fst::VectorFstRead<fst::StdArc> (pi (
                                          ir->get() ) );
    if (!mfst) return 1;
    TopSort (mfst);
    boost::multiprecision::uint128_t j =
      countstrings<fst::StdArc, boost::multiprecision::uint128_t> (*mfst);
    std::stringstream ss;
    ss << j;
    ucam::util::oszfstream o (po (ir->get() ), true);
    o << ss.str()  << std::endl;
    LINFO ( pi (ir->get() ) << ":" << ss.str() ) ;
    o.close();
    delete mfst;
  }
  FORCELINFO ( argv[0] << " ends!" );
}
Beispiel #3
0
  MainClass(int argc, const char *argv[])
      :argv_(argv)
  {
    rg_.reset(new RegistryPO(argc,argv));
    util::initLogger ( argc, argv );
    FORCELINFO ( argv[0] << " starts!" );
    FORCELINFO ( rg_->dump ( "CONFIG parameters:\n====================="
                             , "=====================" ) );

  }
Beispiel #4
0
int
main ( int argc, const char *argv[] ) {
  ucam::util::initLogger ( argc, argv );
  FORCELINFO ( argv[0] << " starts!" );
  ucam::util::RegistryPO rg ( argc, argv );
  FORCELINFO ( rg.dump ( "CONFIG parameters:\n====================="
                         , "=====================" ) );
  ( ucam::util::Runner2<ucam::lmbr::SingleThreadedLmbrTask<>
    , ucam::lmbr::MultiThreadedLmbrTask<>  > ( rg ) ) ();
  FORCELINFO ( argv[0] << " ends!" );
  return 0;
}
int main (int argc,  const char* argv[] ) {
  ucam::util::initLogger ( argc, argv );
  FORCELINFO ( argv[0] << " starts!" );
  ucam::util::RegistryPO rg ( argc, argv );
  FORCELINFO ( rg.dump ( "CONFIG parameters:\n=====================",
                         "=====================" ) )  ;

  std::string semiring = rg.get<std::string> (HifstConstants::kHifstSemiring);
  if (semiring == HifstConstants::kHifstSemiringStdArc) {
    run<fst::StdArc> (rg);
  } else if (semiring == HifstConstants::kHifstSemiringLexStdArc) {
    run<fst::LexStdArc> (rg);
  } else if (semiring == HifstConstants::kHifstSemiringTupleArc) {
    run<TupleArc32> (rg);
  } else {
    LERROR ("Sorry, semiring option not correctly defined");
  }
  FORCELINFO ( argv[0] << " ends!" );
}
Beispiel #6
0
 /**
  * \brief General run method from TaskInterface. Dumps all stats to a file
  * \param &d: general Data structure containing a pointer to StatsData structure
  */
 bool run ( Data& d ) {
   if ( statsoutput_() == "" ) return false;
   d_ = &d;
   FORCELINFO ( "Writing stats to "  << statsoutput_ ( d.sidx ) );
   ucam::util::oszfstream o ( statsoutput_ ( d.sidx ) );
   if ( writeCYKStats_ )
     writeCYKStats ( o );
   o << "=================================================================" <<
     std::endl;
   o << "Local pruning during lattice construction" << std::endl;
   writePruneStats ( o );
   o.close();
   return false;
 };
Beispiel #7
0
int run ( ucam::util::RegistryPO const& rg) {
  ucam::util::PatternAddress<unsigned> input (rg.get<std::string>
      (HifstConstants::kInput) );
  ucam::util::PatternAddress<unsigned> output (rg.get<std::string>
      (HifstConstants::kOutput) );
  WeightFunctorT mwcopy;
  for ( ucam::util::IntRangePtr ir (ucam::util::IntRangeFactory ( rg,
                                    HifstConstants::kRangeOne ) );
        !ir->done();
        ir->next() ) {
    FORCELINFO ("Processing file " << input ( ir->get() ) );
    boost::scoped_ptr< fst::VectorFst<ArcT> > ifst (fst::VectorFstRead<ArcT>
        ( input (
            ir->get() ) ) );
    boost::scoped_ptr< fst::VectorFst<Arc2T> > ofst (new fst::VectorFst<Arc2T>);
    Map ( *ifst, &*ofst, MapperT ( mwcopy ) );
    fst::FstWrite<Arc2T> ( *ofst, output (ir->get() ) );
  }
};
Beispiel #8
0
 /**
  * \brief Runs the lattice building procedure.
  * \param d          Contains the data structure with all the necessary elements (i.e. cykdata) and in which will be store a pointer to the
  * output lattice.
  */
 bool run ( Data& d ) {
   cykfstresult_.DeleteStates();
   this->d_ = &d;
   hieroindexexistence_.clear();
   LINFO ( "Running HiFST" );
   //Reset one-time warnings for inexistent language models.
   warned_ = false;
   pdtparens_.clear();
   cykdata_ = d.cykdata;
   if ( !USER_CHECK ( cykdata_, "cyk parse has not been executed previously?" ) ) {
     resetExternalData (d);
     return true;
   }
   if ( d.cykdata->success == CYK_RETURN_FAILURE ) {
     ///Keep calm, return empty lattice and carry on
     fst::VectorFst<Arc> aux;
     d.fsts[outputkey_] = &cykfstresult_;
     d.vcat = cykdata_->vcat;
     resetExternalData (d);
     return false;
   }
   ///If not yet, initialize now functor with local conditions.
   initLocalConditions();
   rtn_ = new RTNT;
   if ( localprune_ )
     rtnnumstates_ = new ExpandedNumStatesRTNT;
   rfba_ = new ReplaceFstByArcT ( cykdata_->vcat, replacefstbyarc_,
                                  replacefstbyarcexceptions_, aligner_, replacefstbynumstates_ );
   piscount_ = 0; //reset pruning-in-search count to 0
   LINFO ( "Second Pass: FST-building!" );
   d.stats->setTimeStart ( "lattice-construction" );
   //Owned by rtn_;
   fst::Fst<Arc> *sfst = buildRTN ( cykdata_->categories["S"], 0,
                                    cykdata_->sentence.size() - 1 );
   d.stats->setTimeEnd ( "lattice-construction" );
   cykfstresult_ = (*sfst);
   LINFO ( "Final - RTN head optimizations !" );
   optimize ( &cykfstresult_ ,
              std::numeric_limits<unsigned>::max() ,
              !hipdtmode_  && optimize_
            );
   FORCELINFO ("Stats for Sentence " << d.sidx <<
               ": local pruning, number of times=" << piscount_);
   d.stats->lpcount = piscount_; //store local pruning counts in stats
   LINFO ("RTN expansion starts now!");
   //Expand...
   {
     ///Define hieroindex
     Label hieroindex = APBASETAG + 1 * APCCTAG + 0 * APXTAG +
                        ( cykdata_->sentence.size() - 1 ) * APYTAG;
     if ( hieroindexexistence_.find ( hieroindex ) == hieroindexexistence_.end() )
       pairlabelfsts_.push_back ( pair< Label, const fst::Fst<Arc> * > ( hieroindex,
                                  &cykfstresult_ ) );
     ///Optimizations over the rtn -- they are generally worth doing...
     fst::ReplaceUtil<Arc> replace_util (pairlabelfsts_, hieroindex,
                                         !aligner_); //has ownership of modified rtn fsts
     if (rtnopt_) {
       LINFO ("rtn optimizations...");
       d_->stats->setTimeStart ("replace-opts");
       replace_util.ReplaceTrivial();
       replace_util.ReplaceUnique();
       replace_util.Connect();
       pairlabelfsts_.clear();
       replace_util.GetFstPairs (&pairlabelfsts_);
       d_->stats->setTimeEnd ("replace-opts");
     }
     //After optimizations, we can write RTN if required by user
     writeRTN();
     boost::scoped_ptr< fst::VectorFst<Arc> > efst (new fst::VectorFst<Arc>);
     if (!hipdtmode_ ) {
       LINFO ("Final Replace (RTN->FSA), main index=" << hieroindex);
       d_->stats->setTimeStart ("replace-rtn-final");
       Replace (pairlabelfsts_, &*efst, hieroindex, !aligner_);
       d_->stats->setTimeEnd ("replace-rtn-final");
     } else {
       LINFO ("Final Replace (RTN->PDA)");
       d_->stats->setTimeStart ("replace-pdt-final");
       Replace (pairlabelfsts_, &*efst, &pdtparens_, hieroindex);
       d_->stats->setTimeEnd ("replace-pdt-final");
       LINFO ("Number of pdtparens=" << pdtparens_.size() );
     }
     LINFO ("Removing Epsilons...");
     fst::RmEpsilon<Arc> ( &*efst );
     LINFO ("Done! NS=" << efst->NumStates() );
     //Apply filters
     applyFilters ( &*efst );
     //Compose with full reference lattice to ensure that final lattice is correct.
     if ( d.fsts.find ( fullreferencelatticekey_ ) != d.fsts.end() ) {
       if ( static_cast< fst::VectorFst<Arc> * >
            (d.fsts[fullreferencelatticekey_])->NumStates() > 0 ) {
         LINFO ( "Composing with full reference lattice, NS=" <<
                 static_cast< fst::VectorFst<Arc> * >
                 (d.fsts[fullreferencelatticekey_])->NumStates() );
         fst::Compose<Arc> ( *efst,
                             * ( static_cast<fst::VectorFst<Arc> * > (d.fsts[fullreferencelatticekey_]) ),
                             &*efst );
         LINFO ( "After composition: NS=" << efst->NumStates() );
       } else {
         LINFO ( "No composition with full ref lattice" );
       };
     } else {
       LINFO ( "No composition with full ref lattice" );
     };
     //Apply language model
     fst::VectorFst<Arc> *res = NULL;
     if (efst->NumStates() )
       res = applyLanguageModel ( *efst  );
     else {
       LWARN ("Empty lattice -- skipping LM application");
     }
     if ( res != NULL ) {
       boost::shared_ptr<fst::VectorFst<Arc> >latlm ( res );
       if ( latlm.get() == efst.get() ) {
         LWARN ( "Yikes! Unexpected situation! Will it crash? (muhahaha) " );
       }
       //Todo: union with shortest path...
       if ( pruneweight_ < std::numeric_limits<float>::max() ) {
         if (!hipdtmode_ || pdtparens_.empty() ) {
           LINFO ("Pruning, weight=" << pruneweight_);
           fst::Prune<Arc> (*latlm, &cykfstresult_, mw_ ( pruneweight_ ) );
         } else {
           LINFO ("Expanding, weight=" << pruneweight_);
           fst::ExpandOptions<Arc> eopts (true, false, mw_ ( pruneweight_ ) );
           Expand ( *latlm, pdtparens_, &cykfstresult_, eopts);
           pdtparens_.clear();
         }
       } else {
         LINFO ("Copying through full lattice with lm scores");
         cykfstresult_ = *latlm;
       }
     } else {
       LINFO ("Copying through full lattice (no lm)");
       cykfstresult_ = *efst;
     }
     if ( hieroindexexistence_.find ( hieroindex ) == hieroindexexistence_.end() )
       pairlabelfsts_.pop_back();
   }
   pairlabelfsts_.clear();
   LINFO ( "Reps" );
   fst::RmEpsilon ( &cykfstresult_ );
   LINFO ( "NS=" << cykfstresult_.NumStates() );
   //This should delete all pertinent fsas...
   LINFO ( "deleting data stuff..." );
   delete rtn_;
   if ( localprune_ )
     delete rtnnumstates_;
   delete rfba_;
   d.vcat = cykdata_->vcat;
   resetExternalData (d);
   d.fsts[outputkey_] = &cykfstresult_;
   if (hipdtmode_ && pdtparens_.size() )
     d.fsts[outputkey_ + ".parens" ] = &pdtparens_;
   LINFO ( "done..." );
   FORCELINFO ( "End Sentence ******************************************************" );
   d.stats->setTimeEnd ( "sent-dec" );
   d.stats->message += "[" + ucam::util::getTimestamp() + "] End Sentence\n";
   return false;
 };
Beispiel #9
0
 ~MainClass() {
   FORCELINFO ( argv_[0] << " ends!" );
 }