// *****************************************************************************
// *                                                                           *
// * Function: MyTable::setRow                                                 *
// *                                                                           *
// *    Sets the fields of a row.                                              *
// *                                                                           *
// *****************************************************************************
// *                                                                           *
// *  Parameters:                                                              *
// *                                                                           *
// *  <cliInterface>                  OutputInfo &                    In       *
// *    is a reference to CLI interface to the row data that was read.         *
// *                                                                           *
// *  <rowOut>                        PrivMgrMDRow &                  Out      *
// *    passes back a MyRow.                                                   *
// *                                                                           *
// *****************************************************************************
void MyTable::setRow(
   OutputInfo & cliInterface,
   PrivMgrMDRow & rowOut)
   
{

MyRow & row = static_cast<MyRow &>(rowOut);
char * ptr = NULL;
int32_t length = 0;
char value[500];
  
   // column 1:  CATALOG_NAME
   cliInterface.get(0,ptr,length);
   strncpy(value,ptr,length);
   value[length] = 0;
   row.catalogName_ = value;

   // column 2:  SCHEMA_NAME
   cliInterface.get(1,ptr,length);
   strncpy(value,ptr,length);
   value[length] = 0;
   row.schemaName_ = value;

   // column 3:  OBJECT_NAME
   cliInterface.get(2,ptr,length);
   strncpy(value,ptr,length);
   value[length] = 0;
   row.objectName_ = value;

   // column 4:  OBJECT_TYPE
   cliInterface.get(3,ptr,length);
   strncpy(value,ptr,length);
   value[length] = 0;
   row.objectType_ = PrivMgr::ObjectLitToEnum(value);

   // column 5:  OBJECT_UID
   cliInterface.get(4,ptr,length);
   row.objectUID_ = *(reinterpret_cast<int64_t*>(ptr));

   // column 6:  CREATE_TIME
   cliInterface.get(5,ptr,length);
   row.createTime_ = *(reinterpret_cast<int64_t*>(ptr));

   // column 7:  REDEF_TIME
   cliInterface.get(6,ptr,length);
   row.redefTime_ = *(reinterpret_cast<int64_t*>(ptr));
   
   // column 8:  VALID_DEF
   cliInterface.get(7,ptr,length);
   strncpy(value,ptr,length);
   value[length] = 0;
   row.isValid_ = (value[0] == 'Y' ? true : false);
   
   // column 9:  OBJECT_OWNER
   cliInterface.get(8,ptr,length);
   row.objectOwner_ = *(reinterpret_cast<int32_t*>(ptr));
   
   lastRowRead_ = row;

}
FullyConnected::FullyConnected(OutputInfo info, int J, bool bias,
                               ActivationFunction act, double stdDev,
                               double maxSquaredWeightNorm)
  : I(info.outputs()), J(J), bias(bias), act(act), stdDev(stdDev),
    maxSquaredWeightNorm(maxSquaredWeightNorm), W(J, I), Wd(J, I),
    b(J), bd(J), x(0), a(1, J), y(1, J), yd(1, J), deltas(1, J), e(1, I)
{
}
Subsampling::Subsampling(OutputInfo info, int kernelRows, int kernelCols,
                         bool bias, ActivationFunction act, double stdDev,
                         Regularization regularization)
  : I(info.outputs()), fm(info.dimensions[0]), inRows(info.dimensions[1]),
    inCols(info.dimensions[2]), kernelRows(kernelRows),
    kernelCols(kernelCols), bias(bias), act(act), stdDev(stdDev), x(0),
    e(1, I), fmInSize(-1), outRows(-1), outCols(-1), fmOutSize(-1),
    maxRow(-1), maxCol(-1), regularization(regularization)
{
}
// ****************************************************************************
// method: getListOfDirectlyReferencedObjects
//
// Returns a list of objects that are being directly referenced by the passed 
// in objectUID
//
// Parameters:
//    cliInterface - used to get the list of object usages
//    objectUID - the UID being processed
//    objectList - a list of objectRefdByMe structures describing each usage
//
// returns:
//    0 - successful
//   -1 - unexpected error occurred
// ****************************************************************************
short CmpSeabaseDDL::getListOfDirectlyReferencedObjects (
  ExeCliInterface *cliInterface,
  const Int64 objectUID,
  NAList<objectRefdByMe> &objectsList)
{
  // Select all the rows from views_usage associated with the passed in
  // objectUID
  Lng32 cliRC = 0;
  char buf[4000];
  str_sprintf(buf, "select object_type, object_uid, catalog_name," 
                   "schema_name, object_name from %s.\"%s\".%s T, %s.\"%s\".%s VU " 
                   "where VU.using_view_uid = %Ld "
                   "and T.object_uid = VU.used_object_uid",
              getSystemCatalog(), SEABASE_MD_SCHEMA, SEABASE_OBJECTS,
              getSystemCatalog(), SEABASE_MD_SCHEMA, SEABASE_VIEWS_USAGE,
              objectUID);

  Queue * usingObjectsQueue = NULL;
  cliRC = cliInterface->fetchAllRows(usingObjectsQueue, buf, 0, FALSE, FALSE, TRUE);
  if (cliRC < 0)
    {
      cliInterface->retrieveSQLDiagnostics(CmpCommon::diags());
      return -1;
    }

  // set up an objectRefdByMe struct for each returned row
  usingObjectsQueue->position();
  for (int idx = 0; idx < usingObjectsQueue->numEntries(); idx++)
    {
      OutputInfo * oi = (OutputInfo*)usingObjectsQueue->getNext();
      objectRefdByMe objectInfo;
      objectInfo.objectType = NAString(oi->get(0));
      objectInfo.objectUID = *(Int64*)oi->get(1);
      objectInfo.catalogName = NAString(oi->get(2));
      objectInfo.schemaName = NAString(oi->get(3));
      objectInfo.objectName = NAString(oi->get(4));
      objectsList.insert(objectInfo);
    }

  return 0;
}
Exemple #5
0
OutputInfo MaxPooling::initialize(std::vector<double*>& parameterPointers,
                                  std::vector<double*>& parameterDerivativePointers)
{
    OutputInfo info;
    info.dimensions.push_back(fm);
    outRows = inRows / kernelRows;
    outCols = inCols / kernelCols;
    fmOutSize = outRows * outCols;
    info.dimensions.push_back(outRows);
    info.dimensions.push_back(outCols);
    fmInSize = inRows * inCols;
    maxRow = inRows - kernelRows + 1;
    maxCol = inCols - kernelCols + 1;

    y.resize(1, info.outputs());
    deltas.resize(1, info.outputs());

    if(info.outputs() < 1)
        throw OpenANNException("Number of outputs in max-pooling layer is below"
                               " 1. You should either choose a smaller filter"
                               " size or generate a bigger input.");
    return info;
}
Exemple #6
0
void MDDAGClassifier::saveLikelihoods(const string& dataFileName, const string& shypFileName,
                                      const string& outFileName, int numIterations)
{
    InputData* pData = loadInputData(dataFileName, shypFileName);

    if (_verbose > 0)
        cout << "Loading strong hypothesis..." << flush;

    // The class that loads the weak hypotheses
    UnSerialization us;

    // Where to put the weak hypotheses
    vector<BaseLearner*> weakHypotheses;

    // loads them
    us.loadHypotheses(shypFileName, weakHypotheses, pData);

    // where the results go
    vector< ExampleResults* > results;

    if (_verbose > 0)
        cout << "Classifying..." << flush;

    const int numClasses = pData->getNumClasses();
    const int numExamples = pData->getNumExamples();


    ofstream outFile(outFileName.c_str());
    string exampleName;

    if (_verbose > 0)
        cout << "Output likelihoods..." << flush;

    // get the results
    /////////////////////////////////////////////////////////////////////
    // computeResults( pData, weakHypotheses, results, numIterations );
    assert( !weakHypotheses.empty() );

    // Initialize the output info
    OutputInfo* pOutInfo = NULL;

    if ( !_outputInfoFile.empty() )
        pOutInfo = new OutputInfo(_outputInfoFile, "err");

    // Creating the results structures. See file Structures.h for the
    // PointResults structure
    results.clear();
    results.reserve(numExamples);
    for (int i = 0; i < numExamples; ++i)
        results.push_back( new ExampleResults(i, numClasses) );

    // sum votes for classes
    vector< AlphaReal > votesForExamples( numClasses );
    vector< AlphaReal > expVotesForExamples( numClasses );

    // iterator over all the weak hypotheses
    vector<BaseLearner*>::const_iterator whyIt;
    int t;

    pOutInfo->initialize( pData );

    // for every feature: 1..T
    for (whyIt = weakHypotheses.begin(), t = 0;
            whyIt != weakHypotheses.end() && t < numIterations; ++whyIt, ++t)
    {
        BaseLearner* currWeakHyp = *whyIt;
        AlphaReal alpha = currWeakHyp->getAlpha();

        // for every point
        for (int i = 0; i < numExamples; ++i)
        {
            // a reference for clarity and speed
            vector<AlphaReal>& currVotesVector = results[i]->getVotesVector();

            // for every class
            for (int l = 0; l < numClasses; ++l)
                currVotesVector[l] += alpha * currWeakHyp->classify(pData, i, l);
        }

        // if needed output the step-by-step information
        if ( pOutInfo )
        {
            pOutInfo->outputIteration(t);
            pOutInfo->outputCustom(pData, currWeakHyp);

            // Margins and edge requires an update of the weight,
            // therefore I keep them out for the moment
            //outInfo.outputMargins(pData, currWeakHyp);
            //outInfo.outputEdge(pData, currWeakHyp);

            pOutInfo->endLine();

        } // for (int i = 0; i < numExamples; ++i)
        // calculate likelihoods from votes

        fill( votesForExamples.begin(), votesForExamples.end(), 0.0 );
        AlphaReal lLambda = 0.0;
        for (int i = 0; i < numExamples; ++i)
        {
            // a reference for clarity and speed
            vector<AlphaReal>& currVotesVector = results[i]->getVotesVector();
            AlphaReal sumExp = 0.0;
            // for every class
            for (int l = 0; l < numClasses; ++l)
            {
                expVotesForExamples[l] =  exp( currVotesVector[l] ) ;
                sumExp += expVotesForExamples[l];
            }

            if ( sumExp > numeric_limits<AlphaReal>::epsilon() )
            {
                for (int l = 0; l < numClasses; ++l)
                {
                    expVotesForExamples[l] /= sumExp;
                }
            }

            Example ex = pData->getExample( results[i]->getIdx() );
            vector<Label> labs = ex.getLabels();
            AlphaReal m = numeric_limits<AlphaReal>::infinity();
            for (int l = 0; l < numClasses; ++l)
            {
                if ( labs[l].y > 0 )
                {
                    if ( expVotesForExamples[l] > numeric_limits<AlphaReal>::epsilon() )
                    {
                        AlphaReal logVal = log( expVotesForExamples[l] );

                        if ( logVal != m ) {
                            lLambda += ( ( 1.0/(AlphaReal)numExamples ) * logVal );
                        }
                    }
                }
            }


        }


        outFile << t << "\t" << lLambda ;
        outFile << '\n';

        outFile.flush();
    }

    if (pOutInfo)
        delete pOutInfo;

    // computeResults( pData, weakHypotheses, results, numIterations );
    ///////////////////////////////////////////////////////////////////////////////////


    /*
     for (int i = 0; i < numExamples; ++i)
     {
     // output the name if it exists, otherwise the number
     // of the example
     exampleName = pData->getExampleName(i);
     if ( !exampleName.empty() )
     outFile << exampleName << ',';

     // output the posteriors
     outFile << results[i]->getVotesVector()[0];
     for (int l = 1; l < numClasses; ++l)
     outFile << ',' << results[i]->getVotesVector()[l];
     outFile << '\n';
     }
     */

    if (_verbose > 0)
        cout << "Done!" << endl;

    if (_verbose > 1)
    {
        cout << "\nClass order (You can change it in the header of the data file):" << endl;
        for (int l = 0; l < numClasses; ++l)
            cout << "- " << pData->getClassMap().getNameFromIdx(l) << endl;
    }

    // delete the input data file
    if (pData)
        delete pData;

    vector<ExampleResults*>::iterator it;
    for (it = results.begin(); it != results.end(); ++it)
        delete (*it);
}
// *****************************************************************************
// *                                                                           *
// * Function: CmpSeabaseDDL::dropSeabaseSchema                                *
// *                                                                           *
// *    Implements the DROP SCHEMA command.                                    *
// *                                                                           *
// *****************************************************************************
// *                                                                           *
// *  Parameters:                                                              *
// *                                                                           *
// *  <dropSchemaNode>                StmtDDLDropSchema *             In       *
// *    is a pointer to a create schema parser node.                           *
// *                                                                           *
// *****************************************************************************
void CmpSeabaseDDL::dropSeabaseSchema(StmtDDLDropSchema * dropSchemaNode)
   
{

Lng32 cliRC = 0;

ComSchemaName schemaName(dropSchemaNode->getSchemaName());
NAString catName = schemaName.getCatalogNamePartAsAnsiString();
ComAnsiNamePart schNameAsComAnsi = schemaName.getSchemaNamePart();
NAString schName = schNameAsComAnsi.getInternalName();

ExeCliInterface cliInterface(STMTHEAP, NULL, NULL, 
CmpCommon::context()->sqlSession()->getParentQid());
Int32 objectOwnerID = 0;
Int32 schemaOwnerID = 0;
ComObjectType objectType;

   Int64 schemaUID = getObjectTypeandOwner(&cliInterface,catName.data(),schName.data(),
                             SEABASE_SCHEMA_OBJECTNAME,objectType,schemaOwnerID);
   
   // if schemaUID == -1, then either the schema does not exist or an unexpected error occurred
   if (schemaUID == -1)
   {
      // If an error occurred, return
      if (CmpCommon::diags()->getNumber(DgSqlCode::ERROR_) > 0)
        return;
 
      // schema does not exist and IF EXISTS specified, then ignore and continue
      if (dropSchemaNode->dropIfExists())
        return;

      // A Trafodion schema does not exist if the schema object row is not
      // present: CATALOG-NAME.SCHEMA-NAME.__SCHEMA__.
      *CmpCommon::diags() << DgSqlCode(-CAT_SCHEMA_DOES_NOT_EXIST_ERROR)
                          << DgSchemaName(schemaName.getExternalName().data());
      return;
   }

   if (!isDDLOperationAuthorized(SQLOperation::DROP_SCHEMA,
                                 schemaOwnerID,schemaOwnerID))
   {
      *CmpCommon::diags() << DgSqlCode(-CAT_NOT_AUTHORIZED);
      return;
   }
 
ComObjectName objName(catName,schName,NAString("dummy"),COM_TABLE_NAME,TRUE);

   if ((isSeabaseReservedSchema(objName) ||
        (schName == SEABASE_SYSTEM_SCHEMA)) &&
       !Get_SqlParser_Flags(INTERNAL_QUERY_FROM_EXEUTIL))
   {
      *CmpCommon::diags() << DgSqlCode(-CAT_USER_CANNOT_DROP_SMD_SCHEMA)
                          << DgSchemaName(schemaName.getExternalName().data());
      return;
   }
   
bool isVolatile = (memcmp(schName.data(),"VOLATILE_SCHEMA",strlen("VOLATILE_SCHEMA")) == 0);

// Can't drop a schema whose name begins with VOLATILE_SCHEMA unless the 
// keyword VOLATILE was specified in the DROP SCHEMA command. 
   if (isVolatile && !dropSchemaNode->isVolatile())
   {
      *CmpCommon::diags() << DgSqlCode(-CAT_RESERVED_METADATA_SCHEMA_NAME)
                          << DgTableName(schName);
      return;
   }

// Get a list of all objects in the schema, excluding the schema object itself.
char query[4000];

   str_sprintf(query,"SELECT TRIM(object_name), TRIM(object_type) "
                     "FROM %s.\"%s\".%s "
                     "WHERE catalog_name = '%s' AND schema_name = '%s' AND "
                     "object_name <> '"SEABASE_SCHEMA_OBJECTNAME"'" 
                     "FOR READ COMMITTED ACCESS",
               getSystemCatalog(),SEABASE_MD_SCHEMA,SEABASE_OBJECTS,
               (char*)catName.data(),(char*)schName.data());
  
Queue * objectsQueue = NULL;

   cliRC = cliInterface.fetchAllRows(objectsQueue, query, 0, FALSE, FALSE, TRUE);
   if (cliRC < 0)
   {
      cliInterface.retrieveSQLDiagnostics(CmpCommon::diags());
      return;
   }

   objectsQueue->position();
   if ((dropSchemaNode->getDropBehavior() == COM_RESTRICT_DROP_BEHAVIOR) &&
       (objectsQueue->numEntries() > 0))
   {
      OutputInfo * oi = (OutputInfo*)objectsQueue->getCurr(); 
      
      *CmpCommon::diags() << DgSqlCode(-CAT_SCHEMA_IS_NOT_EMPTY)
                          << DgTableName(oi->get(0));
      return;
   }

bool someObjectsCouldNotBeDropped = false;

// Drop libraries, procedures (SPJs), UDFs (functions), and views 
   objectsQueue->position();
   for (int idx = 0; idx < objectsQueue->numEntries(); idx++)
   {
      OutputInfo * vi = (OutputInfo*)objectsQueue->getNext(); 

      char * objName = vi->get(0);
      NAString objectTypeLit = vi->get(1);
      ComObjectType objectType = PrivMgr::ObjectLitToEnum(objectTypeLit.data());
      char buf[1000];
      NAString objectTypeString;
      NAString cascade = " ";
      
      switch (objectType)
      {
         // These object types are handled later and can be ignored for now.
         case COM_BASE_TABLE_OBJECT:
         case COM_INDEX_OBJECT:
         case COM_CHECK_CONSTRAINT_OBJECT:
         case COM_NOT_NULL_CONSTRAINT_OBJECT:
         case COM_PRIMARY_KEY_CONSTRAINT_OBJECT:
         case COM_REFERENTIAL_CONSTRAINT_OBJECT:
         case COM_SEQUENCE_GENERATOR_OBJECT:
         case COM_UNIQUE_CONSTRAINT_OBJECT:
         {
            continue;
         }
         case COM_LIBRARY_OBJECT:
         {
            objectTypeString = "LIBRARY";
            cascade = "CASCADE";
            break;
         }
         case COM_STORED_PROCEDURE_OBJECT:
         {
            objectTypeString = "PROCEDURE";
            break;
         }
         case COM_USER_DEFINED_ROUTINE_OBJECT:
         {
            objectTypeString = "FUNCTION";
            cascade = "CASCADE";
            break;
         }
         case COM_VIEW_OBJECT:
         {
            objectTypeString = "VIEW";
            cascade = "CASCADE";
            break;
         }
         // These object types should not be seen.
         case COM_MV_OBJECT: 
         case COM_MVRG_OBJECT:    
         case COM_TRIGGER_OBJECT:
         case COM_LOB_TABLE_OBJECT:
         case COM_TRIGGER_TABLE_OBJECT:
         case COM_SYNONYM_OBJECT:
         case COM_PRIVATE_SCHEMA_OBJECT:
         case COM_SHARED_SCHEMA_OBJECT:
         case COM_EXCEPTION_TABLE_OBJECT:
         case COM_LOCK_OBJECT:
         case COM_MODULE_OBJECT:
         default:
            SEABASEDDL_INTERNAL_ERROR("Unrecognized object type in schema");
            return;
      }
         
      str_sprintf(buf, "drop %s \"%s\".\"%s\".\"%s\" %s",
                  objectTypeString.data(),(char*)catName.data(),(char*)schName.data(), 
                  objName,cascade.data());
         
      cliRC = cliInterface.executeImmediate(buf);
      if (cliRC < 0 && cliRC != -CAT_OBJECT_DOES_NOT_EXIST_IN_TRAFODION)
         someObjectsCouldNotBeDropped = true;
   } 

// Drop all tables in the schema.  This will also drop any associated constraints. 
// Drop of histogram tables is deferred.
bool histExists = false;

   objectsQueue->position();
   for (int idx = 0; idx < objectsQueue->numEntries(); idx++)
   {
      OutputInfo * vi = (OutputInfo*)objectsQueue->getNext(); 

      NAString objName = vi->get(0);
      NAString objType = vi->get(1);

      // drop user objects first
      if (objType == COM_BASE_TABLE_OBJECT_LIT)
      {
         if (!(objName == HBASE_HIST_NAME || objName == HBASE_HISTINT_NAME))
         {
            if (dropOneTable(cliInterface,(char*)catName.data(), 
                             (char*)schName.data(),(char*)objName.data(),
                             isVolatile))
               someObjectsCouldNotBeDropped = true;
         }
         else
            histExists = true;
      } 
   } 

// Drop any remaining indexes.

   str_sprintf(query,"SELECT TRIM(object_name), TRIM(object_type) "
                     "FROM %s.\"%s\".%s "
                     "WHERE catalog_name = '%s' AND "
                     "      schema_name = '%s' AND "
                     "      object_type = '%s' "
                     "FOR READ COMMITTED ACCESS ",
               getSystemCatalog(),SEABASE_MD_SCHEMA,SEABASE_OBJECTS,
               (char*)catName.data(),(char*)schName.data(), 
               COM_INDEX_OBJECT_LIT);
   
   cliRC = cliInterface.fetchAllRows(objectsQueue,query,0,FALSE,FALSE,TRUE);
   if (cliRC < 0)
   {
      cliInterface.retrieveSQLDiagnostics(CmpCommon::diags());
      return;
   }

   objectsQueue->position();
   for (int idx = 0; idx < objectsQueue->numEntries(); idx++)
   {
      OutputInfo * vi = (OutputInfo*)objectsQueue->getNext(); 

      char * objName = vi->get(0);
      NAString objType = vi->get(1);
    
      if (objType == COM_INDEX_OBJECT_LIT)
      {
         char buf [1000];

         str_sprintf(buf, "DROP INDEX \"%s\".\"%s\".\"%s\" CASCADE",
                     (char*)catName.data(), (char*)schName.data(), objName);
         cliRC = cliInterface.executeImmediate(buf);

         if (cliRC < 0 && cliRC != -CAT_OBJECT_DOES_NOT_EXIST_IN_TRAFODION)
            someObjectsCouldNotBeDropped = true;
      }  
   }  

// Drop any remaining sequences.

   str_sprintf(query,"SELECT TRIM(object_name), TRIM(object_type) "
                     "FROM %s.\"%s\".%s "
                     "WHERE catalog_name = '%s' AND "
                     "      schema_name = '%s' AND "
                     "      object_type = '%s' "
                     "FOR READ COMMITTED ACCESS ",
               getSystemCatalog(),SEABASE_MD_SCHEMA,SEABASE_OBJECTS,
               (char*)catName.data(),(char*)schName.data(), 
               COM_SEQUENCE_GENERATOR_OBJECT_LIT);
   
   cliRC = cliInterface.fetchAllRows(objectsQueue,query,0,FALSE,FALSE,TRUE);
   if (cliRC < 0)
   {
      cliInterface.retrieveSQLDiagnostics(CmpCommon::diags());
      return;
   }

   objectsQueue->position();
   for (int idx = 0; idx < objectsQueue->numEntries(); idx++)
   {
      OutputInfo * vi = (OutputInfo*)objectsQueue->getNext(); 

      char * objName = vi->get(0);
      NAString objType = vi->get(1);
    
      if (objType == COM_SEQUENCE_GENERATOR_OBJECT_LIT)
      {
         char buf [1000];

         str_sprintf(buf, "DROP SEQUENCE \"%s\".\"%s\".\"%s\"",
                     (char*)catName.data(), (char*)schName.data(), objName);
         cliRC = cliInterface.executeImmediate(buf);

         if (cliRC < 0 && cliRC != -CAT_OBJECT_DOES_NOT_EXIST_IN_TRAFODION)
            someObjectsCouldNotBeDropped = true;
      }  
   }  

// For volatile schemas, sometimes only the objects get dropped.    
// If the dropObjectsOnly flag is set, just exit now, we are done.
   if (dropSchemaNode->dropObjectsOnly())
      return;

// Now drop any histogram objects
   if (histExists)
   {
      if (dropOneTable(cliInterface,(char*)catName.data(),(char*)schName.data(), 
                      (char*)HBASE_HISTINT_NAME,false))
         someObjectsCouldNotBeDropped = true;
      
      if (dropOneTable(cliInterface,(char*)catName.data(),(char*)schName.data(), 
                       (char*)HBASE_HIST_NAME,false))
         someObjectsCouldNotBeDropped = true;
   }

   if (someObjectsCouldNotBeDropped)
   {
      CmpCommon::diags()->clear();
      
      *CmpCommon::diags() << DgSqlCode(-CAT_UNABLE_TO_DROP_SCHEMA)
                          << DgSchemaName(catName + "." + schName);
      return;
   }
   
// Verify all objects in the schema have been dropped.   
   str_sprintf(query,"SELECT COUNT(*) "
                     "FROM %s.\"%s\".%s "
                     "WHERE catalog_name = '%s' AND schema_name = '%s' AND "
                     "object_name <> '"SEABASE_SCHEMA_OBJECTNAME"'" 
                     "FOR READ COMMITTED ACCESS",
               getSystemCatalog(),SEABASE_MD_SCHEMA,SEABASE_OBJECTS,
               (char*)catName.data(),(char*)schName.data());
               
int32_t length = 0;
int32_t rowCount = 0;

   cliRC = cliInterface.executeImmediate(query,(char*)&rowCount,&length,NULL);
  
   if (cliRC < 0)
   {
      cliInterface.retrieveSQLDiagnostics(CmpCommon::diags());
      return;
   }
   
   if (rowCount > 0)
   {
      CmpCommon::diags()->clear();
      
      *CmpCommon::diags() << DgSqlCode(-CAT_UNABLE_TO_DROP_SCHEMA)
                          << DgSchemaName(catName + "." + schName);
      return;
   }
   
// After all objects in the schema have been dropped, drop the schema object itself.
    
char buf [1000];

   str_sprintf(buf,"DELETE FROM %s.\"%s\".%s "
                   "WHERE CATALOG_NAME = '%s' AND SCHEMA_NAME = '%s' AND " 
                   "OBJECT_NAME = '"SEABASE_SCHEMA_OBJECTNAME"'",
               getSystemCatalog(),SEABASE_MD_SCHEMA,SEABASE_OBJECTS,
               (char*)catName.data(),(char*)schName.data());
   cliRC = cliInterface.executeImmediate(buf);
   if (cliRC < 0) 
      *CmpCommon::diags() << DgSqlCode(-CAT_UNABLE_TO_DROP_SCHEMA)
                          << DgSchemaName(catName + "." + schName);
    
}
    void FilterBoostLearner::run(const nor_utils::Args& args)
    {
        // load the arguments
        this->getArgs(args);

        time_t startTime, currentTime;
        time(&startTime);

        // get the registered weak learner (type from name)
        BaseLearner* pWeakHypothesisSource = 
            BaseLearner::RegisteredLearners().getLearner(_baseLearnerName);
        // initialize learning options; normally it's done in the strong loop
        // also, here we do it for Product learners, so input data can be created
        pWeakHypothesisSource->initLearningOptions(args);

        BaseLearner* pConstantWeakHypothesisSource = 
            BaseLearner::RegisteredLearners().getLearner("ConstantLearner");

        // get the training input data, and load it

        InputData* pTrainingData = pWeakHypothesisSource->createInputData();
        pTrainingData->initOptions(args);
        pTrainingData->load(_trainFileName, IT_TRAIN, _verbose);

        const int numClasses = pTrainingData->getNumClasses();
        const int numExamples = pTrainingData->getNumExamples();
                
        //initialize the margins variable
        _margins.resize( numExamples );
        for( int i=0; i<numExamples; i++ )
        {
            _margins[i].resize( numClasses );
            fill( _margins[i].begin(), _margins[i].end(), 0.0 );
        }


        // get the testing input data, and load it
        InputData* pTestData = NULL;
        if ( !_testFileName.empty() )
        {
            pTestData = pWeakHypothesisSource->createInputData();
            pTestData->initOptions(args);
            pTestData->load(_testFileName, IT_TEST, _verbose);
        }

        // The output information object
        OutputInfo* pOutInfo = NULL;


        if ( !_outputInfoFile.empty() ) 
        {
            // Baseline: constant classifier - goes into 0th iteration

            BaseLearner* pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ;
            pConstantWeakHypothesis->initLearningOptions(args);
            pConstantWeakHypothesis->setTrainingData(pTrainingData);
            AlphaReal constantEnergy = pConstantWeakHypothesis->run();

            pOutInfo = new OutputInfo(args);
            pOutInfo->initialize(pTrainingData);

            updateMargins( pTrainingData, pConstantWeakHypothesis );

            if (pTestData)
                pOutInfo->initialize(pTestData);
            pOutInfo->outputHeader(pTrainingData->getClassMap() );

            pOutInfo->outputIteration(-1);
            pOutInfo->outputCustom(pTrainingData, pConstantWeakHypothesis);

            if (pTestData)
            {
                pOutInfo->separator();
                pOutInfo->outputCustom(pTestData, pConstantWeakHypothesis);
            }
                        
            pOutInfo->outputCurrentTime();

            pOutInfo->endLine();
            pOutInfo->initialize(pTrainingData);
                        
            if (pTestData)
                pOutInfo->initialize(pTestData);
        }
        // reload the previously found weak learners if -resume is set. 
        // otherwise just return 0
        int startingIteration = resumeWeakLearners(pTrainingData);


        Serialization ss(_shypFileName, _isShypCompressed );
        ss.writeHeader(_baseLearnerName); // this must go after resumeProcess has been called

        // perform the resuming if necessary. If not it will just return
        resumeProcess(ss, pTrainingData, pTestData, pOutInfo);

        if (_verbose == 1)
            cout << "Learning in progress..." << endl;
                                
        ///////////////////////////////////////////////////////////////////////
        // Starting the AdaBoost main loop
        ///////////////////////////////////////////////////////////////////////
        for (int t = startingIteration; t < _numIterations; ++t)
        {                       
            if (_verbose > 1)
                cout << "------- WORKING ON ITERATION " << (t+1) << " -------" << endl;
                
            // create the weak learner
            BaseLearner* pWeakHypothesis;
            BaseLearner* pConstantWeakHypothesis;
            pWeakHypothesis = pWeakHypothesisSource->create();
            pWeakHypothesis->initLearningOptions(args);
            //pTrainingData->clearIndexSet();
            pWeakHypothesis->setTrainingData(pTrainingData);
            AlphaReal edge, energy=0.0;
                        
            // create the constant learner
            pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ;
            pConstantWeakHypothesis->initLearningOptions(args);
            pConstantWeakHypothesis->setTrainingData(pTrainingData);
            AlphaReal constantEdge = -numeric_limits<AlphaReal>::max();
                        
            int currentNumberOfUsedData = static_cast<int>(_Cn * log(t+3.0));
                        
            if ( _onlineWeakLearning )
            {
                //check whether the weak learner is a ScalarLeaerner
                try {
                    StochasticLearner* pStochasticLearner = dynamic_cast<StochasticLearner*>(pWeakHypothesis);
                    StochasticLearner* pStochasticConstantWeakHypothesis = dynamic_cast<StochasticLearner*> (pConstantWeakHypothesis);
                                        
                    pStochasticLearner->initLearning();
                    pStochasticConstantWeakHypothesis->initLearning();                                                                              
                                        
                    if (_verbose>1)
                        cout << "Number of random instances: \t" << currentNumberOfUsedData << endl;
                                        
                    // set the weights
                    setWeightToMargins(pTrainingData);
                                        
                    //learning
                    for (int i=0; i<currentNumberOfUsedData; ++i )
                    {
                        int randomIndex = (rand() % pTrainingData->getNumExamples());   
                        //int randomIndex = getRandomIndex();
                        pStochasticLearner->update(randomIndex);
                        pStochasticConstantWeakHypothesis->update(randomIndex);
                    }                                       
                    pStochasticLearner->finishLearning();           
                    pStochasticConstantWeakHypothesis->finishLearning();
                }
                catch (bad_cast& e) {
                    cerr << "The weak learner must be a StochasticLearner!!!" << endl;
                    exit(-1);
                }                                                                                               
            }
            else
            {
                filter( pTrainingData, currentNumberOfUsedData );
                if ( pTrainingData->getNumExamples() < 2 ) 
                {
                    filter( pTrainingData, currentNumberOfUsedData, false );
                }
                                
                if (_verbose > 1)
                {
                    cout << "--> Size of training data = " << pTrainingData->getNumExamples() << endl;
                }
                                
                energy = pWeakHypothesis->run();                                                                
                pConstantWeakHypothesis->run(); 
            }                       

            //estimate edge
            filter( pTrainingData, currentNumberOfUsedData, false );
            edge = pWeakHypothesis->getEdge(true) / 2.0;                                            
            constantEdge = pConstantWeakHypothesis->getEdge() / 2.0;
                        
                        
            if ( constantEdge > edge )
            {
                delete pWeakHypothesis;
                pWeakHypothesis = pConstantWeakHypothesis;
                edge = constantEdge;
            } else {
                delete pConstantWeakHypothesis;
            }
                                                                        
            // calculate alpha
            AlphaReal alpha = 0.0;
            alpha = 0.5 * log( ( 1 + edge ) / ( 1 - edge ) );
            pWeakHypothesis->setAlpha( alpha );
            _sumAlpha += alpha;
                        
            if (_verbose > 1)
                cout << "Weak learner: " << pWeakHypothesis->getName()<< endl;
            // Output the step-by-step information
            pTrainingData->clearIndexSet();
            printOutputInfo(pOutInfo, t, pTrainingData, pTestData, pWeakHypothesis);

            // Updates the weights and returns the edge
            //AlphaReal gamma = updateWeights(pTrainingData, pWeakHypothesis);

            if (_verbose > 1)
            {
                cout << setprecision(5)
                     << "--> Alpha = " << pWeakHypothesis->getAlpha() << endl
                     << "--> Edge  = " << edge << endl
                     << "--> Energy  = " << energy << endl
                    //            << "--> ConstantEnergy  = " << constantEnergy << endl
                    //            << "--> difference  = " << (energy - constantEnergy) << endl
                    ;
            }

            // update the margins
            //saveMargins();
            updateMargins( pTrainingData, pWeakHypothesis );
                        
            // append the current weak learner to strong hypothesis file,
            // that is, serialize it.
            ss.appendHypothesis(t, pWeakHypothesis);

            // Add it to the internal list of weak hypotheses
            _foundHypotheses.push_back(pWeakHypothesis); 

            // check if the time limit has been reached
            if (_maxTime > 0)
            {
                time( &currentTime );
                float diff = difftime(currentTime, startTime); // difftime is in seconds
                diff /= 60; // = minutes

                if (diff > _maxTime)
                {
                    if (_verbose > 0)
                        cout << "Time limit of " << _maxTime 
                             << " minutes has been reached!" << endl;
                    break;     
                }
            } // check for maxtime
            delete pWeakHypothesis;
        }  // loop on iterations
        /////////////////////////////////////////////////////////

        // write the footer of the strong hypothesis file
        ss.writeFooter();

        // Free the two input data objects
        if (pTrainingData)
            delete pTrainingData;
        if (pTestData)
            delete pTestData;

        if (pOutInfo)
            delete pOutInfo;

        if (_verbose > 0)
            cout << "Learning completed." << endl;
    }
Exemple #9
0
/**
 * The main function. Everything starts here!
 * \param argc The number of arguments.
 * \param argv The arguments.
 * \date 11/11/2005
 */
int main(int argc, const char* argv[])
{
	// initializing the random number generator
	srand ( time(NULL) );
	
	// no need to synchronize with C style stream
	std::ios_base::sync_with_stdio(false);
	
#if STABLE_SORT
	cerr << "WARNING: Stable sort active! It might be slower!!" << endl;
#endif
	
	//////////////////////////////////////////////////////////////////////////
	// Standard arguments
	nor_utils::Args args;
	
	args.setArgumentDiscriminator("--");
	
	args.declareArgument("help");
	args.declareArgument("static");
	
	args.declareArgument("h", "Help", 1, "<optiongroup>");
	
	//////////////////////////////////////////////////////////////////////////
	// Basic Arguments
	
	args.setGroup("Parameters");
	
	args.declareArgument("train", "Performs training.", 2, "<dataFile> <nInterations>");
	args.declareArgument("traintest", "Performs training and test at the same time.", 3, "<trainingDataFile> <testDataFile> <nInterations>");
	args.declareArgument("trainvalidtest", "Performs training and test at the same time.", 4, "<trainingDataFile> <validDataFile> <testDataFile> <nInterations>");
	args.declareArgument("test", "Test the model.", 3, "<dataFile> <numIters> <shypFile>");
	args.declareArgument("test", "Test the model and output the results", 4, "<datafile> <shypFile> <numIters> <outFile>");
	args.declareArgument("cmatrix", "Print the confusion matrix for the given model.", 2, "<dataFile> <shypFile>");
	args.declareArgument("cmatrixfile", "Print the confusion matrix with the class names to a file.", 3, "<dataFile> <shypFile> <outFile>");
	args.declareArgument("posteriors", "Output the posteriors for each class, that is the vector-valued discriminant function for the given dataset and model.", 4, "<dataFile> <shypFile> <outFile> <numIters>");
	args.declareArgument("posteriors", "Output the posteriors for each class, that is the vector-valued discriminant function for the given dataset and model periodically.", 5, "<dataFile> <shypFile> <outFile> <numIters> <period>");	
		
	args.declareArgument("encode", "Save the coefficient vector of boosting individually on each point using ParasiteLearner", 6, "<inputDataFile> <autoassociativeDataFile> <outputDataFile> <nIterations> <poolFile> <nBaseLearners>");	
	args.declareArgument("ssfeatures", "Print matrix data for SingleStump-Based weak learners (if numIters=0 it means all of them).", 4, "<dataFile> <shypFile> <outFile> <numIters>");
	
	args.declareArgument( "fileformat", "Defines the type of intput file. Available types are:\n" 
						 "* simple: each line has attributes separated by whitespace and class at the end (DEFAULT!)\n"
						 "* arff: arff filetype. The header file can be specified using --headerfile option\n"
						 "* arffbzip: bziped arff filetype. The header file can be specified using --headerfile option\n"
						 "* svmlight: \n"
						 "(Example: --fileformat simple)",
                         1, "<fileFormat>" );
	
	args.declareArgument("headerfile", "The header file for arff and SVMLight and arff formats.", 1, "header.txt");
	
	args.declareArgument("constant", "Check constant learner in each iteration.", 0, "");
	args.declareArgument("timelimit", "Time limit in minutes", 1, "<minutes>" );
	args.declareArgument("stronglearner", "Available strong learners:\n"
						 "AdaBoost (default)\n"
						 "FilterBoost\n"
                         "SoftCascade\n"
                         "VJcascade\n", 1, "<stronglearner>" );
	
	args.declareArgument("slowresumeprocess", "Computes every statitstic in each iteration (slow resume)\n"
						 "Computes only the statistics in the last iteration (fast resume, default)\n", 0, "" );
	args.declareArgument("weights", "Outputs the weights of instances at the end of the learning process", 1, "<filename>" );
	args.declareArgument("Cn", "Resampling size for FilterBoost (default=300)", 1, "<value>" );
	
	args.declareArgument("onlinetraining", "The weak learner will be trained online\n", 0, "" );
	
	//// ignored for the moment!
	//args.declareArgument("arffheader", "Specify the arff header.", 1, "<arffHeaderFile>");
	
	// for MDDAG
	//args.setGroup("MDDAG");
	args.declareArgument("traintestmddag", "Performs training and test at the same time using mddag.", 5, "<trainingDataFile> <testDataFile> <modelFile> <nIterations> <baseIter>");
	args.declareArgument("policytrainingiter", "The iteration number the policy learner takes.", 1, "<iternum>");
	args.declareArgument("rollouts", "The number of rollouts.", 1, "<num>");
	args.declareArgument("rollouttype", "Rollout type (montecarlo or szatymaz)", 1, "<rollouttype>");
	args.declareArgument("beta", "Trade-off parameter", 1, "<beta>");
	args.declareArgument("outdir", "Output directory.", 1, "<outdir>");
	args.declareArgument("policyalpha", "Alpha for policy array.", 1, "<alpha>");
	args.declareArgument("succrewardtype", "Rewrd type (e01 or hammng)", 1, "<rward_type");
	args.declareArgument("outtrainingerror", "Output training error", 0, "");
	args.declareArgument("epsilon", "Exploration term", 1, "<epsilon>");
	args.declareArgument("updateperc", "Number of component in the policy are updated", 1, "<perc>");
	
	// for VJ cascade
	VJCascadeLearner::declareBaseArguments(args);
    
    // for SoftCascade
    SoftCascadeLearner::declareBaseArguments(args);
	//////////////////////////////////////////////////////////////////////////
	// Options
	
	args.setGroup("I/O Options");
	
	/////////////////////////////////////////////
	// these are valid only for .txt input!
	// they might be removed!
	args.declareArgument("d", "The separation characters between the fields (default: whitespaces).\nExample: -d \"\\t,.-\"\nNote: new-line is always included!", 1, "<separators>");
	args.declareArgument("classend", "The class is the last column instead of the first (or second if -examplelabel is active).");
	args.declareArgument("examplename", "The data file has an additional column (the very first) which contains the 'name' of the example.");
	/////////////////////////////////////////////
	
	args.setGroup("Basic Algorithm Options");
	args.declareArgument("weightpolicy", "Specify the type of weight initialization. The user specified weights (if available) are used inside the policy which can be:\n"
						 "* sharepoints Share the weight equally among data points and between positiv and negative labels (DEFAULT)\n"
						 "* sharelabels Share the weight equally among data points\n"
						 "* proportional Share the weights freely", 1, "<weightType>");
	
	
	args.setGroup("General Options");
	
	args.declareArgument("verbose", "Set the verbose level 0, 1 or 2 (0=no messages, 1=default, 2=all messages).", 1, "<val>");
	args.declareArgument("outputinfo", "Output informations on the algorithm performances during training, on file <filename>.", 1, "<filename>");
	args.declareArgument("outputinfo", "Output specific informations on the algorithm performances during training, on file <filename> <outputlist>. <outputlist> must be a concatenated list of three characters abreviation (ex: err for error, fpr for false positive rate)", 2, "<filename> <outputlist>");

	args.declareArgument("seed", "Defines the seed for the random operations.", 1, "<seedval>");
	
	//////////////////////////////////////////////////////////////////////////
	// Shows the list of available learners
	string learnersComment = "Available learners are:";
	
	vector<string> learnersList;
	BaseLearner::RegisteredLearners().getList(learnersList);
	vector<string>::const_iterator it;
	for (it = learnersList.begin(); it != learnersList.end(); ++it)
	{
		learnersComment += "\n ** " + *it;
		// defaultLearner is defined in Defaults.h
		if ( *it == defaultLearner )
			learnersComment += " (DEFAULT)";
	}
	
	args.declareArgument("learnertype", "Change the type of weak learner. " + learnersComment, 1, "<learner>");
	
	//////////////////////////////////////////////////////////////////////////
	//// Declare arguments that belongs to all weak learners
	BaseLearner::declareBaseArguments(args);
	
	////////////////////////////////////////////////////////////////////////////
	//// Weak learners (and input data) arguments
	for (it = learnersList.begin(); it != learnersList.end(); ++it)
	{
		args.setGroup(*it + " Options");
		// add weaklearner-specific options
		BaseLearner::RegisteredLearners().getLearner(*it)->declareArguments(args);
	}
	
	//////////////////////////////////////////////////////////////////////////
	//// Declare arguments that belongs to all bandit learner
	GenericBanditAlgorithm::declareBaseArguments(args);
	
	
	//////////////////////////////////////////////////////////////////////////////////////////  
	//////////////////////////////////////////////////////////////////////////////////////////
	
	switch ( args.readArguments(argc, argv) )
	{
		case nor_utils::AOT_NO_ARGUMENTS:
			showBase();
			break;
			
		case nor_utils::AOT_UNKOWN_ARGUMENT:
			exit(1);
			break;
			
		case nor_utils::AOT_INCORRECT_VALUES_NUMBER:
			exit(1);
			break;
			
		case nor_utils::AOT_OK:
			break;
	}
	
	//////////////////////////////////////////////////////////////////////////////////////////  
	//////////////////////////////////////////////////////////////////////////////////////////
	
	if ( args.hasArgument("help") )
		showHelp(args, learnersList);
	if ( args.hasArgument("static") )
		showStaticConfig();
	
	//////////////////////////////////////////////////////////////////////////////////////////  
	//////////////////////////////////////////////////////////////////////////////////////////
	
	if ( args.hasArgument("h") )
		showOptionalHelp(args);
	
	//////////////////////////////////////////////////////////////////////////////////////////  
	//////////////////////////////////////////////////////////////////////////////////////////
	
	int verbose = 1;
	
	if ( args.hasArgument("verbose") )
		args.getValue("verbose", 0, verbose);
	
	//////////////////////////////////////////////////////////////////////////////////////////  
	//////////////////////////////////////////////////////////////////////////////////////////
	
	// defines the seed
	if (args.hasArgument("seed"))
	{
		unsigned int seed = args.getValue<unsigned int>("seed", 0);
		srand(seed);
	}
	
	//////////////////////////////////////////////////////////////////////////////////////////  
	//////////////////////////////////////////////////////////////////////////////////////////
	
	GenericStrongLearner* pModel = NULL;
	
	if ( args.hasArgument("train") ||
        args.hasArgument("traintest") || 
	    args.hasArgument("trainvalidtest") ) // for Viola-Jones Cascade
	{
		
		// get the name of the learner
		string baseLearnerName = defaultLearner;
		if ( args.hasArgument("learnertype") )
			args.getValue("learnertype", 0, baseLearnerName);
		
		checkBaseLearner(baseLearnerName);
		if (verbose > 1)    
			cout << "--> Using learner: " << baseLearnerName << endl;
		
		// This hould be changed: the user decides the strong learner
		BaseLearner*  pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(baseLearnerName);
		pModel = pWeakHypothesisSource->createGenericStrongLearner( args );
		
		pModel->run(args);
	}
	//////////////////////////////////////////////////////////////////////////////////////////
	//////////////////////////////////////////////////////////////////////////////////////////
	else if ( args.hasArgument("traintestmddag") )
	{
		// -test <dataFile> <shypFile> <numIters>
		string shypFileName = args.getValue<string>("traintestmddag", 2);
		
		string baseLearnerName = UnSerialization::getWeakLearnerName(shypFileName);
		
		BaseLearner*  pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(baseLearnerName);
		pModel = pWeakHypothesisSource->createGenericStrongLearner( args );
		
		pModel->run(args);
		
	}		
	//////////////////////////////////////////////////////////////////////////////////////////
	//////////////////////////////////////////////////////////////////////////////////////////
	else if ( args.hasArgument("test") )
	{
		// -test <dataFile> <shypFile> <numIters>
		string shypFileName = args.getValue<string>("test", 1);
		
		string baseLearnerName = UnSerialization::getWeakLearnerName(shypFileName);
                
		BaseLearner*  pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(baseLearnerName);
		pModel = pWeakHypothesisSource->createGenericStrongLearner( args );
		
		pModel->classify(args);
	}
	//////////////////////////////////////////////////////////////////////////////////////////
	//////////////////////////////////////////////////////////////////////////////////////////
	else if ( args.hasArgument("cmatrix") )
	{
		// -cmatrix <dataFile> <shypFile>
		
		string shypFileName = args.getValue<string>("cmatrix", 1);
		
		string baseLearnerName = UnSerialization::getWeakLearnerName(shypFileName);
		BaseLearner*  pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(baseLearnerName);
		pModel = pWeakHypothesisSource->createGenericStrongLearner( args );
		
		pModel->doConfusionMatrix(args);
	}
	//////////////////////////////////////////////////////////////////////////////////////////
	//////////////////////////////////////////////////////////////////////////////////////////
	else if ( args.hasArgument("posteriors") )
	{
		// -posteriors <dataFile> <shypFile> <outFileName>
		string shypFileName = args.getValue<string>("posteriors", 1);
		
		string baseLearnerName = UnSerialization::getWeakLearnerName(shypFileName);
        
		BaseLearner*  pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(baseLearnerName);
		pModel = pWeakHypothesisSource->createGenericStrongLearner( args );
		
		pModel->doPosteriors(args);
	}   
	//////////////////////////////////////////////////////////////////////////////////////////
	//////////////////////////////////////////////////////////////////////////////////////////
	else if ( args.hasArgument("ssfeatures") )
	{
		// ONLY for AdaBoostMH classifiers
		
		// -ssfeatures <dataFile> <shypFile> <outFile> <numIters>
		string testFileName = args.getValue<string>("ssfeatures", 0);
		string shypFileName = args.getValue<string>("ssfeatures", 1);
		string outFileName = args.getValue<string>("ssfeatures", 2);
		int numIterations = args.getValue<int>("ssfeatures", 3);
		
		cerr << "ERROR: ssfeatures has been deactivated for the moment!" << endl;
		
		
		//classifier.saveSingleStumpFeatureData(testFileName, shypFileName, outFileName, numIterations);
	}
	
	//////////////////////////////////////////////////////////////////////////////////////////
	//////////////////////////////////////////////////////////////////////////////////////////
	else if ( args.hasArgument("encode") )
	{
		
		// --encode <inputDataFile> <outputDataFile> <nIterations> <poolFile> <nBaseLearners>
		string labelsFileName = args.getValue<string>("encode", 0);
		string autoassociativeFileName = args.getValue<string>("encode", 1);
		string outputFileName = args.getValue<string>("encode", 2);
		int numIterations = args.getValue<int>("encode", 3);
		string poolFileName = args.getValue<string>("encode", 4);
		int numBaseLearners = args.getValue<int>("encode", 5);
		string outputInfoFile;
		const char* tmpArgv1[] = {"bla", // for ParasiteLearner
			"--pool",
			args.getValue<string>("encode", 4).c_str(),
			args.getValue<string>("encode", 5).c_str()};
		args.readArguments(4,tmpArgv1);
		
		InputData* pAutoassociativeData = new InputData();
		pAutoassociativeData->initOptions(args);
		pAutoassociativeData->load(autoassociativeFileName,IT_TRAIN,verbose);
		
		// for the original labels
		InputData* pLabelsData = new InputData();
		pLabelsData->initOptions(args);
		pLabelsData->load(labelsFileName,IT_TRAIN,verbose);
		
		// set up all the InputData members identically to pAutoassociativeData
		EncodeData* pOnePoint = new EncodeData();
		pOnePoint->initOptions(args);
		pOnePoint->load(autoassociativeFileName,IT_TRAIN,verbose);
		
		const int numExamples = pAutoassociativeData->getNumExamples();
		BaseLearner* pWeakHypothesisSource = 
		BaseLearner::RegisteredLearners().getLearner("ParasiteLearner");
		pWeakHypothesisSource->declareArguments(args);
		
		ParasiteLearner* pWeakHypothesis;
		
		ofstream outFile(outputFileName.c_str());
		if (!outFile.is_open())
		{
			cerr << "ERROR: Cannot open strong hypothesis file <" << outputFileName << ">!" << endl;
			exit(1);
		}
		
		for (int i = 0; i < numExamples ; ++i)
		{
			vector<float> alphas;
			alphas.resize(numBaseLearners);
			fill(alphas.begin(), alphas.end(), 0);
			
			if (verbose >= 1)
				cout << "--> Encoding example no " << (i+1) << endl;
			pOnePoint->resetData();
			pOnePoint->addExample( pAutoassociativeData->getExample(i) );
			AlphaReal energy = 1;
			
			OutputInfo* pOutInfo = NULL;
			if ( args.hasArgument("outputinfo") ) 
			{
				args.getValue("outputinfo", 0, outputInfoFile);
				pOutInfo = new OutputInfo(args);
				pOutInfo->initialize(pOnePoint);
			}
			
			
			for (int t = 0; t < numIterations; ++t)
			{
				pWeakHypothesis = (ParasiteLearner*)pWeakHypothesisSource->create();
				pWeakHypothesis->initLearningOptions(args);
				pWeakHypothesis->setTrainingData(pOnePoint);
				energy *= pWeakHypothesis->run();
				// 	    if (verbose >= 2)
				//  	       cout << "energy = " << energy << endl << flush;
				AdaBoostMHLearner adaBoostMHLearner;
				
				if (i == 0 && t == 0)
				{
					if ( pWeakHypothesis->getBaseLearners().size() < numBaseLearners )
						numBaseLearners = pWeakHypothesis->getBaseLearners().size();
					outFile << "%Hidden representation using autoassociative boosting" << endl << endl;
					outFile << "@RELATION " << outputFileName << endl << endl;
					outFile << "% numBaseLearners" << endl;
					for (int j = 0; j < numBaseLearners; ++j) 
						outFile << "@ATTRIBUTE " << j << "_" <<
						pWeakHypothesis->getBaseLearners()[j]->getId() << " NUMERIC" << endl;
					outFile << "@ATTRIBUTE class {" << pLabelsData->getClassMap().getNameFromIdx(0);
					for (int l = 1; l < pLabelsData->getClassMap().getNumNames(); ++l)
						outFile << ", " << pLabelsData->getClassMap().getNameFromIdx(l);
					outFile << "}" << endl<< endl<< "@DATA" << endl;
				}
				alphas[pWeakHypothesis->getSelectedIndex()] += 
				pWeakHypothesis->getAlpha() * pWeakHypothesis->getSignOfAlpha();
				if ( pOutInfo )
					adaBoostMHLearner.printOutputInfo(pOutInfo, t, pOnePoint, NULL, pWeakHypothesis);
				adaBoostMHLearner.updateWeights(pOnePoint,pWeakHypothesis);
			}
			float sumAlphas = 0;
			for (int j = 0; j < numBaseLearners; ++j)
				sumAlphas += alphas[j];
			
			for (int j = 0; j < numBaseLearners; ++j)
				outFile << alphas[j]/sumAlphas << ",";
			const vector<Label>& labels = pLabelsData->getLabels(i);
			for (int l = 0; l < labels.size(); ++l)
				if (labels[l].y > 0)
					outFile << pLabelsData->getClassMap().getNameFromIdx(labels[l].idx) << endl;
			delete pOutInfo;
		}
		outFile.close();
	}
	
	if (pModel)
		delete pModel;
	
	return 0;
}
OutputInfo Subsampling::initialize(std::vector<double*>& parameterPointers,
                                   std::vector<double*>& parameterDerivativePointers)
{
  OutputInfo info;
  info.dimensions.push_back(fm);
  outRows = inRows / kernelRows;
  outCols = inCols / kernelCols;
  fmOutSize = outRows * outCols;
  info.dimensions.push_back(outRows);
  info.dimensions.push_back(outCols);
  fmInSize = inRows * inCols;
  maxRow = inRows - kernelRows + 1;
  maxCol = inCols - kernelCols + 1;

  W.resize(fm, Eigen::MatrixXd(outRows, outCols));
  Wd.resize(fm, Eigen::MatrixXd(outRows, outCols));
  int numParams = fm * outRows * outCols * kernelRows * kernelCols;
  if(bias)
  {
    Wb.resize(fm, Eigen::MatrixXd(outRows, outCols));
    Wbd.resize(fm, Eigen::MatrixXd(outRows, outCols));
    numParams += fm * outRows * outCols;
  }
  parameterPointers.reserve(parameterPointers.size() + numParams);
  parameterDerivativePointers.reserve(parameterDerivativePointers.size() + numParams);
  for(int fmo = 0; fmo < fm; fmo++)
  {
    for(int r = 0; r < outRows; r++)
    {
      for(int c = 0; c < outCols; c++)
      {
        parameterPointers.push_back(&W[fmo](r, c));
        parameterDerivativePointers.push_back(&Wd[fmo](r, c));
        if(bias)
        {
          parameterPointers.push_back(&Wb[fmo](r, c));
          parameterDerivativePointers.push_back(&Wbd[fmo](r, c));
        }
      }
    }
  }

  initializeParameters();

  a.resize(1, info.outputs());
  y.resize(1, info.outputs());
  yd.resize(1, info.outputs());
  deltas.resize(1, info.outputs());

  if(info.outputs() < 1)
    throw OpenANNException("Number of outputs in subsampling layer is below"
                           " 1. You should either choose a smaller filter"
                           " size or generate a bigger input.");
  OPENANN_CHECK(fmInSize > 0);
  OPENANN_CHECK(outRows > 0);
  OPENANN_CHECK(outCols > 0);
  OPENANN_CHECK(fmOutSize > 0);
  OPENANN_CHECK(maxRow > 0);
  OPENANN_CHECK(maxCol > 0);

  return info;
}
Exemple #11
0
	void AdaBoostMHLearner::run(const nor_utils::Args& args)
	{
		// load the arguments
		this->getArgs(args);

		// get the registered weak learner (type from name)
		BaseLearner* pWeakHypothesisSource = 
			BaseLearner::RegisteredLearners().getLearner(_baseLearnerName);
		// initialize learning options; normally it's done in the strong loop
		// also, here we do it for Product learners, so input data can be created
		pWeakHypothesisSource->initLearningOptions(args);

		BaseLearner* pConstantWeakHypothesisSource = 
			BaseLearner::RegisteredLearners().getLearner("ConstantLearner");

		// get the training input data, and load it

		InputData* pTrainingData = pWeakHypothesisSource->createInputData();
		pTrainingData->initOptions(args);
		pTrainingData->load(_trainFileName, IT_TRAIN, _verbose);
		
		// get the testing input data, and load it
		InputData* pTestData = NULL;
		if ( !_testFileName.empty() )
		{
			pTestData = pWeakHypothesisSource->createInputData();
			pTestData->initOptions(args);
			pTestData->load(_testFileName, IT_TEST, _verbose);
		}

		// The output information object
		OutputInfo* pOutInfo = NULL;


		if ( !_outputInfoFile.empty() ) 
		{
			// Baseline: constant classifier - goes into 0th iteration

			BaseLearner* pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ;
			pConstantWeakHypothesis->initLearningOptions(args);
			pConstantWeakHypothesis->setTrainingData(pTrainingData);
			AlphaReal constantEnergy = pConstantWeakHypothesis->run();

			//pOutInfo = new OutputInfo(_outputInfoFile);
            pOutInfo = new OutputInfo(args);
			pOutInfo->initialize(pTrainingData);

			if (pTestData)
				pOutInfo->initialize(pTestData);
			pOutInfo->outputHeader(pTrainingData->getClassMap());

			pOutInfo->outputIteration(-1);
            pOutInfo->outputCustom(pTrainingData, pConstantWeakHypothesis);
            
			if (pTestData != NULL)
            {
                pOutInfo->separator();
                pOutInfo->outputCustom(pTestData, pConstantWeakHypothesis);   
            }

			pOutInfo->outputCurrentTime();

			pOutInfo->endLine(); 
			pOutInfo->initialize(pTrainingData);

			if (pTestData)
				pOutInfo->initialize(pTestData);
		}
		//cout << "Before serialization" << endl;
		// reload the previously found weak learners if -resume is set. 
		// otherwise just return 0
		int startingIteration = resumeWeakLearners(pTrainingData);


		Serialization ss(_shypFileName, _isShypCompressed );
		ss.writeHeader(_baseLearnerName); // this must go after resumeProcess has been called

		// perform the resuming if necessary. If not it will just return
		resumeProcess(ss, pTrainingData, pTestData, pOutInfo);

		if (_verbose == 1)
			cout << "Learning in progress..." << endl;

		//I put here the starting time, but it may take very long time to load the saved model
		time_t startTime, currentTime;
		time(&startTime);

		///////////////////////////////////////////////////////////////////////
		// Starting the AdaBoost main loop
		///////////////////////////////////////////////////////////////////////
		for (int t = startingIteration; t < _numIterations; ++t)
		{
			if (_verbose > 1)
				cout << "------- WORKING ON ITERATION " << (t+1) << " -------" << endl;

			BaseLearner* pWeakHypothesis = pWeakHypothesisSource->create();
			pWeakHypothesis->initLearningOptions(args);
			//pTrainingData->clearIndexSet();

			pWeakHypothesis->setTrainingData(pTrainingData);
			
			AlphaReal energy = pWeakHypothesis->run();
			
			//float gamma = pWeakHypothesis->getEdge();
			//cout << gamma << endl;

			if ( (_withConstantLearner) || ( energy != energy ) ) // check constant learner if user wants it (if energi is nan, then we chose constant learner
			{
				BaseLearner* pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ;
				pConstantWeakHypothesis->initLearningOptions(args);
				pConstantWeakHypothesis->setTrainingData(pTrainingData);
				AlphaReal constantEnergy = pConstantWeakHypothesis->run();

				if ( (constantEnergy <= energy) || ( energy != energy ) ) {
					delete pWeakHypothesis;
					pWeakHypothesis = pConstantWeakHypothesis;
				}
			}

			if (_verbose > 1)
				cout << "Weak learner: " << pWeakHypothesis->getName()<< endl;
			// Output the step-by-step information
			printOutputInfo(pOutInfo, t, pTrainingData, pTestData, pWeakHypothesis);

			// Updates the weights and returns the edge
			AlphaReal gamma = updateWeights(pTrainingData, pWeakHypothesis);

			if (_verbose > 1)
			{
				cout << setprecision(5)
					<< "--> Alpha = " << pWeakHypothesis->getAlpha() << endl
					<< "--> Edge  = " << gamma << endl
					<< "--> Energy  = " << energy << endl
					//            << "--> ConstantEnergy  = " << constantEnergy << endl
					//            << "--> difference  = " << (energy - constantEnergy) << endl
					;
			}

			// If gamma <= theta the algorithm must stop.
			// If theta == 0 and gamma is 0, it means that the weak learner is no better than chance
			// and no further training is possible.
			if (gamma <= _theta)
			{
				if (_verbose > 0)
				{
					cout << "Can't train any further: edge = " << gamma 
						<< " (with and edge offset (theta)=" << _theta << ")" << endl;
				}

				//          delete pWeakHypothesis;
				//          break; 
			}

			// append the current weak learner to strong hypothesis file,
			// that is, serialize it.
			ss.appendHypothesis(t, pWeakHypothesis);

			// Add it to the internal list of weak hypotheses
			_foundHypotheses.push_back(pWeakHypothesis); 

			// check if the time limit has been reached
			if (_maxTime > 0)
			{
				time( &currentTime );
				float diff = difftime(currentTime, startTime); // difftime is in seconds
				diff /= 60; // = minutes

				if (diff > _maxTime)
				{
					if (_verbose > 0)
						cout << "Time limit of " << _maxTime 
						<< " minutes has been reached!" << endl;
					break;     
				}
			} // check for maxtime
			delete pWeakHypothesis;
		}  // loop on iterations
		/////////////////////////////////////////////////////////

		// write the footer of the strong hypothesis file
		ss.writeFooter();

		// write the weights of the instances if the name of weights file isn't empty
		printOutWeights( pTrainingData );


		// Free the two input data objects
		if (pTrainingData)
			delete pTrainingData;
		if (pTestData)
			delete pTestData;

		if (pOutInfo)
			delete pOutInfo;

		if (_verbose > 0)
			cout << "Learning completed." << endl;
	}
void CmpSeabaseDDL::dropSeabaseLibrary(StmtDDLDropLibrary * dropLibraryNode,
                                       NAString &currCatName, 
                                       NAString &currSchName)
{
  Lng32 cliRC = 0;
  Lng32 retcode = 0;

  const NAString &objName = dropLibraryNode->getLibraryName();

  ComObjectName libraryName(objName);
  ComAnsiNamePart currCatAnsiName(currCatName);
  ComAnsiNamePart currSchAnsiName(currSchName);
  libraryName.applyDefaults(currCatAnsiName, currSchAnsiName);

  const NAString catalogNamePart = libraryName.
    getCatalogNamePartAsAnsiString();
  const NAString schemaNamePart = libraryName.
    getSchemaNamePartAsAnsiString(TRUE);
  const NAString objectNamePart = libraryName.
    getObjectNamePartAsAnsiString(TRUE);
  const NAString extLibraryName = libraryName.getExternalName(TRUE);

  ExeCliInterface cliInterface(STMTHEAP, NULL, NULL, 
    CmpCommon::context()->sqlSession()->getParentQid());

  ExpHbaseInterface * ehi = allocEHI();
  if (ehi == NULL)
    return;

  retcode = existsInSeabaseMDTable(&cliInterface, 
				   catalogNamePart, schemaNamePart, 
                                   objectNamePart,
				   COM_LIBRARY_OBJECT, TRUE, FALSE);
  if (retcode < 0)
    {
      deallocEHI(ehi); 
      processReturn();
      return;
    }

  if (retcode == 0) // does not exist
    {
      *CmpCommon::diags() << DgSqlCode(-1389)
			  << DgString0(extLibraryName);
      deallocEHI(ehi); 
      processReturn();
      return;
    }

  Int32 objectOwnerID = 0;
  Int32 schemaOwnerID = 0;
  Int64 objectFlags = 0;
  Int64 objUID = getObjectInfo(&cliInterface,
			      catalogNamePart.data(), schemaNamePart.data(), 
			      objectNamePart.data(), COM_LIBRARY_OBJECT,
                              objectOwnerID,schemaOwnerID,objectFlags);
  if (objUID < 0 || objectOwnerID == 0 || schemaOwnerID == 0)
    {
      deallocEHI(ehi); 
      processReturn();
      return;
    }

  if (!isDDLOperationAuthorized(SQLOperation::DROP_LIBRARY,
                                objectOwnerID,
                                schemaOwnerID))
  {
     *CmpCommon::diags() << DgSqlCode(-CAT_NOT_AUTHORIZED);
     processReturn ();
     return;
  }
  
  Queue * usingRoutinesQueue = NULL;
  cliRC = getUsingRoutines(&cliInterface, objUID, usingRoutinesQueue);
  if (cliRC < 0)
    {
      deallocEHI(ehi); 
      processReturn();
      return;
    }
  // If RESTRICT and the library is being used, return an error
  if (cliRC != 100 && dropLibraryNode->getDropBehavior() == COM_RESTRICT_DROP_BEHAVIOR) 
    {
      *CmpCommon::diags() << DgSqlCode(-CAT_DEPENDENT_ROUTINES_EXIST);

      deallocEHI(ehi); 
      processReturn();
      return;
    }
    
  for (size_t i = 0; i < usingRoutinesQueue->numEntries(); i++)
  { 
     usingRoutinesQueue->position();
     OutputInfo * rou = (OutputInfo*)usingRoutinesQueue->getNext(); 
     
     char * routineName = rou->get(0);
     ComObjectType objectType = PrivMgr::ObjectLitToEnum(rou->get(1));

     if (dropSeabaseObject(ehi, routineName,
                           currCatName, currSchName, objectType,
                           TRUE, FALSE))
     {
       deallocEHI(ehi); 
       processReturn();
       return;
     }
   }
 
  // can get a slight perf. gain if we pass in objUID
  if (dropSeabaseObject(ehi, objName,
                        currCatName, currSchName, COM_LIBRARY_OBJECT,
                        TRUE, FALSE))
    {
      deallocEHI(ehi); 
      processReturn();
      return;
    }

  deallocEHI(ehi);      
  processReturn();
  return;
}
void CmpSeabaseDDL::dropSeabaseRoutine(StmtDDLDropRoutine * dropRoutineNode,
                                       NAString &currCatName, 
                                       NAString &currSchName)
{
  Lng32 retcode = 0;
 
  ComObjectName routineName(dropRoutineNode->getRoutineName());
  ComAnsiNamePart currCatAnsiName(currCatName);
  ComAnsiNamePart currSchAnsiName(currSchName);
  routineName.applyDefaults(currCatAnsiName, currSchAnsiName);
  const NAString catalogNamePart = 
    routineName.getCatalogNamePartAsAnsiString();
  const NAString schemaNamePart = 
    routineName.getSchemaNamePartAsAnsiString(TRUE);
  const NAString objectNamePart = 
    routineName.getObjectNamePartAsAnsiString(TRUE);
  const NAString extRoutineName = routineName.getExternalName(TRUE);
  
  ExpHbaseInterface * ehi = NULL;
  ExeCliInterface cliInterface(STMTHEAP, NULL, NULL, 
    CmpCommon::context()->sqlSession()->getParentQid());

  ehi = allocEHI();
  if (ehi == NULL)
    {
      processReturn();
      return;
    }

  retcode = existsInSeabaseMDTable(&cliInterface, 
				   catalogNamePart, schemaNamePart, 
                                   objectNamePart, COM_USER_DEFINED_ROUTINE_OBJECT, 
                                   TRUE, FALSE);
  if (retcode < 0)
    {
      deallocEHI(ehi); 
      processReturn();
      return;
    }

  if (retcode == 0) // does not exist
    {
      *CmpCommon::diags() << DgSqlCode(-1389)
			  << DgString0(extRoutineName);
      deallocEHI(ehi); 
      processReturn();
      return;
    }
  
  // get objectOwner
  Int64 objUID = 0;
  Int32 objectOwnerID = 0;
  Int32 schemaOwnerID = 0;
  Int64 objectFlags = 0;

  // see if routine is cached
  BindWA bindWA(ActiveSchemaDB(), CmpCommon::context(), FALSE/*inDDL*/);
  NARoutineDB *pRoutineDBCache  = ActiveSchemaDB()->getNARoutineDB();
  QualifiedName qualRoutineName(routineName, STMTHEAP);
  NARoutineDBKey key(qualRoutineName, STMTHEAP);

  NARoutine *cachedNARoutine = pRoutineDBCache->get(&bindWA, &key);
  if (cachedNARoutine)
    {
      objUID = cachedNARoutine->getRoutineID();
      objectOwnerID = cachedNARoutine->getObjectOwner();
      schemaOwnerID = cachedNARoutine->getSchemaOwner();
    }
  else
    {
      objUID = getObjectInfo(&cliInterface,
			      catalogNamePart.data(), schemaNamePart.data(), 
			      objectNamePart.data(), COM_USER_DEFINED_ROUTINE_OBJECT,
                              objectOwnerID,schemaOwnerID,objectFlags);
    if (objUID < 0 || objectOwnerID == 0 || schemaOwnerID == 0)
      {
        deallocEHI(ehi); 
        processReturn();
        return;
      }
    }

  // Verify user has privilege to drop routine
  if (!isDDLOperationAuthorized(SQLOperation::DROP_ROUTINE,objectOwnerID,schemaOwnerID))
  {
     *CmpCommon::diags() << DgSqlCode(-CAT_NOT_AUTHORIZED);
     deallocEHI(ehi);
     processReturn ();
     return;
  }
  
  // Determine if this function is referenced by any other objects.
  Lng32 cliRC = 0;
  Queue * usingViewsQueue = NULL;
  if (dropRoutineNode->getDropBehavior() == COM_RESTRICT_DROP_BEHAVIOR)
    {
      NAString usingObjName;
      cliRC = getUsingObject(&cliInterface, objUID, usingObjName);
      if (cliRC < 0)
        {
          deallocEHI(ehi); 
          processReturn();
          
          return;
        }

      if (cliRC != 100) // found an object
        {
          *CmpCommon::diags() << DgSqlCode(-CAT_DEPENDENT_VIEW_EXISTS)
                              << DgTableName(usingObjName);

          deallocEHI(ehi); 
          processReturn();

          return;
        }
    }
  else 
    if (dropRoutineNode->getDropBehavior() == COM_CASCADE_DROP_BEHAVIOR)
    {
      cliRC = getUsingViews(&cliInterface, objUID, usingViewsQueue);
      if (cliRC < 0)
        {
          deallocEHI(ehi); 
          processReturn();
          
          return;
        }
    }
  
  if (usingViewsQueue)
    {
      usingViewsQueue->position();
      for (int idx = 0; idx < usingViewsQueue->numEntries(); idx++)
        {
          OutputInfo * vi = (OutputInfo*)usingViewsQueue->getNext(); 
          
          char * viewName = vi->get(0);
          
          if (dropOneTableorView(cliInterface,viewName,COM_VIEW_OBJECT,false))
          
            {
              deallocEHI(ehi); 
              processReturn();
              
              return;
            }
        }
    }
  
  // Removed routine from metadata 
  if (dropSeabaseObject(ehi, dropRoutineNode->getRoutineName(),
                        currCatName, currSchName, COM_USER_DEFINED_ROUTINE_OBJECT,
                        TRUE, FALSE))
    {
      deallocEHI(ehi); 
      processReturn();
      return;
    }

  // Remove cached entries in other processes
  pRoutineDBCache->removeNARoutine(qualRoutineName, 
                                   NARoutineDB::REMOVE_FROM_ALL_USERS,
                                   objUID);

  deallocEHI(ehi);      
  processReturn();
  return;
}
Exemple #14
0
MaxPooling::MaxPooling(OutputInfo info, int kernelRows, int kernelCols)
    : I(info.outputs()), fm(info.dimensions[0]),
      inRows(info.dimensions[1]), inCols(info.dimensions[2]),
      kernelRows(kernelRows), kernelCols(kernelCols), x(0), e(1, I)
{
}
void CmpSeabaseDDL::dropSeabaseView(
				    StmtDDLDropView * dropViewNode,
				    NAString &currCatName, NAString &currSchName)
{
  Lng32 cliRC = 0;
  Lng32 retcode = 0;

  const NAString &tabName = dropViewNode->getViewName();

  ComObjectName viewName(tabName);
  ComAnsiNamePart currCatAnsiName(currCatName);
  ComAnsiNamePart currSchAnsiName(currSchName);
  viewName.applyDefaults(currCatAnsiName, currSchAnsiName);

  const NAString catalogNamePart = viewName.getCatalogNamePartAsAnsiString();
  const NAString schemaNamePart = viewName.getSchemaNamePartAsAnsiString(TRUE);
  const NAString objectNamePart = viewName.getObjectNamePartAsAnsiString(TRUE);
  const NAString extViewName = viewName.getExternalName(TRUE);

  ExeCliInterface cliInterface(STMTHEAP, NULL, NULL, 
  CmpCommon::context()->sqlSession()->getParentQid());

  ExpHbaseInterface * ehi = allocEHI();
  if (ehi == NULL)
    return;

  if ((isSeabaseReservedSchema(viewName)) &&
      (!Get_SqlParser_Flags(INTERNAL_QUERY_FROM_EXEUTIL)))
    {
      *CmpCommon::diags() << DgSqlCode(-1119)
			  << DgTableName(extViewName);
      deallocEHI(ehi); 

      processReturn();

      return;
    }

  retcode = existsInSeabaseMDTable(&cliInterface, 
				   catalogNamePart, schemaNamePart, objectNamePart,
				   COM_VIEW_OBJECT, TRUE, FALSE);
  if (retcode < 0)
    {
      deallocEHI(ehi); 

      processReturn();

      return;
    }

  if (retcode == 0) // does not exist
    {
      *CmpCommon::diags() << DgSqlCode(-1389)
			  << DgString0(extViewName);

      deallocEHI(ehi); 

      processReturn();
      
      return;
    }

  Int32 objectOwnerID = 0;
  Int32 schemaOwnerID = 0;
  Int64 objUID = getObjectUIDandOwners(&cliInterface,
			              catalogNamePart.data(), schemaNamePart.data(), 
			              objectNamePart.data(),
			              COM_VIEW_OBJECT,
                                      objectOwnerID,schemaOwnerID);

  if (objUID < 0 || objectOwnerID == 0)
    {
      if (CmpCommon::diags()->getNumber(DgSqlCode::ERROR_) == 0)
        SEABASEDDL_INTERNAL_ERROR("getting object UID and owner for drop view");

      deallocEHI(ehi); 

      processReturn();

      return;
    }

  // Verify user can perform operation
  if (!isDDLOperationAuthorized(SQLOperation::DROP_VIEW,objectOwnerID,schemaOwnerID))
  {
     *CmpCommon::diags() << DgSqlCode(-CAT_NOT_AUTHORIZED);
     deallocEHI(ehi);
     processReturn ();
     return;
  }
 
  Queue * usingViewsQueue = NULL;
  if (dropViewNode->getDropBehavior() == COM_RESTRICT_DROP_BEHAVIOR)
    {
      NAString usingObjName;
      cliRC = getUsingObject(&cliInterface, objUID, usingObjName);
      if (cliRC < 0)
	{
	  deallocEHI(ehi); 

	  processReturn();
	  
	  return;
	}

      if (cliRC != 100) // found an object
	{
	  *CmpCommon::diags() << DgSqlCode(-1047)
			      << DgTableName(usingObjName);

	  deallocEHI(ehi); 

	  processReturn();

	  return;
	}
    }
  else if (dropViewNode->getDropBehavior() == COM_CASCADE_DROP_BEHAVIOR)
    {
      cliRC = getUsingViews(&cliInterface, objUID, usingViewsQueue);
      if (cliRC < 0)
	{
	  deallocEHI(ehi); 

	  processReturn();
	  
	  return;
	}
    }

  // get the list of all tables referenced by the view.  Save this list so 
  // referenced tables can be removed from cache later
  NAList<objectRefdByMe> tablesRefdList;
  short status = getListOfReferencedTables(&cliInterface, objUID, tablesRefdList);

  if (usingViewsQueue)
    {
      usingViewsQueue->position();
      for (int idx = 0; idx < usingViewsQueue->numEntries(); idx++)
	{
	  OutputInfo * vi = (OutputInfo*)usingViewsQueue->getNext(); 
	  
	  char * viewName = vi->get(0);
	  
	  if (dropSeabaseObject(ehi, viewName,
				 currCatName, currSchName, COM_VIEW_OBJECT))
	    {
	      deallocEHI(ehi); 

	      processReturn();
	      
	      return;
	    }
	}
    }

  if (dropSeabaseObject(ehi, tabName,
			 currCatName, currSchName, COM_VIEW_OBJECT))
    {
      deallocEHI(ehi); 

      processReturn();

      return;
    }

  // clear view definition from my cache only. 
  CorrName cn(objectNamePart, STMTHEAP, schemaNamePart, catalogNamePart);
  ActiveSchemaDB()->getNATableDB()->removeNATable(cn,
    NATableDB::REMOVE_MINE_ONLY, COM_VIEW_OBJECT);

  // clear view from all other caches here. This compensates for a 
  // scenario where the object UID is not available in removeNATable, 
  // and the look up failed too.  Solution is just to use the objectUID 
  // here.
  SQL_QIKEY qiKey;
  qiKey.operation[0] = 'O';
  qiKey.operation[1] = 'R';
  qiKey.ddlObjectUID = objUID;
  SQL_EXEC_SetSecInvalidKeys(1, &qiKey);

  // Now remove referenced tables from cache.
  // When a query that references a view is compiled, all views are converted
  // to the underlying base tables.  Query plans are generated to access the
  // tables, and the views are no longer relevant.
  // When dropping a view, query plans that reference the dropped view will
  // continue to work if the plans are cached.  This code removes the 
  // referenced tables from caches to force recompilations so dropped views
  // are noticed.
  for (CollIndex i = 0; i < tablesRefdList.entries(); i++)
    {
      CorrName cn(tablesRefdList[i].objectName,
                  STMTHEAP,
                  tablesRefdList[i].schemaName,
                  tablesRefdList[i].catalogName);
      ActiveSchemaDB()->getNATableDB()->removeNATable(cn,
        NATableDB::REMOVE_FROM_ALL_USERS, COM_BASE_TABLE_OBJECT);
    }

  deallocEHI(ehi); 
      
  processReturn();

  return;
}
Exemple #16
0
// Returns the results into ptRes
void MDDAGClassifier::computeResults(InputData* pData, vector<BaseLearner*>& weakHypotheses,
                                     vector< ExampleResults* >& results, int numIterations)
{
    assert( !weakHypotheses.empty() );

    const int numClasses = pData->getNumClasses();
    const int numExamples = pData->getNumExamples();

    // Initialize the output info
    OutputInfo* pOutInfo = NULL;

    if ( !_outputInfoFile.empty() )
    {
        if ( _args.getNumValues("outputinfo") > 1 )
        {
            pOutInfo = new OutputInfo(_args);;
        }
        else
        {
            pOutInfo = new OutputInfo(_outputInfoFile, "e01hamauc", false);
        }

    }


    // Creating the results structures. See file Structures.h for the
    // PointResults structure
    results.clear();
    results.reserve(numExamples);
    for (int i = 0; i < numExamples; ++i)
        results.push_back( new ExampleResults(i, numClasses) );

    // iterator over all the weak hypotheses
    vector<BaseLearner*>::const_iterator whyIt;
    int t;

    if ( pOutInfo )
    {
        pOutInfo->initialize( pData );
        pOutInfo->outputHeader(pData->getClassMap(),
                               true, // output iterations
                               false, // output time
                               true // endline
                              );
    }

    // for every feature: 1..T
    for (whyIt = weakHypotheses.begin(), t = 0;
            whyIt != weakHypotheses.end() && t < numIterations; ++whyIt, ++t)
    {
        BaseLearner* currWeakHyp = *whyIt;
        AlphaReal alpha = currWeakHyp->getAlpha();

        // for every point
        for (int i = 0; i < numExamples; ++i)
        {
            // a reference for clarity and speed
            vector<AlphaReal>& currVotesVector = results[i]->getVotesVector();

            // for every class
            for (int l = 0; l < numClasses; ++l)
                currVotesVector[l] += alpha * currWeakHyp->classify(pData, i, l);
        }

        // if needed output the step-by-step information
        if ( pOutInfo )
        {
            pOutInfo->outputIteration(t);
            //				pOutInfo->outputError(pData, currWeakHyp);
            //				pOutInfo->outTPRFPR(pData);
            //pOutInfo->outputBalancedError(pData, currWeakHyp);
            //				if ( ( t % 1 ) == 0 ) {
            //					pOutInfo->outputROC(pData);
            //				}

            pOutInfo->outputCustom(pData, currWeakHyp);
            // Margins and edge requires an update of the weight,
            // therefore I keep them out for the moment
            //outInfo.outputMargins(pData, currWeakHyp);
            //outInfo.outputEdge(pData, currWeakHyp);
            pOutInfo->endLine();
        }
    }

    if (pOutInfo)
        delete pOutInfo;

}
// ----------------------------------------------------------------------------
// method:  authorizationEnabled
//
// Input:  pointer to the error structure
//
// Returns:
//    PRIV_INITIALIZED means all metadata tables exist
//    PRIV_UNINITIALIZED means no metadata tables exist
//    PRIV_PARTIALLY_INITIALIZED means only part of the metadata tables exist
//    PRIV_INITIALIZE_UNKNOWN means unable to retrieve metadata table info
//
// A cli error is put into the diags area if there is an error
// ----------------------------------------------------------------------------
PrivMgr::PrivMDStatus PrivMgr::authorizationEnabled(
  std::set<std::string> &existingObjectList)
{
// Will require QI to reset on INITIALIZE AUTHORIZATION [,DROP]
  // get the list of tables from the schema
  // if the catalog name ever allows an embedded '.', this code will need 
  // to change.
  std::string metadataLocation = getMetadataLocation();
  size_t period = metadataLocation.find(".");
  std::string catName = metadataLocation.substr(0, period);
  std::string schName = metadataLocation.substr(period+1);
  char buf[1000];
  sprintf(buf, "get tables in schema %s.%s, no header",
              catName.c_str(), schName.c_str());

  ExeCliInterface cliInterface(STMTHEAP, NULL, NULL, 
  CmpCommon::context()->sqlSession()->getParentQid());
  Queue * schemaQueue = NULL;

// set pointer in diags area
int32_t diagsMark = pDiags_->mark();

  int32_t cliRC =  cliInterface.fetchAllRows(schemaQueue, buf, 0, FALSE, FALSE, TRUE);
  if (cliRC < 0)
  {
    cliInterface.retrieveSQLDiagnostics(pDiags_);
    return PRIV_INITIALIZE_UNKNOWN;
  }

  if (cliRC == 100) // did not find the row
  {
    pDiags_->rewind(diagsMark);
    return PRIV_UNINITIALIZED;
  }

  // Not sure how this can happen but code I cloned had the check
  if (schemaQueue->numEntries() == 0)
    return PRIV_UNINITIALIZED;

  // Gather the returned list of tables in existingObjectList
  schemaQueue->position();
  for (int idx = 0; idx < schemaQueue->numEntries(); idx++)
  {
    OutputInfo * row = (OutputInfo*)schemaQueue->getNext();
    std::string theName = row->get(0);
    existingObjectList.insert(theName);
  }

  // Gather the list of expected tables in expectedObjectList
  std::set<string> expectedObjectList;
  size_t numTables = sizeof(privMgrTables)/sizeof(PrivMgrTableStruct);
  for (int ndx_tl = 0; ndx_tl < numTables; ndx_tl++)
  {
    const PrivMgrTableStruct &tableDefinition = privMgrTables[ndx_tl];
    expectedObjectList.insert(tableDefinition.tableName);
  }

  // Compare the existing with the expected
  std::set<string> diffsObjectList;
  std::set_difference (expectedObjectList.begin(), expectedObjectList.end(),
                       existingObjectList.begin(), existingObjectList.end(),
                       std::inserter(diffsObjectList, diffsObjectList.end()));

  // If the number of existing tables match the expected, diffsObjectList 
  // is empty -> return initialized
  if (diffsObjectList.empty())
    return PRIV_INITIALIZED;
 
  // If the number of existing tables does not match the expected, 
  // initialization is required -> return not initialized
  if (existingObjectList.size() == diffsObjectList.size())
    return PRIV_UNINITIALIZED;
 
  // Otherwise, mismatch is found, return partially initialized
  return PRIV_PARTIALLY_INITIALIZED;
}
Exemple #18
0
	void FilterBoostLearner::run(const nor_utils::Args& args)
	{
		// load the arguments
		this->getArgs(args);

		time_t startTime, currentTime;
		time(&startTime);

		// get the registered weak learner (type from name)
		BaseLearner* pWeakHypothesisSource = 
			BaseLearner::RegisteredLearners().getLearner(_baseLearnerName);
		// initialize learning options; normally it's done in the strong loop
		// also, here we do it for Product learners, so input data can be created
		pWeakHypothesisSource->initLearningOptions(args);

		BaseLearner* pConstantWeakHypothesisSource = 
			BaseLearner::RegisteredLearners().getLearner("ConstantLearner");

		// get the training input data, and load it

		InputData* pTrainingData = pWeakHypothesisSource->createInputData();
		pTrainingData->initOptions(args);
		pTrainingData->load(_trainFileName, IT_TRAIN, _verbose);

		const int numClasses = pTrainingData->getNumClasses();
		const int numExamples = pTrainingData->getNumExamples();
		
		//initialize the margins variable
		_margins.resize( numExamples );
		for( int i=0; i<numExamples; i++ )
		{
			_margins[i].resize( numClasses );
			fill( _margins[i].begin(), _margins[i].end(), 0.0 );
		}


		// get the testing input data, and load it
		InputData* pTestData = NULL;
		if ( !_testFileName.empty() )
		{
			pTestData = pWeakHypothesisSource->createInputData();
			pTestData->initOptions(args);
			pTestData->load(_testFileName, IT_TEST, _verbose);
		}

		// The output information object
		OutputInfo* pOutInfo = NULL;


		if ( !_outputInfoFile.empty() ) 
		{
			// Baseline: constant classifier - goes into 0th iteration

			BaseLearner* pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ;
			pConstantWeakHypothesis->initLearningOptions(args);
			pConstantWeakHypothesis->setTrainingData(pTrainingData);
			float constantEnergy = pConstantWeakHypothesis->run();

			pOutInfo = new OutputInfo(_outputInfoFile);
			pOutInfo->initialize(pTrainingData);

			updateMargins( pTrainingData, pConstantWeakHypothesis );

			if (pTestData)
				pOutInfo->initialize(pTestData);
			pOutInfo->outputHeader();

			pOutInfo->outputIteration(-1);
			pOutInfo->outputError(pTrainingData, pConstantWeakHypothesis);

			if (pTestData)
				pOutInfo->outputError(pTestData, pConstantWeakHypothesis);
			/*
			pOutInfo->outputMargins(pTrainingData, pConstantWeakHypothesis);
			
			pOutInfo->outputEdge(pTrainingData, pConstantWeakHypothesis);

			if (pTestData)
				pOutInfo->outputMargins(pTestData, pConstantWeakHypothesis);

			pOutInfo->outputMAE(pTrainingData);

			if (pTestData)
				pOutInfo->outputMAE(pTestData);
			*/
			pOutInfo->outputCurrentTime();

			pOutInfo->endLine();
			pOutInfo->initialize(pTrainingData);
			
			if (pTestData)
				pOutInfo->initialize(pTestData);
		}
		// reload the previously found weak learners if -resume is set. 
		// otherwise just return 0
		int startingIteration = resumeWeakLearners(pTrainingData);


		Serialization ss(_shypFileName, _isShypCompressed );
		ss.writeHeader(_baseLearnerName); // this must go after resumeProcess has been called

		// perform the resuming if necessary. If not it will just return
		resumeProcess(ss, pTrainingData, pTestData, pOutInfo);

		if (_verbose == 1)
			cout << "Learning in progress..." << endl;

		///////////////////////////////////////////////////////////////////////
		// Starting the AdaBoost main loop
		///////////////////////////////////////////////////////////////////////
		for (int t = startingIteration; t < _numIterations; ++t)
		{
			if (_verbose > 1)
				cout << "------- WORKING ON ITERATION " << (t+1) << " -------" << endl;

			filter( pTrainingData, (int)(_Cn * log(t+2.0)) );
			if ( pTrainingData->getNumExamples() < 2 ) 
			{
				filter( pTrainingData, (int)(_Cn * log(t+2.0)), false );
			}
			
			if (_verbose > 1)
			{
				cout << "--> Size of training data = " << pTrainingData->getNumExamples() << endl;
			}

			BaseLearner* pWeakHypothesis = pWeakHypothesisSource->create();
			pWeakHypothesis->initLearningOptions(args);
			//pTrainingData->clearIndexSet();
			pWeakHypothesis->setTrainingData(pTrainingData);
			float energy = pWeakHypothesis->run();

			BaseLearner* pConstantWeakHypothesis;
			if (_withConstantLearner) // check constant learner if user wants it
			{
				pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ;
				pConstantWeakHypothesis->initLearningOptions(args);
				pConstantWeakHypothesis->setTrainingData(pTrainingData);
				float constantEnergy = pConstantWeakHypothesis->run();
			}

			//estimate edge
			filter( pTrainingData, (int)(_Cn * log(t+2.0)), false );
			float edge = pWeakHypothesis->getEdge() / 2.0;

			if (_withConstantLearner) // check constant learner if user wants it
			{
				float constantEdge = pConstantWeakHypothesis->getEdge() / 2.0;
				if ( constantEdge > edge )
				{
					delete pWeakHypothesis;
					pWeakHypothesis = pConstantWeakHypothesis;
					edge = constantEdge;
				} else {
					delete pConstantWeakHypothesis;
				}
			}

			// calculate alpha
			float alpha = 0.0;
			alpha = 0.5 * log( ( 0.5 + edge ) / ( 0.5 - edge ) );
			pWeakHypothesis->setAlpha( alpha );

			if (_verbose > 1)
				cout << "Weak learner: " << pWeakHypothesis->getName()<< endl;
			// Output the step-by-step information
			pTrainingData->clearIndexSet();
			printOutputInfo(pOutInfo, t, pTrainingData, pTestData, pWeakHypothesis);

			// Updates the weights and returns the edge
			float gamma = updateWeights(pTrainingData, pWeakHypothesis);

			if (_verbose > 1)
			{
				cout << setprecision(5)
					<< "--> Alpha = " << pWeakHypothesis->getAlpha() << endl
					<< "--> Edge  = " << gamma << endl
					<< "--> Energy  = " << energy << endl
					//            << "--> ConstantEnergy  = " << constantEnergy << endl
					//            << "--> difference  = " << (energy - constantEnergy) << endl
					;
			}

			// update the margins
			updateMargins( pTrainingData, pWeakHypothesis );

			// append the current weak learner to strong hypothesis file,
			// that is, serialize it.
			ss.appendHypothesis(t, pWeakHypothesis);

			// Add it to the internal list of weak hypotheses
			_foundHypotheses.push_back(pWeakHypothesis); 

			// check if the time limit has been reached
			if (_maxTime > 0)
			{
				time( &currentTime );
				float diff = difftime(currentTime, startTime); // difftime is in seconds
				diff /= 60; // = minutes

				if (diff > _maxTime)
				{
					if (_verbose > 0)
						cout << "Time limit of " << _maxTime 
						<< " minutes has been reached!" << endl;
					break;     
				}
			} // check for maxtime
			delete pWeakHypothesis;
		}  // loop on iterations
		/////////////////////////////////////////////////////////

		// write the footer of the strong hypothesis file
		ss.writeFooter();

		// Free the two input data objects
		if (pTrainingData)
			delete pTrainingData;
		if (pTestData)
			delete pTestData;

		if (pOutInfo)
			delete pOutInfo;

		if (_verbose > 0)
			cout << "Learning completed." << endl;
	}
Exemple #19
0
Dropout::Dropout(OutputInfo info, double dropoutProbability)
  : info(info), I(info.outputs()),
    dropoutProbability(dropoutProbability), y(1, I), dropoutMask(1, I), e(1, I)
{
}
    void SoftCascadeLearner::run(const nor_utils::Args& args)
    {
        // load the arguments
        this->getArgs(args);
        
        //print cascade properties
        if (_verbose > 0) {
            cout    << "[+] Softcascade parameters :" << endl
                    << "\t --> target detection rate = " << _targetDetectionRate << endl
                    << "\t --> alpha (exp param) = " << _alphaExponentialParameter << endl
                    << "\t --> bootstrap rate = " << _bootstrapRate << endl
                    << endl;
        }
        

        // get the registered weak learner (type from name)
        BaseLearner* pWeakHypothesisSource = 
            BaseLearner::RegisteredLearners().getLearner(_baseLearnerName);
        // initialize learning options; normally it's done in the strong loop
        // also, here we do it for Product learners, so input data can be created
        pWeakHypothesisSource->initLearningOptions(args);

        // get the training input data, and load it

        InputData* pTrainingData = pWeakHypothesisSource->createInputData();
        pTrainingData->initOptions(args);
        pTrainingData->load(_trainFileName, IT_TRAIN, 5);

        InputData* pBootstrapData = NULL;
        if (!_bootstrapFileName.empty()) {
            pBootstrapData = pWeakHypothesisSource->createInputData();
            pBootstrapData->initOptions(args);
            pBootstrapData->load(_bootstrapFileName, IT_TRAIN, 5);
        }
        
        // get the testing input data, and load it
        InputData* pTestData = NULL;
        if ( !_testFileName.empty() )
        {
            pTestData = pWeakHypothesisSource->createInputData();
            pTestData->initOptions(args);
            pTestData->load(_testFileName, IT_TEST, 5);
        }

        Serialization ss(_shypFileName, false );
        ss.writeHeader(_baseLearnerName);
        
        
//        outputHeader();
        // The output information object
        OutputInfo* pOutInfo = NULL;

        if ( !_outputInfoFile.empty() ) 
        {
            pOutInfo = new OutputInfo(args, true);
            pOutInfo->setOutputList("sca", &args);
            
            pOutInfo->initialize(pTrainingData);
            
            if (pTestData)
                pOutInfo->initialize(pTestData);
            pOutInfo->outputHeader(pTrainingData->getClassMap(), true, true, false);
            pOutInfo->outputUserHeader("thresh");
            pOutInfo->headerEndLine();
        }
        
        
//        ofstream trainPosteriorsFile;
//        ofstream testPosteriorsFile;
        
        
        const NameMap& namemap = pTrainingData->getClassMap();
        _positiveLabelIndex = namemap.getIdxFromName(_positiveLabelName);

        // FIXME: output posteriors

//        OutputInfo* pTrainPosteriorsOut = NULL;
//        OutputInfo* pTestPosteriorsOut = NULL;
        
//        if (! _trainPosteriorsFileName.empty()) {
//            pTrainPosteriorsOut = new OutputInfo(_trainPosteriorsFileName, "pos", true);
//            pTrainPosteriorsOut->initialize(pTrainingData);
//            dynamic_cast<PosteriorsOutput*>( pTrainPosteriorsOut->getOutputInfoObject("pos") )->addClassIndex(_positiveLabelIndex );
//        }
        
//        if (! _testPosteriorsFileName.empty() && !_testFileName.empty() ) {
//            pTestPosteriorsOut = new OutputInfo(_testPosteriorsFileName, "pos", true);
//            pTestPosteriorsOut->initialize(pTestData);
//            dynamic_cast<PosteriorsOutput*>( pTestPosteriorsOut->getOutputInfoObject("pos") )->addClassIndex(_positiveLabelIndex );            
//        }
        
        const int numExamples = pTrainingData->getNumExamples();

        vector<BaseLearner*> inWeakHypotheses;
        
        if (_fullRun) {            
            // TODO : the full training is implementet, testing is needed
            AdaBoostMHLearner* sHypothesis = new AdaBoostMHLearner();
            sHypothesis->run(args, pTrainingData, _baseLearnerName, _numIterations, inWeakHypotheses );
            delete sHypothesis;
        }
        else { 
            
            cout << "[+] Loading uncalibrated shyp file... ";
            //read the shyp file of the trained classifier
            UnSerialization us;
            us.loadHypotheses(_unCalibratedShypFileName, inWeakHypotheses, pTrainingData);  
            if (_inShypLimit > 0 && _inShypLimit < inWeakHypotheses.size() ) {
                inWeakHypotheses.resize(_inShypLimit);
            }
            if (_numIterations > inWeakHypotheses.size()) {
                _numIterations = inWeakHypotheses.size();
            }
            cout << "weak hypotheses loaded, " << inWeakHypotheses.size() << " retained.\n";
        }
        
        // some initializations
        _foundHypotheses.resize(0);
        double faceRejectionFraction = 0.;
        double estimatedExecutionTime = 0.;
        vector<double> rejectionDistributionVector;

        _rejectionThresholds.resize(0);
        
        
        set<int> trainingIndices;
        for (int i = 0; i < numExamples; i++) {
            trainingIndices.insert(pTrainingData->getRawIndex(i) );
        }
        
        // init v_t (see the paper)
        initializeRejectionDistributionVector(_numIterations, rejectionDistributionVector);

        if (_verbose == 1)
            cout << "Learning in progress..." << endl;

        ///////////////////////////////////////////////////////////////////////
        // Starting the SoftCascade main loop
        ///////////////////////////////////////////////////////////////////////
        for (int t = 0; t < _numIterations; ++t)
        {
            if (_verbose > 0)
                cout << "--------------[ iteration " << (t+1) << " ]--------------" << endl;

            faceRejectionFraction += rejectionDistributionVector[t];
            
            cout << "[+] Face rejection tolerated : " << faceRejectionFraction << " | v[t] = " << rejectionDistributionVector[t] << endl;
            
            int numberOfNegatives = pTrainingData->getNumExamplesPerClass(1 - _positiveLabelIndex);
            
            //vector<BaseLearner*>::const_iterator whyIt;
            int selectedIndex = 0;
            AlphaReal bestGap = 0;
            vector<AlphaReal> posteriors;
            computePosteriors(pTrainingData, _foundHypotheses, posteriors, _positiveLabelIndex);
            
            //should use an iterator instead of i
            
            vector<BaseLearner*>::iterator whyIt;
            int i;
            for (i = 0, whyIt = inWeakHypotheses.begin(); whyIt != inWeakHypotheses.end(); ++whyIt, ++i) {
            
                vector<AlphaReal> temporaryPosteriors = posteriors;
                vector<BaseLearner*> temporaryWeakHyp = _foundHypotheses;
                temporaryWeakHyp.push_back(*whyIt);
                updatePosteriors(pTrainingData, *whyIt, temporaryPosteriors, _positiveLabelIndex);
                
                AlphaReal gap = computeSeparationSpan(pTrainingData, temporaryPosteriors, _positiveLabelIndex );

                if (gap > bestGap) {
                    bestGap = gap;
                    selectedIndex = i;
                }
            }
            
            BaseLearner* selectedWeakHypothesis = inWeakHypotheses[selectedIndex];
            
            cout << "[+] Rank of the selected weak hypothesis : " << selectedIndex << endl
                 << "\t ---> edge gap = " << bestGap << endl
                 << "\t ---> alpha = " << selectedWeakHypothesis->getAlpha() << endl;

            //update the stages
            _foundHypotheses.push_back(selectedWeakHypothesis);
            updatePosteriors(pTrainingData, selectedWeakHypothesis, posteriors, _positiveLabelIndex);
            
            double missesFraction;
            AlphaReal r = findBestRejectionThreshold(pTrainingData, posteriors, faceRejectionFraction, missesFraction);
            _rejectionThresholds.push_back(r);
            
            
            // update the output info object
            dynamic_cast<SoftCascadeOutput*>( pOutInfo->getOutputInfoObject("sca") )->appendRejectionThreshold(r);
            
            cout << "[+] Rejection threshold = " << r << endl;
            
            //some updates
            ss.appendHypothesisWithThreshold(t, selectedWeakHypothesis, r);
            faceRejectionFraction -= missesFraction;
            
            inWeakHypotheses.erase(inWeakHypotheses.begin() + selectedIndex);
            double whypCost = 1; //just in case there are different costs for each whyp
            estimatedExecutionTime += whypCost * numberOfNegatives;
            
            // output perf in file
            vector< vector< AlphaReal> > scores(0);
            _output << t + 1 << setw(_sepWidth + 1) << r << setw(_sepWidth);
            
            // update OutputInfo with the new whyp
//            updateOutputInfo(pOutInfo, pTrainingData, selectedWeakHypothesis);
//            if (pTestData) {
//                updateOutputInfo(pOutInfo, pTestData, selectedWeakHypothesis);
//            }
            

            // output the iteration results
            printOutputInfo(pOutInfo, t, pTrainingData, pTestData, selectedWeakHypothesis, r);
                        
//            if (pTrainPosteriorsOut) {
//                pTrainPosteriorsOut->setTable(pTrainingData, pOutInfo->getTable(pTrainingData));
//                pTrainPosteriorsOut->outputCustom(pTrainingData);
//            }
//
//            if (pTestPosteriorsOut) {
//                pTestPosteriorsOut->setTable(pTestData, pOutInfo->getTable(pTestData));
//                pTestPosteriorsOut->outputCustom(pTestData);
//            }
            
            
            int leftNegatives = filterDataset(pTrainingData, posteriors, r, trainingIndices);
            if (leftNegatives == 0) {
                cout << endl << "[+] No more negatives.\n";
                break;
            }
            
            if (_bootstrapRate != 0) {
                bootstrapTrainingSet(pTrainingData, pBootstrapData, trainingIndices);
            }

        }  // loop on iterations
        /////////////////////////////////////////////////////////

        // write the footer of the strong hypothesis file
        ss.writeFooter();

        // Free the two input data objects
        if (pTrainingData)
            delete pTrainingData;
        if (pBootstrapData) {
            delete pBootstrapData;
        }
        if (pTestData)
            delete pTestData;

        if (_verbose > 0)
            cout << "Learning completed." << endl;
    }