// ***************************************************************************** // * * // * Function: MyTable::setRow * // * * // * Sets the fields of a row. * // * * // ***************************************************************************** // * * // * Parameters: * // * * // * <cliInterface> OutputInfo & In * // * is a reference to CLI interface to the row data that was read. * // * * // * <rowOut> PrivMgrMDRow & Out * // * passes back a MyRow. * // * * // ***************************************************************************** void MyTable::setRow( OutputInfo & cliInterface, PrivMgrMDRow & rowOut) { MyRow & row = static_cast<MyRow &>(rowOut); char * ptr = NULL; int32_t length = 0; char value[500]; // column 1: CATALOG_NAME cliInterface.get(0,ptr,length); strncpy(value,ptr,length); value[length] = 0; row.catalogName_ = value; // column 2: SCHEMA_NAME cliInterface.get(1,ptr,length); strncpy(value,ptr,length); value[length] = 0; row.schemaName_ = value; // column 3: OBJECT_NAME cliInterface.get(2,ptr,length); strncpy(value,ptr,length); value[length] = 0; row.objectName_ = value; // column 4: OBJECT_TYPE cliInterface.get(3,ptr,length); strncpy(value,ptr,length); value[length] = 0; row.objectType_ = PrivMgr::ObjectLitToEnum(value); // column 5: OBJECT_UID cliInterface.get(4,ptr,length); row.objectUID_ = *(reinterpret_cast<int64_t*>(ptr)); // column 6: CREATE_TIME cliInterface.get(5,ptr,length); row.createTime_ = *(reinterpret_cast<int64_t*>(ptr)); // column 7: REDEF_TIME cliInterface.get(6,ptr,length); row.redefTime_ = *(reinterpret_cast<int64_t*>(ptr)); // column 8: VALID_DEF cliInterface.get(7,ptr,length); strncpy(value,ptr,length); value[length] = 0; row.isValid_ = (value[0] == 'Y' ? true : false); // column 9: OBJECT_OWNER cliInterface.get(8,ptr,length); row.objectOwner_ = *(reinterpret_cast<int32_t*>(ptr)); lastRowRead_ = row; }
FullyConnected::FullyConnected(OutputInfo info, int J, bool bias, ActivationFunction act, double stdDev, double maxSquaredWeightNorm) : I(info.outputs()), J(J), bias(bias), act(act), stdDev(stdDev), maxSquaredWeightNorm(maxSquaredWeightNorm), W(J, I), Wd(J, I), b(J), bd(J), x(0), a(1, J), y(1, J), yd(1, J), deltas(1, J), e(1, I) { }
Subsampling::Subsampling(OutputInfo info, int kernelRows, int kernelCols, bool bias, ActivationFunction act, double stdDev, Regularization regularization) : I(info.outputs()), fm(info.dimensions[0]), inRows(info.dimensions[1]), inCols(info.dimensions[2]), kernelRows(kernelRows), kernelCols(kernelCols), bias(bias), act(act), stdDev(stdDev), x(0), e(1, I), fmInSize(-1), outRows(-1), outCols(-1), fmOutSize(-1), maxRow(-1), maxCol(-1), regularization(regularization) { }
// **************************************************************************** // method: getListOfDirectlyReferencedObjects // // Returns a list of objects that are being directly referenced by the passed // in objectUID // // Parameters: // cliInterface - used to get the list of object usages // objectUID - the UID being processed // objectList - a list of objectRefdByMe structures describing each usage // // returns: // 0 - successful // -1 - unexpected error occurred // **************************************************************************** short CmpSeabaseDDL::getListOfDirectlyReferencedObjects ( ExeCliInterface *cliInterface, const Int64 objectUID, NAList<objectRefdByMe> &objectsList) { // Select all the rows from views_usage associated with the passed in // objectUID Lng32 cliRC = 0; char buf[4000]; str_sprintf(buf, "select object_type, object_uid, catalog_name," "schema_name, object_name from %s.\"%s\".%s T, %s.\"%s\".%s VU " "where VU.using_view_uid = %Ld " "and T.object_uid = VU.used_object_uid", getSystemCatalog(), SEABASE_MD_SCHEMA, SEABASE_OBJECTS, getSystemCatalog(), SEABASE_MD_SCHEMA, SEABASE_VIEWS_USAGE, objectUID); Queue * usingObjectsQueue = NULL; cliRC = cliInterface->fetchAllRows(usingObjectsQueue, buf, 0, FALSE, FALSE, TRUE); if (cliRC < 0) { cliInterface->retrieveSQLDiagnostics(CmpCommon::diags()); return -1; } // set up an objectRefdByMe struct for each returned row usingObjectsQueue->position(); for (int idx = 0; idx < usingObjectsQueue->numEntries(); idx++) { OutputInfo * oi = (OutputInfo*)usingObjectsQueue->getNext(); objectRefdByMe objectInfo; objectInfo.objectType = NAString(oi->get(0)); objectInfo.objectUID = *(Int64*)oi->get(1); objectInfo.catalogName = NAString(oi->get(2)); objectInfo.schemaName = NAString(oi->get(3)); objectInfo.objectName = NAString(oi->get(4)); objectsList.insert(objectInfo); } return 0; }
OutputInfo MaxPooling::initialize(std::vector<double*>& parameterPointers, std::vector<double*>& parameterDerivativePointers) { OutputInfo info; info.dimensions.push_back(fm); outRows = inRows / kernelRows; outCols = inCols / kernelCols; fmOutSize = outRows * outCols; info.dimensions.push_back(outRows); info.dimensions.push_back(outCols); fmInSize = inRows * inCols; maxRow = inRows - kernelRows + 1; maxCol = inCols - kernelCols + 1; y.resize(1, info.outputs()); deltas.resize(1, info.outputs()); if(info.outputs() < 1) throw OpenANNException("Number of outputs in max-pooling layer is below" " 1. You should either choose a smaller filter" " size or generate a bigger input."); return info; }
void MDDAGClassifier::saveLikelihoods(const string& dataFileName, const string& shypFileName, const string& outFileName, int numIterations) { InputData* pData = loadInputData(dataFileName, shypFileName); if (_verbose > 0) cout << "Loading strong hypothesis..." << flush; // The class that loads the weak hypotheses UnSerialization us; // Where to put the weak hypotheses vector<BaseLearner*> weakHypotheses; // loads them us.loadHypotheses(shypFileName, weakHypotheses, pData); // where the results go vector< ExampleResults* > results; if (_verbose > 0) cout << "Classifying..." << flush; const int numClasses = pData->getNumClasses(); const int numExamples = pData->getNumExamples(); ofstream outFile(outFileName.c_str()); string exampleName; if (_verbose > 0) cout << "Output likelihoods..." << flush; // get the results ///////////////////////////////////////////////////////////////////// // computeResults( pData, weakHypotheses, results, numIterations ); assert( !weakHypotheses.empty() ); // Initialize the output info OutputInfo* pOutInfo = NULL; if ( !_outputInfoFile.empty() ) pOutInfo = new OutputInfo(_outputInfoFile, "err"); // Creating the results structures. See file Structures.h for the // PointResults structure results.clear(); results.reserve(numExamples); for (int i = 0; i < numExamples; ++i) results.push_back( new ExampleResults(i, numClasses) ); // sum votes for classes vector< AlphaReal > votesForExamples( numClasses ); vector< AlphaReal > expVotesForExamples( numClasses ); // iterator over all the weak hypotheses vector<BaseLearner*>::const_iterator whyIt; int t; pOutInfo->initialize( pData ); // for every feature: 1..T for (whyIt = weakHypotheses.begin(), t = 0; whyIt != weakHypotheses.end() && t < numIterations; ++whyIt, ++t) { BaseLearner* currWeakHyp = *whyIt; AlphaReal alpha = currWeakHyp->getAlpha(); // for every point for (int i = 0; i < numExamples; ++i) { // a reference for clarity and speed vector<AlphaReal>& currVotesVector = results[i]->getVotesVector(); // for every class for (int l = 0; l < numClasses; ++l) currVotesVector[l] += alpha * currWeakHyp->classify(pData, i, l); } // if needed output the step-by-step information if ( pOutInfo ) { pOutInfo->outputIteration(t); pOutInfo->outputCustom(pData, currWeakHyp); // Margins and edge requires an update of the weight, // therefore I keep them out for the moment //outInfo.outputMargins(pData, currWeakHyp); //outInfo.outputEdge(pData, currWeakHyp); pOutInfo->endLine(); } // for (int i = 0; i < numExamples; ++i) // calculate likelihoods from votes fill( votesForExamples.begin(), votesForExamples.end(), 0.0 ); AlphaReal lLambda = 0.0; for (int i = 0; i < numExamples; ++i) { // a reference for clarity and speed vector<AlphaReal>& currVotesVector = results[i]->getVotesVector(); AlphaReal sumExp = 0.0; // for every class for (int l = 0; l < numClasses; ++l) { expVotesForExamples[l] = exp( currVotesVector[l] ) ; sumExp += expVotesForExamples[l]; } if ( sumExp > numeric_limits<AlphaReal>::epsilon() ) { for (int l = 0; l < numClasses; ++l) { expVotesForExamples[l] /= sumExp; } } Example ex = pData->getExample( results[i]->getIdx() ); vector<Label> labs = ex.getLabels(); AlphaReal m = numeric_limits<AlphaReal>::infinity(); for (int l = 0; l < numClasses; ++l) { if ( labs[l].y > 0 ) { if ( expVotesForExamples[l] > numeric_limits<AlphaReal>::epsilon() ) { AlphaReal logVal = log( expVotesForExamples[l] ); if ( logVal != m ) { lLambda += ( ( 1.0/(AlphaReal)numExamples ) * logVal ); } } } } } outFile << t << "\t" << lLambda ; outFile << '\n'; outFile.flush(); } if (pOutInfo) delete pOutInfo; // computeResults( pData, weakHypotheses, results, numIterations ); /////////////////////////////////////////////////////////////////////////////////// /* for (int i = 0; i < numExamples; ++i) { // output the name if it exists, otherwise the number // of the example exampleName = pData->getExampleName(i); if ( !exampleName.empty() ) outFile << exampleName << ','; // output the posteriors outFile << results[i]->getVotesVector()[0]; for (int l = 1; l < numClasses; ++l) outFile << ',' << results[i]->getVotesVector()[l]; outFile << '\n'; } */ if (_verbose > 0) cout << "Done!" << endl; if (_verbose > 1) { cout << "\nClass order (You can change it in the header of the data file):" << endl; for (int l = 0; l < numClasses; ++l) cout << "- " << pData->getClassMap().getNameFromIdx(l) << endl; } // delete the input data file if (pData) delete pData; vector<ExampleResults*>::iterator it; for (it = results.begin(); it != results.end(); ++it) delete (*it); }
// ***************************************************************************** // * * // * Function: CmpSeabaseDDL::dropSeabaseSchema * // * * // * Implements the DROP SCHEMA command. * // * * // ***************************************************************************** // * * // * Parameters: * // * * // * <dropSchemaNode> StmtDDLDropSchema * In * // * is a pointer to a create schema parser node. * // * * // ***************************************************************************** void CmpSeabaseDDL::dropSeabaseSchema(StmtDDLDropSchema * dropSchemaNode) { Lng32 cliRC = 0; ComSchemaName schemaName(dropSchemaNode->getSchemaName()); NAString catName = schemaName.getCatalogNamePartAsAnsiString(); ComAnsiNamePart schNameAsComAnsi = schemaName.getSchemaNamePart(); NAString schName = schNameAsComAnsi.getInternalName(); ExeCliInterface cliInterface(STMTHEAP, NULL, NULL, CmpCommon::context()->sqlSession()->getParentQid()); Int32 objectOwnerID = 0; Int32 schemaOwnerID = 0; ComObjectType objectType; Int64 schemaUID = getObjectTypeandOwner(&cliInterface,catName.data(),schName.data(), SEABASE_SCHEMA_OBJECTNAME,objectType,schemaOwnerID); // if schemaUID == -1, then either the schema does not exist or an unexpected error occurred if (schemaUID == -1) { // If an error occurred, return if (CmpCommon::diags()->getNumber(DgSqlCode::ERROR_) > 0) return; // schema does not exist and IF EXISTS specified, then ignore and continue if (dropSchemaNode->dropIfExists()) return; // A Trafodion schema does not exist if the schema object row is not // present: CATALOG-NAME.SCHEMA-NAME.__SCHEMA__. *CmpCommon::diags() << DgSqlCode(-CAT_SCHEMA_DOES_NOT_EXIST_ERROR) << DgSchemaName(schemaName.getExternalName().data()); return; } if (!isDDLOperationAuthorized(SQLOperation::DROP_SCHEMA, schemaOwnerID,schemaOwnerID)) { *CmpCommon::diags() << DgSqlCode(-CAT_NOT_AUTHORIZED); return; } ComObjectName objName(catName,schName,NAString("dummy"),COM_TABLE_NAME,TRUE); if ((isSeabaseReservedSchema(objName) || (schName == SEABASE_SYSTEM_SCHEMA)) && !Get_SqlParser_Flags(INTERNAL_QUERY_FROM_EXEUTIL)) { *CmpCommon::diags() << DgSqlCode(-CAT_USER_CANNOT_DROP_SMD_SCHEMA) << DgSchemaName(schemaName.getExternalName().data()); return; } bool isVolatile = (memcmp(schName.data(),"VOLATILE_SCHEMA",strlen("VOLATILE_SCHEMA")) == 0); // Can't drop a schema whose name begins with VOLATILE_SCHEMA unless the // keyword VOLATILE was specified in the DROP SCHEMA command. if (isVolatile && !dropSchemaNode->isVolatile()) { *CmpCommon::diags() << DgSqlCode(-CAT_RESERVED_METADATA_SCHEMA_NAME) << DgTableName(schName); return; } // Get a list of all objects in the schema, excluding the schema object itself. char query[4000]; str_sprintf(query,"SELECT TRIM(object_name), TRIM(object_type) " "FROM %s.\"%s\".%s " "WHERE catalog_name = '%s' AND schema_name = '%s' AND " "object_name <> '"SEABASE_SCHEMA_OBJECTNAME"'" "FOR READ COMMITTED ACCESS", getSystemCatalog(),SEABASE_MD_SCHEMA,SEABASE_OBJECTS, (char*)catName.data(),(char*)schName.data()); Queue * objectsQueue = NULL; cliRC = cliInterface.fetchAllRows(objectsQueue, query, 0, FALSE, FALSE, TRUE); if (cliRC < 0) { cliInterface.retrieveSQLDiagnostics(CmpCommon::diags()); return; } objectsQueue->position(); if ((dropSchemaNode->getDropBehavior() == COM_RESTRICT_DROP_BEHAVIOR) && (objectsQueue->numEntries() > 0)) { OutputInfo * oi = (OutputInfo*)objectsQueue->getCurr(); *CmpCommon::diags() << DgSqlCode(-CAT_SCHEMA_IS_NOT_EMPTY) << DgTableName(oi->get(0)); return; } bool someObjectsCouldNotBeDropped = false; // Drop libraries, procedures (SPJs), UDFs (functions), and views objectsQueue->position(); for (int idx = 0; idx < objectsQueue->numEntries(); idx++) { OutputInfo * vi = (OutputInfo*)objectsQueue->getNext(); char * objName = vi->get(0); NAString objectTypeLit = vi->get(1); ComObjectType objectType = PrivMgr::ObjectLitToEnum(objectTypeLit.data()); char buf[1000]; NAString objectTypeString; NAString cascade = " "; switch (objectType) { // These object types are handled later and can be ignored for now. case COM_BASE_TABLE_OBJECT: case COM_INDEX_OBJECT: case COM_CHECK_CONSTRAINT_OBJECT: case COM_NOT_NULL_CONSTRAINT_OBJECT: case COM_PRIMARY_KEY_CONSTRAINT_OBJECT: case COM_REFERENTIAL_CONSTRAINT_OBJECT: case COM_SEQUENCE_GENERATOR_OBJECT: case COM_UNIQUE_CONSTRAINT_OBJECT: { continue; } case COM_LIBRARY_OBJECT: { objectTypeString = "LIBRARY"; cascade = "CASCADE"; break; } case COM_STORED_PROCEDURE_OBJECT: { objectTypeString = "PROCEDURE"; break; } case COM_USER_DEFINED_ROUTINE_OBJECT: { objectTypeString = "FUNCTION"; cascade = "CASCADE"; break; } case COM_VIEW_OBJECT: { objectTypeString = "VIEW"; cascade = "CASCADE"; break; } // These object types should not be seen. case COM_MV_OBJECT: case COM_MVRG_OBJECT: case COM_TRIGGER_OBJECT: case COM_LOB_TABLE_OBJECT: case COM_TRIGGER_TABLE_OBJECT: case COM_SYNONYM_OBJECT: case COM_PRIVATE_SCHEMA_OBJECT: case COM_SHARED_SCHEMA_OBJECT: case COM_EXCEPTION_TABLE_OBJECT: case COM_LOCK_OBJECT: case COM_MODULE_OBJECT: default: SEABASEDDL_INTERNAL_ERROR("Unrecognized object type in schema"); return; } str_sprintf(buf, "drop %s \"%s\".\"%s\".\"%s\" %s", objectTypeString.data(),(char*)catName.data(),(char*)schName.data(), objName,cascade.data()); cliRC = cliInterface.executeImmediate(buf); if (cliRC < 0 && cliRC != -CAT_OBJECT_DOES_NOT_EXIST_IN_TRAFODION) someObjectsCouldNotBeDropped = true; } // Drop all tables in the schema. This will also drop any associated constraints. // Drop of histogram tables is deferred. bool histExists = false; objectsQueue->position(); for (int idx = 0; idx < objectsQueue->numEntries(); idx++) { OutputInfo * vi = (OutputInfo*)objectsQueue->getNext(); NAString objName = vi->get(0); NAString objType = vi->get(1); // drop user objects first if (objType == COM_BASE_TABLE_OBJECT_LIT) { if (!(objName == HBASE_HIST_NAME || objName == HBASE_HISTINT_NAME)) { if (dropOneTable(cliInterface,(char*)catName.data(), (char*)schName.data(),(char*)objName.data(), isVolatile)) someObjectsCouldNotBeDropped = true; } else histExists = true; } } // Drop any remaining indexes. str_sprintf(query,"SELECT TRIM(object_name), TRIM(object_type) " "FROM %s.\"%s\".%s " "WHERE catalog_name = '%s' AND " " schema_name = '%s' AND " " object_type = '%s' " "FOR READ COMMITTED ACCESS ", getSystemCatalog(),SEABASE_MD_SCHEMA,SEABASE_OBJECTS, (char*)catName.data(),(char*)schName.data(), COM_INDEX_OBJECT_LIT); cliRC = cliInterface.fetchAllRows(objectsQueue,query,0,FALSE,FALSE,TRUE); if (cliRC < 0) { cliInterface.retrieveSQLDiagnostics(CmpCommon::diags()); return; } objectsQueue->position(); for (int idx = 0; idx < objectsQueue->numEntries(); idx++) { OutputInfo * vi = (OutputInfo*)objectsQueue->getNext(); char * objName = vi->get(0); NAString objType = vi->get(1); if (objType == COM_INDEX_OBJECT_LIT) { char buf [1000]; str_sprintf(buf, "DROP INDEX \"%s\".\"%s\".\"%s\" CASCADE", (char*)catName.data(), (char*)schName.data(), objName); cliRC = cliInterface.executeImmediate(buf); if (cliRC < 0 && cliRC != -CAT_OBJECT_DOES_NOT_EXIST_IN_TRAFODION) someObjectsCouldNotBeDropped = true; } } // Drop any remaining sequences. str_sprintf(query,"SELECT TRIM(object_name), TRIM(object_type) " "FROM %s.\"%s\".%s " "WHERE catalog_name = '%s' AND " " schema_name = '%s' AND " " object_type = '%s' " "FOR READ COMMITTED ACCESS ", getSystemCatalog(),SEABASE_MD_SCHEMA,SEABASE_OBJECTS, (char*)catName.data(),(char*)schName.data(), COM_SEQUENCE_GENERATOR_OBJECT_LIT); cliRC = cliInterface.fetchAllRows(objectsQueue,query,0,FALSE,FALSE,TRUE); if (cliRC < 0) { cliInterface.retrieveSQLDiagnostics(CmpCommon::diags()); return; } objectsQueue->position(); for (int idx = 0; idx < objectsQueue->numEntries(); idx++) { OutputInfo * vi = (OutputInfo*)objectsQueue->getNext(); char * objName = vi->get(0); NAString objType = vi->get(1); if (objType == COM_SEQUENCE_GENERATOR_OBJECT_LIT) { char buf [1000]; str_sprintf(buf, "DROP SEQUENCE \"%s\".\"%s\".\"%s\"", (char*)catName.data(), (char*)schName.data(), objName); cliRC = cliInterface.executeImmediate(buf); if (cliRC < 0 && cliRC != -CAT_OBJECT_DOES_NOT_EXIST_IN_TRAFODION) someObjectsCouldNotBeDropped = true; } } // For volatile schemas, sometimes only the objects get dropped. // If the dropObjectsOnly flag is set, just exit now, we are done. if (dropSchemaNode->dropObjectsOnly()) return; // Now drop any histogram objects if (histExists) { if (dropOneTable(cliInterface,(char*)catName.data(),(char*)schName.data(), (char*)HBASE_HISTINT_NAME,false)) someObjectsCouldNotBeDropped = true; if (dropOneTable(cliInterface,(char*)catName.data(),(char*)schName.data(), (char*)HBASE_HIST_NAME,false)) someObjectsCouldNotBeDropped = true; } if (someObjectsCouldNotBeDropped) { CmpCommon::diags()->clear(); *CmpCommon::diags() << DgSqlCode(-CAT_UNABLE_TO_DROP_SCHEMA) << DgSchemaName(catName + "." + schName); return; } // Verify all objects in the schema have been dropped. str_sprintf(query,"SELECT COUNT(*) " "FROM %s.\"%s\".%s " "WHERE catalog_name = '%s' AND schema_name = '%s' AND " "object_name <> '"SEABASE_SCHEMA_OBJECTNAME"'" "FOR READ COMMITTED ACCESS", getSystemCatalog(),SEABASE_MD_SCHEMA,SEABASE_OBJECTS, (char*)catName.data(),(char*)schName.data()); int32_t length = 0; int32_t rowCount = 0; cliRC = cliInterface.executeImmediate(query,(char*)&rowCount,&length,NULL); if (cliRC < 0) { cliInterface.retrieveSQLDiagnostics(CmpCommon::diags()); return; } if (rowCount > 0) { CmpCommon::diags()->clear(); *CmpCommon::diags() << DgSqlCode(-CAT_UNABLE_TO_DROP_SCHEMA) << DgSchemaName(catName + "." + schName); return; } // After all objects in the schema have been dropped, drop the schema object itself. char buf [1000]; str_sprintf(buf,"DELETE FROM %s.\"%s\".%s " "WHERE CATALOG_NAME = '%s' AND SCHEMA_NAME = '%s' AND " "OBJECT_NAME = '"SEABASE_SCHEMA_OBJECTNAME"'", getSystemCatalog(),SEABASE_MD_SCHEMA,SEABASE_OBJECTS, (char*)catName.data(),(char*)schName.data()); cliRC = cliInterface.executeImmediate(buf); if (cliRC < 0) *CmpCommon::diags() << DgSqlCode(-CAT_UNABLE_TO_DROP_SCHEMA) << DgSchemaName(catName + "." + schName); }
void FilterBoostLearner::run(const nor_utils::Args& args) { // load the arguments this->getArgs(args); time_t startTime, currentTime; time(&startTime); // get the registered weak learner (type from name) BaseLearner* pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(_baseLearnerName); // initialize learning options; normally it's done in the strong loop // also, here we do it for Product learners, so input data can be created pWeakHypothesisSource->initLearningOptions(args); BaseLearner* pConstantWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner("ConstantLearner"); // get the training input data, and load it InputData* pTrainingData = pWeakHypothesisSource->createInputData(); pTrainingData->initOptions(args); pTrainingData->load(_trainFileName, IT_TRAIN, _verbose); const int numClasses = pTrainingData->getNumClasses(); const int numExamples = pTrainingData->getNumExamples(); //initialize the margins variable _margins.resize( numExamples ); for( int i=0; i<numExamples; i++ ) { _margins[i].resize( numClasses ); fill( _margins[i].begin(), _margins[i].end(), 0.0 ); } // get the testing input data, and load it InputData* pTestData = NULL; if ( !_testFileName.empty() ) { pTestData = pWeakHypothesisSource->createInputData(); pTestData->initOptions(args); pTestData->load(_testFileName, IT_TEST, _verbose); } // The output information object OutputInfo* pOutInfo = NULL; if ( !_outputInfoFile.empty() ) { // Baseline: constant classifier - goes into 0th iteration BaseLearner* pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ; pConstantWeakHypothesis->initLearningOptions(args); pConstantWeakHypothesis->setTrainingData(pTrainingData); AlphaReal constantEnergy = pConstantWeakHypothesis->run(); pOutInfo = new OutputInfo(args); pOutInfo->initialize(pTrainingData); updateMargins( pTrainingData, pConstantWeakHypothesis ); if (pTestData) pOutInfo->initialize(pTestData); pOutInfo->outputHeader(pTrainingData->getClassMap() ); pOutInfo->outputIteration(-1); pOutInfo->outputCustom(pTrainingData, pConstantWeakHypothesis); if (pTestData) { pOutInfo->separator(); pOutInfo->outputCustom(pTestData, pConstantWeakHypothesis); } pOutInfo->outputCurrentTime(); pOutInfo->endLine(); pOutInfo->initialize(pTrainingData); if (pTestData) pOutInfo->initialize(pTestData); } // reload the previously found weak learners if -resume is set. // otherwise just return 0 int startingIteration = resumeWeakLearners(pTrainingData); Serialization ss(_shypFileName, _isShypCompressed ); ss.writeHeader(_baseLearnerName); // this must go after resumeProcess has been called // perform the resuming if necessary. If not it will just return resumeProcess(ss, pTrainingData, pTestData, pOutInfo); if (_verbose == 1) cout << "Learning in progress..." << endl; /////////////////////////////////////////////////////////////////////// // Starting the AdaBoost main loop /////////////////////////////////////////////////////////////////////// for (int t = startingIteration; t < _numIterations; ++t) { if (_verbose > 1) cout << "------- WORKING ON ITERATION " << (t+1) << " -------" << endl; // create the weak learner BaseLearner* pWeakHypothesis; BaseLearner* pConstantWeakHypothesis; pWeakHypothesis = pWeakHypothesisSource->create(); pWeakHypothesis->initLearningOptions(args); //pTrainingData->clearIndexSet(); pWeakHypothesis->setTrainingData(pTrainingData); AlphaReal edge, energy=0.0; // create the constant learner pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ; pConstantWeakHypothesis->initLearningOptions(args); pConstantWeakHypothesis->setTrainingData(pTrainingData); AlphaReal constantEdge = -numeric_limits<AlphaReal>::max(); int currentNumberOfUsedData = static_cast<int>(_Cn * log(t+3.0)); if ( _onlineWeakLearning ) { //check whether the weak learner is a ScalarLeaerner try { StochasticLearner* pStochasticLearner = dynamic_cast<StochasticLearner*>(pWeakHypothesis); StochasticLearner* pStochasticConstantWeakHypothesis = dynamic_cast<StochasticLearner*> (pConstantWeakHypothesis); pStochasticLearner->initLearning(); pStochasticConstantWeakHypothesis->initLearning(); if (_verbose>1) cout << "Number of random instances: \t" << currentNumberOfUsedData << endl; // set the weights setWeightToMargins(pTrainingData); //learning for (int i=0; i<currentNumberOfUsedData; ++i ) { int randomIndex = (rand() % pTrainingData->getNumExamples()); //int randomIndex = getRandomIndex(); pStochasticLearner->update(randomIndex); pStochasticConstantWeakHypothesis->update(randomIndex); } pStochasticLearner->finishLearning(); pStochasticConstantWeakHypothesis->finishLearning(); } catch (bad_cast& e) { cerr << "The weak learner must be a StochasticLearner!!!" << endl; exit(-1); } } else { filter( pTrainingData, currentNumberOfUsedData ); if ( pTrainingData->getNumExamples() < 2 ) { filter( pTrainingData, currentNumberOfUsedData, false ); } if (_verbose > 1) { cout << "--> Size of training data = " << pTrainingData->getNumExamples() << endl; } energy = pWeakHypothesis->run(); pConstantWeakHypothesis->run(); } //estimate edge filter( pTrainingData, currentNumberOfUsedData, false ); edge = pWeakHypothesis->getEdge(true) / 2.0; constantEdge = pConstantWeakHypothesis->getEdge() / 2.0; if ( constantEdge > edge ) { delete pWeakHypothesis; pWeakHypothesis = pConstantWeakHypothesis; edge = constantEdge; } else { delete pConstantWeakHypothesis; } // calculate alpha AlphaReal alpha = 0.0; alpha = 0.5 * log( ( 1 + edge ) / ( 1 - edge ) ); pWeakHypothesis->setAlpha( alpha ); _sumAlpha += alpha; if (_verbose > 1) cout << "Weak learner: " << pWeakHypothesis->getName()<< endl; // Output the step-by-step information pTrainingData->clearIndexSet(); printOutputInfo(pOutInfo, t, pTrainingData, pTestData, pWeakHypothesis); // Updates the weights and returns the edge //AlphaReal gamma = updateWeights(pTrainingData, pWeakHypothesis); if (_verbose > 1) { cout << setprecision(5) << "--> Alpha = " << pWeakHypothesis->getAlpha() << endl << "--> Edge = " << edge << endl << "--> Energy = " << energy << endl // << "--> ConstantEnergy = " << constantEnergy << endl // << "--> difference = " << (energy - constantEnergy) << endl ; } // update the margins //saveMargins(); updateMargins( pTrainingData, pWeakHypothesis ); // append the current weak learner to strong hypothesis file, // that is, serialize it. ss.appendHypothesis(t, pWeakHypothesis); // Add it to the internal list of weak hypotheses _foundHypotheses.push_back(pWeakHypothesis); // check if the time limit has been reached if (_maxTime > 0) { time( ¤tTime ); float diff = difftime(currentTime, startTime); // difftime is in seconds diff /= 60; // = minutes if (diff > _maxTime) { if (_verbose > 0) cout << "Time limit of " << _maxTime << " minutes has been reached!" << endl; break; } } // check for maxtime delete pWeakHypothesis; } // loop on iterations ///////////////////////////////////////////////////////// // write the footer of the strong hypothesis file ss.writeFooter(); // Free the two input data objects if (pTrainingData) delete pTrainingData; if (pTestData) delete pTestData; if (pOutInfo) delete pOutInfo; if (_verbose > 0) cout << "Learning completed." << endl; }
/** * The main function. Everything starts here! * \param argc The number of arguments. * \param argv The arguments. * \date 11/11/2005 */ int main(int argc, const char* argv[]) { // initializing the random number generator srand ( time(NULL) ); // no need to synchronize with C style stream std::ios_base::sync_with_stdio(false); #if STABLE_SORT cerr << "WARNING: Stable sort active! It might be slower!!" << endl; #endif ////////////////////////////////////////////////////////////////////////// // Standard arguments nor_utils::Args args; args.setArgumentDiscriminator("--"); args.declareArgument("help"); args.declareArgument("static"); args.declareArgument("h", "Help", 1, "<optiongroup>"); ////////////////////////////////////////////////////////////////////////// // Basic Arguments args.setGroup("Parameters"); args.declareArgument("train", "Performs training.", 2, "<dataFile> <nInterations>"); args.declareArgument("traintest", "Performs training and test at the same time.", 3, "<trainingDataFile> <testDataFile> <nInterations>"); args.declareArgument("trainvalidtest", "Performs training and test at the same time.", 4, "<trainingDataFile> <validDataFile> <testDataFile> <nInterations>"); args.declareArgument("test", "Test the model.", 3, "<dataFile> <numIters> <shypFile>"); args.declareArgument("test", "Test the model and output the results", 4, "<datafile> <shypFile> <numIters> <outFile>"); args.declareArgument("cmatrix", "Print the confusion matrix for the given model.", 2, "<dataFile> <shypFile>"); args.declareArgument("cmatrixfile", "Print the confusion matrix with the class names to a file.", 3, "<dataFile> <shypFile> <outFile>"); args.declareArgument("posteriors", "Output the posteriors for each class, that is the vector-valued discriminant function for the given dataset and model.", 4, "<dataFile> <shypFile> <outFile> <numIters>"); args.declareArgument("posteriors", "Output the posteriors for each class, that is the vector-valued discriminant function for the given dataset and model periodically.", 5, "<dataFile> <shypFile> <outFile> <numIters> <period>"); args.declareArgument("encode", "Save the coefficient vector of boosting individually on each point using ParasiteLearner", 6, "<inputDataFile> <autoassociativeDataFile> <outputDataFile> <nIterations> <poolFile> <nBaseLearners>"); args.declareArgument("ssfeatures", "Print matrix data for SingleStump-Based weak learners (if numIters=0 it means all of them).", 4, "<dataFile> <shypFile> <outFile> <numIters>"); args.declareArgument( "fileformat", "Defines the type of intput file. Available types are:\n" "* simple: each line has attributes separated by whitespace and class at the end (DEFAULT!)\n" "* arff: arff filetype. The header file can be specified using --headerfile option\n" "* arffbzip: bziped arff filetype. The header file can be specified using --headerfile option\n" "* svmlight: \n" "(Example: --fileformat simple)", 1, "<fileFormat>" ); args.declareArgument("headerfile", "The header file for arff and SVMLight and arff formats.", 1, "header.txt"); args.declareArgument("constant", "Check constant learner in each iteration.", 0, ""); args.declareArgument("timelimit", "Time limit in minutes", 1, "<minutes>" ); args.declareArgument("stronglearner", "Available strong learners:\n" "AdaBoost (default)\n" "FilterBoost\n" "SoftCascade\n" "VJcascade\n", 1, "<stronglearner>" ); args.declareArgument("slowresumeprocess", "Computes every statitstic in each iteration (slow resume)\n" "Computes only the statistics in the last iteration (fast resume, default)\n", 0, "" ); args.declareArgument("weights", "Outputs the weights of instances at the end of the learning process", 1, "<filename>" ); args.declareArgument("Cn", "Resampling size for FilterBoost (default=300)", 1, "<value>" ); args.declareArgument("onlinetraining", "The weak learner will be trained online\n", 0, "" ); //// ignored for the moment! //args.declareArgument("arffheader", "Specify the arff header.", 1, "<arffHeaderFile>"); // for MDDAG //args.setGroup("MDDAG"); args.declareArgument("traintestmddag", "Performs training and test at the same time using mddag.", 5, "<trainingDataFile> <testDataFile> <modelFile> <nIterations> <baseIter>"); args.declareArgument("policytrainingiter", "The iteration number the policy learner takes.", 1, "<iternum>"); args.declareArgument("rollouts", "The number of rollouts.", 1, "<num>"); args.declareArgument("rollouttype", "Rollout type (montecarlo or szatymaz)", 1, "<rollouttype>"); args.declareArgument("beta", "Trade-off parameter", 1, "<beta>"); args.declareArgument("outdir", "Output directory.", 1, "<outdir>"); args.declareArgument("policyalpha", "Alpha for policy array.", 1, "<alpha>"); args.declareArgument("succrewardtype", "Rewrd type (e01 or hammng)", 1, "<rward_type"); args.declareArgument("outtrainingerror", "Output training error", 0, ""); args.declareArgument("epsilon", "Exploration term", 1, "<epsilon>"); args.declareArgument("updateperc", "Number of component in the policy are updated", 1, "<perc>"); // for VJ cascade VJCascadeLearner::declareBaseArguments(args); // for SoftCascade SoftCascadeLearner::declareBaseArguments(args); ////////////////////////////////////////////////////////////////////////// // Options args.setGroup("I/O Options"); ///////////////////////////////////////////// // these are valid only for .txt input! // they might be removed! args.declareArgument("d", "The separation characters between the fields (default: whitespaces).\nExample: -d \"\\t,.-\"\nNote: new-line is always included!", 1, "<separators>"); args.declareArgument("classend", "The class is the last column instead of the first (or second if -examplelabel is active)."); args.declareArgument("examplename", "The data file has an additional column (the very first) which contains the 'name' of the example."); ///////////////////////////////////////////// args.setGroup("Basic Algorithm Options"); args.declareArgument("weightpolicy", "Specify the type of weight initialization. The user specified weights (if available) are used inside the policy which can be:\n" "* sharepoints Share the weight equally among data points and between positiv and negative labels (DEFAULT)\n" "* sharelabels Share the weight equally among data points\n" "* proportional Share the weights freely", 1, "<weightType>"); args.setGroup("General Options"); args.declareArgument("verbose", "Set the verbose level 0, 1 or 2 (0=no messages, 1=default, 2=all messages).", 1, "<val>"); args.declareArgument("outputinfo", "Output informations on the algorithm performances during training, on file <filename>.", 1, "<filename>"); args.declareArgument("outputinfo", "Output specific informations on the algorithm performances during training, on file <filename> <outputlist>. <outputlist> must be a concatenated list of three characters abreviation (ex: err for error, fpr for false positive rate)", 2, "<filename> <outputlist>"); args.declareArgument("seed", "Defines the seed for the random operations.", 1, "<seedval>"); ////////////////////////////////////////////////////////////////////////// // Shows the list of available learners string learnersComment = "Available learners are:"; vector<string> learnersList; BaseLearner::RegisteredLearners().getList(learnersList); vector<string>::const_iterator it; for (it = learnersList.begin(); it != learnersList.end(); ++it) { learnersComment += "\n ** " + *it; // defaultLearner is defined in Defaults.h if ( *it == defaultLearner ) learnersComment += " (DEFAULT)"; } args.declareArgument("learnertype", "Change the type of weak learner. " + learnersComment, 1, "<learner>"); ////////////////////////////////////////////////////////////////////////// //// Declare arguments that belongs to all weak learners BaseLearner::declareBaseArguments(args); //////////////////////////////////////////////////////////////////////////// //// Weak learners (and input data) arguments for (it = learnersList.begin(); it != learnersList.end(); ++it) { args.setGroup(*it + " Options"); // add weaklearner-specific options BaseLearner::RegisteredLearners().getLearner(*it)->declareArguments(args); } ////////////////////////////////////////////////////////////////////////// //// Declare arguments that belongs to all bandit learner GenericBanditAlgorithm::declareBaseArguments(args); ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// switch ( args.readArguments(argc, argv) ) { case nor_utils::AOT_NO_ARGUMENTS: showBase(); break; case nor_utils::AOT_UNKOWN_ARGUMENT: exit(1); break; case nor_utils::AOT_INCORRECT_VALUES_NUMBER: exit(1); break; case nor_utils::AOT_OK: break; } ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// if ( args.hasArgument("help") ) showHelp(args, learnersList); if ( args.hasArgument("static") ) showStaticConfig(); ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// if ( args.hasArgument("h") ) showOptionalHelp(args); ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// int verbose = 1; if ( args.hasArgument("verbose") ) args.getValue("verbose", 0, verbose); ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// // defines the seed if (args.hasArgument("seed")) { unsigned int seed = args.getValue<unsigned int>("seed", 0); srand(seed); } ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// GenericStrongLearner* pModel = NULL; if ( args.hasArgument("train") || args.hasArgument("traintest") || args.hasArgument("trainvalidtest") ) // for Viola-Jones Cascade { // get the name of the learner string baseLearnerName = defaultLearner; if ( args.hasArgument("learnertype") ) args.getValue("learnertype", 0, baseLearnerName); checkBaseLearner(baseLearnerName); if (verbose > 1) cout << "--> Using learner: " << baseLearnerName << endl; // This hould be changed: the user decides the strong learner BaseLearner* pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(baseLearnerName); pModel = pWeakHypothesisSource->createGenericStrongLearner( args ); pModel->run(args); } ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// else if ( args.hasArgument("traintestmddag") ) { // -test <dataFile> <shypFile> <numIters> string shypFileName = args.getValue<string>("traintestmddag", 2); string baseLearnerName = UnSerialization::getWeakLearnerName(shypFileName); BaseLearner* pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(baseLearnerName); pModel = pWeakHypothesisSource->createGenericStrongLearner( args ); pModel->run(args); } ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// else if ( args.hasArgument("test") ) { // -test <dataFile> <shypFile> <numIters> string shypFileName = args.getValue<string>("test", 1); string baseLearnerName = UnSerialization::getWeakLearnerName(shypFileName); BaseLearner* pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(baseLearnerName); pModel = pWeakHypothesisSource->createGenericStrongLearner( args ); pModel->classify(args); } ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// else if ( args.hasArgument("cmatrix") ) { // -cmatrix <dataFile> <shypFile> string shypFileName = args.getValue<string>("cmatrix", 1); string baseLearnerName = UnSerialization::getWeakLearnerName(shypFileName); BaseLearner* pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(baseLearnerName); pModel = pWeakHypothesisSource->createGenericStrongLearner( args ); pModel->doConfusionMatrix(args); } ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// else if ( args.hasArgument("posteriors") ) { // -posteriors <dataFile> <shypFile> <outFileName> string shypFileName = args.getValue<string>("posteriors", 1); string baseLearnerName = UnSerialization::getWeakLearnerName(shypFileName); BaseLearner* pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(baseLearnerName); pModel = pWeakHypothesisSource->createGenericStrongLearner( args ); pModel->doPosteriors(args); } ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// else if ( args.hasArgument("ssfeatures") ) { // ONLY for AdaBoostMH classifiers // -ssfeatures <dataFile> <shypFile> <outFile> <numIters> string testFileName = args.getValue<string>("ssfeatures", 0); string shypFileName = args.getValue<string>("ssfeatures", 1); string outFileName = args.getValue<string>("ssfeatures", 2); int numIterations = args.getValue<int>("ssfeatures", 3); cerr << "ERROR: ssfeatures has been deactivated for the moment!" << endl; //classifier.saveSingleStumpFeatureData(testFileName, shypFileName, outFileName, numIterations); } ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// else if ( args.hasArgument("encode") ) { // --encode <inputDataFile> <outputDataFile> <nIterations> <poolFile> <nBaseLearners> string labelsFileName = args.getValue<string>("encode", 0); string autoassociativeFileName = args.getValue<string>("encode", 1); string outputFileName = args.getValue<string>("encode", 2); int numIterations = args.getValue<int>("encode", 3); string poolFileName = args.getValue<string>("encode", 4); int numBaseLearners = args.getValue<int>("encode", 5); string outputInfoFile; const char* tmpArgv1[] = {"bla", // for ParasiteLearner "--pool", args.getValue<string>("encode", 4).c_str(), args.getValue<string>("encode", 5).c_str()}; args.readArguments(4,tmpArgv1); InputData* pAutoassociativeData = new InputData(); pAutoassociativeData->initOptions(args); pAutoassociativeData->load(autoassociativeFileName,IT_TRAIN,verbose); // for the original labels InputData* pLabelsData = new InputData(); pLabelsData->initOptions(args); pLabelsData->load(labelsFileName,IT_TRAIN,verbose); // set up all the InputData members identically to pAutoassociativeData EncodeData* pOnePoint = new EncodeData(); pOnePoint->initOptions(args); pOnePoint->load(autoassociativeFileName,IT_TRAIN,verbose); const int numExamples = pAutoassociativeData->getNumExamples(); BaseLearner* pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner("ParasiteLearner"); pWeakHypothesisSource->declareArguments(args); ParasiteLearner* pWeakHypothesis; ofstream outFile(outputFileName.c_str()); if (!outFile.is_open()) { cerr << "ERROR: Cannot open strong hypothesis file <" << outputFileName << ">!" << endl; exit(1); } for (int i = 0; i < numExamples ; ++i) { vector<float> alphas; alphas.resize(numBaseLearners); fill(alphas.begin(), alphas.end(), 0); if (verbose >= 1) cout << "--> Encoding example no " << (i+1) << endl; pOnePoint->resetData(); pOnePoint->addExample( pAutoassociativeData->getExample(i) ); AlphaReal energy = 1; OutputInfo* pOutInfo = NULL; if ( args.hasArgument("outputinfo") ) { args.getValue("outputinfo", 0, outputInfoFile); pOutInfo = new OutputInfo(args); pOutInfo->initialize(pOnePoint); } for (int t = 0; t < numIterations; ++t) { pWeakHypothesis = (ParasiteLearner*)pWeakHypothesisSource->create(); pWeakHypothesis->initLearningOptions(args); pWeakHypothesis->setTrainingData(pOnePoint); energy *= pWeakHypothesis->run(); // if (verbose >= 2) // cout << "energy = " << energy << endl << flush; AdaBoostMHLearner adaBoostMHLearner; if (i == 0 && t == 0) { if ( pWeakHypothesis->getBaseLearners().size() < numBaseLearners ) numBaseLearners = pWeakHypothesis->getBaseLearners().size(); outFile << "%Hidden representation using autoassociative boosting" << endl << endl; outFile << "@RELATION " << outputFileName << endl << endl; outFile << "% numBaseLearners" << endl; for (int j = 0; j < numBaseLearners; ++j) outFile << "@ATTRIBUTE " << j << "_" << pWeakHypothesis->getBaseLearners()[j]->getId() << " NUMERIC" << endl; outFile << "@ATTRIBUTE class {" << pLabelsData->getClassMap().getNameFromIdx(0); for (int l = 1; l < pLabelsData->getClassMap().getNumNames(); ++l) outFile << ", " << pLabelsData->getClassMap().getNameFromIdx(l); outFile << "}" << endl<< endl<< "@DATA" << endl; } alphas[pWeakHypothesis->getSelectedIndex()] += pWeakHypothesis->getAlpha() * pWeakHypothesis->getSignOfAlpha(); if ( pOutInfo ) adaBoostMHLearner.printOutputInfo(pOutInfo, t, pOnePoint, NULL, pWeakHypothesis); adaBoostMHLearner.updateWeights(pOnePoint,pWeakHypothesis); } float sumAlphas = 0; for (int j = 0; j < numBaseLearners; ++j) sumAlphas += alphas[j]; for (int j = 0; j < numBaseLearners; ++j) outFile << alphas[j]/sumAlphas << ","; const vector<Label>& labels = pLabelsData->getLabels(i); for (int l = 0; l < labels.size(); ++l) if (labels[l].y > 0) outFile << pLabelsData->getClassMap().getNameFromIdx(labels[l].idx) << endl; delete pOutInfo; } outFile.close(); } if (pModel) delete pModel; return 0; }
OutputInfo Subsampling::initialize(std::vector<double*>& parameterPointers, std::vector<double*>& parameterDerivativePointers) { OutputInfo info; info.dimensions.push_back(fm); outRows = inRows / kernelRows; outCols = inCols / kernelCols; fmOutSize = outRows * outCols; info.dimensions.push_back(outRows); info.dimensions.push_back(outCols); fmInSize = inRows * inCols; maxRow = inRows - kernelRows + 1; maxCol = inCols - kernelCols + 1; W.resize(fm, Eigen::MatrixXd(outRows, outCols)); Wd.resize(fm, Eigen::MatrixXd(outRows, outCols)); int numParams = fm * outRows * outCols * kernelRows * kernelCols; if(bias) { Wb.resize(fm, Eigen::MatrixXd(outRows, outCols)); Wbd.resize(fm, Eigen::MatrixXd(outRows, outCols)); numParams += fm * outRows * outCols; } parameterPointers.reserve(parameterPointers.size() + numParams); parameterDerivativePointers.reserve(parameterDerivativePointers.size() + numParams); for(int fmo = 0; fmo < fm; fmo++) { for(int r = 0; r < outRows; r++) { for(int c = 0; c < outCols; c++) { parameterPointers.push_back(&W[fmo](r, c)); parameterDerivativePointers.push_back(&Wd[fmo](r, c)); if(bias) { parameterPointers.push_back(&Wb[fmo](r, c)); parameterDerivativePointers.push_back(&Wbd[fmo](r, c)); } } } } initializeParameters(); a.resize(1, info.outputs()); y.resize(1, info.outputs()); yd.resize(1, info.outputs()); deltas.resize(1, info.outputs()); if(info.outputs() < 1) throw OpenANNException("Number of outputs in subsampling layer is below" " 1. You should either choose a smaller filter" " size or generate a bigger input."); OPENANN_CHECK(fmInSize > 0); OPENANN_CHECK(outRows > 0); OPENANN_CHECK(outCols > 0); OPENANN_CHECK(fmOutSize > 0); OPENANN_CHECK(maxRow > 0); OPENANN_CHECK(maxCol > 0); return info; }
void AdaBoostMHLearner::run(const nor_utils::Args& args) { // load the arguments this->getArgs(args); // get the registered weak learner (type from name) BaseLearner* pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(_baseLearnerName); // initialize learning options; normally it's done in the strong loop // also, here we do it for Product learners, so input data can be created pWeakHypothesisSource->initLearningOptions(args); BaseLearner* pConstantWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner("ConstantLearner"); // get the training input data, and load it InputData* pTrainingData = pWeakHypothesisSource->createInputData(); pTrainingData->initOptions(args); pTrainingData->load(_trainFileName, IT_TRAIN, _verbose); // get the testing input data, and load it InputData* pTestData = NULL; if ( !_testFileName.empty() ) { pTestData = pWeakHypothesisSource->createInputData(); pTestData->initOptions(args); pTestData->load(_testFileName, IT_TEST, _verbose); } // The output information object OutputInfo* pOutInfo = NULL; if ( !_outputInfoFile.empty() ) { // Baseline: constant classifier - goes into 0th iteration BaseLearner* pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ; pConstantWeakHypothesis->initLearningOptions(args); pConstantWeakHypothesis->setTrainingData(pTrainingData); AlphaReal constantEnergy = pConstantWeakHypothesis->run(); //pOutInfo = new OutputInfo(_outputInfoFile); pOutInfo = new OutputInfo(args); pOutInfo->initialize(pTrainingData); if (pTestData) pOutInfo->initialize(pTestData); pOutInfo->outputHeader(pTrainingData->getClassMap()); pOutInfo->outputIteration(-1); pOutInfo->outputCustom(pTrainingData, pConstantWeakHypothesis); if (pTestData != NULL) { pOutInfo->separator(); pOutInfo->outputCustom(pTestData, pConstantWeakHypothesis); } pOutInfo->outputCurrentTime(); pOutInfo->endLine(); pOutInfo->initialize(pTrainingData); if (pTestData) pOutInfo->initialize(pTestData); } //cout << "Before serialization" << endl; // reload the previously found weak learners if -resume is set. // otherwise just return 0 int startingIteration = resumeWeakLearners(pTrainingData); Serialization ss(_shypFileName, _isShypCompressed ); ss.writeHeader(_baseLearnerName); // this must go after resumeProcess has been called // perform the resuming if necessary. If not it will just return resumeProcess(ss, pTrainingData, pTestData, pOutInfo); if (_verbose == 1) cout << "Learning in progress..." << endl; //I put here the starting time, but it may take very long time to load the saved model time_t startTime, currentTime; time(&startTime); /////////////////////////////////////////////////////////////////////// // Starting the AdaBoost main loop /////////////////////////////////////////////////////////////////////// for (int t = startingIteration; t < _numIterations; ++t) { if (_verbose > 1) cout << "------- WORKING ON ITERATION " << (t+1) << " -------" << endl; BaseLearner* pWeakHypothesis = pWeakHypothesisSource->create(); pWeakHypothesis->initLearningOptions(args); //pTrainingData->clearIndexSet(); pWeakHypothesis->setTrainingData(pTrainingData); AlphaReal energy = pWeakHypothesis->run(); //float gamma = pWeakHypothesis->getEdge(); //cout << gamma << endl; if ( (_withConstantLearner) || ( energy != energy ) ) // check constant learner if user wants it (if energi is nan, then we chose constant learner { BaseLearner* pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ; pConstantWeakHypothesis->initLearningOptions(args); pConstantWeakHypothesis->setTrainingData(pTrainingData); AlphaReal constantEnergy = pConstantWeakHypothesis->run(); if ( (constantEnergy <= energy) || ( energy != energy ) ) { delete pWeakHypothesis; pWeakHypothesis = pConstantWeakHypothesis; } } if (_verbose > 1) cout << "Weak learner: " << pWeakHypothesis->getName()<< endl; // Output the step-by-step information printOutputInfo(pOutInfo, t, pTrainingData, pTestData, pWeakHypothesis); // Updates the weights and returns the edge AlphaReal gamma = updateWeights(pTrainingData, pWeakHypothesis); if (_verbose > 1) { cout << setprecision(5) << "--> Alpha = " << pWeakHypothesis->getAlpha() << endl << "--> Edge = " << gamma << endl << "--> Energy = " << energy << endl // << "--> ConstantEnergy = " << constantEnergy << endl // << "--> difference = " << (energy - constantEnergy) << endl ; } // If gamma <= theta the algorithm must stop. // If theta == 0 and gamma is 0, it means that the weak learner is no better than chance // and no further training is possible. if (gamma <= _theta) { if (_verbose > 0) { cout << "Can't train any further: edge = " << gamma << " (with and edge offset (theta)=" << _theta << ")" << endl; } // delete pWeakHypothesis; // break; } // append the current weak learner to strong hypothesis file, // that is, serialize it. ss.appendHypothesis(t, pWeakHypothesis); // Add it to the internal list of weak hypotheses _foundHypotheses.push_back(pWeakHypothesis); // check if the time limit has been reached if (_maxTime > 0) { time( ¤tTime ); float diff = difftime(currentTime, startTime); // difftime is in seconds diff /= 60; // = minutes if (diff > _maxTime) { if (_verbose > 0) cout << "Time limit of " << _maxTime << " minutes has been reached!" << endl; break; } } // check for maxtime delete pWeakHypothesis; } // loop on iterations ///////////////////////////////////////////////////////// // write the footer of the strong hypothesis file ss.writeFooter(); // write the weights of the instances if the name of weights file isn't empty printOutWeights( pTrainingData ); // Free the two input data objects if (pTrainingData) delete pTrainingData; if (pTestData) delete pTestData; if (pOutInfo) delete pOutInfo; if (_verbose > 0) cout << "Learning completed." << endl; }
void CmpSeabaseDDL::dropSeabaseLibrary(StmtDDLDropLibrary * dropLibraryNode, NAString &currCatName, NAString &currSchName) { Lng32 cliRC = 0; Lng32 retcode = 0; const NAString &objName = dropLibraryNode->getLibraryName(); ComObjectName libraryName(objName); ComAnsiNamePart currCatAnsiName(currCatName); ComAnsiNamePart currSchAnsiName(currSchName); libraryName.applyDefaults(currCatAnsiName, currSchAnsiName); const NAString catalogNamePart = libraryName. getCatalogNamePartAsAnsiString(); const NAString schemaNamePart = libraryName. getSchemaNamePartAsAnsiString(TRUE); const NAString objectNamePart = libraryName. getObjectNamePartAsAnsiString(TRUE); const NAString extLibraryName = libraryName.getExternalName(TRUE); ExeCliInterface cliInterface(STMTHEAP, NULL, NULL, CmpCommon::context()->sqlSession()->getParentQid()); ExpHbaseInterface * ehi = allocEHI(); if (ehi == NULL) return; retcode = existsInSeabaseMDTable(&cliInterface, catalogNamePart, schemaNamePart, objectNamePart, COM_LIBRARY_OBJECT, TRUE, FALSE); if (retcode < 0) { deallocEHI(ehi); processReturn(); return; } if (retcode == 0) // does not exist { *CmpCommon::diags() << DgSqlCode(-1389) << DgString0(extLibraryName); deallocEHI(ehi); processReturn(); return; } Int32 objectOwnerID = 0; Int32 schemaOwnerID = 0; Int64 objectFlags = 0; Int64 objUID = getObjectInfo(&cliInterface, catalogNamePart.data(), schemaNamePart.data(), objectNamePart.data(), COM_LIBRARY_OBJECT, objectOwnerID,schemaOwnerID,objectFlags); if (objUID < 0 || objectOwnerID == 0 || schemaOwnerID == 0) { deallocEHI(ehi); processReturn(); return; } if (!isDDLOperationAuthorized(SQLOperation::DROP_LIBRARY, objectOwnerID, schemaOwnerID)) { *CmpCommon::diags() << DgSqlCode(-CAT_NOT_AUTHORIZED); processReturn (); return; } Queue * usingRoutinesQueue = NULL; cliRC = getUsingRoutines(&cliInterface, objUID, usingRoutinesQueue); if (cliRC < 0) { deallocEHI(ehi); processReturn(); return; } // If RESTRICT and the library is being used, return an error if (cliRC != 100 && dropLibraryNode->getDropBehavior() == COM_RESTRICT_DROP_BEHAVIOR) { *CmpCommon::diags() << DgSqlCode(-CAT_DEPENDENT_ROUTINES_EXIST); deallocEHI(ehi); processReturn(); return; } for (size_t i = 0; i < usingRoutinesQueue->numEntries(); i++) { usingRoutinesQueue->position(); OutputInfo * rou = (OutputInfo*)usingRoutinesQueue->getNext(); char * routineName = rou->get(0); ComObjectType objectType = PrivMgr::ObjectLitToEnum(rou->get(1)); if (dropSeabaseObject(ehi, routineName, currCatName, currSchName, objectType, TRUE, FALSE)) { deallocEHI(ehi); processReturn(); return; } } // can get a slight perf. gain if we pass in objUID if (dropSeabaseObject(ehi, objName, currCatName, currSchName, COM_LIBRARY_OBJECT, TRUE, FALSE)) { deallocEHI(ehi); processReturn(); return; } deallocEHI(ehi); processReturn(); return; }
void CmpSeabaseDDL::dropSeabaseRoutine(StmtDDLDropRoutine * dropRoutineNode, NAString &currCatName, NAString &currSchName) { Lng32 retcode = 0; ComObjectName routineName(dropRoutineNode->getRoutineName()); ComAnsiNamePart currCatAnsiName(currCatName); ComAnsiNamePart currSchAnsiName(currSchName); routineName.applyDefaults(currCatAnsiName, currSchAnsiName); const NAString catalogNamePart = routineName.getCatalogNamePartAsAnsiString(); const NAString schemaNamePart = routineName.getSchemaNamePartAsAnsiString(TRUE); const NAString objectNamePart = routineName.getObjectNamePartAsAnsiString(TRUE); const NAString extRoutineName = routineName.getExternalName(TRUE); ExpHbaseInterface * ehi = NULL; ExeCliInterface cliInterface(STMTHEAP, NULL, NULL, CmpCommon::context()->sqlSession()->getParentQid()); ehi = allocEHI(); if (ehi == NULL) { processReturn(); return; } retcode = existsInSeabaseMDTable(&cliInterface, catalogNamePart, schemaNamePart, objectNamePart, COM_USER_DEFINED_ROUTINE_OBJECT, TRUE, FALSE); if (retcode < 0) { deallocEHI(ehi); processReturn(); return; } if (retcode == 0) // does not exist { *CmpCommon::diags() << DgSqlCode(-1389) << DgString0(extRoutineName); deallocEHI(ehi); processReturn(); return; } // get objectOwner Int64 objUID = 0; Int32 objectOwnerID = 0; Int32 schemaOwnerID = 0; Int64 objectFlags = 0; // see if routine is cached BindWA bindWA(ActiveSchemaDB(), CmpCommon::context(), FALSE/*inDDL*/); NARoutineDB *pRoutineDBCache = ActiveSchemaDB()->getNARoutineDB(); QualifiedName qualRoutineName(routineName, STMTHEAP); NARoutineDBKey key(qualRoutineName, STMTHEAP); NARoutine *cachedNARoutine = pRoutineDBCache->get(&bindWA, &key); if (cachedNARoutine) { objUID = cachedNARoutine->getRoutineID(); objectOwnerID = cachedNARoutine->getObjectOwner(); schemaOwnerID = cachedNARoutine->getSchemaOwner(); } else { objUID = getObjectInfo(&cliInterface, catalogNamePart.data(), schemaNamePart.data(), objectNamePart.data(), COM_USER_DEFINED_ROUTINE_OBJECT, objectOwnerID,schemaOwnerID,objectFlags); if (objUID < 0 || objectOwnerID == 0 || schemaOwnerID == 0) { deallocEHI(ehi); processReturn(); return; } } // Verify user has privilege to drop routine if (!isDDLOperationAuthorized(SQLOperation::DROP_ROUTINE,objectOwnerID,schemaOwnerID)) { *CmpCommon::diags() << DgSqlCode(-CAT_NOT_AUTHORIZED); deallocEHI(ehi); processReturn (); return; } // Determine if this function is referenced by any other objects. Lng32 cliRC = 0; Queue * usingViewsQueue = NULL; if (dropRoutineNode->getDropBehavior() == COM_RESTRICT_DROP_BEHAVIOR) { NAString usingObjName; cliRC = getUsingObject(&cliInterface, objUID, usingObjName); if (cliRC < 0) { deallocEHI(ehi); processReturn(); return; } if (cliRC != 100) // found an object { *CmpCommon::diags() << DgSqlCode(-CAT_DEPENDENT_VIEW_EXISTS) << DgTableName(usingObjName); deallocEHI(ehi); processReturn(); return; } } else if (dropRoutineNode->getDropBehavior() == COM_CASCADE_DROP_BEHAVIOR) { cliRC = getUsingViews(&cliInterface, objUID, usingViewsQueue); if (cliRC < 0) { deallocEHI(ehi); processReturn(); return; } } if (usingViewsQueue) { usingViewsQueue->position(); for (int idx = 0; idx < usingViewsQueue->numEntries(); idx++) { OutputInfo * vi = (OutputInfo*)usingViewsQueue->getNext(); char * viewName = vi->get(0); if (dropOneTableorView(cliInterface,viewName,COM_VIEW_OBJECT,false)) { deallocEHI(ehi); processReturn(); return; } } } // Removed routine from metadata if (dropSeabaseObject(ehi, dropRoutineNode->getRoutineName(), currCatName, currSchName, COM_USER_DEFINED_ROUTINE_OBJECT, TRUE, FALSE)) { deallocEHI(ehi); processReturn(); return; } // Remove cached entries in other processes pRoutineDBCache->removeNARoutine(qualRoutineName, NARoutineDB::REMOVE_FROM_ALL_USERS, objUID); deallocEHI(ehi); processReturn(); return; }
MaxPooling::MaxPooling(OutputInfo info, int kernelRows, int kernelCols) : I(info.outputs()), fm(info.dimensions[0]), inRows(info.dimensions[1]), inCols(info.dimensions[2]), kernelRows(kernelRows), kernelCols(kernelCols), x(0), e(1, I) { }
void CmpSeabaseDDL::dropSeabaseView( StmtDDLDropView * dropViewNode, NAString &currCatName, NAString &currSchName) { Lng32 cliRC = 0; Lng32 retcode = 0; const NAString &tabName = dropViewNode->getViewName(); ComObjectName viewName(tabName); ComAnsiNamePart currCatAnsiName(currCatName); ComAnsiNamePart currSchAnsiName(currSchName); viewName.applyDefaults(currCatAnsiName, currSchAnsiName); const NAString catalogNamePart = viewName.getCatalogNamePartAsAnsiString(); const NAString schemaNamePart = viewName.getSchemaNamePartAsAnsiString(TRUE); const NAString objectNamePart = viewName.getObjectNamePartAsAnsiString(TRUE); const NAString extViewName = viewName.getExternalName(TRUE); ExeCliInterface cliInterface(STMTHEAP, NULL, NULL, CmpCommon::context()->sqlSession()->getParentQid()); ExpHbaseInterface * ehi = allocEHI(); if (ehi == NULL) return; if ((isSeabaseReservedSchema(viewName)) && (!Get_SqlParser_Flags(INTERNAL_QUERY_FROM_EXEUTIL))) { *CmpCommon::diags() << DgSqlCode(-1119) << DgTableName(extViewName); deallocEHI(ehi); processReturn(); return; } retcode = existsInSeabaseMDTable(&cliInterface, catalogNamePart, schemaNamePart, objectNamePart, COM_VIEW_OBJECT, TRUE, FALSE); if (retcode < 0) { deallocEHI(ehi); processReturn(); return; } if (retcode == 0) // does not exist { *CmpCommon::diags() << DgSqlCode(-1389) << DgString0(extViewName); deallocEHI(ehi); processReturn(); return; } Int32 objectOwnerID = 0; Int32 schemaOwnerID = 0; Int64 objUID = getObjectUIDandOwners(&cliInterface, catalogNamePart.data(), schemaNamePart.data(), objectNamePart.data(), COM_VIEW_OBJECT, objectOwnerID,schemaOwnerID); if (objUID < 0 || objectOwnerID == 0) { if (CmpCommon::diags()->getNumber(DgSqlCode::ERROR_) == 0) SEABASEDDL_INTERNAL_ERROR("getting object UID and owner for drop view"); deallocEHI(ehi); processReturn(); return; } // Verify user can perform operation if (!isDDLOperationAuthorized(SQLOperation::DROP_VIEW,objectOwnerID,schemaOwnerID)) { *CmpCommon::diags() << DgSqlCode(-CAT_NOT_AUTHORIZED); deallocEHI(ehi); processReturn (); return; } Queue * usingViewsQueue = NULL; if (dropViewNode->getDropBehavior() == COM_RESTRICT_DROP_BEHAVIOR) { NAString usingObjName; cliRC = getUsingObject(&cliInterface, objUID, usingObjName); if (cliRC < 0) { deallocEHI(ehi); processReturn(); return; } if (cliRC != 100) // found an object { *CmpCommon::diags() << DgSqlCode(-1047) << DgTableName(usingObjName); deallocEHI(ehi); processReturn(); return; } } else if (dropViewNode->getDropBehavior() == COM_CASCADE_DROP_BEHAVIOR) { cliRC = getUsingViews(&cliInterface, objUID, usingViewsQueue); if (cliRC < 0) { deallocEHI(ehi); processReturn(); return; } } // get the list of all tables referenced by the view. Save this list so // referenced tables can be removed from cache later NAList<objectRefdByMe> tablesRefdList; short status = getListOfReferencedTables(&cliInterface, objUID, tablesRefdList); if (usingViewsQueue) { usingViewsQueue->position(); for (int idx = 0; idx < usingViewsQueue->numEntries(); idx++) { OutputInfo * vi = (OutputInfo*)usingViewsQueue->getNext(); char * viewName = vi->get(0); if (dropSeabaseObject(ehi, viewName, currCatName, currSchName, COM_VIEW_OBJECT)) { deallocEHI(ehi); processReturn(); return; } } } if (dropSeabaseObject(ehi, tabName, currCatName, currSchName, COM_VIEW_OBJECT)) { deallocEHI(ehi); processReturn(); return; } // clear view definition from my cache only. CorrName cn(objectNamePart, STMTHEAP, schemaNamePart, catalogNamePart); ActiveSchemaDB()->getNATableDB()->removeNATable(cn, NATableDB::REMOVE_MINE_ONLY, COM_VIEW_OBJECT); // clear view from all other caches here. This compensates for a // scenario where the object UID is not available in removeNATable, // and the look up failed too. Solution is just to use the objectUID // here. SQL_QIKEY qiKey; qiKey.operation[0] = 'O'; qiKey.operation[1] = 'R'; qiKey.ddlObjectUID = objUID; SQL_EXEC_SetSecInvalidKeys(1, &qiKey); // Now remove referenced tables from cache. // When a query that references a view is compiled, all views are converted // to the underlying base tables. Query plans are generated to access the // tables, and the views are no longer relevant. // When dropping a view, query plans that reference the dropped view will // continue to work if the plans are cached. This code removes the // referenced tables from caches to force recompilations so dropped views // are noticed. for (CollIndex i = 0; i < tablesRefdList.entries(); i++) { CorrName cn(tablesRefdList[i].objectName, STMTHEAP, tablesRefdList[i].schemaName, tablesRefdList[i].catalogName); ActiveSchemaDB()->getNATableDB()->removeNATable(cn, NATableDB::REMOVE_FROM_ALL_USERS, COM_BASE_TABLE_OBJECT); } deallocEHI(ehi); processReturn(); return; }
// Returns the results into ptRes void MDDAGClassifier::computeResults(InputData* pData, vector<BaseLearner*>& weakHypotheses, vector< ExampleResults* >& results, int numIterations) { assert( !weakHypotheses.empty() ); const int numClasses = pData->getNumClasses(); const int numExamples = pData->getNumExamples(); // Initialize the output info OutputInfo* pOutInfo = NULL; if ( !_outputInfoFile.empty() ) { if ( _args.getNumValues("outputinfo") > 1 ) { pOutInfo = new OutputInfo(_args);; } else { pOutInfo = new OutputInfo(_outputInfoFile, "e01hamauc", false); } } // Creating the results structures. See file Structures.h for the // PointResults structure results.clear(); results.reserve(numExamples); for (int i = 0; i < numExamples; ++i) results.push_back( new ExampleResults(i, numClasses) ); // iterator over all the weak hypotheses vector<BaseLearner*>::const_iterator whyIt; int t; if ( pOutInfo ) { pOutInfo->initialize( pData ); pOutInfo->outputHeader(pData->getClassMap(), true, // output iterations false, // output time true // endline ); } // for every feature: 1..T for (whyIt = weakHypotheses.begin(), t = 0; whyIt != weakHypotheses.end() && t < numIterations; ++whyIt, ++t) { BaseLearner* currWeakHyp = *whyIt; AlphaReal alpha = currWeakHyp->getAlpha(); // for every point for (int i = 0; i < numExamples; ++i) { // a reference for clarity and speed vector<AlphaReal>& currVotesVector = results[i]->getVotesVector(); // for every class for (int l = 0; l < numClasses; ++l) currVotesVector[l] += alpha * currWeakHyp->classify(pData, i, l); } // if needed output the step-by-step information if ( pOutInfo ) { pOutInfo->outputIteration(t); // pOutInfo->outputError(pData, currWeakHyp); // pOutInfo->outTPRFPR(pData); //pOutInfo->outputBalancedError(pData, currWeakHyp); // if ( ( t % 1 ) == 0 ) { // pOutInfo->outputROC(pData); // } pOutInfo->outputCustom(pData, currWeakHyp); // Margins and edge requires an update of the weight, // therefore I keep them out for the moment //outInfo.outputMargins(pData, currWeakHyp); //outInfo.outputEdge(pData, currWeakHyp); pOutInfo->endLine(); } } if (pOutInfo) delete pOutInfo; }
// ---------------------------------------------------------------------------- // method: authorizationEnabled // // Input: pointer to the error structure // // Returns: // PRIV_INITIALIZED means all metadata tables exist // PRIV_UNINITIALIZED means no metadata tables exist // PRIV_PARTIALLY_INITIALIZED means only part of the metadata tables exist // PRIV_INITIALIZE_UNKNOWN means unable to retrieve metadata table info // // A cli error is put into the diags area if there is an error // ---------------------------------------------------------------------------- PrivMgr::PrivMDStatus PrivMgr::authorizationEnabled( std::set<std::string> &existingObjectList) { // Will require QI to reset on INITIALIZE AUTHORIZATION [,DROP] // get the list of tables from the schema // if the catalog name ever allows an embedded '.', this code will need // to change. std::string metadataLocation = getMetadataLocation(); size_t period = metadataLocation.find("."); std::string catName = metadataLocation.substr(0, period); std::string schName = metadataLocation.substr(period+1); char buf[1000]; sprintf(buf, "get tables in schema %s.%s, no header", catName.c_str(), schName.c_str()); ExeCliInterface cliInterface(STMTHEAP, NULL, NULL, CmpCommon::context()->sqlSession()->getParentQid()); Queue * schemaQueue = NULL; // set pointer in diags area int32_t diagsMark = pDiags_->mark(); int32_t cliRC = cliInterface.fetchAllRows(schemaQueue, buf, 0, FALSE, FALSE, TRUE); if (cliRC < 0) { cliInterface.retrieveSQLDiagnostics(pDiags_); return PRIV_INITIALIZE_UNKNOWN; } if (cliRC == 100) // did not find the row { pDiags_->rewind(diagsMark); return PRIV_UNINITIALIZED; } // Not sure how this can happen but code I cloned had the check if (schemaQueue->numEntries() == 0) return PRIV_UNINITIALIZED; // Gather the returned list of tables in existingObjectList schemaQueue->position(); for (int idx = 0; idx < schemaQueue->numEntries(); idx++) { OutputInfo * row = (OutputInfo*)schemaQueue->getNext(); std::string theName = row->get(0); existingObjectList.insert(theName); } // Gather the list of expected tables in expectedObjectList std::set<string> expectedObjectList; size_t numTables = sizeof(privMgrTables)/sizeof(PrivMgrTableStruct); for (int ndx_tl = 0; ndx_tl < numTables; ndx_tl++) { const PrivMgrTableStruct &tableDefinition = privMgrTables[ndx_tl]; expectedObjectList.insert(tableDefinition.tableName); } // Compare the existing with the expected std::set<string> diffsObjectList; std::set_difference (expectedObjectList.begin(), expectedObjectList.end(), existingObjectList.begin(), existingObjectList.end(), std::inserter(diffsObjectList, diffsObjectList.end())); // If the number of existing tables match the expected, diffsObjectList // is empty -> return initialized if (diffsObjectList.empty()) return PRIV_INITIALIZED; // If the number of existing tables does not match the expected, // initialization is required -> return not initialized if (existingObjectList.size() == diffsObjectList.size()) return PRIV_UNINITIALIZED; // Otherwise, mismatch is found, return partially initialized return PRIV_PARTIALLY_INITIALIZED; }
void FilterBoostLearner::run(const nor_utils::Args& args) { // load the arguments this->getArgs(args); time_t startTime, currentTime; time(&startTime); // get the registered weak learner (type from name) BaseLearner* pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(_baseLearnerName); // initialize learning options; normally it's done in the strong loop // also, here we do it for Product learners, so input data can be created pWeakHypothesisSource->initLearningOptions(args); BaseLearner* pConstantWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner("ConstantLearner"); // get the training input data, and load it InputData* pTrainingData = pWeakHypothesisSource->createInputData(); pTrainingData->initOptions(args); pTrainingData->load(_trainFileName, IT_TRAIN, _verbose); const int numClasses = pTrainingData->getNumClasses(); const int numExamples = pTrainingData->getNumExamples(); //initialize the margins variable _margins.resize( numExamples ); for( int i=0; i<numExamples; i++ ) { _margins[i].resize( numClasses ); fill( _margins[i].begin(), _margins[i].end(), 0.0 ); } // get the testing input data, and load it InputData* pTestData = NULL; if ( !_testFileName.empty() ) { pTestData = pWeakHypothesisSource->createInputData(); pTestData->initOptions(args); pTestData->load(_testFileName, IT_TEST, _verbose); } // The output information object OutputInfo* pOutInfo = NULL; if ( !_outputInfoFile.empty() ) { // Baseline: constant classifier - goes into 0th iteration BaseLearner* pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ; pConstantWeakHypothesis->initLearningOptions(args); pConstantWeakHypothesis->setTrainingData(pTrainingData); float constantEnergy = pConstantWeakHypothesis->run(); pOutInfo = new OutputInfo(_outputInfoFile); pOutInfo->initialize(pTrainingData); updateMargins( pTrainingData, pConstantWeakHypothesis ); if (pTestData) pOutInfo->initialize(pTestData); pOutInfo->outputHeader(); pOutInfo->outputIteration(-1); pOutInfo->outputError(pTrainingData, pConstantWeakHypothesis); if (pTestData) pOutInfo->outputError(pTestData, pConstantWeakHypothesis); /* pOutInfo->outputMargins(pTrainingData, pConstantWeakHypothesis); pOutInfo->outputEdge(pTrainingData, pConstantWeakHypothesis); if (pTestData) pOutInfo->outputMargins(pTestData, pConstantWeakHypothesis); pOutInfo->outputMAE(pTrainingData); if (pTestData) pOutInfo->outputMAE(pTestData); */ pOutInfo->outputCurrentTime(); pOutInfo->endLine(); pOutInfo->initialize(pTrainingData); if (pTestData) pOutInfo->initialize(pTestData); } // reload the previously found weak learners if -resume is set. // otherwise just return 0 int startingIteration = resumeWeakLearners(pTrainingData); Serialization ss(_shypFileName, _isShypCompressed ); ss.writeHeader(_baseLearnerName); // this must go after resumeProcess has been called // perform the resuming if necessary. If not it will just return resumeProcess(ss, pTrainingData, pTestData, pOutInfo); if (_verbose == 1) cout << "Learning in progress..." << endl; /////////////////////////////////////////////////////////////////////// // Starting the AdaBoost main loop /////////////////////////////////////////////////////////////////////// for (int t = startingIteration; t < _numIterations; ++t) { if (_verbose > 1) cout << "------- WORKING ON ITERATION " << (t+1) << " -------" << endl; filter( pTrainingData, (int)(_Cn * log(t+2.0)) ); if ( pTrainingData->getNumExamples() < 2 ) { filter( pTrainingData, (int)(_Cn * log(t+2.0)), false ); } if (_verbose > 1) { cout << "--> Size of training data = " << pTrainingData->getNumExamples() << endl; } BaseLearner* pWeakHypothesis = pWeakHypothesisSource->create(); pWeakHypothesis->initLearningOptions(args); //pTrainingData->clearIndexSet(); pWeakHypothesis->setTrainingData(pTrainingData); float energy = pWeakHypothesis->run(); BaseLearner* pConstantWeakHypothesis; if (_withConstantLearner) // check constant learner if user wants it { pConstantWeakHypothesis = pConstantWeakHypothesisSource->create() ; pConstantWeakHypothesis->initLearningOptions(args); pConstantWeakHypothesis->setTrainingData(pTrainingData); float constantEnergy = pConstantWeakHypothesis->run(); } //estimate edge filter( pTrainingData, (int)(_Cn * log(t+2.0)), false ); float edge = pWeakHypothesis->getEdge() / 2.0; if (_withConstantLearner) // check constant learner if user wants it { float constantEdge = pConstantWeakHypothesis->getEdge() / 2.0; if ( constantEdge > edge ) { delete pWeakHypothesis; pWeakHypothesis = pConstantWeakHypothesis; edge = constantEdge; } else { delete pConstantWeakHypothesis; } } // calculate alpha float alpha = 0.0; alpha = 0.5 * log( ( 0.5 + edge ) / ( 0.5 - edge ) ); pWeakHypothesis->setAlpha( alpha ); if (_verbose > 1) cout << "Weak learner: " << pWeakHypothesis->getName()<< endl; // Output the step-by-step information pTrainingData->clearIndexSet(); printOutputInfo(pOutInfo, t, pTrainingData, pTestData, pWeakHypothesis); // Updates the weights and returns the edge float gamma = updateWeights(pTrainingData, pWeakHypothesis); if (_verbose > 1) { cout << setprecision(5) << "--> Alpha = " << pWeakHypothesis->getAlpha() << endl << "--> Edge = " << gamma << endl << "--> Energy = " << energy << endl // << "--> ConstantEnergy = " << constantEnergy << endl // << "--> difference = " << (energy - constantEnergy) << endl ; } // update the margins updateMargins( pTrainingData, pWeakHypothesis ); // append the current weak learner to strong hypothesis file, // that is, serialize it. ss.appendHypothesis(t, pWeakHypothesis); // Add it to the internal list of weak hypotheses _foundHypotheses.push_back(pWeakHypothesis); // check if the time limit has been reached if (_maxTime > 0) { time( ¤tTime ); float diff = difftime(currentTime, startTime); // difftime is in seconds diff /= 60; // = minutes if (diff > _maxTime) { if (_verbose > 0) cout << "Time limit of " << _maxTime << " minutes has been reached!" << endl; break; } } // check for maxtime delete pWeakHypothesis; } // loop on iterations ///////////////////////////////////////////////////////// // write the footer of the strong hypothesis file ss.writeFooter(); // Free the two input data objects if (pTrainingData) delete pTrainingData; if (pTestData) delete pTestData; if (pOutInfo) delete pOutInfo; if (_verbose > 0) cout << "Learning completed." << endl; }
Dropout::Dropout(OutputInfo info, double dropoutProbability) : info(info), I(info.outputs()), dropoutProbability(dropoutProbability), y(1, I), dropoutMask(1, I), e(1, I) { }
void SoftCascadeLearner::run(const nor_utils::Args& args) { // load the arguments this->getArgs(args); //print cascade properties if (_verbose > 0) { cout << "[+] Softcascade parameters :" << endl << "\t --> target detection rate = " << _targetDetectionRate << endl << "\t --> alpha (exp param) = " << _alphaExponentialParameter << endl << "\t --> bootstrap rate = " << _bootstrapRate << endl << endl; } // get the registered weak learner (type from name) BaseLearner* pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(_baseLearnerName); // initialize learning options; normally it's done in the strong loop // also, here we do it for Product learners, so input data can be created pWeakHypothesisSource->initLearningOptions(args); // get the training input data, and load it InputData* pTrainingData = pWeakHypothesisSource->createInputData(); pTrainingData->initOptions(args); pTrainingData->load(_trainFileName, IT_TRAIN, 5); InputData* pBootstrapData = NULL; if (!_bootstrapFileName.empty()) { pBootstrapData = pWeakHypothesisSource->createInputData(); pBootstrapData->initOptions(args); pBootstrapData->load(_bootstrapFileName, IT_TRAIN, 5); } // get the testing input data, and load it InputData* pTestData = NULL; if ( !_testFileName.empty() ) { pTestData = pWeakHypothesisSource->createInputData(); pTestData->initOptions(args); pTestData->load(_testFileName, IT_TEST, 5); } Serialization ss(_shypFileName, false ); ss.writeHeader(_baseLearnerName); // outputHeader(); // The output information object OutputInfo* pOutInfo = NULL; if ( !_outputInfoFile.empty() ) { pOutInfo = new OutputInfo(args, true); pOutInfo->setOutputList("sca", &args); pOutInfo->initialize(pTrainingData); if (pTestData) pOutInfo->initialize(pTestData); pOutInfo->outputHeader(pTrainingData->getClassMap(), true, true, false); pOutInfo->outputUserHeader("thresh"); pOutInfo->headerEndLine(); } // ofstream trainPosteriorsFile; // ofstream testPosteriorsFile; const NameMap& namemap = pTrainingData->getClassMap(); _positiveLabelIndex = namemap.getIdxFromName(_positiveLabelName); // FIXME: output posteriors // OutputInfo* pTrainPosteriorsOut = NULL; // OutputInfo* pTestPosteriorsOut = NULL; // if (! _trainPosteriorsFileName.empty()) { // pTrainPosteriorsOut = new OutputInfo(_trainPosteriorsFileName, "pos", true); // pTrainPosteriorsOut->initialize(pTrainingData); // dynamic_cast<PosteriorsOutput*>( pTrainPosteriorsOut->getOutputInfoObject("pos") )->addClassIndex(_positiveLabelIndex ); // } // if (! _testPosteriorsFileName.empty() && !_testFileName.empty() ) { // pTestPosteriorsOut = new OutputInfo(_testPosteriorsFileName, "pos", true); // pTestPosteriorsOut->initialize(pTestData); // dynamic_cast<PosteriorsOutput*>( pTestPosteriorsOut->getOutputInfoObject("pos") )->addClassIndex(_positiveLabelIndex ); // } const int numExamples = pTrainingData->getNumExamples(); vector<BaseLearner*> inWeakHypotheses; if (_fullRun) { // TODO : the full training is implementet, testing is needed AdaBoostMHLearner* sHypothesis = new AdaBoostMHLearner(); sHypothesis->run(args, pTrainingData, _baseLearnerName, _numIterations, inWeakHypotheses ); delete sHypothesis; } else { cout << "[+] Loading uncalibrated shyp file... "; //read the shyp file of the trained classifier UnSerialization us; us.loadHypotheses(_unCalibratedShypFileName, inWeakHypotheses, pTrainingData); if (_inShypLimit > 0 && _inShypLimit < inWeakHypotheses.size() ) { inWeakHypotheses.resize(_inShypLimit); } if (_numIterations > inWeakHypotheses.size()) { _numIterations = inWeakHypotheses.size(); } cout << "weak hypotheses loaded, " << inWeakHypotheses.size() << " retained.\n"; } // some initializations _foundHypotheses.resize(0); double faceRejectionFraction = 0.; double estimatedExecutionTime = 0.; vector<double> rejectionDistributionVector; _rejectionThresholds.resize(0); set<int> trainingIndices; for (int i = 0; i < numExamples; i++) { trainingIndices.insert(pTrainingData->getRawIndex(i) ); } // init v_t (see the paper) initializeRejectionDistributionVector(_numIterations, rejectionDistributionVector); if (_verbose == 1) cout << "Learning in progress..." << endl; /////////////////////////////////////////////////////////////////////// // Starting the SoftCascade main loop /////////////////////////////////////////////////////////////////////// for (int t = 0; t < _numIterations; ++t) { if (_verbose > 0) cout << "--------------[ iteration " << (t+1) << " ]--------------" << endl; faceRejectionFraction += rejectionDistributionVector[t]; cout << "[+] Face rejection tolerated : " << faceRejectionFraction << " | v[t] = " << rejectionDistributionVector[t] << endl; int numberOfNegatives = pTrainingData->getNumExamplesPerClass(1 - _positiveLabelIndex); //vector<BaseLearner*>::const_iterator whyIt; int selectedIndex = 0; AlphaReal bestGap = 0; vector<AlphaReal> posteriors; computePosteriors(pTrainingData, _foundHypotheses, posteriors, _positiveLabelIndex); //should use an iterator instead of i vector<BaseLearner*>::iterator whyIt; int i; for (i = 0, whyIt = inWeakHypotheses.begin(); whyIt != inWeakHypotheses.end(); ++whyIt, ++i) { vector<AlphaReal> temporaryPosteriors = posteriors; vector<BaseLearner*> temporaryWeakHyp = _foundHypotheses; temporaryWeakHyp.push_back(*whyIt); updatePosteriors(pTrainingData, *whyIt, temporaryPosteriors, _positiveLabelIndex); AlphaReal gap = computeSeparationSpan(pTrainingData, temporaryPosteriors, _positiveLabelIndex ); if (gap > bestGap) { bestGap = gap; selectedIndex = i; } } BaseLearner* selectedWeakHypothesis = inWeakHypotheses[selectedIndex]; cout << "[+] Rank of the selected weak hypothesis : " << selectedIndex << endl << "\t ---> edge gap = " << bestGap << endl << "\t ---> alpha = " << selectedWeakHypothesis->getAlpha() << endl; //update the stages _foundHypotheses.push_back(selectedWeakHypothesis); updatePosteriors(pTrainingData, selectedWeakHypothesis, posteriors, _positiveLabelIndex); double missesFraction; AlphaReal r = findBestRejectionThreshold(pTrainingData, posteriors, faceRejectionFraction, missesFraction); _rejectionThresholds.push_back(r); // update the output info object dynamic_cast<SoftCascadeOutput*>( pOutInfo->getOutputInfoObject("sca") )->appendRejectionThreshold(r); cout << "[+] Rejection threshold = " << r << endl; //some updates ss.appendHypothesisWithThreshold(t, selectedWeakHypothesis, r); faceRejectionFraction -= missesFraction; inWeakHypotheses.erase(inWeakHypotheses.begin() + selectedIndex); double whypCost = 1; //just in case there are different costs for each whyp estimatedExecutionTime += whypCost * numberOfNegatives; // output perf in file vector< vector< AlphaReal> > scores(0); _output << t + 1 << setw(_sepWidth + 1) << r << setw(_sepWidth); // update OutputInfo with the new whyp // updateOutputInfo(pOutInfo, pTrainingData, selectedWeakHypothesis); // if (pTestData) { // updateOutputInfo(pOutInfo, pTestData, selectedWeakHypothesis); // } // output the iteration results printOutputInfo(pOutInfo, t, pTrainingData, pTestData, selectedWeakHypothesis, r); // if (pTrainPosteriorsOut) { // pTrainPosteriorsOut->setTable(pTrainingData, pOutInfo->getTable(pTrainingData)); // pTrainPosteriorsOut->outputCustom(pTrainingData); // } // // if (pTestPosteriorsOut) { // pTestPosteriorsOut->setTable(pTestData, pOutInfo->getTable(pTestData)); // pTestPosteriorsOut->outputCustom(pTestData); // } int leftNegatives = filterDataset(pTrainingData, posteriors, r, trainingIndices); if (leftNegatives == 0) { cout << endl << "[+] No more negatives.\n"; break; } if (_bootstrapRate != 0) { bootstrapTrainingSet(pTrainingData, pBootstrapData, trainingIndices); } } // loop on iterations ///////////////////////////////////////////////////////// // write the footer of the strong hypothesis file ss.writeFooter(); // Free the two input data objects if (pTrainingData) delete pTrainingData; if (pBootstrapData) { delete pBootstrapData; } if (pTestData) delete pTestData; if (_verbose > 0) cout << "Learning completed." << endl; }