bool Picker::SaveTrainingSet(string fileName) { bool result = false;; MData *trainingSet = new MData(); if( trainingSet != NULL ) { result = trainingSet->Create(m_trainSet[0], m_samples.size(), m_dataset->GetDims(), m_labels, m_ids, NULL, m_dataset->GetMeans(), m_dataset->GetStdDevs(), m_xCentroid, m_yCentroid, m_dataset->GetSlideNames(), m_slideIdx, m_dataset->GetNumSlides(), m_classNames); } if( result ) { fileName = m_outPath + fileName; gLogger->LogMsg(EvtLogger::Evt_INFO, "Saving training set to: %s", fileName.c_str()); result = trainingSet->SaveAs(fileName); } if( trainingSet != NULL ) delete trainingSet; return result; }
/** The main function, where everything the program does is called from . */ int main(int argc, char *argv[]){ FuncBegin(); MData md; //mesh Mesh msh; //external variables driver::initialize(&argc,&argv,md); //initialize all the data driver::readregion(md,msh); //read the regions driver::readfixed(md); //read other solver properties driver::readmesh(md,msh); //read the mesh driver::readinitial(md, msh); //read the initial conditions if(md.setfield){ //we should change the initial conditions driver::setfield(md, msh); } else{ //we should run a simulation driver::preparedata(md,msh); do{ driver::marchintime(md, msh); //solve the system once in time driver::writeintime(md, msh,false); //write the data if required } while ( md.t < md.tEnd); } //anounce that everything is done std::cout << "Simulation finished successfully in " << md.cT << " seconds." << std::endl << "The results can be found in " << md.dir << "result/(*.vtk and *.flow)" << std::endl ; //finalize petsc and other data md.finalize(); PetscFinalize(); return 0; FuncEnd(); }
int TrainClassifier(Classifier *classifier, MData& trainSet) { int result = 0; int *labels = trainSet.GetLabels(), numObjs = trainSet.GetNumObjs(), numDims = trainSet.GetDims(); float **data = trainSet.GetData(); if( !classifier->Train(data[0], labels, numObjs, numDims) ) { cerr << "Classifier traiing FAILED" << endl; result = -20; } return result; }
// Renormalize the training set using the test set's mean and std dev. int Renormalize(MData& trainSet, MData& testSet) { int result = 0; float *trainMean = trainSet.GetMeans(), *testMean = testSet.GetMeans(), *trainStdDev = trainSet.GetStdDevs(), *testStdDev = testSet.GetStdDevs(), **trainFeatures = trainSet.GetData(); bool norm = true; // Check if re-normalization is actually needed for(int i = 0; i < trainSet.GetDims(); i++) { if( trainMean[i] != testMean[i] || trainStdDev[i] != testStdDev[i] ) { norm = false; break; } } if( norm ) { for(int obj = 0; obj < trainSet.GetNumObjs(); obj++) { for(int dim = 0; dim < trainSet.GetDims(); dim++) { trainFeatures[obj][dim] = (trainFeatures[obj][dim] * trainStdDev[dim]) + trainMean[dim]; trainFeatures[obj][dim] = (trainFeatures[obj][dim] - testMean[dim]) / testStdDev[dim]; } } } return result; }
int GenerateMap(MData& trainSet, MData& testSet, Classifier *classifier, string slide, string outFileName) { int result = 0, offset, slideObjs; float **train = trainSet.GetData(), **test = testSet.GetData(), *scores = (float*)malloc(testSet.GetNumObjs() * sizeof(float)); result = TrainClassifier(classifier, trainSet); classifier->ScoreBatch(test, testSet.GetNumObjs(), testSet.GetDims(), scores); offset = testSet.GetSlideOffset(slide, slideObjs); ofstream outFile(outFileName.c_str()); if( outFile.is_open() ) { outFile << "score,X,Y" << endl; for(int i = offset; i < offset + slideObjs; i++) { outFile << scores[i] << "," << testSet.GetXCentroid(i) << "," << testSet.GetYCentroid(i) << endl; } outFile.close(); } else { cerr << "Unable to create " << outFileName << endl; result = -10; } if( scores ) free(scores); return result; }
int CountResults(MData& testSet, int *predictions, int *&posSlideCnt, int *&negSlideCnt) { int result = 0; int *slideIdx = testSet.GetSlideIndices(); posSlideCnt = (int*)calloc(testSet.GetNumSlides(), sizeof(int)); negSlideCnt = (int*)calloc(testSet.GetNumSlides(), sizeof(int)); if( negSlideCnt && posSlideCnt ) { for(int i = 0; i < testSet.GetNumObjs(); i++) { if( predictions[i] == 1 ) { posSlideCnt[slideIdx[i]]++; } else { negSlideCnt[slideIdx[i]]++; } } } return result; }
int TrainClassifier(Classifier *classifier, MData& trainSet, int iteration) { int result = 0; int *labels = trainSet.GetLabels(), dims = trainSet.GetDims(), count, *iterationList = trainSet.GetIterationList();; float **data = trainSet.GetData(); count = 0; while( iterationList[count] <= iteration && count < trainSet.GetNumObjs() ) count++; cout << "Train set size: " << count << endl; if( !classifier->Train(data[0], labels, count, dims) ) { cerr << "Classifier traiing FAILED" << endl; result = -10; } return result; }
int CountTrainingObjs(MData& trainSet, MData& testSet, int *&posCount, int *&negCount) { int result = 0, numTrainSlides = trainSet.GetNumSlides(), *slideIdx = trainSet.GetSlideIndices(), *labels = trainSet.GetLabels(); posCount = (int*)calloc(numTrainSlides, sizeof(int)); negCount = (int*)calloc(numTrainSlides, sizeof(int)); for(int i = 0; i < trainSet.GetNumObjs(); i++) { if( labels[i] == 1 ) { posCount[slideIdx[i]]++; } else { negCount[slideIdx[i]]++; } } return result; }
int ApplyClassifier(MData& trainSet, MData& testSet, Classifier *classifier, string testFileName, string outFileName) { int result = 0, dims = trainSet.GetDims(), *trainLabel = trainSet.GetLabels(), numTestObjs = testSet.GetNumObjs(); float **test = testSet.GetData(), **train = trainSet.GetData(), *predScore = NULL; if( dims != testSet.GetDims() ) { cerr << "Training and test set dimensions do not match" << endl; result = -30; } if( result == 0 ) { cout << "Allocating prediction buffer" << endl; predScore = (float*)malloc(numTestObjs * sizeof(float)); if( predScore == NULL ) { cerr << "Unable to allocae prediction buffer" << endl; result = -31; } } if( result == 0 ) { cout << "Training classifier..." << endl; if( !classifier->Train(train[0], trainLabel, trainSet.GetNumObjs(), dims) ) { cerr << "Classifier training failed" << endl; result = -32; } } if( result == 0 ) { cout << "Applying classifier..." << endl; if( ! classifier->ScoreBatch(test, numTestObjs, dims, predScore) ) { cerr << "Applying classifier failed" << endl; result = -33; } } if( result == 0 ) { // Copy original test file so we can just append the // score data // string cmd = "cp " + testFileName + " " + outFileName; result = system(cmd.c_str()); } if( result == 0 ) { hid_t fileId; hsize_t dims[2]; herr_t status; fileId = H5Fopen(outFileName.c_str(), H5F_ACC_RDWR, H5P_DEFAULT); if( fileId < 0 ) { cerr << "Unable to open: " << outFileName << endl; result = -34; } if( result == 0 ) { dims[0] = numTestObjs; dims[1] = 1; status = H5LTmake_dataset(fileId, "/pred_score", 2, dims, H5T_NATIVE_FLOAT, predScore); if( status < 0 ) { cerr << "Unable to write score data" << endl; result = -35; } } if( fileId >= 0 ) { H5Fclose(fileId); } } if( predScore ) free(predScore); return result; }
int CalcROC(MData& trainSet, MData& testSet, Classifier *classifier, string testFile, string outFileName) { int result = 0, *trainLabels = trainSet.GetLabels(), *testLabels = testSet.GetLabels(), *predClass = (int*)malloc(testSet.GetNumObjs() * sizeof(int)); float **train = trainSet.GetData(), **test = testSet.GetData(), *scores = (float*)malloc(testSet.GetNumObjs() * sizeof(float)); if( testLabels == NULL ) { cerr << "Test set has no lables" << endl; result = -10; } if( predClass == NULL || scores == NULL ) { result = -11; cerr << "Unable to allocate results buffer" << endl; } ofstream outFile(outFileName.c_str()); if( !outFile.is_open() ) { cerr << "Unable to create " << outFileName << endl; result = -12; } if( result == 0 ) { int TP = 0, FP = 0, TN = 0, FN = 0, P = 0, N = 0; cout << "Saving to: " << outFileName << endl; outFile << "labels,"; for(int idx = 0; idx < testSet.GetNumObjs() - 1; idx++) outFile << testLabels[idx] << ","; outFile << testLabels[testSet.GetNumObjs() - 1] << endl; result = TrainClassifier(classifier, trainSet); classifier->ScoreBatch(test, testSet.GetNumObjs(), testSet.GetDims(), scores); for(int i = 0; i < testSet.GetNumObjs(); i++) { if( scores[i] >= 0.0f ) { predClass[i] = 1; } else { predClass[i] = -1; } } // Calculate confusion matrix TP = FP = TN = FN = P = N = 0; for(int i = 0; i < testSet.GetNumObjs(); i++) { if( testLabels[i] == 1 ) { P++; if( predClass[i] == 1 ) { TP++; } else { FN++; } } else { N++; if( predClass[i] == -1 ) { TN++; } else { FP++; } } } printf("\t%4d\t%4d\n", TP, FP); printf("\t%4d\t%4d\n\n", FN, TN); cout << "FP rate: " << (float)FP / (float)N << endl; cout << "TP rate: " << (float)TP / (float)P << endl; cout << "Precision: " << (float)TP / (float)(TP + FP) << endl; cout << "Accuracy: " << (float)(TP + TN) / (float)(N + P) << endl; outFile << "scores,"; for(int idx = 0; idx < testSet.GetNumObjs() - 1; idx++) { outFile << ((scores[idx] + 1.0f) / 2.0) << ","; } outFile << ((scores[testSet.GetNumObjs() - 1] + 1.0f) / 2.0) << endl; outFile.close(); } if( predClass ) free(predClass); if( scores ) free(scores); return result; }
// Apply the classifier to each slide in the test set. Save the counts of positive and negative // classes for each slide to a csv file. // int ClassifySlides(MData& trainSet, MData& testSet, Classifier *classifier, string testFile, string outFileName) { int result = 0; int *pred = NULL, *posSlideCnt = NULL, *negSlideCnt = NULL; char **slideNames = testSet.GetSlideNames(); ofstream outFile(outFileName.c_str()); if( classifier == NULL ) { result = -10; } if( result == 0 ) { if( outFile.is_open() ) { outFile << "slides,"; for(int i = 0; i < testSet.GetNumSlides() - 1; i++) { outFile << slideNames[i] << ","; } outFile << slideNames[testSet.GetNumSlides() - 1] << endl; pred = (int*)malloc(testSet.GetNumObjs() * sizeof(int)); if( pred == NULL ) { cerr << "Unable to allocate results buffer" << endl; result = -11; } } else { cerr << "Unable to open " << outFileName << endl; result = -12; } } if( result == 0 ) { result = TrainClassifier(classifier, trainSet); } if( result == 0 ) { if( !classifier->ClassifyBatch(testSet.GetData(), testSet.GetNumObjs(), testSet.GetDims(), pred) ) { cerr << "Classification failed" << endl; result = -13; } } if( result == 0 ) { result = CountResults(testSet, pred, posSlideCnt, negSlideCnt); } if( result == 0 ) { outFile << "positive,"; for( int i = 0; i < testSet.GetNumSlides() - 1; i++ ) { outFile << posSlideCnt[i] << ","; } outFile << posSlideCnt[testSet.GetNumSlides() - 1] << endl; outFile << "negative,"; for( int i = 0; i < testSet.GetNumSlides() - 1; i++ ) { outFile << negSlideCnt[i] << ","; } outFile << negSlideCnt[testSet.GetNumSlides() - 1] << endl; } if( outFile.is_open() ) { outFile.close(); } if( posSlideCnt ) free(posSlideCnt); if( negSlideCnt ) free(negSlideCnt); return result; }
bool Picker::RestoreSessionData(MData& testSet) { bool result = true; int numObjs = testSet.GetNumObjs(), numDims = testSet.GetDims(); result = UpdateBuffers(numObjs, true); if( result ) { float *floatData = NULL; int *intData = NULL; intData = testSet.GetLabels(); memcpy(m_labels, intData, numObjs * sizeof(int)); intData = testSet.GetIdList(); memcpy(m_ids, intData, numObjs * sizeof(int)); floatData = testSet.GetXCentroidList(); memcpy(m_xCentroid, floatData, numObjs * sizeof(float)); floatData = testSet.GetXClickList(); // Test sets created with earlier versions of HistomicsML don't have clicks if( floatData == NULL ) { // Use centroids for click location if not present. floatData = testSet.GetXCentroidList(); } memcpy(m_xClick, floatData, numObjs * sizeof(float)); floatData = testSet.GetYCentroidList(); memcpy(m_yCentroid, floatData, numObjs * sizeof(float)); floatData = testSet.GetYClickList(); if( floatData == NULL ) { floatData = testSet.GetYCentroidList(); } memcpy(m_yClick, floatData, numObjs * sizeof(float)); floatData = testSet.GetData()[0]; memcpy(m_trainSet[0], floatData, numObjs * numDims * sizeof(float)); char **classNames = testSet.GetClassNames(); // Older versions of al_server did not save class names, set // defaults if they can't be loaded. if( classNames ) { for(int i = 0; i < testSet.GetNumClasses(); i++) { m_classNames.push_back(string(classNames[i])); } } else { m_classNames.push_back(string("Negative")); m_classNames.push_back(string("Positive")); } // Get slide indices from the dataset, NOT from the training set. intData = testSet.GetSlideIndices(); char **slideNames = testSet.GetSlideNames(); int idx; for(int i = 0; i < numObjs; i++) { m_slideIdx[i] = m_dataset->GetSlideIdx(slideNames[intData[i]]); // Keep track fo selected items idx = m_dataset->FindItem(m_xCentroid[i], m_yCentroid[i], slideNames[intData[i]]); if( idx == -1 ) { gLogger->LogMsg(EvtLogger::Evt_ERROR, "Unable to find item in dataset:, %f, %f in %s", m_xCentroid[i], m_yCentroid[i], slideNames[intData[i]]); result = false; break; } else { m_samples.push_back(idx); } } } return result; }
bool Picker::ReloadPicker(const int sock, json_t *obj) { bool result = true, uidUpdated = false; json_t *jsonObj; const char *featureFileName, *uid, *testSetFileName, *name; // m_UID's length is 1 greater than UID_LENGTH, So we can // always write a 0 there to make strlen safe. // m_UID[UID_LENGTH] = 0; if( strlen(m_UID) > 0 ) { gLogger->LogMsg(EvtLogger::Evt_ERROR, "Session already in progress: %s", m_UID); result = false; } if( result ) { jsonObj = json_object_get(obj, "features"); featureFileName = json_string_value(jsonObj); if( featureFileName == NULL ) { result = false; } } if( result ) { jsonObj = json_object_get(obj, "testfile"); testSetFileName = json_string_value(jsonObj); if( testSetFileName == NULL ) { result = false; } } if( result ) { jsonObj = json_object_get(obj, "dataset"); name = json_string_value(jsonObj); if( name != NULL ) { m_curDatasetName = name; } else { result = false; } } if( result ) { jsonObj = json_object_get(obj, "name"); name = json_string_value(jsonObj); if( name != NULL ) { m_testsetName = name; } else { result = false; } } if( result ) { jsonObj = json_object_get(obj, "uid"); uid = json_string_value(jsonObj); if( uid == NULL ) { result = false; } } if( result ) { strncpy(m_UID, uid, UID_LENGTH); uidUpdated = true; m_dataset = new MData(); if( m_dataset == NULL ) { gLogger->LogMsg(EvtLogger::Evt_ERROR, "Unable to create dataset object"); result = false; } } if( result ) { jsonObj = json_object_get(obj, "name"); name = json_string_value(jsonObj); if( name != NULL ) { m_testsetName = name; } else { gLogger->LogMsg(EvtLogger::Evt_ERROR, "Unable to extract test set name"); result = false; } } MData testData; if( result ) { string fqn = testSetFileName; if( testData.Load(fqn) == false ) { gLogger->LogMsg(EvtLogger::Evt_ERROR, "Unable to load test set %s", fqn.c_str()); result = false; } else { gLogger->LogMsg(EvtLogger::Evt_INFO, "Reloaded testset: %s", fqn.c_str()); } } if( result ) { string fqFileName = m_dataPath + featureFileName; gLogger->LogMsg(EvtLogger::Evt_INFO, "Loading %s", fqFileName.c_str()); double start = gLogger->WallTime(); result = m_dataset->Load(fqFileName); gLogger->LogMsg(EvtLogger::Evt_INFO, "Loading took %f", gLogger->WallTime() - start); } if( result ) { result = RestoreSessionData(testData); if( !result ) { gLogger->LogMsg(EvtLogger::Evt_ERROR, "Unable to restore session"); } else { m_reloaded = true; } } // Send result back to client // json_t *root = json_object(), *value = NULL; size_t bytesWritten; if( root != NULL ) { if( result ) { json_object_set(root, "negName", json_string(m_classNames[0].c_str())); json_object_set(root, "posName", json_string(m_classNames[1].c_str())); json_object_set(root, "result", json_string("PASS")); } else { json_object_set(root, "result", json_string("FAIL")); } char *jsonObj = json_dumps(root, 0); bytesWritten = ::write(sock, jsonObj, strlen(jsonObj)); if( bytesWritten != strlen(jsonObj) ) result = false; json_decref(root); free(jsonObj); } if( !result && uidUpdated ){ // Initialization failed, clear current UID memset(m_UID, 0, UID_LENGTH + 1); } return result; }