int TrainClassifier(Classifier *classifier, MData& trainSet) { int result = 0; int *labels = trainSet.GetLabels(), numObjs = trainSet.GetNumObjs(), numDims = trainSet.GetDims(); float **data = trainSet.GetData(); if( !classifier->Train(data[0], labels, numObjs, numDims) ) { cerr << "Classifier traiing FAILED" << endl; result = -20; } return result; }
int TrainClassifier(Classifier *classifier, MData& trainSet, int iteration) { int result = 0; int *labels = trainSet.GetLabels(), dims = trainSet.GetDims(), count, *iterationList = trainSet.GetIterationList();; float **data = trainSet.GetData(); count = 0; while( iterationList[count] <= iteration && count < trainSet.GetNumObjs() ) count++; cout << "Train set size: " << count << endl; if( !classifier->Train(data[0], labels, count, dims) ) { cerr << "Classifier traiing FAILED" << endl; result = -10; } return result; }
int CountTrainingObjs(MData& trainSet, MData& testSet, int *&posCount, int *&negCount) { int result = 0, numTrainSlides = trainSet.GetNumSlides(), *slideIdx = trainSet.GetSlideIndices(), *labels = trainSet.GetLabels(); posCount = (int*)calloc(numTrainSlides, sizeof(int)); negCount = (int*)calloc(numTrainSlides, sizeof(int)); for(int i = 0; i < trainSet.GetNumObjs(); i++) { if( labels[i] == 1 ) { posCount[slideIdx[i]]++; } else { negCount[slideIdx[i]]++; } } return result; }
int ApplyClassifier(MData& trainSet, MData& testSet, Classifier *classifier, string testFileName, string outFileName) { int result = 0, dims = trainSet.GetDims(), *trainLabel = trainSet.GetLabels(), numTestObjs = testSet.GetNumObjs(); float **test = testSet.GetData(), **train = trainSet.GetData(), *predScore = NULL; if( dims != testSet.GetDims() ) { cerr << "Training and test set dimensions do not match" << endl; result = -30; } if( result == 0 ) { cout << "Allocating prediction buffer" << endl; predScore = (float*)malloc(numTestObjs * sizeof(float)); if( predScore == NULL ) { cerr << "Unable to allocae prediction buffer" << endl; result = -31; } } if( result == 0 ) { cout << "Training classifier..." << endl; if( !classifier->Train(train[0], trainLabel, trainSet.GetNumObjs(), dims) ) { cerr << "Classifier training failed" << endl; result = -32; } } if( result == 0 ) { cout << "Applying classifier..." << endl; if( ! classifier->ScoreBatch(test, numTestObjs, dims, predScore) ) { cerr << "Applying classifier failed" << endl; result = -33; } } if( result == 0 ) { // Copy original test file so we can just append the // score data // string cmd = "cp " + testFileName + " " + outFileName; result = system(cmd.c_str()); } if( result == 0 ) { hid_t fileId; hsize_t dims[2]; herr_t status; fileId = H5Fopen(outFileName.c_str(), H5F_ACC_RDWR, H5P_DEFAULT); if( fileId < 0 ) { cerr << "Unable to open: " << outFileName << endl; result = -34; } if( result == 0 ) { dims[0] = numTestObjs; dims[1] = 1; status = H5LTmake_dataset(fileId, "/pred_score", 2, dims, H5T_NATIVE_FLOAT, predScore); if( status < 0 ) { cerr << "Unable to write score data" << endl; result = -35; } } if( fileId >= 0 ) { H5Fclose(fileId); } } if( predScore ) free(predScore); return result; }
int CalcROC(MData& trainSet, MData& testSet, Classifier *classifier, string testFile, string outFileName) { int result = 0, *trainLabels = trainSet.GetLabels(), *testLabels = testSet.GetLabels(), *predClass = (int*)malloc(testSet.GetNumObjs() * sizeof(int)); float **train = trainSet.GetData(), **test = testSet.GetData(), *scores = (float*)malloc(testSet.GetNumObjs() * sizeof(float)); if( testLabels == NULL ) { cerr << "Test set has no lables" << endl; result = -10; } if( predClass == NULL || scores == NULL ) { result = -11; cerr << "Unable to allocate results buffer" << endl; } ofstream outFile(outFileName.c_str()); if( !outFile.is_open() ) { cerr << "Unable to create " << outFileName << endl; result = -12; } if( result == 0 ) { int TP = 0, FP = 0, TN = 0, FN = 0, P = 0, N = 0; cout << "Saving to: " << outFileName << endl; outFile << "labels,"; for(int idx = 0; idx < testSet.GetNumObjs() - 1; idx++) outFile << testLabels[idx] << ","; outFile << testLabels[testSet.GetNumObjs() - 1] << endl; result = TrainClassifier(classifier, trainSet); classifier->ScoreBatch(test, testSet.GetNumObjs(), testSet.GetDims(), scores); for(int i = 0; i < testSet.GetNumObjs(); i++) { if( scores[i] >= 0.0f ) { predClass[i] = 1; } else { predClass[i] = -1; } } // Calculate confusion matrix TP = FP = TN = FN = P = N = 0; for(int i = 0; i < testSet.GetNumObjs(); i++) { if( testLabels[i] == 1 ) { P++; if( predClass[i] == 1 ) { TP++; } else { FN++; } } else { N++; if( predClass[i] == -1 ) { TN++; } else { FP++; } } } printf("\t%4d\t%4d\n", TP, FP); printf("\t%4d\t%4d\n\n", FN, TN); cout << "FP rate: " << (float)FP / (float)N << endl; cout << "TP rate: " << (float)TP / (float)P << endl; cout << "Precision: " << (float)TP / (float)(TP + FP) << endl; cout << "Accuracy: " << (float)(TP + TN) / (float)(N + P) << endl; outFile << "scores,"; for(int idx = 0; idx < testSet.GetNumObjs() - 1; idx++) { outFile << ((scores[idx] + 1.0f) / 2.0) << ","; } outFile << ((scores[testSet.GetNumObjs() - 1] + 1.0f) / 2.0) << endl; outFile.close(); } if( predClass ) free(predClass); if( scores ) free(scores); return result; }
bool Picker::RestoreSessionData(MData& testSet) { bool result = true; int numObjs = testSet.GetNumObjs(), numDims = testSet.GetDims(); result = UpdateBuffers(numObjs, true); if( result ) { float *floatData = NULL; int *intData = NULL; intData = testSet.GetLabels(); memcpy(m_labels, intData, numObjs * sizeof(int)); intData = testSet.GetIdList(); memcpy(m_ids, intData, numObjs * sizeof(int)); floatData = testSet.GetXCentroidList(); memcpy(m_xCentroid, floatData, numObjs * sizeof(float)); floatData = testSet.GetXClickList(); // Test sets created with earlier versions of HistomicsML don't have clicks if( floatData == NULL ) { // Use centroids for click location if not present. floatData = testSet.GetXCentroidList(); } memcpy(m_xClick, floatData, numObjs * sizeof(float)); floatData = testSet.GetYCentroidList(); memcpy(m_yCentroid, floatData, numObjs * sizeof(float)); floatData = testSet.GetYClickList(); if( floatData == NULL ) { floatData = testSet.GetYCentroidList(); } memcpy(m_yClick, floatData, numObjs * sizeof(float)); floatData = testSet.GetData()[0]; memcpy(m_trainSet[0], floatData, numObjs * numDims * sizeof(float)); char **classNames = testSet.GetClassNames(); // Older versions of al_server did not save class names, set // defaults if they can't be loaded. if( classNames ) { for(int i = 0; i < testSet.GetNumClasses(); i++) { m_classNames.push_back(string(classNames[i])); } } else { m_classNames.push_back(string("Negative")); m_classNames.push_back(string("Positive")); } // Get slide indices from the dataset, NOT from the training set. intData = testSet.GetSlideIndices(); char **slideNames = testSet.GetSlideNames(); int idx; for(int i = 0; i < numObjs; i++) { m_slideIdx[i] = m_dataset->GetSlideIdx(slideNames[intData[i]]); // Keep track fo selected items idx = m_dataset->FindItem(m_xCentroid[i], m_yCentroid[i], slideNames[intData[i]]); if( idx == -1 ) { gLogger->LogMsg(EvtLogger::Evt_ERROR, "Unable to find item in dataset:, %f, %f in %s", m_xCentroid[i], m_yCentroid[i], slideNames[intData[i]]); result = false; break; } else { m_samples.push_back(idx); } } } return result; }