示例#1
0
int TrainClassifier(Classifier *classifier, MData& trainSet)
{
	int		result = 0;
	int		*labels = trainSet.GetLabels(), numObjs = trainSet.GetNumObjs(),
			numDims = trainSet.GetDims();
	float	**data = trainSet.GetData();

	if( !classifier->Train(data[0], labels, numObjs, numDims) ) {
		cerr << "Classifier traiing FAILED" << endl;
		result = -20;
	}
	return result;
}
int TrainClassifier(Classifier *classifier, MData& trainSet, int iteration)
{
	int		result = 0;
	int		*labels = trainSet.GetLabels(), dims = trainSet.GetDims(), count,
			*iterationList = trainSet.GetIterationList();;
	float	**data = trainSet.GetData();

	count = 0;
	while( iterationList[count] <= iteration && count < trainSet.GetNumObjs() )
		count++;

	cout << "Train set size: " << count  << endl;

	if( !classifier->Train(data[0], labels, count, dims) ) {
		cerr << "Classifier traiing FAILED" << endl;
		result = -10;
	}
	return result;
}
int CountTrainingObjs(MData& trainSet, MData& testSet, int *&posCount, int *&negCount)
{
	int result = 0,
		numTrainSlides = trainSet.GetNumSlides(),
		*slideIdx = trainSet.GetSlideIndices(),
		*labels = trainSet.GetLabels();

	posCount = (int*)calloc(numTrainSlides, sizeof(int));
	negCount = (int*)calloc(numTrainSlides, sizeof(int));

	for(int i = 0; i < trainSet.GetNumObjs(); i++) {
		if( labels[i] == 1 ) {
			posCount[slideIdx[i]]++;
		} else {
			negCount[slideIdx[i]]++;
		}
	}
	return result;
}
示例#4
0
int ApplyClassifier(MData& trainSet, MData& testSet, Classifier *classifier,
					string testFileName, string outFileName)
{
	int		result = 0, dims = trainSet.GetDims(), *trainLabel = trainSet.GetLabels(),
			numTestObjs = testSet.GetNumObjs();
	float	**test = testSet.GetData(), **train = trainSet.GetData(),
			*predScore = NULL;

	if( dims != testSet.GetDims() ) {
		cerr << "Training and test set dimensions do not match" << endl;
		result = -30;
	}

	if( result == 0 ) {
		cout << "Allocating prediction buffer" << endl;
		predScore = (float*)malloc(numTestObjs * sizeof(float));
		if( predScore == NULL ) {
			cerr << "Unable to allocae prediction buffer" << endl;
			result = -31;
		}
	}

	if( result == 0 ) {
		cout << "Training classifier..." << endl;
		if( !classifier->Train(train[0], trainLabel, trainSet.GetNumObjs(), dims) ) {
			cerr << "Classifier training failed" << endl;
			result = -32;
		}
	}

	if( result == 0 ) {
		cout << "Applying classifier..." << endl;
		if( ! classifier->ScoreBatch(test, numTestObjs, dims, predScore) ) {
			cerr << "Applying classifier failed" << endl;
			result = -33;
		}
	}

	if( result == 0 ) {
		// Copy original test file so we can just append the
		// score data
		//
		string cmd = "cp " + testFileName + " " + outFileName;
		result = system(cmd.c_str());
	}

	if( result == 0 ) {
		hid_t		fileId;
		hsize_t		dims[2];
		herr_t		status;

		fileId = H5Fopen(outFileName.c_str(), H5F_ACC_RDWR, H5P_DEFAULT);
		if( fileId < 0 ) {
			cerr << "Unable to open: " << outFileName << endl;
			result = -34;
		}

		if( result == 0 ) {
			dims[0] = numTestObjs;
			dims[1] = 1;

			status = H5LTmake_dataset(fileId, "/pred_score", 2, dims,
										H5T_NATIVE_FLOAT, predScore);
			if( status < 0 ) {
				cerr << "Unable to write score data" << endl;
				result = -35;
			}
		}

		if( fileId >= 0 ) {
			H5Fclose(fileId);
		}
	}

	if( predScore )
		free(predScore);

	return result;
}
示例#5
0
int	CalcROC(MData& trainSet, MData& testSet, Classifier *classifier, string testFile,
			string outFileName)
{
	int		result = 0, *trainLabels = trainSet.GetLabels(),
			*testLabels = testSet.GetLabels(),
			*predClass = (int*)malloc(testSet.GetNumObjs() * sizeof(int));

	float	**train = trainSet.GetData(), **test = testSet.GetData(),
			*scores = (float*)malloc(testSet.GetNumObjs() * sizeof(float));

	if( testLabels == NULL ) {
		cerr << "Test set has no lables" << endl;
		result = -10;
	}

	if( predClass == NULL || scores == NULL ) {
		result = -11;
		cerr << "Unable to allocate results buffer" << endl;
	}

	ofstream 	outFile(outFileName.c_str());
	if( !outFile.is_open() ) {
		cerr << "Unable to create " << outFileName << endl;
		result = -12;
	}

	if( result == 0 ) {
		int TP = 0, FP = 0, TN = 0, FN = 0, P = 0, N = 0;

		cout << "Saving to: " << outFileName << endl;


		outFile << "labels,";
		for(int idx = 0; idx < testSet.GetNumObjs() - 1; idx++)
			outFile << testLabels[idx] << ",";
		outFile << testLabels[testSet.GetNumObjs() - 1] << endl;

		result = TrainClassifier(classifier, trainSet);
		classifier->ScoreBatch(test, testSet.GetNumObjs(), testSet.GetDims(), scores);

		for(int i = 0; i < testSet.GetNumObjs(); i++) {
			if( scores[i] >= 0.0f ) {
				predClass[i] = 1;
			} else {
				predClass[i] = -1;
			}
		}

		// Calculate confusion matrix
		TP = FP = TN = FN = P = N = 0;

		for(int i = 0; i < testSet.GetNumObjs(); i++) {
			if( testLabels[i] == 1 ) {
				P++;
				if( predClass[i] == 1 ) {
					TP++;
				} else {
					FN++;
				}
			} else {
				N++;
				if( predClass[i] == -1 ) {
					TN++;
				} else {
					FP++;
				}
			}
		}
		printf("\t%4d\t%4d\n", TP, FP);
		printf("\t%4d\t%4d\n\n", FN, TN);

		cout << "FP rate: " << (float)FP / (float)N << endl;
		cout << "TP rate: " << (float)TP / (float)P << endl;
		cout << "Precision: " << (float)TP / (float)(TP + FP) << endl;
		cout << "Accuracy: " << (float)(TP + TN) / (float)(N + P) << endl;

		outFile << "scores,";
		for(int idx = 0; idx < testSet.GetNumObjs() - 1; idx++) {
			outFile << ((scores[idx] + 1.0f) / 2.0) << ",";
		}
		outFile << ((scores[testSet.GetNumObjs() - 1] + 1.0f) / 2.0) << endl;
		outFile.close();
	}

	if( predClass )
		free(predClass);
	if( scores )
		free(scores);

	return result;
}
示例#6
0
bool Picker::RestoreSessionData(MData& testSet)
{
	bool	result = true;
	int 	numObjs = testSet.GetNumObjs(),
			numDims = testSet.GetDims();

	result = UpdateBuffers(numObjs, true);
	if( result ) {
		float	*floatData = NULL;
		int		*intData = NULL;

		intData = testSet.GetLabels();
		memcpy(m_labels, intData, numObjs * sizeof(int));

		intData = testSet.GetIdList();
		memcpy(m_ids, intData, numObjs * sizeof(int));

		floatData = testSet.GetXCentroidList();
		memcpy(m_xCentroid, floatData, numObjs * sizeof(float));

		floatData = testSet.GetXClickList();
		// Test sets created with earlier versions of HistomicsML don't have clicks
		if( floatData == NULL ) {
			// Use centroids for click location if not present.
			floatData = testSet.GetXCentroidList();
		}
		memcpy(m_xClick, floatData, numObjs * sizeof(float));

		floatData = testSet.GetYCentroidList();
		memcpy(m_yCentroid, floatData, numObjs * sizeof(float));

		floatData = testSet.GetYClickList();
		if( floatData == NULL ) {
			floatData = testSet.GetYCentroidList();
		}
		memcpy(m_yClick, floatData, numObjs * sizeof(float));

		floatData = testSet.GetData()[0];
		memcpy(m_trainSet[0], floatData, numObjs * numDims * sizeof(float));

		char **classNames = testSet.GetClassNames();
		// Older versions of al_server did not save class names, set
		// defaults if they can't be loaded.
		if( classNames ) {
			for(int i = 0; i < testSet.GetNumClasses(); i++) {
				m_classNames.push_back(string(classNames[i]));
			}
		} else {
			m_classNames.push_back(string("Negative"));
			m_classNames.push_back(string("Positive"));
		}

		// Get slide indices from the dataset, NOT from the training set.
		intData = testSet.GetSlideIndices();
		char **slideNames = testSet.GetSlideNames();
		int idx;


		for(int i = 0; i < numObjs; i++) {

			m_slideIdx[i] = m_dataset->GetSlideIdx(slideNames[intData[i]]);

			// Keep track fo selected items
			idx = m_dataset->FindItem(m_xCentroid[i], m_yCentroid[i], slideNames[intData[i]]);
			if( idx == -1 ) {
				gLogger->LogMsg(EvtLogger::Evt_ERROR, "Unable to find item in dataset:, %f, %f in %s",
								m_xCentroid[i], m_yCentroid[i], slideNames[intData[i]]);
				result = false;
				break;
			} else {
				m_samples.push_back(idx);
			}
		}
	}
	return result;
}