Ejemplo n.º 1
0
// Renormalize the training set using the test set's mean and std dev.
int Renormalize(MData& trainSet, MData& testSet)
{
	int		result = 0;
	float	*trainMean = trainSet.GetMeans(), *testMean = testSet.GetMeans(),
			*trainStdDev = trainSet.GetStdDevs(), *testStdDev = testSet.GetStdDevs(),
			**trainFeatures = trainSet.GetData();
	bool	norm = true;

	// Check if re-normalization is actually needed
	for(int i = 0; i < trainSet.GetDims(); i++) {
		if( trainMean[i] != testMean[i] ||
			trainStdDev[i] != testStdDev[i] ) {
				norm = false;
				break;
		}
	}


	if( norm ) {
		for(int obj = 0; obj < trainSet.GetNumObjs(); obj++) {
			for(int dim = 0; dim < trainSet.GetDims(); dim++) {
				trainFeatures[obj][dim] = (trainFeatures[obj][dim] * trainStdDev[dim]) + trainMean[dim];

				trainFeatures[obj][dim] = (trainFeatures[obj][dim] - testMean[dim]) / testStdDev[dim];
			}
		}
	}
	return result;
}
Ejemplo n.º 2
0
int GenerateMap(MData& trainSet, MData& testSet, Classifier *classifier,
				string slide, string outFileName)
{
	int		result = 0, offset, slideObjs;
	float	**train = trainSet.GetData(), **test = testSet.GetData(),
			*scores = (float*)malloc(testSet.GetNumObjs() * sizeof(float));

	result = TrainClassifier(classifier, trainSet);
	classifier->ScoreBatch(test, testSet.GetNumObjs(), testSet.GetDims(), scores);

	offset = testSet.GetSlideOffset(slide, slideObjs);

	ofstream 	outFile(outFileName.c_str());

	if( outFile.is_open() ) {

		outFile << "score,X,Y" << endl;

		for(int i = offset; i < offset + slideObjs; i++) {
			outFile << scores[i] << "," << testSet.GetXCentroid(i) << "," << testSet.GetYCentroid(i) << endl;
		}
		outFile.close();
	} else {
		cerr << "Unable to create " << outFileName << endl;
		result = -10;
	}
	if( scores )
		free(scores);

	return result;
}
Ejemplo n.º 3
0
int TrainClassifier(Classifier *classifier, MData& trainSet)
{
	int		result = 0;
	int		*labels = trainSet.GetLabels(), numObjs = trainSet.GetNumObjs(),
			numDims = trainSet.GetDims();
	float	**data = trainSet.GetData();

	if( !classifier->Train(data[0], labels, numObjs, numDims) ) {
		cerr << "Classifier traiing FAILED" << endl;
		result = -20;
	}
	return result;
}
Ejemplo n.º 4
0
int TrainClassifier(Classifier *classifier, MData& trainSet, int iteration)
{
	int		result = 0;
	int		*labels = trainSet.GetLabels(), dims = trainSet.GetDims(), count,
			*iterationList = trainSet.GetIterationList();;
	float	**data = trainSet.GetData();

	count = 0;
	while( iterationList[count] <= iteration && count < trainSet.GetNumObjs() )
		count++;

	cout << "Train set size: " << count  << endl;

	if( !classifier->Train(data[0], labels, count, dims) ) {
		cerr << "Classifier traiing FAILED" << endl;
		result = -10;
	}
	return result;
}
Ejemplo n.º 5
0
int ApplyClassifier(MData& trainSet, MData& testSet, Classifier *classifier,
					string testFileName, string outFileName)
{
	int		result = 0, dims = trainSet.GetDims(), *trainLabel = trainSet.GetLabels(),
			numTestObjs = testSet.GetNumObjs();
	float	**test = testSet.GetData(), **train = trainSet.GetData(),
			*predScore = NULL;

	if( dims != testSet.GetDims() ) {
		cerr << "Training and test set dimensions do not match" << endl;
		result = -30;
	}

	if( result == 0 ) {
		cout << "Allocating prediction buffer" << endl;
		predScore = (float*)malloc(numTestObjs * sizeof(float));
		if( predScore == NULL ) {
			cerr << "Unable to allocae prediction buffer" << endl;
			result = -31;
		}
	}

	if( result == 0 ) {
		cout << "Training classifier..." << endl;
		if( !classifier->Train(train[0], trainLabel, trainSet.GetNumObjs(), dims) ) {
			cerr << "Classifier training failed" << endl;
			result = -32;
		}
	}

	if( result == 0 ) {
		cout << "Applying classifier..." << endl;
		if( ! classifier->ScoreBatch(test, numTestObjs, dims, predScore) ) {
			cerr << "Applying classifier failed" << endl;
			result = -33;
		}
	}

	if( result == 0 ) {
		// Copy original test file so we can just append the
		// score data
		//
		string cmd = "cp " + testFileName + " " + outFileName;
		result = system(cmd.c_str());
	}

	if( result == 0 ) {
		hid_t		fileId;
		hsize_t		dims[2];
		herr_t		status;

		fileId = H5Fopen(outFileName.c_str(), H5F_ACC_RDWR, H5P_DEFAULT);
		if( fileId < 0 ) {
			cerr << "Unable to open: " << outFileName << endl;
			result = -34;
		}

		if( result == 0 ) {
			dims[0] = numTestObjs;
			dims[1] = 1;

			status = H5LTmake_dataset(fileId, "/pred_score", 2, dims,
										H5T_NATIVE_FLOAT, predScore);
			if( status < 0 ) {
				cerr << "Unable to write score data" << endl;
				result = -35;
			}
		}

		if( fileId >= 0 ) {
			H5Fclose(fileId);
		}
	}

	if( predScore )
		free(predScore);

	return result;
}
Ejemplo n.º 6
0
int	CalcROC(MData& trainSet, MData& testSet, Classifier *classifier, string testFile,
			string outFileName)
{
	int		result = 0, *trainLabels = trainSet.GetLabels(),
			*testLabels = testSet.GetLabels(),
			*predClass = (int*)malloc(testSet.GetNumObjs() * sizeof(int));

	float	**train = trainSet.GetData(), **test = testSet.GetData(),
			*scores = (float*)malloc(testSet.GetNumObjs() * sizeof(float));

	if( testLabels == NULL ) {
		cerr << "Test set has no lables" << endl;
		result = -10;
	}

	if( predClass == NULL || scores == NULL ) {
		result = -11;
		cerr << "Unable to allocate results buffer" << endl;
	}

	ofstream 	outFile(outFileName.c_str());
	if( !outFile.is_open() ) {
		cerr << "Unable to create " << outFileName << endl;
		result = -12;
	}

	if( result == 0 ) {
		int TP = 0, FP = 0, TN = 0, FN = 0, P = 0, N = 0;

		cout << "Saving to: " << outFileName << endl;


		outFile << "labels,";
		for(int idx = 0; idx < testSet.GetNumObjs() - 1; idx++)
			outFile << testLabels[idx] << ",";
		outFile << testLabels[testSet.GetNumObjs() - 1] << endl;

		result = TrainClassifier(classifier, trainSet);
		classifier->ScoreBatch(test, testSet.GetNumObjs(), testSet.GetDims(), scores);

		for(int i = 0; i < testSet.GetNumObjs(); i++) {
			if( scores[i] >= 0.0f ) {
				predClass[i] = 1;
			} else {
				predClass[i] = -1;
			}
		}

		// Calculate confusion matrix
		TP = FP = TN = FN = P = N = 0;

		for(int i = 0; i < testSet.GetNumObjs(); i++) {
			if( testLabels[i] == 1 ) {
				P++;
				if( predClass[i] == 1 ) {
					TP++;
				} else {
					FN++;
				}
			} else {
				N++;
				if( predClass[i] == -1 ) {
					TN++;
				} else {
					FP++;
				}
			}
		}
		printf("\t%4d\t%4d\n", TP, FP);
		printf("\t%4d\t%4d\n\n", FN, TN);

		cout << "FP rate: " << (float)FP / (float)N << endl;
		cout << "TP rate: " << (float)TP / (float)P << endl;
		cout << "Precision: " << (float)TP / (float)(TP + FP) << endl;
		cout << "Accuracy: " << (float)(TP + TN) / (float)(N + P) << endl;

		outFile << "scores,";
		for(int idx = 0; idx < testSet.GetNumObjs() - 1; idx++) {
			outFile << ((scores[idx] + 1.0f) / 2.0) << ",";
		}
		outFile << ((scores[testSet.GetNumObjs() - 1] + 1.0f) / 2.0) << endl;
		outFile.close();
	}

	if( predClass )
		free(predClass);
	if( scores )
		free(scores);

	return result;
}
Ejemplo n.º 7
0
//  Apply the classifier to each slide in the test set. Save the counts of positive and negative
//	classes for each slide to a csv file.
//
int	ClassifySlides(MData& trainSet, MData& testSet, Classifier *classifier, string testFile,
					string outFileName)
{
	int		result = 0;
	int		*pred = NULL, *posSlideCnt = NULL, *negSlideCnt = NULL;
	char	**slideNames = testSet.GetSlideNames();
	ofstream 	outFile(outFileName.c_str());

	if( classifier == NULL ) {
		result = -10;
	}

	if( result == 0 ) {
		if( outFile.is_open() ) {

			outFile << "slides,";
			for(int i = 0; i < testSet.GetNumSlides() - 1; i++) {
				outFile << slideNames[i] << ",";
			}
			outFile << slideNames[testSet.GetNumSlides() - 1] << endl;

			pred = (int*)malloc(testSet.GetNumObjs() * sizeof(int));
			if( pred == NULL ) {
				cerr << "Unable to allocate results buffer" << endl;
				result = -11;
			}

		} else {
			cerr << "Unable to open " << outFileName << endl;
			result = -12;
		}
	}

	if( result == 0 ) {
		result = TrainClassifier(classifier, trainSet);
	}


	if( result == 0 ) {
		if( !classifier->ClassifyBatch(testSet.GetData(), testSet.GetNumObjs(), testSet.GetDims(), pred) ) {
			cerr << "Classification failed" << endl;
			result = -13;
		}
	}

	if( result == 0 ) {
		result = CountResults(testSet, pred, posSlideCnt, negSlideCnt);
	}

	if( result == 0 ) {
		outFile << "positive,";
		for( int i = 0; i < testSet.GetNumSlides() - 1; i++ ) {
			 outFile <<  posSlideCnt[i] << ",";
		}
		outFile <<  posSlideCnt[testSet.GetNumSlides() - 1] << endl;

		outFile << "negative,";
		for( int i = 0; i < testSet.GetNumSlides() - 1; i++ ) {
			outFile <<  negSlideCnt[i] << ",";
		}
		outFile <<  negSlideCnt[testSet.GetNumSlides() - 1] << endl;
	}

	if( outFile.is_open() ) {
		outFile.close();
	}

	if( posSlideCnt )
		free(posSlideCnt);
	if( negSlideCnt )
		free(negSlideCnt);
	return result;
}
Ejemplo n.º 8
0
bool Picker::RestoreSessionData(MData& testSet)
{
	bool	result = true;
	int 	numObjs = testSet.GetNumObjs(),
			numDims = testSet.GetDims();

	result = UpdateBuffers(numObjs, true);
	if( result ) {
		float	*floatData = NULL;
		int		*intData = NULL;

		intData = testSet.GetLabels();
		memcpy(m_labels, intData, numObjs * sizeof(int));

		intData = testSet.GetIdList();
		memcpy(m_ids, intData, numObjs * sizeof(int));

		floatData = testSet.GetXCentroidList();
		memcpy(m_xCentroid, floatData, numObjs * sizeof(float));

		floatData = testSet.GetXClickList();
		// Test sets created with earlier versions of HistomicsML don't have clicks
		if( floatData == NULL ) {
			// Use centroids for click location if not present.
			floatData = testSet.GetXCentroidList();
		}
		memcpy(m_xClick, floatData, numObjs * sizeof(float));

		floatData = testSet.GetYCentroidList();
		memcpy(m_yCentroid, floatData, numObjs * sizeof(float));

		floatData = testSet.GetYClickList();
		if( floatData == NULL ) {
			floatData = testSet.GetYCentroidList();
		}
		memcpy(m_yClick, floatData, numObjs * sizeof(float));

		floatData = testSet.GetData()[0];
		memcpy(m_trainSet[0], floatData, numObjs * numDims * sizeof(float));

		char **classNames = testSet.GetClassNames();
		// Older versions of al_server did not save class names, set
		// defaults if they can't be loaded.
		if( classNames ) {
			for(int i = 0; i < testSet.GetNumClasses(); i++) {
				m_classNames.push_back(string(classNames[i]));
			}
		} else {
			m_classNames.push_back(string("Negative"));
			m_classNames.push_back(string("Positive"));
		}

		// Get slide indices from the dataset, NOT from the training set.
		intData = testSet.GetSlideIndices();
		char **slideNames = testSet.GetSlideNames();
		int idx;


		for(int i = 0; i < numObjs; i++) {

			m_slideIdx[i] = m_dataset->GetSlideIdx(slideNames[intData[i]]);

			// Keep track fo selected items
			idx = m_dataset->FindItem(m_xCentroid[i], m_yCentroid[i], slideNames[intData[i]]);
			if( idx == -1 ) {
				gLogger->LogMsg(EvtLogger::Evt_ERROR, "Unable to find item in dataset:, %f, %f in %s",
								m_xCentroid[i], m_yCentroid[i], slideNames[intData[i]]);
				result = false;
				break;
			} else {
				m_samples.push_back(idx);
			}
		}
	}
	return result;
}