Exemple #1
0
// convert decimal to any base 
char* Decimal2Base(double src, unsigned int dst_base, unsigned int percision){
	int wholeVal = (int)abs(src);
	double fractionVal = fabs(src) - wholeVal;
	char *newNum = NULL;
	char *temp = NULL;
	int len = 0;

	newNum = (char*)malloc(100);

	//create the whole part
	temp = convertWhole(wholeVal, dst_base);
	if (src < 0){ //case negative number
		newNum[0] = '-';
		strcpy(newNum + 1, temp);
	}
	else // case positive
		strcpy(newNum, temp);
	free(temp);

	// the given number has fraction part
	if (fractionVal != 0)
		strcat(newNum, ".");

	//create the fraction part
	temp = convertFraction(fractionVal, dst_base, percision);
	strcat(newNum, temp);
	free(temp);

	//remove any trailing spaces
	removeTrailingZeros(&newNum);

	return newNum;
}
void TCGADataNormalizer::exportToFile(double positiveValue,
		double negativeValue) {
	ptrToData->buildDataMatrix();
	std::ofstream outputStreamSamples(HEINZ_SAMPLES_LIST);
	const auto &dataMatrix = ptrToData->getDataMatrixHandler();
	unsigned int N = dataMatrix.cols();
	for (unsigned int i = 0; i < N; ++i) {

		std::string outputFilename =
				ptrToData->getPatientsHandler()[i].toString();
		outputStreamSamples << outputFilename << std::endl;

		std::ofstream outputStream(
				HEINZ_INPUT_DIRECTORY
						+ removeTrailingZeros(
								std::to_string(std::fabs(negativeValue))) + '_'
						+ outputFilename + ".txt");

		for (unsigned int j = 0; j < dataMatrix.rows(); ++j) {
			outputStream << ptrToData->getGeneListHandler()[j].first << " ";
			if (dataMatrix(j, i) > 0.5) {
				outputStream << positiveValue << std::endl;
			} else {
				outputStream << negativeValue << std::endl;
			}
		}

		ClusterXX::Utilities::printAdvancement(i, N);
	}
}
Exemple #3
0
QString UnitFormatter::format(const double value, QString unit)
{
    QLocale locale;
    QString result;
    ushort prefix;
    QTextStream out(&result);
    out.setLocale(locale);
    if(value>0.999999999){
        double aux = value;
        int multiplier = 0;
        while(aux>=1000){
            ++multiplier;
            aux=aux/1000;
        }
        prefix = BIG_PREFIXES[multiplier];
        out.setRealNumberPrecision(2);
        out.setNumberFlags(out.numberFlags() & ~QTextStream::ForcePoint);
        out.setRealNumberNotation(QTextStream::FixedNotation);
        out<<aux;
    }
    else {
        double aux = value;
        int divider = 0;
        if(aux>SMALLEST_VALUE){
            while(aux<0.999999999){
                ++divider;
                aux=aux*1000;
            }
        }
        prefix = SMALL_PREFIXES[divider];
        out.setRealNumberPrecision(2);
        out.setRealNumberNotation(QTextStream::FixedNotation);
        out<<aux;
    }
    removeTrailingZeros(result, locale.decimalPoint());
    if(prefix){
        out<<QChar(prefix);
    }
    if(unit!=0){
        out<<unit;
    }
    return result;
}
void CommandLineProcessor::runProgram() {
	std::cout << "--------------------------------------" << std::endl;
	std::cout << "|            TCGA-ANALYZER           |" << std::endl;
	std::cout << "--------------------------------------" << std::endl;

	if (PROGRAM_MODE == 0) {
		std::cout << std::endl << "Program mode : 0 (Clustering mode)"
				<< std::endl << std::endl;
	}

	else if (PROGRAM_MODE == 2) {
		std::cout << std::endl
				<< "Program mode : 2 (Entry of the Heinz pipeline)" << std::endl
				<< std::endl;
	}

	else if (PROGRAM_MODE == 1) {
		std::cout << std::endl
				<< "Program mode : 1 (Multiple cut percentages analyzer)"
				<< std::endl << std::endl;
	}

	if (PROGRAM_MODE == 0 || PROGRAM_MODE == 2) {

		std::cout << "------------------- Data Parameters --------------------"
				<< std::endl;
		std::cout << "* Cancers : "
				<< implode(CANCERS.begin(), CANCERS.end(), ", ") << std::endl;
		std::cout << "* Clinical attributes : "
				<< implode(CLINICAL.begin(), CLINICAL.end(), ", ") << std::endl;
		std::cout << "* Max control samples : " << MAX_CONTROL_SAMPLES
				<< std::endl;
		std::cout << "* Max tumor samples : " << MAX_TUMOR_SAMPLES << std::endl;
		std::cout << "--------------------------------------------------------"
				<< std::endl << std::endl;

		/* Read Data */
		std::cout << "-------------------- Loading data ----------------------"
				<< std::endl;
		TCGAData data;
		TCGADataLoader loader(&data, CANCERS, MAX_CONTROL_SAMPLES,
				MAX_TUMOR_SAMPLES, VERBOSE);
		loader.loadGeneExpressionData(SAMPLE_FILE);
		loader.loadClinicalData(CLINICAL);

		//Keep only data which will be in the PPI graph
		//data.keepOnlyGenesInGraph(GRAPH_NODE_FILE);
		std::cout << "--------------------------------------------------------"
				<< std::endl << std::endl;

		/* Normalize */
		std::shared_ptr<Normalizer> normalizer;
		std::cout << "-------------- Normalization parameters ----------------"
				<< std::endl;
		if (DEFAULT_NORMALIZATION_METHOD == KMEANS_NORMALIZATION) {
			normalizer = std::make_shared<KMeansNormalizer>(
					K_MEANS_NORMALIZATION_PARAM, K_MEANS_MAX_ITERATIONS);
			std::cout << "* Normalization method : K-Means" << std::endl;
			std::cout << "* K : " << K_MEANS_NORMALIZATION_PARAM << std::endl;
			std::cout << "* Max iterations : " << K_MEANS_MAX_ITERATIONS
					<< std::endl;
		} else if (DEFAULT_NORMALIZATION_METHOD
				== BINARY_QUANTILE_NORMALIZATION) {
			normalizer = std::make_shared<BinaryQuantileNormalizer>(
					BINARY_QUANTILE_NORMALIZATION_PARAM);
			std::cout << "* Normalization method : Binary quantile"
					<< std::endl;
			std::cout << "* Binary quantile cut percentage : "
					<< BINARY_QUANTILE_NORMALIZATION_PARAM << std::endl;
		} else {
			normalizer = std::make_shared<NoOperationNormalizer>();
			std::cout << "* Normalization method : no normalization"
					<< std::endl;
		}
		std::cout << "--------------------------------------------------------"
				<< std::endl << std::endl;

		std::cout << "------------------ Normalizing data --------------------"
				<< std::endl;

		TCGADataNormalizer tcgaNormalizer(&data, normalizer, VERBOSE);
		tcgaNormalizer.normalize();
		std::cout << "--------------------------------------------------------"
				<< std::endl << std::endl;

		if (PROGRAM_MODE == 0) {
			/* Output distance matrix */
			std::cout
					<< "------------------ Distance matrix ---------------------"
					<< std::endl;
			std::cout << "* Metric : " << METRIC->toString() << std::endl;
			TCGADataDistanceMatrixAnalyser distanceMetricAnalyzer(&data, METRIC,
					VERBOSE);
			distanceMetricAnalyzer.computeDistanceMatrix();
			//distanceMetricAnalyzer.exportClassStats();
			//distanceMetricAnalyzer.exportHeatMap();
			std::cout
					<< "--------------------------------------------------------"
					<< std::endl << std::endl;

			std::cout
					<< "---------------- Clustering parameters -----------------"
					<< std::endl;
			if (K_CLUSTER == 0) {
				std::cout
						<< "Number of clusters to find : automatic (= number of real classes in the data)"
						<< std::endl;
			} else {
				std::cout << "Number of clusters to find : " << K_CLUSTER
						<< std::endl;
			}
			std::cout
					<< "--------------------------------------------------------"
					<< std::endl << std::endl;

			std::vector<std::string> patientLabels = data.getPatientLabels();

			std::cout
					<< "------------------ KMeans Clustering -------------------"
					<< std::endl;

			TCGADataKMeansClusterer kMeansClusterer(&data, K_CLUSTER,
					K_MEANS_MAX_ITERATIONS, PARALLEL_KMEANS, VERBOSE);
			kMeansClusterer.computeClustering();
			kMeansClusterer.printClusteringInfo();
			//kMeansClusterer.printRawClustering(patientLabels);

			std::cout
					<< "--------------------------------------------------------"
					<< std::endl << std::endl;

			/*
			 std::cout
			 << "-------------- Hierarchical Clustering -----------------"
			 << std::endl;

			 TCGADataHierarchicalClusterer hierarchicalClusterer(&data,
			 distanceMetricAnalyzer.getDistanceMatrixHandler(), METRIC,
			 K_CLUSTER, DEFAULT_LINKAGE_METHOD, VERBOSE);
			 hierarchicalClusterer.computeClustering();
			 hierarchicalClusterer.printClusteringInfo();
			 //kMeansClusterer.printRawClustering(patientLabels);

			 std::cout
			 << "--------------------------------------------------------"
			 << std::endl << std::endl;
			 */

			std::cout
					<< "---------- Unnormalized Spectral Clustering ------------"
					<< std::endl;

			TCGADataUnnormalizedSpectralClusterer unnormalizedSpectralClusterer(
					&data, distanceMetricAnalyzer.getDistanceMatrixHandler(),
					METRIC, K_CLUSTER, DEFAULT_GRAPH_TRANSFORMATION, VERBOSE);
			unnormalizedSpectralClusterer.computeClustering();
			unnormalizedSpectralClusterer.printClusteringInfo();
			//unnormalizedSpectralClusterer.printRawClustering(patientLabels);

			std::cout
					<< "--------------------------------------------------------"
					<< std::endl << std::endl;

			std::cout
					<< "------ Normalized Spectral Clustering (Symmetric) ------"
					<< std::endl;

			TCGADataNormalizedSpectralClusterer normalizedSpectralClusterer(
					&data, distanceMetricAnalyzer.getDistanceMatrixHandler(),
					METRIC, K_CLUSTER, DEFAULT_GRAPH_TRANSFORMATION, VERBOSE);
			normalizedSpectralClusterer.computeClustering();
			normalizedSpectralClusterer.printClusteringInfo();
			//normalizedSpectralClusterer.printRawClustering(patientLabels);

			std::cout
					<< "--------------------------------------------------------"
					<< std::endl << std::endl;
		}

		else {
			std::ofstream negativeWeightsOutput(HEINZ_NEGATIVEWEIGHT_LIST);
			std::cout
					<< "----------------- Writing Heinz input ------------------"
					<< std::endl;
			std::vector<std::string> weights_string;
			for (double d : WEIGHTS) {
				weights_string.push_back(
						removeTrailingZeros(std::to_string(d)));
			}
			std::cout << "* Weights : "
					<< implode(weights_string.begin(), weights_string.end(),
							", ") << std::endl;
			for (double d : WEIGHTS) {
				negativeWeightsOutput << removeTrailingZeros(std::to_string(d))
						<< std::endl;
				std::cout << "Writing files for d=-" << d << "... "
						<< std::endl;
				tcgaNormalizer.exportToFile(1, -d);
			}

			std::cout
					<< "--------------------------------------------------------"
					<< std::endl << std::endl;
		}
	}

	else if (PROGRAM_MODE == 1) {
		std::cout << "------------------- Data Parameters --------------------"
				<< std::endl;
		std::cout << "* Cancers : "
				<< implode(CANCERS.begin(), CANCERS.end(), ", ") << std::endl;
		std::cout << "* Max control samples : " << MAX_CONTROL_SAMPLES
				<< std::endl;
		std::cout << "* Max tumor samples : " << MAX_TUMOR_SAMPLES << std::endl;
		std::cout << "--------------------------------------------------------"
				<< std::endl << std::endl;

		/* Read Data */
		std::cout << "-------------------- Loading data ----------------------"
				<< std::endl;
		TCGAData data;
		TCGADataLoader loader(&data, CANCERS, MAX_CONTROL_SAMPLES,
				MAX_TUMOR_SAMPLES, VERBOSE);
		loader.loadGeneExpressionData(SAMPLE_FILE);
		data.keepOnlyGenesInGraph(GRAPH_NODE_FILE);
		std::cout << "--------------------------------------------------------"
				<< std::endl << std::endl;

		/*Normalizing and clustering*/
		std::cout << "------------- Normalizing and clustering ---------------"
				<< std::endl;
		std::cout << "* Metric : " << METRIC->toString() << std::endl;
		for (double d = MIN_CUT_PERCENTAGE; d < MAX_CUT_PERCENTAGE; d +=
				STEP_CUT_PERCENTAGE) {
			TCGAData dataCopy = data;
			std::cout << d << std::flush;
			std::shared_ptr<Normalizer> normalizer = std::make_shared<
					BinaryQuantileNormalizer>(d);
			TCGADataNormalizer tcgaNormalizer(&data, normalizer, false);
			tcgaNormalizer.normalize();
			TCGADataDistanceMatrixAnalyser distanceMetricAnalyzer(&data, METRIC,
					false);
			distanceMetricAnalyzer.computeDistanceMatrix();
//			TCGADataKMeansClusterer kMeansClusterer(&data, K_CLUSTER,
//					K_MEANS_MAX_ITERATIONS, false);
//			kMeansClusterer.computeClustering();
			TCGADataUnnormalizedSpectralClusterer spectralClusterer(&data,
					distanceMetricAnalyzer.getDistanceMatrixHandler(), METRIC,
					K_CLUSTER, DEFAULT_GRAPH_TRANSFORMATION, false);
			spectralClusterer.computeClustering();
			//double adi1 = kMeansClusterer.getAdjustedRandIndex();
			double adi2 = spectralClusterer.getAdjustedRandIndex();
			std::cout << "\t" << adi2 << std::endl;
			data = dataCopy;
		}

		std::cout << "--------------------------------------------------------"
				<< std::endl << std::endl;
	}
}