// convert decimal to any base char* Decimal2Base(double src, unsigned int dst_base, unsigned int percision){ int wholeVal = (int)abs(src); double fractionVal = fabs(src) - wholeVal; char *newNum = NULL; char *temp = NULL; int len = 0; newNum = (char*)malloc(100); //create the whole part temp = convertWhole(wholeVal, dst_base); if (src < 0){ //case negative number newNum[0] = '-'; strcpy(newNum + 1, temp); } else // case positive strcpy(newNum, temp); free(temp); // the given number has fraction part if (fractionVal != 0) strcat(newNum, "."); //create the fraction part temp = convertFraction(fractionVal, dst_base, percision); strcat(newNum, temp); free(temp); //remove any trailing spaces removeTrailingZeros(&newNum); return newNum; }
void TCGADataNormalizer::exportToFile(double positiveValue, double negativeValue) { ptrToData->buildDataMatrix(); std::ofstream outputStreamSamples(HEINZ_SAMPLES_LIST); const auto &dataMatrix = ptrToData->getDataMatrixHandler(); unsigned int N = dataMatrix.cols(); for (unsigned int i = 0; i < N; ++i) { std::string outputFilename = ptrToData->getPatientsHandler()[i].toString(); outputStreamSamples << outputFilename << std::endl; std::ofstream outputStream( HEINZ_INPUT_DIRECTORY + removeTrailingZeros( std::to_string(std::fabs(negativeValue))) + '_' + outputFilename + ".txt"); for (unsigned int j = 0; j < dataMatrix.rows(); ++j) { outputStream << ptrToData->getGeneListHandler()[j].first << " "; if (dataMatrix(j, i) > 0.5) { outputStream << positiveValue << std::endl; } else { outputStream << negativeValue << std::endl; } } ClusterXX::Utilities::printAdvancement(i, N); } }
QString UnitFormatter::format(const double value, QString unit) { QLocale locale; QString result; ushort prefix; QTextStream out(&result); out.setLocale(locale); if(value>0.999999999){ double aux = value; int multiplier = 0; while(aux>=1000){ ++multiplier; aux=aux/1000; } prefix = BIG_PREFIXES[multiplier]; out.setRealNumberPrecision(2); out.setNumberFlags(out.numberFlags() & ~QTextStream::ForcePoint); out.setRealNumberNotation(QTextStream::FixedNotation); out<<aux; } else { double aux = value; int divider = 0; if(aux>SMALLEST_VALUE){ while(aux<0.999999999){ ++divider; aux=aux*1000; } } prefix = SMALL_PREFIXES[divider]; out.setRealNumberPrecision(2); out.setRealNumberNotation(QTextStream::FixedNotation); out<<aux; } removeTrailingZeros(result, locale.decimalPoint()); if(prefix){ out<<QChar(prefix); } if(unit!=0){ out<<unit; } return result; }
void CommandLineProcessor::runProgram() { std::cout << "--------------------------------------" << std::endl; std::cout << "| TCGA-ANALYZER |" << std::endl; std::cout << "--------------------------------------" << std::endl; if (PROGRAM_MODE == 0) { std::cout << std::endl << "Program mode : 0 (Clustering mode)" << std::endl << std::endl; } else if (PROGRAM_MODE == 2) { std::cout << std::endl << "Program mode : 2 (Entry of the Heinz pipeline)" << std::endl << std::endl; } else if (PROGRAM_MODE == 1) { std::cout << std::endl << "Program mode : 1 (Multiple cut percentages analyzer)" << std::endl << std::endl; } if (PROGRAM_MODE == 0 || PROGRAM_MODE == 2) { std::cout << "------------------- Data Parameters --------------------" << std::endl; std::cout << "* Cancers : " << implode(CANCERS.begin(), CANCERS.end(), ", ") << std::endl; std::cout << "* Clinical attributes : " << implode(CLINICAL.begin(), CLINICAL.end(), ", ") << std::endl; std::cout << "* Max control samples : " << MAX_CONTROL_SAMPLES << std::endl; std::cout << "* Max tumor samples : " << MAX_TUMOR_SAMPLES << std::endl; std::cout << "--------------------------------------------------------" << std::endl << std::endl; /* Read Data */ std::cout << "-------------------- Loading data ----------------------" << std::endl; TCGAData data; TCGADataLoader loader(&data, CANCERS, MAX_CONTROL_SAMPLES, MAX_TUMOR_SAMPLES, VERBOSE); loader.loadGeneExpressionData(SAMPLE_FILE); loader.loadClinicalData(CLINICAL); //Keep only data which will be in the PPI graph //data.keepOnlyGenesInGraph(GRAPH_NODE_FILE); std::cout << "--------------------------------------------------------" << std::endl << std::endl; /* Normalize */ std::shared_ptr<Normalizer> normalizer; std::cout << "-------------- Normalization parameters ----------------" << std::endl; if (DEFAULT_NORMALIZATION_METHOD == KMEANS_NORMALIZATION) { normalizer = std::make_shared<KMeansNormalizer>( K_MEANS_NORMALIZATION_PARAM, K_MEANS_MAX_ITERATIONS); std::cout << "* Normalization method : K-Means" << std::endl; std::cout << "* K : " << K_MEANS_NORMALIZATION_PARAM << std::endl; std::cout << "* Max iterations : " << K_MEANS_MAX_ITERATIONS << std::endl; } else if (DEFAULT_NORMALIZATION_METHOD == BINARY_QUANTILE_NORMALIZATION) { normalizer = std::make_shared<BinaryQuantileNormalizer>( BINARY_QUANTILE_NORMALIZATION_PARAM); std::cout << "* Normalization method : Binary quantile" << std::endl; std::cout << "* Binary quantile cut percentage : " << BINARY_QUANTILE_NORMALIZATION_PARAM << std::endl; } else { normalizer = std::make_shared<NoOperationNormalizer>(); std::cout << "* Normalization method : no normalization" << std::endl; } std::cout << "--------------------------------------------------------" << std::endl << std::endl; std::cout << "------------------ Normalizing data --------------------" << std::endl; TCGADataNormalizer tcgaNormalizer(&data, normalizer, VERBOSE); tcgaNormalizer.normalize(); std::cout << "--------------------------------------------------------" << std::endl << std::endl; if (PROGRAM_MODE == 0) { /* Output distance matrix */ std::cout << "------------------ Distance matrix ---------------------" << std::endl; std::cout << "* Metric : " << METRIC->toString() << std::endl; TCGADataDistanceMatrixAnalyser distanceMetricAnalyzer(&data, METRIC, VERBOSE); distanceMetricAnalyzer.computeDistanceMatrix(); //distanceMetricAnalyzer.exportClassStats(); //distanceMetricAnalyzer.exportHeatMap(); std::cout << "--------------------------------------------------------" << std::endl << std::endl; std::cout << "---------------- Clustering parameters -----------------" << std::endl; if (K_CLUSTER == 0) { std::cout << "Number of clusters to find : automatic (= number of real classes in the data)" << std::endl; } else { std::cout << "Number of clusters to find : " << K_CLUSTER << std::endl; } std::cout << "--------------------------------------------------------" << std::endl << std::endl; std::vector<std::string> patientLabels = data.getPatientLabels(); std::cout << "------------------ KMeans Clustering -------------------" << std::endl; TCGADataKMeansClusterer kMeansClusterer(&data, K_CLUSTER, K_MEANS_MAX_ITERATIONS, PARALLEL_KMEANS, VERBOSE); kMeansClusterer.computeClustering(); kMeansClusterer.printClusteringInfo(); //kMeansClusterer.printRawClustering(patientLabels); std::cout << "--------------------------------------------------------" << std::endl << std::endl; /* std::cout << "-------------- Hierarchical Clustering -----------------" << std::endl; TCGADataHierarchicalClusterer hierarchicalClusterer(&data, distanceMetricAnalyzer.getDistanceMatrixHandler(), METRIC, K_CLUSTER, DEFAULT_LINKAGE_METHOD, VERBOSE); hierarchicalClusterer.computeClustering(); hierarchicalClusterer.printClusteringInfo(); //kMeansClusterer.printRawClustering(patientLabels); std::cout << "--------------------------------------------------------" << std::endl << std::endl; */ std::cout << "---------- Unnormalized Spectral Clustering ------------" << std::endl; TCGADataUnnormalizedSpectralClusterer unnormalizedSpectralClusterer( &data, distanceMetricAnalyzer.getDistanceMatrixHandler(), METRIC, K_CLUSTER, DEFAULT_GRAPH_TRANSFORMATION, VERBOSE); unnormalizedSpectralClusterer.computeClustering(); unnormalizedSpectralClusterer.printClusteringInfo(); //unnormalizedSpectralClusterer.printRawClustering(patientLabels); std::cout << "--------------------------------------------------------" << std::endl << std::endl; std::cout << "------ Normalized Spectral Clustering (Symmetric) ------" << std::endl; TCGADataNormalizedSpectralClusterer normalizedSpectralClusterer( &data, distanceMetricAnalyzer.getDistanceMatrixHandler(), METRIC, K_CLUSTER, DEFAULT_GRAPH_TRANSFORMATION, VERBOSE); normalizedSpectralClusterer.computeClustering(); normalizedSpectralClusterer.printClusteringInfo(); //normalizedSpectralClusterer.printRawClustering(patientLabels); std::cout << "--------------------------------------------------------" << std::endl << std::endl; } else { std::ofstream negativeWeightsOutput(HEINZ_NEGATIVEWEIGHT_LIST); std::cout << "----------------- Writing Heinz input ------------------" << std::endl; std::vector<std::string> weights_string; for (double d : WEIGHTS) { weights_string.push_back( removeTrailingZeros(std::to_string(d))); } std::cout << "* Weights : " << implode(weights_string.begin(), weights_string.end(), ", ") << std::endl; for (double d : WEIGHTS) { negativeWeightsOutput << removeTrailingZeros(std::to_string(d)) << std::endl; std::cout << "Writing files for d=-" << d << "... " << std::endl; tcgaNormalizer.exportToFile(1, -d); } std::cout << "--------------------------------------------------------" << std::endl << std::endl; } } else if (PROGRAM_MODE == 1) { std::cout << "------------------- Data Parameters --------------------" << std::endl; std::cout << "* Cancers : " << implode(CANCERS.begin(), CANCERS.end(), ", ") << std::endl; std::cout << "* Max control samples : " << MAX_CONTROL_SAMPLES << std::endl; std::cout << "* Max tumor samples : " << MAX_TUMOR_SAMPLES << std::endl; std::cout << "--------------------------------------------------------" << std::endl << std::endl; /* Read Data */ std::cout << "-------------------- Loading data ----------------------" << std::endl; TCGAData data; TCGADataLoader loader(&data, CANCERS, MAX_CONTROL_SAMPLES, MAX_TUMOR_SAMPLES, VERBOSE); loader.loadGeneExpressionData(SAMPLE_FILE); data.keepOnlyGenesInGraph(GRAPH_NODE_FILE); std::cout << "--------------------------------------------------------" << std::endl << std::endl; /*Normalizing and clustering*/ std::cout << "------------- Normalizing and clustering ---------------" << std::endl; std::cout << "* Metric : " << METRIC->toString() << std::endl; for (double d = MIN_CUT_PERCENTAGE; d < MAX_CUT_PERCENTAGE; d += STEP_CUT_PERCENTAGE) { TCGAData dataCopy = data; std::cout << d << std::flush; std::shared_ptr<Normalizer> normalizer = std::make_shared< BinaryQuantileNormalizer>(d); TCGADataNormalizer tcgaNormalizer(&data, normalizer, false); tcgaNormalizer.normalize(); TCGADataDistanceMatrixAnalyser distanceMetricAnalyzer(&data, METRIC, false); distanceMetricAnalyzer.computeDistanceMatrix(); // TCGADataKMeansClusterer kMeansClusterer(&data, K_CLUSTER, // K_MEANS_MAX_ITERATIONS, false); // kMeansClusterer.computeClustering(); TCGADataUnnormalizedSpectralClusterer spectralClusterer(&data, distanceMetricAnalyzer.getDistanceMatrixHandler(), METRIC, K_CLUSTER, DEFAULT_GRAPH_TRANSFORMATION, false); spectralClusterer.computeClustering(); //double adi1 = kMeansClusterer.getAdjustedRandIndex(); double adi2 = spectralClusterer.getAdjustedRandIndex(); std::cout << "\t" << adi2 << std::endl; data = dataCopy; } std::cout << "--------------------------------------------------------" << std::endl << std::endl; } }