void Ploter::plotCorrectedHll(string filename){ ofstream file(filename); string line; int x1, x2; double y1, y2; x1 = 0; y1 = 0; x2 = CARDMAX/STEP-1; y2 = 0; uint64_t hash; uint64_t hash128[2]; vector<double> estimates = makeVector(); int i; vector< vector<double> > tab(CARDMAX/STEP, vector<double>(TESTS)); int ca = 0; for(i = 0; i < TESTS; i ++){ cout << i << endl; Hll hll(14); for(int j = 0; j < CARDMAX; j++){ MurmurHash3_x86_128(&ca, 4, 0, &hash128); ca++; hash = hash128[0]; hll.AddItem64(hash); if(j%STEP == 0){ double count = hll.CountRaw64(); for(int i = 0; i < CARDMAX/STEP-1; i++){ if(estimates[i] <= count && count < estimates[i+1]){ x1 = estimates[i]; y1 = i*STEP; x2 = estimates[i+1]; y2 = (i+1)*STEP; count = interpolation(count, x1, y1, x2, y2); } } //count = interpolation(count, x1, y1, x2, y2); //count = (double)abs(count-j)/j; tab[j/STEP][i]=count; } } } for(int j = 0; j < CARDMAX/STEP; j++){ double sum = 0; for (int k = 0; k < TESTS; k++){ sum= sum + tab[j][k]; } //cout << sum << endl; double median = percentile(tab[j],0.5); double pct01 = percentile(tab[j],0.01); double pct99 = percentile(tab[j],0.99); file << (j*STEP) << "\t" << (double)sum/TESTS << "\t" << (double)median << "\t" << pct01 << "\t" << pct99 << endl; } }
void test_percentile() { int n; double res; double vals[25] = {43, 54, 56, 61, 62, 66, 68, 69, 69, 70, 71, 72, 77, 78, 79, 85, 87, 88, 89, 93, 95, 96, 98, 99, 99}; res = percentile(vals, 25, 0.90); printf("PERCENTILE: Expected %g got %g\n", 97.2, res); double vals2[10] = {0,1,2,3,4,5,6,7,8,9}; res = percentile(vals2, 10, 0.5); printf("PERCENTILE: Expected %g got %g\n", 4.5, res); }
void CNAnalysisMethodMosaicism::runmed(double* p, int iCount, int iWindowSize) { std::vector<float>vOut(iCount); if ((iWindowSize % 2) == 0) {iWindowSize++;} // Window size should be odd. if (iCount <= iWindowSize) { iWindowSize = iCount - 2; if ((iWindowSize % 2) == 0) {iWindowSize--;} // Window size should be odd. if (iWindowSize < 0) {return;} } int iHalfWindow = (int)((double)iWindowSize/2.0); for (int iIndex = iHalfWindow; (iIndex < (iCount - iHalfWindow)); iIndex++) { vOut[iIndex] = percentile(50, (p + iIndex - iHalfWindow), iWindowSize); } for (int iIndex = (iHalfWindow - 1); (iIndex >= 0); iIndex--) { vOut[iIndex] = vOut[iIndex + 1]; } for (int iIndex = (iCount - iHalfWindow); (iIndex < iCount); iIndex++) { vOut[iIndex] = vOut[iIndex - 1]; } for (int iIndex = 0; (iIndex < iCount); iIndex++) { p[iIndex] = vOut[iIndex]; } }
Vector<T> CDFTools<T>::percentile(const Matrix<T>& x, const float percent) const { Vector<T> xEvals; xEvals = x.cat(); xEvals.sort(); return percentile( x, xEvals, percent ); }
void Ploter::plotRawHll(string filename){ ofstream file(filename); string line; uint64_t hash; uint64_t hash128[2]; int i; vector< vector<double> > tab(CARDMAX/STEP, vector<double>(TESTS)); int ca = 0; for(i = 0; i < TESTS; i ++){ cout << i << endl; Hll hll(14); for(int j = 0; j < CARDMAX; j++){ MurmurHash3_x86_128(&ca, 4, 0, &hash128); ca++; hash = hash128[0]; hll.AddItem64(hash); if(j%STEP == 0){ double count = hll.CountRaw64(); //count = (double)abs(count-j)/j; tab[j/STEP][i]=count; } } } for(int j = 0; j < CARDMAX/STEP; j++){ double sum = 0; for (int k = 0; k < TESTS; k++){ sum= sum + tab[j][k]; } //cout << sum << endl; double median = percentile(tab[j],0.5); double pct01 = percentile(tab[j],0.01); double pct99 = percentile(tab[j],0.99); file << (j*STEP) << "\t" << (double)sum/TESTS << "\t" << (double)median << "\t" << pct01 << "\t" << pct99 << endl; } }
Vector<T> CDFTools<T>::percentile(const vector<const Vector<T>*>& x, const float percent) const { const int n = x.size(); Vector<T> xEvals; for (int i = 0; i < n; ++i) { xEvals.append( *x[i] ); } xEvals.sort(); return percentile( x, xEvals, percent ); }
Vector<T> CDFTools<T>::percentile( const Matrix<T>& x, const Vector<T>& xEvals, const float percent) const { const int n = x.getSize1(); vector<const Vector<T>*> v( n ); for (int i = 0; i < n; ++i) { v[i] = &x[i]; } return percentile( v, xEvals, percent ); }
void clamp( pfs::Array2D *array, float min, float max, bool opt_percentile, bool opt_zeromode ) { int imgSize = array->getRows()*array->getCols(); if( opt_percentile ) percentile(array,min,max); float minval=min; float maxval=max; if( opt_zeromode ) minval = maxval = 0.0f; for( int index = 0; index < imgSize ; index++ ) { float &v = (*array)(index); if( v < min ) v = minval; else if( v > max ) v = maxval; if( !finite(v) ) v = maxval; } }
bool TStellarData::load(const std::string& fname, const std::string& group, const std::string& dset, double err_floor, double default_EBV) { H5::H5File *file = H5Utils::openFile(fname); if(file == NULL) { return false; } H5::Group *gp = H5Utils::openGroup(file, group); if(gp == NULL) { delete file; return false; } H5::DataSet dataset = gp->openDataSet(dset); /* * Photometry */ // Datatype hsize_t nbands = NBANDS; H5::ArrayType f4arr(H5::PredType::NATIVE_FLOAT, 1, &nbands); H5::ArrayType u4arr(H5::PredType::NATIVE_UINT32, 1, &nbands); H5::CompType dtype(sizeof(TFileData)); dtype.insertMember("obj_id", HOFFSET(TFileData, obj_id), H5::PredType::NATIVE_UINT64); dtype.insertMember("l", HOFFSET(TFileData, l), H5::PredType::NATIVE_DOUBLE); dtype.insertMember("b", HOFFSET(TFileData, b), H5::PredType::NATIVE_DOUBLE); dtype.insertMember("mag", HOFFSET(TFileData, mag), f4arr); dtype.insertMember("err", HOFFSET(TFileData, err), f4arr); dtype.insertMember("maglimit", HOFFSET(TFileData, maglimit), f4arr); dtype.insertMember("nDet", HOFFSET(TFileData, N_det), u4arr); dtype.insertMember("EBV", HOFFSET(TFileData, EBV), H5::PredType::NATIVE_FLOAT); // Dataspace hsize_t length; H5::DataSpace dataspace = dataset.getSpace(); dataspace.getSimpleExtentDims(&length); // Read in dataset TFileData* data_buf = new TFileData[length]; dataset.read(data_buf, dtype); //std::cerr << "# Read in dimensions." << std::endl; // Fix magnitude limits for(int n=0; n<nbands; n++) { float tmp; float maglim_replacement = 25.; // Find the 95th percentile of valid magnitude limits std::vector<float> maglimit; for(hsize_t i=0; i<length; i++) { tmp = data_buf[i].maglimit[n]; if((tmp > 10.) && (tmp < 40.) && (!isnan(tmp))) { maglimit.push_back(tmp); } } //std::sort(maglimit.begin(), maglimit.end()); if(maglimit.size() != 0) { maglim_replacement = percentile(maglimit, 95.); } // Replace missing magnitude limits with the 95th percentile magnitude limit for(hsize_t i=0; i<length; i++) { tmp = data_buf[i].maglimit[n]; if(!((tmp > 10.) && (tmp < 40.)) || isnan(tmp)) { //std::cout << i << ", " << n << ": " << tmp << std::endl; data_buf[i].maglimit[n] = maglim_replacement; } } } //int n_filtered = 0; //int n_M_dwarfs = 0; TMagnitudes mag_tmp; for(size_t i=0; i<length; i++) { mag_tmp.set(data_buf[i], err_floor); star.push_back(mag_tmp); //int n_informative = 0; // Remove g-band //mag_tmp.m[0] = 0.; //mag_tmp.err[0] = 1.e10; //double g_err = mag_tmp.err[0]; //mag_tmp.err[0] = sqrt(g_err*g_err + 0.1*0.1); // Filter bright end // TODO: Put this into query_lsd.py /*for(int j=0; j<NBANDS; j++) { if((mag_tmp.err[j] < 1.e9) && (mag_tmp.m[j] < 14.)) { mag_tmp.err[j] = 1.e10; mag_tmp.m[j] = 0.; } if(mag_tmp.err[j] < 1.e9) { n_informative++; } }*/ // Filter M dwarfs based on color cut //bool M_dwarf = false; /*bool M_dwarf = true; double A_g = 3.172; double A_r = 2.271; double A_i = 1.682; if(mag_tmp.m[0] - A_g / (A_g - A_r) * (mag_tmp.m[0] - mag_tmp.m[1] - 1.2) > 20.) { M_dwarf = false; } else if(mag_tmp.m[1] - mag_tmp.m[2] - (A_r - A_i) / (A_g - A_r) * (mag_tmp.m[0] - mag_tmp.m[1]) < 0.) { M_dwarf = false; } else { n_M_dwarfs++; } */ /*if(n_informative >= 4) { //&& (!M_dwarf)) { star.push_back(mag_tmp); } else { n_filtered++; }*/ } //std::cerr << "# of stars filtered: " << n_filtered << std::endl; //std::cerr << "# of M dwarfs: " << n_M_dwarfs << std::endl; /* * Attributes */ H5::Attribute att = dataset.openAttribute("healpix_index"); H5::DataType att_dtype = H5::PredType::NATIVE_UINT64; att.read(att_dtype, reinterpret_cast<void*>(&healpix_index)); att = dataset.openAttribute("nested"); att_dtype = H5::PredType::NATIVE_UCHAR; att.read(att_dtype, reinterpret_cast<void*>(&nested)); att = dataset.openAttribute("nside"); att_dtype = H5::PredType::NATIVE_UINT32; att.read(att_dtype, reinterpret_cast<void*>(&nside)); att = dataset.openAttribute("l"); att_dtype = H5::PredType::NATIVE_DOUBLE; att.read(att_dtype, reinterpret_cast<void*>(&l)); att = dataset.openAttribute("b"); att_dtype = H5::PredType::NATIVE_DOUBLE; att.read(att_dtype, reinterpret_cast<void*>(&b)); att = dataset.openAttribute("EBV"); att_dtype = H5::PredType::NATIVE_DOUBLE; att.read(att_dtype, reinterpret_cast<void*>(&EBV)); // TEST: Force l, b to anticenter //l = 180.; //b = 0.; if((EBV <= 0.) || (EBV > default_EBV) || isnan(EBV)) { EBV = default_EBV; } delete[] data_buf; delete gp; delete file; return true; }
void RunStatsCommand(ProgramData *p, int lcindex, int threadindex, _Stats *s) { int i, j, k, Npct; double *tmpdata = NULL, *tmpweight = NULL; if(p->NJD[threadindex] <= 0) { for(i=0, k=0; i < s->Nvar; i++) { for(j=0; j < s->Nstats; j++, k++) { s->statsout[threadindex][k] = 0.0; } } return; } if((tmpdata = (double *) malloc(p->NJD[threadindex]*sizeof(double))) == NULL) { error(ERR_MEMALLOC); } for(i = 0, k=0; i < s->Nvar; i++) { if(s->vars[i]->vectortype != VARTOOLS_VECTORTYPE_LC) { error(ERR_BADVARIABLETYPE_STATSCOMMAND); } for(j=0; j < p->NJD[threadindex]; j++) { tmpdata[j] = EvaluateVariable_Double(lcindex, threadindex, j, s->vars[i]); } Npct = 0; for(j = 0; j < s->Nstats; j++, k++) { switch(s->statstocalc[j]) { case VARTOOLS_STATSTYPE_MEAN: s->statsout[threadindex][k] = getmean(p->NJD[threadindex], tmpdata); break; case VARTOOLS_STATSTYPE_WEIGHTEDMEAN: s->statsout[threadindex][k] = getweightedmean(p->NJD[threadindex], tmpdata, p->sig[threadindex]); break; case VARTOOLS_STATSTYPE_MEDIAN: s->statsout[threadindex][k] = median(p->NJD[threadindex], tmpdata); break; case VARTOOLS_STATSTYPE_MEDIAN_WEIGHT: s->statsout[threadindex][k] = median_weight(p->NJD[threadindex], tmpdata, p->sig[threadindex]); break; case VARTOOLS_STATSTYPE_STDDEV: s->statsout[threadindex][k] = stddev(p->NJD[threadindex], tmpdata); break; case VARTOOLS_STATSTYPE_MEDDEV: s->statsout[threadindex][k] = meddev(p->NJD[threadindex], tmpdata); break; case VARTOOLS_STATSTYPE_MEDMEDDEV: s->statsout[threadindex][k] = medmeddev(p->NJD[threadindex], tmpdata); break; case VARTOOLS_STATSTYPE_MAD: s->statsout[threadindex][k] = MAD(p->NJD[threadindex], tmpdata); break; case VARTOOLS_STATSTYPE_KURTOSIS: s->statsout[threadindex][k] = kurtosis(p->NJD[threadindex], tmpdata); break; case VARTOOLS_STATSTYPE_SKEWNESS: s->statsout[threadindex][k] = skewness(p->NJD[threadindex], tmpdata); break; case VARTOOLS_STATSTYPE_PERCENTILE: s->statsout[threadindex][k] = percentile(p->NJD[threadindex], tmpdata, s->pctval[Npct]); Npct++; break; case VARTOOLS_STATSTYPE_PERCENTILE_WEIGHT: s->statsout[threadindex][k] = percentile_weight(p->NJD[threadindex], tmpdata, p->sig[threadindex], s->pctval[Npct]); Npct++; break; case VARTOOLS_STATSTYPE_MAXIMUM: s->statsout[threadindex][k] = getmaximum(p->NJD[threadindex],tmpdata); break; case VARTOOLS_STATSTYPE_MINIMUM: s->statsout[threadindex][k] = getminimum(p->NJD[threadindex],tmpdata); break; case VARTOOLS_STATSTYPE_SUM: s->statsout[threadindex][k] = getsum(p->NJD[threadindex],tmpdata); break; default: error(ERR_CODEERROR); } } } if(tmpdata != NULL) free(tmpdata); }
double median(std::vector<double> v) { return percentile(v, 0.5); }
values.reserve(values_.size()); foreach (const typename TimeSeries<T>::Value& value, values_) { values.push_back(value.data); } std::sort(values.begin(), values.end()); Statistics statistics; statistics.count = values.size(); statistics.min = values.front(); statistics.max = values.back(); statistics.p50 = percentile(values, 0.5); statistics.p90 = percentile(values, 0.90); statistics.p95 = percentile(values, 0.95); statistics.p99 = percentile(values, 0.99); statistics.p999 = percentile(values, 0.999); statistics.p9999 = percentile(values, 0.9999); return statistics; } size_t count; T min; T max; // TODO(dhamon): Consider making the percentiles we store dynamic.