Beispiel #1
0
void Ploter::plotCorrectedHll(string filename){
	ofstream file(filename);
	string line;
	int x1, x2;
	double y1, y2;
	x1 = 0;
	y1 = 0;
	x2 = CARDMAX/STEP-1;
	y2 = 0;
	uint64_t hash;
	uint64_t hash128[2];
	
	vector<double> estimates = makeVector();
	
	int i;
	vector< vector<double> > tab(CARDMAX/STEP, vector<double>(TESTS));
	
	int ca = 0;
	for(i = 0; i < TESTS; i ++){
		cout << i << endl;
		Hll hll(14);
		
		for(int j = 0; j < CARDMAX; j++){
			MurmurHash3_x86_128(&ca, 4, 0, &hash128);
			ca++;
			hash = hash128[0];
			hll.AddItem64(hash);
			if(j%STEP == 0){
				double count = hll.CountRaw64();
				for(int i = 0; i < CARDMAX/STEP-1; i++){
					if(estimates[i] <= count && count < estimates[i+1]){
						x1 = estimates[i];
						y1 = i*STEP;
						x2 = estimates[i+1];
						y2 = (i+1)*STEP;
						count = interpolation(count, x1, y1, x2, y2);
					}
				}
				//count = interpolation(count, x1, y1, x2, y2);
				//count = (double)abs(count-j)/j;
				tab[j/STEP][i]=count;
			}
		}
	}
	
	for(int j = 0; j < CARDMAX/STEP; j++){
		double sum = 0;
		for (int k = 0; k < TESTS; k++){
			sum= sum + tab[j][k];
		}
		//cout << sum << endl;
		double median = percentile(tab[j],0.5);
		double pct01 = percentile(tab[j],0.01);
		double pct99 = percentile(tab[j],0.99);
		file << (j*STEP) << "\t" << (double)sum/TESTS << "\t" << (double)median << "\t" << pct01 << "\t" << pct99 << endl;
	}
	
}
void test_percentile() {
    int n;
    double res;
    double vals[25] = {43, 54, 56, 61, 62, 66, 68, 69, 69, 70, 71, 72, 77, 78, 79, 85, 87, 88, 89, 93, 95, 96, 98, 99, 99};
    res = percentile(vals, 25, 0.90);
    printf("PERCENTILE: Expected %g got %g\n", 97.2, res);

    double vals2[10] = {0,1,2,3,4,5,6,7,8,9};
    res = percentile(vals2, 10, 0.5);
    printf("PERCENTILE: Expected %g got %g\n", 4.5, res);

}
void CNAnalysisMethodMosaicism::runmed(double* p, int iCount, int iWindowSize)
{
    std::vector<float>vOut(iCount);
    if ((iWindowSize % 2) == 0) {iWindowSize++;} // Window size should be odd.
    if (iCount <= iWindowSize)
    {
        iWindowSize = iCount - 2;
        if ((iWindowSize % 2) == 0) {iWindowSize--;} // Window size should be odd.
        if (iWindowSize < 0) {return;}
    }
    int iHalfWindow = (int)((double)iWindowSize/2.0);
    for (int iIndex = iHalfWindow; (iIndex < (iCount - iHalfWindow)); iIndex++)
    {
        vOut[iIndex] = percentile(50, (p + iIndex - iHalfWindow), iWindowSize);
    }
    for (int iIndex = (iHalfWindow - 1); (iIndex >= 0); iIndex--)
    {
        vOut[iIndex] = vOut[iIndex + 1];
    }
    for (int iIndex = (iCount - iHalfWindow); (iIndex < iCount); iIndex++)
    {
        vOut[iIndex] = vOut[iIndex - 1];
    }
    for (int iIndex = 0; (iIndex < iCount); iIndex++)
    {
        p[iIndex] = vOut[iIndex];
    }
}
Beispiel #4
0
Vector<T> CDFTools<T>::percentile(const Matrix<T>& x, const float percent) const
{
  Vector<T> xEvals;
  xEvals = x.cat();
  xEvals.sort();

  return percentile( x, xEvals, percent );
}
Beispiel #5
0
void Ploter::plotRawHll(string filename){
	ofstream file(filename);
	string line;
	
	uint64_t hash;
	uint64_t hash128[2];
	
	int i;

	vector< vector<double> > tab(CARDMAX/STEP, vector<double>(TESTS));
	
	int ca = 0;
	for(i = 0; i < TESTS; i ++){
		cout << i << endl;
		Hll hll(14);
		for(int j = 0; j < CARDMAX; j++){
			MurmurHash3_x86_128(&ca, 4, 0, &hash128);
			ca++;
			hash = hash128[0];
			hll.AddItem64(hash);
			if(j%STEP == 0){
				double count = hll.CountRaw64();
				//count = (double)abs(count-j)/j;
				tab[j/STEP][i]=count;
			}
		}
	}
	
	for(int j = 0; j < CARDMAX/STEP; j++){
		double sum = 0;
		for (int k = 0; k < TESTS; k++){
			sum= sum + tab[j][k];
		}
		//cout << sum << endl;
		double median = percentile(tab[j],0.5);
		double pct01 = percentile(tab[j],0.01);
		double pct99 = percentile(tab[j],0.99);
		file << (j*STEP) << "\t" << (double)sum/TESTS << "\t" << (double)median << "\t" << pct01 << "\t" << pct99 << endl;
	}

}
Beispiel #6
0
Vector<T> CDFTools<T>::percentile(const vector<const Vector<T>*>& x, const float percent) const
{
  const int n = x.size();
  Vector<T> xEvals;

  for (int i = 0; i < n; ++i)
  {
    xEvals.append( *x[i] );
  }
  xEvals.sort();

  return percentile( x, xEvals, percent );
}
Beispiel #7
0
Vector<T> CDFTools<T>::percentile(
  const Matrix<T>& x,
  const Vector<T>& xEvals,
  const float percent) const
{
  const int n = x.getSize1();
  vector<const Vector<T>*> v( n );

  for (int i = 0; i < n; ++i)
  {
    v[i] = &x[i];
  }

  return percentile( v, xEvals, percent );
}
void clamp( pfs::Array2D *array, float min, float max,
            bool opt_percentile, bool opt_zeromode )
{
    int imgSize = array->getRows()*array->getCols();

    if( opt_percentile )
        percentile(array,min,max);

    float minval=min;
    float maxval=max;
    if( opt_zeromode )
        minval = maxval = 0.0f;

    for( int index = 0; index < imgSize ; index++ )
    {
        float &v = (*array)(index);
        if( v < min ) v = minval;
        else if( v > max ) v = maxval;

        if( !finite(v) )
            v = maxval;
    }
}
Beispiel #9
0
bool TStellarData::load(const std::string& fname, const std::string& group, const std::string& dset,
			double err_floor, double default_EBV) {
	H5::H5File *file = H5Utils::openFile(fname);
	if(file == NULL) { return false; }
	
	H5::Group *gp = H5Utils::openGroup(file, group);
	if(gp == NULL) {
		delete file;
		return false;
	}
	
	H5::DataSet dataset = gp->openDataSet(dset);
	
	/*
	 *  Photometry
	 */
	
	// Datatype
	hsize_t nbands = NBANDS;
	H5::ArrayType f4arr(H5::PredType::NATIVE_FLOAT, 1, &nbands);
	H5::ArrayType u4arr(H5::PredType::NATIVE_UINT32, 1, &nbands);
	H5::CompType dtype(sizeof(TFileData));
	dtype.insertMember("obj_id", HOFFSET(TFileData, obj_id), H5::PredType::NATIVE_UINT64);
	dtype.insertMember("l", HOFFSET(TFileData, l), H5::PredType::NATIVE_DOUBLE);
	dtype.insertMember("b", HOFFSET(TFileData, b), H5::PredType::NATIVE_DOUBLE);
	dtype.insertMember("mag", HOFFSET(TFileData, mag), f4arr);
	dtype.insertMember("err", HOFFSET(TFileData, err), f4arr);
	dtype.insertMember("maglimit", HOFFSET(TFileData, maglimit), f4arr);
	dtype.insertMember("nDet", HOFFSET(TFileData, N_det), u4arr);
	dtype.insertMember("EBV", HOFFSET(TFileData, EBV), H5::PredType::NATIVE_FLOAT);
	
	// Dataspace
	hsize_t length;
	H5::DataSpace dataspace = dataset.getSpace();
	dataspace.getSimpleExtentDims(&length);
	
	// Read in dataset
	TFileData* data_buf = new TFileData[length];
	dataset.read(data_buf, dtype);
	//std::cerr << "# Read in dimensions." << std::endl;
	
	// Fix magnitude limits
	for(int n=0; n<nbands; n++) {
		float tmp;
		float maglim_replacement = 25.;
		
		// Find the 95th percentile of valid magnitude limits
		std::vector<float> maglimit;
		for(hsize_t i=0; i<length; i++) {
			tmp = data_buf[i].maglimit[n];
			
			if((tmp > 10.) && (tmp < 40.) && (!isnan(tmp))) {
				maglimit.push_back(tmp);
			}
		}
		
		//std::sort(maglimit.begin(), maglimit.end());
		if(maglimit.size() != 0) {
			maglim_replacement = percentile(maglimit, 95.);
		}
		
		// Replace missing magnitude limits with the 95th percentile magnitude limit
		for(hsize_t i=0; i<length; i++) {
			tmp = data_buf[i].maglimit[n];
			
			if(!((tmp > 10.) && (tmp < 40.)) || isnan(tmp)) {
				//std::cout << i << ", " << n << ":  " << tmp << std::endl;
				data_buf[i].maglimit[n] = maglim_replacement;
			}
		}
	}
	
	//int n_filtered = 0;
	//int n_M_dwarfs = 0;
	
	TMagnitudes mag_tmp;
	for(size_t i=0; i<length; i++) {
		mag_tmp.set(data_buf[i], err_floor);
		star.push_back(mag_tmp);
		
		//int n_informative = 0;
		
		// Remove g-band
		//mag_tmp.m[0] = 0.;
		//mag_tmp.err[0] = 1.e10;
		
		//double g_err = mag_tmp.err[0];
		//mag_tmp.err[0] = sqrt(g_err*g_err + 0.1*0.1);
		
		// Filter bright end
                // TODO: Put this into query_lsd.py
		/*for(int j=0; j<NBANDS; j++) {
			if((mag_tmp.err[j] < 1.e9) && (mag_tmp.m[j] < 14.)) {
				mag_tmp.err[j] = 1.e10;
				mag_tmp.m[j] = 0.;
			}
			
			if(mag_tmp.err[j] < 1.e9) {
				n_informative++;
			}
		}*/
		
		// Filter M dwarfs based on color cut
		//bool M_dwarf = false;
		/*bool M_dwarf = true;
		
		double A_g = 3.172;
		double A_r = 2.271;
		double A_i = 1.682;
		
		if(mag_tmp.m[0] - A_g / (A_g - A_r) * (mag_tmp.m[0] - mag_tmp.m[1] - 1.2) > 20.) {
			M_dwarf = false;
		} else if(mag_tmp.m[1] - mag_tmp.m[2] - (A_r - A_i) / (A_g - A_r) * (mag_tmp.m[0] - mag_tmp.m[1]) < 0.) {
			M_dwarf = false;
		} else {
			n_M_dwarfs++;
		}
		*/
		
		/*if(n_informative >= 4) { //&& (!M_dwarf)) {
			star.push_back(mag_tmp);
		} else {
			n_filtered++;
		}*/
	}
	
	//std::cerr << "# of stars filtered: " << n_filtered << std::endl;
	//std::cerr << "# of M dwarfs: " << n_M_dwarfs << std::endl;
	
	/*
	 *  Attributes
	 */
	
	H5::Attribute att = dataset.openAttribute("healpix_index");
	H5::DataType att_dtype = H5::PredType::NATIVE_UINT64;
	att.read(att_dtype, reinterpret_cast<void*>(&healpix_index));
	
	att = dataset.openAttribute("nested");
	att_dtype = H5::PredType::NATIVE_UCHAR;
	att.read(att_dtype, reinterpret_cast<void*>(&nested));
	
	att = dataset.openAttribute("nside");
	att_dtype = H5::PredType::NATIVE_UINT32;
	att.read(att_dtype, reinterpret_cast<void*>(&nside));
	
	att = dataset.openAttribute("l");
	att_dtype = H5::PredType::NATIVE_DOUBLE;
	att.read(att_dtype, reinterpret_cast<void*>(&l));
	
	att = dataset.openAttribute("b");
	att_dtype = H5::PredType::NATIVE_DOUBLE;
	att.read(att_dtype, reinterpret_cast<void*>(&b));
	
	att = dataset.openAttribute("EBV");
	att_dtype = H5::PredType::NATIVE_DOUBLE;
	att.read(att_dtype, reinterpret_cast<void*>(&EBV));
	
	// TEST: Force l, b to anticenter
	//l = 180.;
	//b = 0.;
	
	if((EBV <= 0.) || (EBV > default_EBV) || isnan(EBV)) { EBV = default_EBV; }
	
	delete[] data_buf;
	delete gp;
	delete file;
	
	return true;
}
Beispiel #10
0
void RunStatsCommand(ProgramData *p, int lcindex, int threadindex, _Stats *s)
{
  int i, j, k, Npct;
  double *tmpdata = NULL, *tmpweight = NULL;
  if(p->NJD[threadindex] <= 0) {
    for(i=0, k=0; i < s->Nvar; i++) {
      for(j=0; j < s->Nstats; j++, k++) {
	s->statsout[threadindex][k] = 0.0;
      }
    }
    return;
  }
  if((tmpdata = (double *) malloc(p->NJD[threadindex]*sizeof(double))) == NULL) {
    error(ERR_MEMALLOC);
  }
  for(i = 0, k=0; i < s->Nvar; i++) {
    if(s->vars[i]->vectortype != VARTOOLS_VECTORTYPE_LC) {
      error(ERR_BADVARIABLETYPE_STATSCOMMAND);
    }
    for(j=0; j < p->NJD[threadindex]; j++) {
      tmpdata[j] = EvaluateVariable_Double(lcindex, threadindex, j, s->vars[i]);
    }
    Npct = 0;
    for(j = 0; j < s->Nstats; j++, k++) {
      switch(s->statstocalc[j]) {
      case VARTOOLS_STATSTYPE_MEAN:
	s->statsout[threadindex][k] = getmean(p->NJD[threadindex], tmpdata);
	break;
      case VARTOOLS_STATSTYPE_WEIGHTEDMEAN:
	s->statsout[threadindex][k] = getweightedmean(p->NJD[threadindex], tmpdata, p->sig[threadindex]);
	break;
      case VARTOOLS_STATSTYPE_MEDIAN:
	s->statsout[threadindex][k] = median(p->NJD[threadindex], tmpdata);
	break;
      case VARTOOLS_STATSTYPE_MEDIAN_WEIGHT:
	s->statsout[threadindex][k] = median_weight(p->NJD[threadindex], tmpdata, p->sig[threadindex]);
	break;
      case VARTOOLS_STATSTYPE_STDDEV:
	s->statsout[threadindex][k] = stddev(p->NJD[threadindex], tmpdata);
	break;
      case VARTOOLS_STATSTYPE_MEDDEV:
	s->statsout[threadindex][k] = meddev(p->NJD[threadindex], tmpdata);
	break;
      case VARTOOLS_STATSTYPE_MEDMEDDEV:
	s->statsout[threadindex][k] = medmeddev(p->NJD[threadindex], tmpdata);
	break;
      case VARTOOLS_STATSTYPE_MAD:
	s->statsout[threadindex][k] = MAD(p->NJD[threadindex], tmpdata);
	break;
      case VARTOOLS_STATSTYPE_KURTOSIS:
	s->statsout[threadindex][k] = kurtosis(p->NJD[threadindex], tmpdata);
	break;
      case VARTOOLS_STATSTYPE_SKEWNESS:
	s->statsout[threadindex][k] = skewness(p->NJD[threadindex], tmpdata);
	break;
      case VARTOOLS_STATSTYPE_PERCENTILE:
	s->statsout[threadindex][k] = percentile(p->NJD[threadindex], 
							tmpdata,
							s->pctval[Npct]);
	Npct++;
	break;
      case VARTOOLS_STATSTYPE_PERCENTILE_WEIGHT:
	s->statsout[threadindex][k] = percentile_weight(p->NJD[threadindex], 
							tmpdata,
							       p->sig[threadindex],
							s->pctval[Npct]);
	Npct++;
	break;
      case VARTOOLS_STATSTYPE_MAXIMUM:
	s->statsout[threadindex][k] = getmaximum(p->NJD[threadindex],tmpdata);
	break;
      case VARTOOLS_STATSTYPE_MINIMUM:
	s->statsout[threadindex][k] = getminimum(p->NJD[threadindex],tmpdata);
	break;
      case VARTOOLS_STATSTYPE_SUM:
	s->statsout[threadindex][k] = getsum(p->NJD[threadindex],tmpdata);
	break;
      default:
	error(ERR_CODEERROR);
      }
    }
  }
  if(tmpdata != NULL)
    free(tmpdata);
}
double median(std::vector<double> v) {
    return percentile(v, 0.5);
}
Beispiel #12
0
    values.reserve(values_.size());

    foreach (const typename TimeSeries<T>::Value& value, values_) {
      values.push_back(value.data);
    }

    std::sort(values.begin(), values.end());

    Statistics statistics;

    statistics.count = values.size();

    statistics.min = values.front();
    statistics.max = values.back();

    statistics.p50 = percentile(values, 0.5);
    statistics.p90 = percentile(values, 0.90);
    statistics.p95 = percentile(values, 0.95);
    statistics.p99 = percentile(values, 0.99);
    statistics.p999 = percentile(values, 0.999);
    statistics.p9999 = percentile(values, 0.9999);

    return statistics;
  }

  size_t count;

  T min;
  T max;

  // TODO(dhamon): Consider making the percentiles we store dynamic.