void DataProcess::ReadFileWidBucket(const char * fname, int fcol, double dw, vector<int>& _dbRes) { vector<double> data; ReadFileDouble(fname, fcol, data); Bucketized(data, dw, _dbRes); }
void DataProcess::ReadFileBucket(const char * fname, int fcol, int bukNum, vector<int> _dbRes) { vector<double> data; ReadFileDouble(fname, fcol,data); double dw = getBukWidth(bukNum); Bucketized(data, dw,_dbRes); }
void DataProcess::z_normalizeData(string inputFilename, int fcol, string outputFilename, double rangeMultipler){ vector<double> data; data.clear(); cout<<"start processing file:"<<inputFilename<<" col:"<<fcol<<endl; ReadFileDouble(inputFilename.c_str(), fcol, data); double sum=0,sumSqau=0; for(size_t i=0;i<data.size();i++){ sum+=data[i]; sumSqau+=(data[i]*data[i]); } double mean = sum/data.size(); double dev = sqrt(sumSqau/data.size() - mean*mean); string line; ifstream ifile(inputFilename.c_str()); ofstream outf; outf.open(outputFilename.c_str(), ios::out); outf<<"#mean:"<<mean<<" standard variance:"<<dev<<endl; if (ifile.is_open()) { while (getline(ifile, line)) { std::size_t found = line.find("#"); if(found!=std::string::npos) { outf<<line<<endl; continue; } vector<string> nstring = split(line, ","); string myString = eraseSpace(nstring[fcol]); double value = (atof)(myString.c_str()); //cout<<"my string"<<myString<<endl; for(int i=0;i<nstring.size();i++){ if(i!=fcol){ outf<<nstring[i]<<","; }else{ outf<< (value-mean)/dev*rangeMultipler<<","; } } outf<<endl; } } cout<<"output normalized data in:"<<outputFilename<<endl; outf.close(); }
/** * _data:output result */ void DataProcess::ReadFileFloat(const char* fname, vector<vector<float> >& _data, int lineNum){ vector<vector<double> > data_db; ReadFileDouble(fname, data_db); _data.resize(data_db.size()); for(int i=0;i<data_db.size();i++){ _data[i].resize(data_db[i].size()); for(int j=0;j<data_db[i].size();j++){ _data[i][j] = (float) data_db[i][j]; } } }
void DataProcess::ReadFileFloat(const char* fname,int fcol, vector<float>& _data){ vector<double> df; ReadFileDouble(fname,fcol,df); _data.resize(df.size()); for(int i=0;i<df.size();i++){ _data[i] = (float)(df[i]); } }
int main(int argc, char** argv) { double *fv1=NULL; double *fv2=NULL; double distance=0; if (argc != 3) { fprintf(stderr,"usage: spytec_distance <fv1_path> <fv2_path>\n"); exit(-1); } fv1 = ReadFileDouble(argv[1], QTD_COEFS); fv2 = ReadFileDouble(argv[2], QTD_COEFS); distance=L2DoubleDistance(fv1, fv2, QTD_COEFS); printf("%lf\n",distance); free(fv1); free(fv2); return(0); }
void DataProcess::z_normalizeData(string inputFilename, int fcol_start,int fcol_end, string outputFilename,double rangeMultipler){ vector<double> mean(fcol_end-fcol_start+1,0); vector<double> dev(fcol_end-fcol_start+1,0); for(int i=fcol_start;i<=fcol_end;i++){ vector<double> data; ReadFileDouble(inputFilename.c_str(), i,data); z_normalizeData_perVector(data, mean[i-fcol_start],dev[i-fcol_start]); } string line; ifstream ifile(inputFilename.c_str()); ofstream outf; outf.open(outputFilename.c_str(), ios::out); if (ifile.is_open()) { while (getline(ifile, line)) { std::size_t found = line.find("#"); if (found != std::string::npos) { outf << line << endl; continue; } vector<string> nstring = split(line, ","); for (int i = 0; i < nstring.size(); i++) { if (i < fcol_start||i>fcol_end) { outf << nstring[i] << ","; } else { string myString = eraseSpace(nstring[i]); double value = (atof)(myString.c_str()); outf << (value - mean[i-fcol_start]) / dev[i-fcol_start] * rangeMultipler << ","; } } outf << endl; } } }