int main(int argc, char * argv[]) { int i = 0; char *ptr = NULL; char *string = NULL; int fd = -1; Header* p; char *tab = NULL; char size = sizeof(char)*7; name_file(&i); /*debug_println_ptr("dedans address ", &address);*/ fd = open_file("t"); if (fd != -1){ ptr = (char*)mmap_file(); string = (char*)mymalloc(size); if (string == NULL) { perror("Erreur d'allocation"); } strcpy(string, "coucou "); /*p = (void*)string - sizeof(Header); printf("%s\n", string); printf("string:%d size:%d address:%d\n", (int)string, p->size, address);*/ tab = (char*)mymalloc(size); if (tab == NULL) { perror("Erreur d'allocation"); } strcpy(tab, "coucou "); p = (void*)string - sizeof(Header); /* printf("string -> %s | tab -> %s | poniteur address %d string address %d file %d\n", string, tab, (int)p, (int)string, (int)ptr); */ printf("\n p->size %d\n", p->size); /*debug_println_ptr("dehors address ", &address);*/ myfree(string); string = NULL; /*printf("string -> %s | tab -> %s\n", string, tab);*/ } else printf("Erreur %d\n", fd); exit(EXIT_SUCCESS); }
// Runs experiments with one inputed data set and varies the memory // Input: takes 5 command line arguments, lower and upper fraction of memory, // names of files the data are in, and number of bins // Output: creates 4 files, the log file holds all the data generated, the // table file holds the pvalues deliminated by tabs, the extra file holds all // the calculated statistics. and the pvalue holds the actual and estimated // pvalues int main(int argc, char* argv[]) { if (argc < 5) { cout << "usage: VaryMemoryCategorical lower-memory upper-memory filename1 filename2" << endl; exit(1); } double lower = atof(argv[1]), upper = atof(argv[2]); char *filename1 = argv[3], *filename2 = argv[4]; double memory_percent, mem = lower; int repeats = 0; while (mem <= (upper + 0.00000001)) { repeats++; mem += 10; } double actual_values[repeats], estimated_values[repeats], percents[repeats]; long times[repeats]; long times2[repeats]; std::vector<double> data1, data2; std::unordered_map<double,int> stream1, stream2; // creates and initializes the log file ofstream data_file; char str[100]; name_file(str, argv, 0); data_file.open(str); ifstream input_file; char output[100]; int stream_size1 = 0, stream_size2 = 0; input_file.open(filename1); while (!input_file.eof()) { input_file >> output; data1.push_back(atof(output)); stream_size1++; if (stream1.find(atof(output)) == stream1.end()) stream1.insert(std::make_pair(atof(output),1)); else stream1[atof(output)] += 1; } input_file.close(); input_file.open(filename2); while (!input_file.eof()) { input_file >> output; data2.push_back(atof(output)); stream_size2++; if (stream2.find(atof(output)) == stream2.end()) stream2.insert(std::make_pair(atof(output),1)); else stream2[atof(output)] += 1; } input_file.close(); data_file << filename1 << endl << filename2 << endl; data_file << "stream 1 size: " << stream_size1 << endl; data_file << "stream 2 size: " << stream_size2 << endl; data_file << "number of categories: " << stream1.size() << endl; memory_percent = lower; int i = 0, num_categories; while (memory_percent <= (upper + 0.00000001)) //accounts for rounding { data_file << "memory percent: " << memory_percent << endl; percents[i] = memory_percent; // calculates the estimated statistic ChiSquareCategorical sketch1(memory_percent); ChiSquareCategorical sketch2(memory_percent); timeval timeBefore, timeAfter; // initializes variables long diffSeconds, diffUSeconds; gettimeofday(&timeBefore, NULL); for (std::vector<double>::iterator j = data1.begin(); j != data1.end();j++) sketch1.insert(*j); for (std::vector<double>::iterator j = data2.begin(); j != data2.end();j++) sketch2.insert(*j); gettimeofday(&timeAfter, NULL); // get time for insertion diffSeconds = timeAfter.tv_sec - timeBefore.tv_sec; diffUSeconds = timeAfter.tv_usec - timeBefore.tv_usec; times[i] = diffSeconds; times2[i] = diffUSeconds; double estimated_stat = sketch1.calculate_statistic(sketch2, 0); estimated_values[i] = estimated_stat; data_file << "estimate = " << estimated_stat << endl; // calculates actual statistic double constant1 = sqrt(double(stream_size2) / double(stream_size1)); double constant2 = sqrt(double(stream_size1) / double(stream_size2)); double actual_stat = 0; for (std::unordered_map<double,int>::const_iterator j = stream1.begin(); j!= stream1.end(); j++) { double frequency1 = j->second; double frequency2 = 0; num_categories++; if (stream2.find(j->first) != stream2.end()) frequency2 = stream2[j->first]; double value = frequency1 * constant1 - frequency2 * constant2; actual_stat += (value * value) / (frequency1 + frequency2); } // have to loop through other stream to find when first one is 0 for (std::unordered_map<double,int>::const_iterator j = stream2.begin(); j!= stream2.end(); j++) { if (stream1.find(j->first) == stream1.end()) { num_categories++; int frequency1 = 0; int frequency2 = j->second; double value = frequency1 * constant1 - frequency2 * constant2; actual_stat += (value * value) / (frequency1 + frequency2); } } actual_values[i] = actual_stat; data_file << "actual = " << actual_stat << endl; i++; memory_percent += 10; } data_file.close(); // creates pvalues file name_file(str, argv, 3); data_file.open(str); // creates table file ofstream data2_file; name_file(str, argv, 1); data2_file.open(str); // creates time table file ofstream time_file; char str2[150]; name_file(str2, argv, 5); time_file.open(str2); // creates extra file ofstream data3_file; name_file(str, argv, 2); data3_file.open(str); int deg_freedom = num_categories - 1; for (int i = 0; i < repeats; i++) { // adds values to the table data2_file << percents[i] << "\t"; double error = abs(pochisq(estimated_values[i], deg_freedom) - pochisq(actual_values[i], deg_freedom)); data2_file << error << endl; // adds values to the time table time_file << percents[i] << "\t"; long double avg_time = times[i] + times2[i]/1000000.0; time_file << avg_time << endl; // adds values to pvalue file data_file << pochisq(actual_values[i], deg_freedom) << " actual" << endl; data_file << pochisq(estimated_values[i], deg_freedom) << " estimated" << endl; // adds values to extra file data3_file << percents[i] << "\t"; error = abs(estimated_values[i] - actual_values[i]) / actual_values[i]; data3_file << error << endl; } data3_file.close(); data2_file.close(); data_file.close(); return 0; }
// Runs experiments with one inputed data set and varies the memory // Input: takes 5 command line arguments, lower and upper fraction of memory, // names of files the data are in, and number of bins // Output: creates 4 files, the log file holds all the data generated. the tabl // file holds the pvalues deliminated by tabs, the extra file holds all the // calculated statistics. and the pvalue holds the actual and estimated pvalues int main(int argc, char* argv[]) { if (argc < 6) { cout << "usage: VaryMemoryReal lower-memory upper-memory filename1 filename2 num_bins\n"; exit(1); } double lower = atof(argv[1]), upper = atof(argv[2]); char *filename1 = argv[3], *filename2 = argv[4]; int num_buckets = atoi(argv[5]); if (lower <=0 || upper <= 0) { cout << "The memory must be greater than 0.\n"; exit(1); } if (num_buckets <= 0) { cout << "The number of buckets must be greater than 0.\n"; exit(1); } // finds the number of times the experiment will run double memory_percent; int repeats = 0; double mem = lower; while (mem <= (upper + 0.0000001)) // accounts for rounding error { repeats++; mem *= sqrt(10); } double actual_values[repeats]; double GK_values[repeats]; double QD_values[repeats]; double RS_values[repeats]; double percents[repeats]; std::vector<double> data1, data2; // creates and initializes the log file ofstream data_file; char str[100]; name_file(str, argv, 0); data_file.open(str); ifstream input_file; char output[100]; int stream_size1 = 0, stream_size2 = 0; std::default_random_engine generator(1); std::uniform_real_distribution<double> distribution(0.0, 1.0); input_file.open(filename1); while (!input_file.eof()) { input_file >> output; data1.push_back(atof(output) + (distribution(generator) * 0.000000001)); stream_size1++; } input_file.close(); input_file.open(filename2); while (!input_file.eof()) { input_file >> output; data2.push_back(atof(output) + (distribution(generator) * 0.000000001)); stream_size2++; } input_file.close(); data_file << filename1 << endl << filename2 << endl; data_file << "num_buckets: " << num_buckets << endl; data_file << "stream 1 size: " << stream_size1 << endl; data_file << "stream 2 size: " << stream_size2 << endl; memory_percent = lower; int i = 0; while (memory_percent <= (upper + 0.00000001)) //accounts for rounding { data_file << "memory percent: " << memory_percent << endl; percents[i] = memory_percent; int sample_size1 = memory_percent * stream_size1; int sample_size2 = memory_percent * stream_size2; // calculates GK statistic ChiSquareContinuous GK_sketch1(sample_size1,1); for (std::vector<double>::iterator j = data1.begin(); j != data1.end();j++) GK_sketch1.insert(*j); ChiSquareContinuous GK_sketch2(sample_size2,1); for (std::vector<double>::iterator j = data2.begin(); j != data2.end();j++) GK_sketch2.insert(*j); double GK_stat = GK_sketch1.two_sample_statistic(GK_sketch2, num_buckets); GK_values[i] = GK_stat; data_file << "GK = " << GK_stat << endl; // calculates real statistic double *upper_intervals = GK_sketch1.get_upper(); double *lower_intervals = GK_sketch1.get_lower(); double constant_1 = sqrt((double)stream_size2/stream_size1); double constant_2 = sqrt((double)stream_size1/stream_size2); double chi_squared = 0; for (int i = 0; i < num_buckets; i++) { double frequency1 = 0, frequency2 = 0; for (std::vector<double>::iterator j = data1.begin(); j!=data1.end();j++) { if (*j <= upper_intervals[i+1] && *j > lower_intervals[i+1]) frequency1++; } for (std::vector<double>::iterator j = data2.begin(); j!=data2.end();j++) { if (*j <= upper_intervals[i+1] && *j > lower_intervals[i+1]) frequency2++; } double lambda = frequency1 * constant_1 - frequency2 * constant_2; chi_squared += (lambda * lambda) / (frequency1 + frequency2); } actual_values[i] = chi_squared; data_file << "actual = " << chi_squared << endl; memory_percent *= sqrt(10); i++; } data_file.close(); // creates pvalue file name_file(str, argv, 3); data_file.open(str); // creates table file ofstream data2_file; name_file(str, argv, 1); data2_file.open(str); // creates extra file ofstream data3_file; name_file(str, argv, 2); data3_file.open(str); int deg_freedom = num_buckets; if (stream_size1 != stream_size2) deg_freedom--; for (int i = 0; i < repeats; i++) { // adds values to the table data2_file << percents[i] * 100 << "\t"; double error = abs(pochisq(GK_values[i], deg_freedom) - pochisq(actual_values[i], deg_freedom)); data2_file << error << endl; // adds values to pvalue file data_file << pochisq(actual_values[i], deg_freedom) << " actual" << endl; data_file << pochisq(GK_values[i], deg_freedom) << " GK" << endl; // adds values to extra file data3_file << percents[i] * 100 << "\t"; error = abs(GK_values[i] - actual_values[i]) / actual_values[i]; data3_file << error << endl; } data3_file.close(); data2_file.close(); data_file.close(); return 0; }
string query_allow_login(void) { return name_file(TO) + ":query_stat_index|set_stat_index"; }