Exemple #1
0
int main(int argc, char * argv[]) {
	int i = 0;
	char *ptr = NULL;
	char *string = NULL;
	int fd = -1;
	Header* p;
	char *tab = NULL;
	char size = sizeof(char)*7;
	name_file(&i);
	/*debug_println_ptr("dedans address ", &address);*/
	fd = open_file("t");
	if (fd != -1){
		ptr = (char*)mmap_file();
		string = (char*)mymalloc(size);
		if (string == NULL) {
			perror("Erreur d'allocation");
		}
		strcpy(string, "coucou ");

		/*p = (void*)string - sizeof(Header);
		printf("%s\n", string);
		printf("string:%d  size:%d  address:%d\n", (int)string, p->size, address);*/
		tab = (char*)mymalloc(size);
		if (tab == NULL) {
			perror("Erreur d'allocation");
		}
		strcpy(tab, "coucou ");
		p = (void*)string - sizeof(Header);
		
		/* printf("string -> %s | tab -> %s | poniteur address %d string address %d file %d\n", string, tab, (int)p, (int)string, (int)ptr); */
		printf("\n p->size %d\n", p->size);
		/*debug_println_ptr("dehors address ", &address);*/
		myfree(string);
		string = NULL;
		/*printf("string -> %s | tab -> %s\n", string, tab);*/
		
	}
	else
		printf("Erreur %d\n", fd);
	exit(EXIT_SUCCESS);
}
// Runs experiments with one inputed data set and varies the memory
// Input: takes 5 command line arguments, lower and upper fraction of memory,
// names of files the data are in, and number of bins
// Output: creates 4 files, the log file holds all the data generated, the 
// table file holds the pvalues deliminated by tabs, the extra file holds all 
// the calculated statistics. and the pvalue holds the actual and estimated 
// pvalues
int main(int argc, char* argv[])
{
  if (argc < 5)
  {
    cout << "usage: VaryMemoryCategorical lower-memory upper-memory filename1 filename2" << endl;
    exit(1);
  }
  
  double lower = atof(argv[1]), upper = atof(argv[2]);
  char *filename1 = argv[3], *filename2 = argv[4];

  double memory_percent, mem = lower;
  int repeats = 0;
  while (mem <= (upper + 0.00000001))
  {
    repeats++;
    mem += 10;
  }

  double actual_values[repeats], estimated_values[repeats], percents[repeats];
  long times[repeats];
  long times2[repeats];
  std::vector<double> data1, data2;
  std::unordered_map<double,int> stream1, stream2;

  // creates and initializes the log file
  ofstream data_file;
  char str[100];
  name_file(str, argv, 0);
  data_file.open(str);
  ifstream input_file;
  char output[100];
  int stream_size1 = 0, stream_size2 = 0;

  input_file.open(filename1);
  while (!input_file.eof())
    {
      input_file >> output;
      data1.push_back(atof(output));
      stream_size1++;
      if (stream1.find(atof(output)) == stream1.end())
	stream1.insert(std::make_pair(atof(output),1));
      else
	stream1[atof(output)] += 1;
    }
  input_file.close();

  input_file.open(filename2);
  while (!input_file.eof())
    {
      input_file >> output;
      data2.push_back(atof(output));
      stream_size2++;
      if (stream2.find(atof(output)) == stream2.end())
	stream2.insert(std::make_pair(atof(output),1));
      else
	stream2[atof(output)] += 1;
    }
  input_file.close();

  data_file << filename1 << endl << filename2 << endl;
  data_file << "stream 1 size: " << stream_size1 << endl;
  data_file << "stream 2 size: " << stream_size2 << endl;
  data_file << "number of categories: " << stream1.size() << endl;
  memory_percent = lower;
  int i = 0, num_categories;


  while (memory_percent <= (upper + 0.00000001)) //accounts for rounding
  {
    data_file << "memory percent: " << memory_percent << endl;
    percents[i] = memory_percent;

    // calculates the estimated statistic
    ChiSquareCategorical sketch1(memory_percent);
    ChiSquareCategorical sketch2(memory_percent);

	timeval timeBefore, timeAfter; // initializes variables
  	long diffSeconds, diffUSeconds;
 	gettimeofday(&timeBefore, NULL); 

    for (std::vector<double>::iterator j = data1.begin(); j != data1.end();j++)
      sketch1.insert(*j);
    for (std::vector<double>::iterator j = data2.begin(); j != data2.end();j++)
      sketch2.insert(*j);

	gettimeofday(&timeAfter, NULL); // get time for insertion
  	diffSeconds = timeAfter.tv_sec - timeBefore.tv_sec;
  	diffUSeconds = timeAfter.tv_usec - timeBefore.tv_usec;

	times[i] = diffSeconds;
	times2[i] = diffUSeconds;

    double estimated_stat = sketch1.calculate_statistic(sketch2, 0);

    estimated_values[i] = estimated_stat;
    data_file << "estimate = " << estimated_stat << endl;

    // calculates actual statistic
    double constant1 = sqrt(double(stream_size2) / double(stream_size1));
    double constant2 = sqrt(double(stream_size1) / double(stream_size2));
    double actual_stat = 0;

    for (std::unordered_map<double,int>::const_iterator j = stream1.begin(); j!= stream1.end(); j++)
    {
      double frequency1 = j->second;
      double frequency2 = 0;
      num_categories++;
      if (stream2.find(j->first) != stream2.end())
	frequency2 = stream2[j->first];
      double value = frequency1 * constant1 - frequency2 * constant2;
      actual_stat += (value * value) / (frequency1 + frequency2);
    }
    // have to loop through other stream to find when first one is 0
    for (std::unordered_map<double,int>::const_iterator j = stream2.begin(); j!= stream2.end(); j++)
    {
      if (stream1.find(j->first) == stream1.end())
      {
	num_categories++;
	int frequency1 = 0;
	int frequency2 = j->second;
	double value = frequency1 * constant1 - frequency2 * constant2;
	actual_stat += (value * value) / (frequency1 + frequency2);
      }
    }
    actual_values[i] = actual_stat;
    data_file << "actual = " << actual_stat << endl;
    i++;
    memory_percent += 10;
  }
  data_file.close();

  // creates pvalues file
  name_file(str, argv, 3);
  data_file.open(str);

  // creates table file
  ofstream data2_file;
  name_file(str, argv, 1);
  data2_file.open(str);

  // creates time table file
  ofstream time_file;
  char str2[150];
  name_file(str2, argv, 5);
  time_file.open(str2);

  // creates extra file
  ofstream data3_file;
  name_file(str, argv, 2);
  data3_file.open(str);

  int deg_freedom = num_categories - 1;
  for (int i = 0; i < repeats; i++)
    {
      // adds values to the table
      data2_file << percents[i] << "\t";
      double error = abs(pochisq(estimated_values[i], deg_freedom) - pochisq(actual_values[i], deg_freedom));
      data2_file << error << endl;

	  // adds values to the time table
	  time_file << percents[i] << "\t";
      long double avg_time = times[i] + times2[i]/1000000.0;
      time_file << avg_time << endl;
	  
      // adds values to pvalue file
      data_file << pochisq(actual_values[i], deg_freedom) << " actual" << endl;
      data_file << pochisq(estimated_values[i], deg_freedom) << " estimated" << endl;

      // adds values to extra file
      data3_file << percents[i] << "\t";
      error = abs(estimated_values[i] - actual_values[i]) / actual_values[i];
      data3_file << error << endl;
    }
  data3_file.close();
  data2_file.close();
  data_file.close(); 
  return 0;
}
// Runs experiments with one inputed data set and varies the memory
// Input: takes 5 command line arguments, lower and upper fraction of memory,
// names of files the data are in, and number of bins
// Output: creates 4 files, the log file holds all the data generated. the tabl
// file holds the pvalues deliminated by tabs, the extra file holds all the 
// calculated statistics. and the pvalue holds the actual and estimated pvalues
int main(int argc, char* argv[])
{
  if (argc < 6)
  {
    cout << "usage: VaryMemoryReal lower-memory upper-memory filename1 filename2 num_bins\n";
    exit(1);
  }

  double lower = atof(argv[1]), upper = atof(argv[2]);
  char *filename1 = argv[3], *filename2 = argv[4];
  int num_buckets = atoi(argv[5]);

  if (lower <=0 || upper <= 0)
  {
    cout << "The memory must be greater than 0.\n";
    exit(1);
  }
  if (num_buckets <= 0)
  {
    cout << "The number of buckets must be greater than 0.\n";
    exit(1);
  }

  // finds the number of times the experiment will run
  double memory_percent;
  int repeats = 0;
  double mem = lower;
  while (mem <= (upper + 0.0000001)) // accounts for rounding error
  {
    repeats++;
    mem *= sqrt(10);
  }

  double actual_values[repeats];
  double GK_values[repeats];
  double QD_values[repeats];
  double RS_values[repeats];
  double percents[repeats];
  std::vector<double> data1, data2;

  // creates and initializes the log file
  ofstream data_file;
  char str[100];
  name_file(str, argv, 0);
  data_file.open(str);
  ifstream input_file;
  char output[100];
  int stream_size1 = 0, stream_size2 = 0;

  std::default_random_engine generator(1);
  std::uniform_real_distribution<double> distribution(0.0, 1.0);

  input_file.open(filename1);
  while (!input_file.eof())
  {
    input_file >> output;
    data1.push_back(atof(output) + (distribution(generator) * 0.000000001));
    stream_size1++;
  }
  input_file.close();

  input_file.open(filename2);
  while (!input_file.eof())
  {
    input_file >> output;
    data2.push_back(atof(output) + (distribution(generator) * 0.000000001));
    stream_size2++;
  }
  input_file.close();

  data_file << filename1 << endl << filename2 << endl;
  data_file << "num_buckets: " << num_buckets << endl;
  data_file << "stream 1 size: " << stream_size1 << endl;
  data_file << "stream 2 size: " << stream_size2 << endl;
  memory_percent = lower;
  int i = 0;
  while (memory_percent <= (upper + 0.00000001)) //accounts for rounding
  {
    data_file << "memory percent: " << memory_percent << endl;
    percents[i] = memory_percent;
    int sample_size1 = memory_percent * stream_size1;
    int sample_size2 = memory_percent * stream_size2;

    // calculates GK statistic
    ChiSquareContinuous GK_sketch1(sample_size1,1);
    for (std::vector<double>::iterator j = data1.begin(); j != data1.end();j++)
      GK_sketch1.insert(*j);
    ChiSquareContinuous GK_sketch2(sample_size2,1);
    for (std::vector<double>::iterator j = data2.begin(); j != data2.end();j++)
      GK_sketch2.insert(*j);
    
    double GK_stat = GK_sketch1.two_sample_statistic(GK_sketch2, num_buckets);
    GK_values[i] = GK_stat;
    data_file << "GK = " << GK_stat << endl;

    // calculates real statistic
    double *upper_intervals = GK_sketch1.get_upper();
    double *lower_intervals = GK_sketch1.get_lower();
    double constant_1 = sqrt((double)stream_size2/stream_size1);
    double constant_2 = sqrt((double)stream_size1/stream_size2);
    double chi_squared = 0;

    for (int i = 0; i < num_buckets; i++)
    {
      double frequency1 = 0, frequency2 = 0;
      for (std::vector<double>::iterator j = data1.begin(); j!=data1.end();j++)
	{
        if (*j <= upper_intervals[i+1] && *j > lower_intervals[i+1])
          frequency1++;
      }
      for (std::vector<double>::iterator j = data2.begin(); j!=data2.end();j++)
      {
        if (*j <= upper_intervals[i+1] && *j > lower_intervals[i+1])
	  frequency2++;
      }

      double lambda = frequency1 * constant_1 - frequency2 * constant_2;
      chi_squared += (lambda * lambda) / (frequency1 + frequency2);
    }

    actual_values[i] = chi_squared;
    data_file << "actual = " << chi_squared << endl;

    memory_percent *= sqrt(10);
    i++;
  }
  data_file.close();

  // creates pvalue file
  name_file(str, argv, 3);
  data_file.open(str);
  
  // creates table file
  ofstream data2_file;
  name_file(str, argv, 1);
  data2_file.open(str);

  // creates extra file
  ofstream data3_file;
  name_file(str, argv, 2);
  data3_file.open(str);

  int deg_freedom = num_buckets;
  if (stream_size1 != stream_size2)
    deg_freedom--;
  for (int i = 0; i < repeats; i++)
  {
    // adds values to the table
    data2_file << percents[i] * 100 << "\t";
    double error = abs(pochisq(GK_values[i], deg_freedom) - pochisq(actual_values[i], deg_freedom));
    data2_file << error << endl;

    // adds values to pvalue file
    data_file << pochisq(actual_values[i], deg_freedom) << " actual" << endl;
    data_file << pochisq(GK_values[i], deg_freedom) << " GK" << endl;

    // adds values to extra file
    data3_file << percents[i] * 100 << "\t";
    error = abs(GK_values[i] - actual_values[i]) / actual_values[i]; 
    data3_file << error << endl;
  }
  data3_file.close();
  data2_file.close();
  data_file.close();
  return 0;
}
string query_allow_login(void)
{
    return name_file(TO) + ":query_stat_index|set_stat_index";
}