예제 #1
0
int main (
	  int argc,
	  char ** argv
	  )
{
  
  int status = 0;

  if ( argc != 2 ) { assert(0); } 

  char * keyBuffer = NULL; 

  FILE * infile, * outfile, * keyfile1, * keyfile2, * offsetfile, * lenfile;

  infile = fopen  (argv[1], "r"); 
  outfile = fopen(OUTPUT_FILE_NAME, "wb"); 
  keyfile1 = fopen(KEY_FILE_NAME_1, "wb"); 
  keyfile2 = fopen(KEY_FILE_NAME_2,"wb"); 
  offsetfile = fopen(OFFSET_FILE_NAME,"wb");
  lenfile = fopen(LEN_FILE_NAME,"wb");

  if ( infile == NULL ) { assert(0); }

  /* infile: file with input string key information. example: {abc,def,ghij,abc,abc,xyz} 
     outfile: binary file containing the corresponding integer version of the input key information. example: {0,1,2,0,0,3} 
     keyfile1: assume that each input string is of constant length LEN (apprend nulls to ensure this), this file contains the strings corresponding to the integers. example:  {abc\0..\0,def\0..\0,ghij..\0,xyz\0..\0}, each being appended to make their length some fixed max_length. NOTE: we do this to make the process of finding what's the string corresponding to an integer easy, all we have to do is offset by max_length*integer.
     keyfile2, offsetfile, lenfile: if the input strings have varying lengths, instead of appending zero's to make them same length we just write them and keep track of the necessary offset of be done in keyfile2 to identify the string corresponding to integer i in offsetfile. lenfile keeps track of length of the i'th string */


  /* Setup the dictionary. If another dictionary of the same name exists, delete that dictionary and create a new one */
  
  int dict_exists;
  status = is_dict (TEST_DB_NAME, TEST_DIR_NAME, & dict_exists);
  cBYE(status);

  if ( dict_exists == 1 ) {

    printf("Deleting existing dictionary\n");
    status = del_dict(TEST_DB_NAME, TEST_DIR_NAME);
    cBYE(status);

  }

  printf("Creating new dictionary\n");
  status = mk_dict (TEST_DB_NAME, TEST_DIR_NAME); 
  cBYE(status);
 

  size_t len = MAX_STR_LEN;
  ssize_t read;
  keyBuffer = malloc ( MAX_STR_LEN * sizeof(char) );
  
  int min_unused_value = 1;
  long long count_iter = 0;
  
  int offSet = 0;

  /* Scan through all the keys in the input data. 
     For each key, check if the key already exists in the levelDB dictionary.
     (i) Yes: get it's value and write the value to the output binary file. 
     (ii) No: assign it an unused value and write the value to the output. Also update keyfile. */

  printf("Processing input data\n");
  while ( (read = getline (&keyBuffer, &len, infile)) != -1 ) {

    int value = -1;
    int keyLength = strlen(keyBuffer);
    int keyExists = 0;
    keyBuffer[keyLength-1]='\0'; /* replaces \n with \0 */

    status = get_from_dict_2 (TEST_DB_NAME, TEST_DIR_NAME, keyBuffer, &value, &keyExists);
    cBYE(status);

    if ( keyExists == 0 ) {

      status = add_to_dict_2 (TEST_DB_NAME, TEST_DIR_NAME, keyBuffer, min_unused_value);
      cBYE(status);
      
      fwrite (&min_unused_value, sizeof(int), 1, outfile);
      min_unused_value++;
      
      /* update key file */

      fwrite (keyBuffer, MAX_STR_LEN*sizeof(char), 1, keyfile1);
      fwrite (keyBuffer, keyLength*sizeof(char), 1, keyfile2);
      fwrite (&keyLength, sizeof(int),1,lenfile);
      fwrite (&offSet, sizeof(int),1,offsetfile);
    
      offSet += keyLength;

    }
    else {
      fwrite (&value, sizeof(int), 1, outfile);
    }

    printPoll(count_iter++);
 
  }
  printf("\nProcess completed\n");
  

 BYE:

  fclose (infile);
  fclose (outfile);
  fclose (keyfile1);
  fclose (keyfile2);
  fclose (offsetfile);
  fclose (lenfile);

  free_if_non_null (keyBuffer);

  return (status);

}
예제 #2
0
/** This function created multiple key value pairs and reads them and tests that they are okay **/
void testMultipleReadWrites()
{
  setup();

  unsigned int i=0;
  unsigned int count_iter=0;
  size_t len = 100;
  char keyBuffer[100];
  char * valueBuffer = (char *)malloc(len+1);
  char readBuffer[100];
  ssize_t read;
  clock_t begin, end;
  FILE * fp;
  
  printf("Initiating DB Writes. \n");
  fp = fopen("../sample_emails.txt", "r");
  begin = clock();
  while ((read = getline(&valueBuffer, &len, fp)) != -1) 
  {
    printPoll(count_iter);
    sprintf(keyBuffer,"%d",count_iter++);
    add_to_dict(TEST_DB_NAME,TEST_DIR_NAME,keyBuffer,valueBuffer);
  }
  end = clock();
  double time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
  fclose(fp);

  printf("\nInitiating Control Sequence Writes.\n");
  count_iter = 0;
  fp = fopen("../sample_emails.txt", "r");
  begin = clock();
  while ((read = getline(&valueBuffer, &len, fp)) != -1) 
  {
    printPoll(count_iter);
    sprintf(keyBuffer,"%d",count_iter++);
  }
  end = clock();
  double time_spent_outside = (double)(end - begin) / CLOCKS_PER_SEC;
  fclose(fp);

  printf("\n For %d writes Total Time Taken:" ANSI_COLOR_RED "[%.3fs]" ANSI_COLOR_RESET ". removing additional [%fs] overhead.\n",count_iter,time_spent-time_spent_outside,time_spent_outside);
  printf("Time Taken per Write:" ANSI_COLOR_RED "[%fs]" ANSI_COLOR_RESET "\n",(time_spent-time_spent_outside)/count_iter);

  printf("Initiating DB Reads\n");
  begin = clock();
  for(i=0;i<count_iter;i++)
  {
    printPoll(i);
    sprintf(keyBuffer,"%d",i);
    get_from_dict_2(TEST_DB_NAME,TEST_DIR_NAME,keyBuffer,readBuffer,50);
  } 
  end = clock();
  time_spent = (double)(end - begin) / CLOCKS_PER_SEC;

  printf("Initiating DB Reads (Control)\n");
  begin = clock();
  for(i=0;i<count_iter;i++)
  {
    printPoll(i);
    sprintf(keyBuffer,"%d",i);
  }
  end = clock();
  time_spent_outside = (double)(end - begin) / CLOCKS_PER_SEC;

  printf("\nFor %d reads Total Time Taken:" ANSI_COLOR_RED "[%.3fs]" ANSI_COLOR_RESET ". removing additional [%fs] overhead..\n",count_iter,time_spent-time_spent_outside,time_spent_outside);
  printf("Time Taken per Read:" ANSI_COLOR_RED "[%fs]" ANSI_COLOR_RESET "\n",(time_spent-time_spent_outside)/count_iter);
}
예제 #3
0
/** This function created multiple key value pairs and reads them and tests that they are okay **/
void testMultipleReadWrites()
{
  setup();

  /* initialize random seed: */
  srand (time(NULL));
  
  int i=0;
  const int count_iter =100000;
  const char ** keys = (const char **)(malloc(count_iter*(sizeof(char *))));
  const char ** keysX = (const char **)(malloc(count_iter*(sizeof(char *)))); // Only for testing
  char keyBuffer[100];
  char valueBuffer[100];
  char readBuffer[100];
  clock_t begin, end;

  begin = clock();
  for(i=0;i<count_iter;i++)
  {
    sprintf(keyBuffer,"Key%d",randomInt());
    sprintf(valueBuffer,"Value%d",randomInt());
    char * key = (char *)malloc(strlen(keyBuffer)+1);
    strcpy(key,keyBuffer);
    keys[i] = key;
  
    add_to_dict(TEST_DB_NAME,TEST_DIR_NAME,keyBuffer,valueBuffer);
  }
  end = clock();
  double time_spent = (double)(end - begin) / CLOCKS_PER_SEC;

  begin = clock();
  for(i=0;i<count_iter;i++)
  {
    sprintf(keyBuffer,"Key%d",randomInt());
    sprintf(valueBuffer,"Value%d",randomInt());
    char * key = (char *)malloc(strlen(keyBuffer)+1);
    strcpy(key,keyBuffer);
    keysX[i] = key;
  }
  end = clock();
  double time_spent_outside = (double)(end - begin) / CLOCKS_PER_SEC;

  printf("For %d writes Total Time Taken:" ANSI_COLOR_RED "[%.3fs]" ANSI_COLOR_RESET ". removing additional [%fs] overhead.\n",count_iter,time_spent-time_spent_outside,time_spent_outside);
  printf("Time Taken per Write:" ANSI_COLOR_RED "[%fs]" ANSI_COLOR_RESET "\n",(time_spent-time_spent_outside)/count_iter);

  begin = clock();
  for(i=0;i<count_iter;i++)
  {
    get_from_dict_2(TEST_DB_NAME,TEST_DIR_NAME,keys[i],readBuffer,50);
  } 
  end = clock();
  time_spent = (double)(end - begin) / CLOCKS_PER_SEC;

  begin = clock();
  for(i=0;i<count_iter;i++)
  {
    keys[i] = "ds";
  }
  end = clock();
  time_spent_outside = (double)(end - begin) / CLOCKS_PER_SEC;

  printf("For %d reads Total Time Taken:" ANSI_COLOR_RED "[%.3fs]" ANSI_COLOR_RESET ". removing additional [%fs] overhead..\n",count_iter,time_spent-time_spent_outside,time_spent_outside);
  printf("Time Taken per Read:" ANSI_COLOR_RED "[%fs]" ANSI_COLOR_RESET "\n",(time_spent-time_spent_outside)/count_iter);
}