Example #1
0
void test_cvector_remove()
{
  cvector vec;
  int tester = 1;

  /* start the test */

  /* run the functions */
  cvector_init(&vec, int_function_registry());
  cvector_push_back(vec, &tester);
  cvector_remove(vec, vec->data);

  /* test the results */
  FO_ASSERT_EQUAL(vec->size, 0);
  FO_ASSERT_PTR_EQUAL(vec->data[0], NULL);

  cvector_destroy(vec);
}
Example #2
0
void test_cvector_remove()
{
  cvector vec;
  int tester = 1;

  /* start the test */
  printf("Test cvector_remove:");

  /* run the functions */
  cvector_init(&vec, int_function_registry());
  cvector_push_back(vec, &tester);
  cvector_remove(vec, vec->data);

  /* test the results */
  CU_ASSERT_EQUAL(vec->size, 0);
  CU_ASSERT_EQUAL(vec->data[0], NULL);

  cvector_destroy(vec);
  test_failure();
  printf("\n");
}
Example #3
0
/**
 * @brief runs the labeled test files to determine accuracy
 *
 * This function will open each pair of files in the testdata directory to
 * analyze how accurate the copyright agent is. This function will respond with
 * the number of false negatives, false positives, and correct answers for each
 * file and total tally of these numbers. This will also produce 3 files, one
 * containing all matches that the copyright agent found, all the things that it
 * didn't find, and all of the false positives.
 */
void run_test_files(copyright copy)
{
  /* locals */
  cvector compare;
  copyright_iterator iter;
  cvector_iterator curr;
  FILE* istr, * m_out, * n_out, * p_out;
  char buffer[READMAX + 1];
  char file_name[FILENAME_MAX];
  char copy_buf[FILENAME_MAX];
  char name_buf[FILENAME_MAX];
  char* first, * last, * loc, tmp;
  int i, matches, correct = 0, falsep = 0, falsen = 0;

  /* grab the copyright files */
  memset(copy_buf, '\0', sizeof(copy_buf));
  memset(name_buf, '\0', sizeof(copy_buf));
  snprintf(copy_buf, sizeof(copy_buf),
      "%s/mods-enabled/copyright/agent/copyright.dic",
      sysconfigdir);
  snprintf(name_buf, sizeof(name_buf),
      "%s/mods-enabled/copyright/agent/names.dic",
      sysconfigdir);

  /* create data structures */
  copyright_init(&copy, copy_buf, name_buf);
  cvector_init(&compare, string_function_registry());

  /* open the logging files */
  m_out = fopen("Matches", "w");
  n_out = fopen("False_Negatives", "w");
  p_out = fopen("False_Positives", "w");

  /* big problem if any of the log files didn't open correctly */
  if(!m_out || !n_out || !p_out)
  {
    fprintf(cerr, "ERROR did not successfully open one of the log files\n");
    fprintf(cerr, "ERROR the files that needed to be opened were:\n");
    fprintf(cerr, "ERROR Matches, False_Positives, False_Negatives\n");
    exit(-1);
  }

  /* loop over every file in the test directory */
  for(i = 0; i < TESTFILE_NUMBER; i++)
  {
    sprintf(file_name, "%s%d_raw", test_dir, i);

    /* attempt to open the labeled test file */
    istr = fopen(file_name, "r");
    if(!istr)
    {
      fprintf(cerr, "ERROR Must run testing from correct directory. The\n");
      fprintf(cerr, "ERROR correct directory is installation dependent but\n");
      fprintf(cerr, "ERROR the working directory should include the folder:\n");
      fprintf(cerr, "ERROR   %s\n", test_dir);
      exit(-1);
    }

    /* initialize the buffer and read in any information */
    memset(buffer, '\0', sizeof(buffer));
    buffer[fread(buffer, sizeof(char), READMAX, istr)] = '\0';
    matches = 0;

    /* set everything in the buffer to lower case */
    for(first = buffer; *first; first++)
    {
      *first = tolower(*first);
    }

    /* loop through and find all <s>...</s> tags */
    loc = buffer;
    while((first = strstr(loc, "<s>")) != NULL)
    {
      last = strstr(loc, "</s>");

      if(last == NULL)
      {
        fprintf(cerr, "ERROR unmatched \"<s>\"\n");
        fprintf(cerr, "ERROR in file: \"%s\"\n", file_name);
        exit(-1);
      }

      if(last <= first)
      {
        fprintf(cerr, "ERROR unmatched \"</s>\"\n");
        fprintf(cerr, "ERROR in file: \"%s\"\n", file_name);
        exit(-1);
      }

      tmp = *last;
      *last = 0;
      cvector_push_back(compare, first + 3);
      *last = tmp;
      loc = last + 4;
    }

    /* close the previous file and open the corresponding raw data */
    fclose(istr);
    file_name[strlen(file_name) - 4] = '\0';
    istr = fopen(file_name, "r");
    if(!istr)
    {
      fprintf(cerr, "ERROR Unmatched file in the test directory");
      fprintf(cerr, "ERROR File with no match: \"%s\"_raw\n", file_name);
      fprintf(cerr, "ERROR File that caused error: \"%s\"\n", file_name);
    }

    /* perform the analysis on the current file */
    copyright_analyze(copy, istr, REPORTALL);
    fclose(istr);

    /* loop over every match that the copyright object found */
    for(iter = copyright_begin(copy); iter != copyright_end(copy); iter++)
    {
      cvector_iterator best = cvector_begin(compare);
      char score[2048];
      char dst[2048];

      memset(dst, '\0', sizeof(dst));
      memset(score, '\0', sizeof(score));

      /* log the coyright entry */
      fprintf(m_out, "====%s================================\n", file_name);
      fprintf(m_out, "DICT: %s\tNAME: %s\n",copy_entry_dict(*iter), copy_entry_name(*iter));
      fprintf(m_out, "TEXT[%s]\n",copy_entry_text(*iter));

      /* loop over the vector looking for matches */
      for(curr = cvector_begin(compare); curr != cvector_end(compare); curr++)
      {
        if(longest_common(dst, copy_entry_text(*iter), (char*)*curr) > strlen(score))
        {
          strcpy(score, dst);
          best = curr;
        }
      }

      /* log the entry as found if it matched something in compare */
      if(cvector_size(compare) != 0 &&
          (strcmp(copy_entry_dict(*iter), "by") || strlen(score) > THRESHOLD))
      {
        cvector_remove(compare, best);
        matches++;
      }
      else if(!strcmp(copy_entry_dict(*iter), "email") || !strcmp(copy_entry_dict(*iter), "url"))
      {
        matches++;
      }
      else
      {
        fprintf(p_out, "====%s================================\n", file_name);
        fprintf(p_out, "DICT: %s\tNAME: %s\n",copy_entry_dict(*iter), copy_entry_name(*iter));
        fprintf(p_out, "TEXT[%s]\n",copy_entry_text(*iter));
      }
    }

    /* log all the false negatives */
    for(curr = cvector_begin(compare); curr != cvector_end(compare); curr++)
    {
      fprintf(n_out, "====%s================================\n", file_name);
      fprintf(n_out, "%s\n", (char*)*curr);
    }

    fprintf(cout, "====%s================================\n", file_name);
    fprintf(cout, "Correct:         %d\n", matches);
    fprintf(cout, "False Positives: %d\n", copyright_size(copy) - matches);
    fprintf(cout, "False Negatives: %d\n", cvector_size(compare));

    /* clean up for the next file */
    correct += matches;
    falsep += copyright_size(copy) - matches;
    falsen += cvector_size(compare);
    cvector_clear(compare);
  }

  fprintf(cout, "==== Totals ================================\n");
  fprintf(cout, "Total Found:     %d\n", correct + falsep);
  fprintf(cout, "Correct:         %d\n", correct);
  fprintf(cout, "False Positives: %d\n", falsep);
  fprintf(cout, "False Negatives: %d\n", falsen);

  fclose(m_out);
  fclose(n_out);
  fclose(p_out);
  copyright_destroy(copy);
  cvector_destroy(compare);
}