示例#1
0
 void DiffHelper::longest_common(ptrdiff_t l1, ptrdiff_t l2, ptrdiff_t r1, ptrdiff_t r2) {
     for (; l1 < l2 && r1 < r2 && match(l1, r1); ++l1, ++r1) {}
     for (; l1 < l2 && r1 < r2 && match(l2 - 1, r2 - 1); --l2, --r2) {}
     if (l1 == l2) {
         std::fill(rchanges.begin() + r1, rchanges.begin() + r2, true);
         return;
     }
     if (r1 == r2) {
         std::fill(lchanges.begin() + l1, lchanges.begin() + l2, true);
         return;
     }
     fwd[1] = l1;
     rev[-1] = l2;
     ptrdiff_t d1 = l1 - r1, d2 = l2 - r2, delta = d2 - d1;
     for (ptrdiff_t d = 0;; ++d) {
         for (ptrdiff_t k = - d; k <= d; k = k + 2) {
             ptrdiff_t i = k == - d ? fwd[k + 1] : fwd[k - 1] + 1;
             if (k < d)
                 i = std::max(i, fwd[k + 1]);
             for (ptrdiff_t j = i - k - d1; i < l2 && j < r2 && match(i, j); ++i, ++j) {}
             fwd[k] = i;
             if ((delta & 1) && k > delta - d && k < delta + d && rev[k - delta] <= fwd[k]) {
                 longest_common(l1, fwd[k], r1, fwd[k] - k - d1);
                 longest_common(fwd[k], l2, fwd[k] - k - d1, r2);
                 return;
             }
         }
         for (ptrdiff_t k = - d; k <= d; k = k + 2) {
             ptrdiff_t i = k == d ? rev[k - 1] : rev[k + 1] - 1;
             if (k > - d)
                 i = std::min(i, rev[k - 1]);
             for (ptrdiff_t j = i - k - d2; i > l1 && j > r1 && match(i - 1, j - 1); --i, --j) {}
             rev[k] = i;
             if (! (delta & 1) && k >= - d - delta && k <= d - delta && rev[k] <= fwd[k + delta]) {
                 longest_common(l1, fwd[k + delta], r1, fwd[k + delta] - k - d2);
                 longest_common(fwd[k + delta], l2, fwd[k + delta] - k - d2, r2);
                 return;
             }
         }
     }
 }
示例#2
0
文件: main.c 项目: gerv/fossology
/**
 * @brief check to make sure the copyright has been created
 *
 * will attempt to access the copyright table, if the response from the database
 * indicates that the copyright table does not exist, this will also attempt to
 * create the table for future use.
 *
 * @param pgConn the connection to the database
 * @return 1 if the table exists at the end of the function, 0 otherwise
 */
int check_copyright_table(PGconn* pgConn)
{
  /* local variables */
  PGresult* pgResult; // the result of the database access
  int ret;            // the value returned by this function
  char* str;          // the string error message if the database access fails
  char buffer[256];   // a buffer used for string manipulation

  /* initialize memory and do the sql access */
  ret = 1;
  str = NULL;
  memset(buffer, '\0', sizeof(buffer));
  pgResult = PQexec(pgConn, check_database_table);

  /* check if the database already exists */
  if(PQresultStatus(pgResult) != PGRES_TUPLES_OK)
  {
    str = PQresultErrorMessage(pgResult);
    if(longest_common(buffer, str, "does not exist") == 14)
    {
      fprintf(cerr, "WARNING %s.%d: Could not find copyright table.", __FILE__, __LINE__);
      ret = setup_database(pgConn);
    }
    else
    {
      fprintf(cerr, "ERROR %s.%d: problem with copyright table\n", __FILE__, __LINE__);
      fprintf(cerr, "ERROR PQ error message: %s\n", PQresultErrorMessage(pgResult));
      fprintf(cerr, "ERROR sql was: %s\n", check_database_table);
      ret = 0;
    }
    //free(str);
  }
  else
  {
    /* check if the copyright foreign key exsits */
    pgResult = PQexec(pgConn, check_copyright_foreign_key);
    if(PQntuples(pgResult) != 1)
    {
      ret = cleanup_copyright(pgConn);
    }
  }

  /* check if the copyright exsits */
  pgResult = PQexec(pgConn, check_copyright_ars);
  if(PQresultStatus(pgResult) != PGRES_TUPLES_OK && PQntuples(pgResult) != 1)
  {
    fo_CreateARSTable(pgConn, AGENT_ARS);
  }
  /* clean up memory and return */
  PQclear(pgResult);
  return ret;
}
示例#3
0
文件: main.c 项目: gerv/fossology
/**
 * @brief runs the labeled test files to determine accuracy
 *
 * This function will open each pair of files in the testdata directory to
 * analyze how accurate the copyright agent is. This function will respond with
 * the number of false negatives, false positives, and correct answers for each
 * file and total tally of these numbers. This will also produce 3 files, one
 * containing all matches that the copyright agent found, all the things that it
 * didn't find, and all of the false positives.
 */
void run_test_files(copyright copy)
{
  /* locals */
  cvector compare;
  copyright_iterator iter;
  cvector_iterator curr;
  FILE* istr, * m_out, * n_out, * p_out;
  char buffer[READMAX + 1];
  char file_name[FILENAME_MAX];
  char copy_buf[FILENAME_MAX];
  char name_buf[FILENAME_MAX];
  char* first, * last, * loc, tmp;
  int i, matches, correct = 0, falsep = 0, falsen = 0;

  /* grab the copyright files */
  memset(copy_buf, '\0', sizeof(copy_buf));
  memset(name_buf, '\0', sizeof(copy_buf));
  snprintf(copy_buf, sizeof(copy_buf),
      "%s/mods-enabled/copyright/agent/copyright.dic",
      sysconfigdir);
  snprintf(name_buf, sizeof(name_buf),
      "%s/mods-enabled/copyright/agent/names.dic",
      sysconfigdir);

  /* create data structures */
  copyright_init(&copy, copy_buf, name_buf);
  cvector_init(&compare, string_function_registry());

  /* open the logging files */
  m_out = fopen("Matches", "w");
  n_out = fopen("False_Negatives", "w");
  p_out = fopen("False_Positives", "w");

  /* big problem if any of the log files didn't open correctly */
  if(!m_out || !n_out || !p_out)
  {
    fprintf(cerr, "ERROR did not successfully open one of the log files\n");
    fprintf(cerr, "ERROR the files that needed to be opened were:\n");
    fprintf(cerr, "ERROR Matches, False_Positives, False_Negatives\n");
    exit(-1);
  }

  /* loop over every file in the test directory */
  for(i = 0; i < TESTFILE_NUMBER; i++)
  {
    sprintf(file_name, "%s%d_raw", test_dir, i);

    /* attempt to open the labeled test file */
    istr = fopen(file_name, "r");
    if(!istr)
    {
      fprintf(cerr, "ERROR Must run testing from correct directory. The\n");
      fprintf(cerr, "ERROR correct directory is installation dependent but\n");
      fprintf(cerr, "ERROR the working directory should include the folder:\n");
      fprintf(cerr, "ERROR   %s\n", test_dir);
      exit(-1);
    }

    /* initialize the buffer and read in any information */
    memset(buffer, '\0', sizeof(buffer));
    buffer[fread(buffer, sizeof(char), READMAX, istr)] = '\0';
    matches = 0;

    /* set everything in the buffer to lower case */
    for(first = buffer; *first; first++)
    {
      *first = tolower(*first);
    }

    /* loop through and find all <s>...</s> tags */
    loc = buffer;
    while((first = strstr(loc, "<s>")) != NULL)
    {
      last = strstr(loc, "</s>");

      if(last == NULL)
      {
        fprintf(cerr, "ERROR unmatched \"<s>\"\n");
        fprintf(cerr, "ERROR in file: \"%s\"\n", file_name);
        exit(-1);
      }

      if(last <= first)
      {
        fprintf(cerr, "ERROR unmatched \"</s>\"\n");
        fprintf(cerr, "ERROR in file: \"%s\"\n", file_name);
        exit(-1);
      }

      tmp = *last;
      *last = 0;
      cvector_push_back(compare, first + 3);
      *last = tmp;
      loc = last + 4;
    }

    /* close the previous file and open the corresponding raw data */
    fclose(istr);
    file_name[strlen(file_name) - 4] = '\0';
    istr = fopen(file_name, "r");
    if(!istr)
    {
      fprintf(cerr, "ERROR Unmatched file in the test directory");
      fprintf(cerr, "ERROR File with no match: \"%s\"_raw\n", file_name);
      fprintf(cerr, "ERROR File that caused error: \"%s\"\n", file_name);
    }

    /* perform the analysis on the current file */
    copyright_analyze(copy, istr, REPORTALL);
    fclose(istr);

    /* loop over every match that the copyright object found */
    for(iter = copyright_begin(copy); iter != copyright_end(copy); iter++)
    {
      cvector_iterator best = cvector_begin(compare);
      char score[2048];
      char dst[2048];

      memset(dst, '\0', sizeof(dst));
      memset(score, '\0', sizeof(score));

      /* log the coyright entry */
      fprintf(m_out, "====%s================================\n", file_name);
      fprintf(m_out, "DICT: %s\tNAME: %s\n",copy_entry_dict(*iter), copy_entry_name(*iter));
      fprintf(m_out, "TEXT[%s]\n",copy_entry_text(*iter));

      /* loop over the vector looking for matches */
      for(curr = cvector_begin(compare); curr != cvector_end(compare); curr++)
      {
        if(longest_common(dst, copy_entry_text(*iter), (char*)*curr) > strlen(score))
        {
          strcpy(score, dst);
          best = curr;
        }
      }

      /* log the entry as found if it matched something in compare */
      if(cvector_size(compare) != 0 &&
          (strcmp(copy_entry_dict(*iter), "by") || strlen(score) > THRESHOLD))
      {
        cvector_remove(compare, best);
        matches++;
      }
      else if(!strcmp(copy_entry_dict(*iter), "email") || !strcmp(copy_entry_dict(*iter), "url"))
      {
        matches++;
      }
      else
      {
        fprintf(p_out, "====%s================================\n", file_name);
        fprintf(p_out, "DICT: %s\tNAME: %s\n",copy_entry_dict(*iter), copy_entry_name(*iter));
        fprintf(p_out, "TEXT[%s]\n",copy_entry_text(*iter));
      }
    }

    /* log all the false negatives */
    for(curr = cvector_begin(compare); curr != cvector_end(compare); curr++)
    {
      fprintf(n_out, "====%s================================\n", file_name);
      fprintf(n_out, "%s\n", (char*)*curr);
    }

    fprintf(cout, "====%s================================\n", file_name);
    fprintf(cout, "Correct:         %d\n", matches);
    fprintf(cout, "False Positives: %d\n", copyright_size(copy) - matches);
    fprintf(cout, "False Negatives: %d\n", cvector_size(compare));

    /* clean up for the next file */
    correct += matches;
    falsep += copyright_size(copy) - matches;
    falsen += cvector_size(compare);
    cvector_clear(compare);
  }

  fprintf(cout, "==== Totals ================================\n");
  fprintf(cout, "Total Found:     %d\n", correct + falsep);
  fprintf(cout, "Correct:         %d\n", correct);
  fprintf(cout, "False Positives: %d\n", falsep);
  fprintf(cout, "False Negatives: %d\n", falsen);

  fclose(m_out);
  fclose(n_out);
  fclose(p_out);
  copyright_destroy(copy);
  cvector_destroy(compare);
}