コード例 #1
0
ファイル: hamming.c プロジェクト: mju/usaco
int
backtrack(int depth, int start, int size_set, int dist, short* nums, int num_nums, short* sol) {
  int i;
  if (depth == size_set) {
    return 1;
  }
  int j;
  int valid;
  for (i = start; i < num_nums; i++) {
    valid = 1;
    for (j = 0; j < depth; j++) {
      if (hamming_dist(nums[i], sol[j]) < dist) {
        valid = 0;
        break;
      }
    }
    if (valid == 1) {
      sol[depth] = nums[i];
      if (backtrack(depth + 1, i + 1, size_set, dist, nums, num_nums, sol)) {
        return 1;
      }
    }
  }
  return 0;
}
コード例 #2
0
ファイル: stringdist.c プロジェクト: hschreib/stringdist
double stringdist(Stringdist *S, unsigned int *str_a, int len_a, unsigned int *str_b, int len_b){
  double d = -1.0;

  switch(S->distance){
    case osa :
     return osa_dist(str_a, len_a, str_b, len_b, S->weight, S->work);
    case lv :
      return lv_dist( str_a, len_a, str_b, len_b, S->weight, S->work);
    case dl :
      return dl_dist(str_a, len_a, str_b, len_b, S->weight, S->dict, S->work);
    case hamming :
      return hamming_dist(str_a, len_a, str_b, len_b);
    case lcs :
      return lcs_dist(str_a, len_a, str_b, len_b, S->work); 
    case qgram :
      return qgram_dist(str_a, len_a, str_b, len_b, S->q, S->tree, 0L);
    case cosine :
      return qgram_dist(str_a, len_a, str_b, len_b, S->q, S->tree, 1L);
    case jaccard :
      d = qgram_dist(str_a, len_a, str_b, len_b, S->q, S->tree, 2L);
      break;
    case jw :
      return jaro_winkler_dist(str_a, len_a, str_b, len_b, S->p, S->weight, S->work);
    case soundex :
      return soundex_dist(str_a, len_a, str_b, len_b, &(S->ifail));
    default :
      break;
      // set errno, return -1
  }
    return d;
}
コード例 #3
0
ファイル: hamming.c プロジェクト: mju/usaco
int
main(int argc, char** argv) {
  int size_set;
  int num_bits;
  int dist;
  short nums[1 << MAX_NUM_BITS];
  short sol[1 << MAX_NUM_BITS];
  int i;

  FILE* fin = fopen("hamming.in", "r");
  assert(fin != NULL);
  fscanf(fin, "%d%d%d", &size_set, &num_bits, &dist);
  fclose(fin);

  int bound = 1 << num_bits;
  int num_nums = 0;
  for (i = 1; i < bound; i++) {
    if (hamming_dist(0, i) >= dist) {
      nums[num_nums++] = i;
    }
  }

  FILE* fout = fopen("hamming.out", "w");
  assert(fout != NULL);

  sol[0] = 0;
  if (backtrack(1, 0, size_set, dist, nums, num_nums, sol)) {
    for (i = 0; i < size_set; i++) {
      if (i > 0) {
        if (i % 10 == 0) {
          fprintf(fout, "\n");
        } else {
          fprintf(fout, " ");
        }
      }
      fprintf(fout, "%hd", sol[i]);
    }
    fprintf(fout, "\n");
  }

  fclose(fout);

  return 0;
}
コード例 #4
0
int
main( int argc, char **argv)
{
	/* Print help if there are no parameters */
	if (argc < 2)
		print_help();

	/* Parse input line */
	int opt;
	int stoken_size = 1;
	bool diff = false;
	bool quiet = false;
	while (opt = getopt( argc, argv, "dqs:"), opt != -1)
		switch (opt) {
			case 's':
				stoken_size = atoi( optarg);
				break;
			case 'd':
				diff = true;
				break;
			case 'q':
				quiet = true;
				break;
			case 'h':
			default:
				print_help();
				/* Unreachable */
		}

	if (stoken_size <= 0) {
		fprintf( stderr, "Error: Invalid super-token size\n");
		return 6;
	}

	if (diff) {
		if (argc-optind < 2) {
			fprintf( stderr, "Error: At least two files must be specified.\n");
			return 7;
		}
		argc = optind+2;
	}

	/* Do the work */
	uint64_t hash1, hash2;
	int c = 0;
	for (c=optind; c<argc; c++) {
		char *data;
		long data_length;
		get_file( argv[ c], &data, &data_length);
		hash2 = hash1;
		hash1 = stoken_size == 1
			? charikar_hash64( data, data_length) /* Fast version */
			: charikar_hash64_wide( data, data_length, stoken_size); /* Fast version */
		free( data);
		if (!quiet)
			printf( "%016llx %s\n", (long long)hash1, argv[ c]);
	}

	/* Print results */
	if (diff) {
		uint64_t hashx = hash1^hash2;
		int hamm = hamming_dist( hash1, hash2);

		if (quiet) {
			printf( "%d\n", hamm);
		} else {
			printf( "%016llx\n", (long long)hashx);
			printf( "difference: %d\n", hamm);
		}
	}

	return 0;
}
コード例 #5
0
ファイル: hash.c プロジェクト: dtzWill/xkcd-hash
void *search(void *unused) {
  // Various size constants for convenience
  const size_t prelen = strlen(PREFIX_STRING);
  const size_t sufflen = strlen(SUFFIX_STRING);
  const size_t suffstart = LEN - sufflen;

  // Buffer used to store candidate string, extra for null terminator.
  char str[LEN + 1];
  str[LEN] = 0;

  // Put in our hardcoded prefix/suffix strings
  strcpy(str, PREFIX_STRING);
  strcpy(str + suffstart, SUFFIX_STRING);

  // Track how many hashes we've tried, for throughput estimate.
  uint64_t count = 0;
  char counting = 1;

  // Thread-local best score achieved, used to avoid
  // unnecessary accesses to shared global_best in the common case.
  int best = INT_MAX;

  // Buffer for storing computed hash bytes
  char hash[128];

start:
  // Fill the middle of the string with random data
  // (not including prefix, suffix, or portion we'll exhaustively search)
  gen_rand(str + prelen, LEN - prelen - sufflen - SEARCH_CHARS);

  // Iteration index array
  // Indirection used to keep character set flexible.
  unsigned idx[SEARCH_CHARS];
  memset(idx, 0, sizeof(idx));

  // Initialize enumeration part of string to first letter in charset
  char *iterstr = str + suffstart - SEARCH_CHARS;
  memset(iterstr, CHARSET[0], SEARCH_CHARS);

  while (1) {
    // Try string in current form
    Hash(1024, (BitSequence *)str, LEN * 8, (BitSequence *)hash);

    // How'd we do?
    int d = hamming_dist(hash, GOAL_BITS, 128);

    // If this is the best we've seen, print it and update best.
    if (d < best) {
      best = d;

      lock();
      if (d < global_best) {
        global_best = d;
        printf("%d - '%s'\n", d, str);
        fflush(stdout);
      }
      unlock();
    }

    // Increment string index array, updating str as we go
    // aaaaaa
    // baaaaa
    // caaaaa
    // ...
    // abaaaa
    // bbaaaa
    // cbaaaa
    // ...
    // (etc)
    int cur = 0;
    while (++idx[cur] >= CHARSET_SIZE) {
      idx[cur] = 0;
      iterstr[cur] = CHARSET[idx[cur]];

      // Advance to next position.
      // If we've used all of our search characters,
      // time to start over with new random prefix.
      if (++cur == SEARCH_CHARS)
        goto start;
    }
    iterstr[cur] = CHARSET[idx[cur]];

    // Throughput calculation.
    // Once this thread hits a limit, increment global_done
    // and print throughput estimate if we're the last thread to do so.
    const uint64_t iters = 1 << 24; // ~16M
    if (counting && ++count == iters) {
      counting = 0;

      time_t end = time(NULL);
      int elapsed = end - global_start;

      lock();
      global_count += count;
      assert(global_count >= count && "counter overflow");
      if (++global_done == num_threads) {
        printf("\n*** Total throughput ~= %f hash/S\n\n",
               ((double)(global_count)) / elapsed);
      }
      unlock();
    }
  }
}