Пример #1
0
unsigned long hash(char * str)
{
	unsigned long hash = HASH_INIT;
	unsigned int i;

	for(i = 0; str[i] && (i<MAXLEN); i++)
	{
		HASH_STEP(hash,str[i]);
	}
	return hash;
}
Пример #2
0
uint64_t hash(char * str)
{
        uint64_t hash = HASH_INIT;
        unsigned int i;

        for(i = 0; str[i] ; i++)
        {
                HASH_STEP(hash,str[i]);
        }
        return hash;
}
Пример #3
0
int main(int argc, char ** argv)
{
	FILE * dictionnary;
	FILE * pwds;
	char line[LINELEN];
	unsigned int i,j;
	avl_tree_t * sroot;
	struct rlimit rlim;
	BLOOM_TYPE * bloom;
	BLOOM_TYPE * bloom_cityhash;

	unsigned long curhashes[MAXLEN];
	unsigned int curstart[MAXLEN];
	unsigned int maxlen;
	unsigned int maxstart;
	unsigned char ln2[LINELEN];
	unsigned int len;
    unsigned int minmatch;

	rlim.rlim_cur = MAXMEM;
	rlim.rlim_max = MAXMEM;
	if(setrlimit(RLIMIT_AS, &rlim))
	{
		perror("setrlimit");
		return 3;
	}

	if( (argc != 4) && (argc != 5) )
		usage();
	nblines = 0;

    if(argc == 5)
    {
        minmatch = atoi(argv[4]);
        if(minmatch == 0)
        {
            fprintf(stderr, "can't parse %s as int\n", argv[4]);
            return 6;
        }
    }
    else
        minmatch = 1;

	/* arbre contenant le dictionnaire pour éviter les FP */
	sroot = avl_alloc_tree((avl_compare_t)strcmp, (avl_freeitem_t)free);

	/* bloom table */
	bloom = xmalloc(BLOOM_STORAGE);
	memset(bloom, 0, BLOOM_STORAGE);
	bloom_cityhash = xmalloc(BLOOM_STORAGE);
	memset(bloom_cityhash, 0, BLOOM_STORAGE);
	
	fprintf(stderr, "%lld bytes have been allocated for bloom filters\n", BLOOM_STORAGE*2);

	if(strcmp(argv[1],"-")==0)
		dictionnary = stdin;
	else
		dictionnary = fopen(argv[1], "r");
	if(dictionnary == NULL)
	{
		perror(argv[1]);
		return 2;
	}
	pwds = fopen(argv[2], "r");
	if(pwds == NULL)
	{
		perror(argv[2]);
		return 3;
	}

	while(fgets(line, LINELEN-1, dictionnary))
	{
		len = strlen(line);
		if(len>MAXLEN)
			continue;
        if(len<minmatch)
            continue;
		nblines++;
		if(nblines % 1000000 == 0)
			fprintf(stderr, "%ldM dictionnary lines integrated\n", nblines/1000000);
		line[len-1]=0; // trim !
		SETBIT(bloom, hash(line));
		SETBIT(bloom_cityhash, cityhash(line));
		avl_insert(sroot, strdup(line));
	}
	printf("%ld lines\n", nblines);
	fclose(dictionnary);

	nbpass = 0;
	nbfp = 0;
	nbmatch = 0;
	while(fgets((char*) line, LINELEN-1, pwds))
	{
		nbpass++;
		if(nbpass % 500000 == 0)
			fprintf(stderr, "%.1fM passwords analyzed [%s]\n", ((float)nbpass)/1000000.0, argv[3]);
		maxlen = 0;
		maxstart = 0;
		memset(curstart, 0, sizeof(curstart));
		for(i=0;i<MAXLEN;i++)
			curhashes[i]=HASH_INIT;
		for(i=0;line[i] && line[i]!='\n' && (i<MAXLEN*2);i++)
		{
			curhashes[i%MAXLEN]=HASH_INIT;
			curstart[i%MAXLEN] = i;
			for(j=0;j<MAXLEN;j++)
			{
				HASH_STEP(curhashes[j],line[i]);
			}
			for(j=0;j<MAXLEN;j++)
			{
				/* si on ne bat pas le maxlen courant ça ne sert à rien */
				if( i-curstart[j]+1 < maxlen )
					continue;
				/* check bloom filter */
				if(GETBIT(bloom, curhashes[j]))
				{
					memset(ln2, 0, sizeof(ln2));
					memcpy(ln2, line+curstart[j], i-curstart[j]+1);
					if(GETBIT(bloom_cityhash, cityhash((char*)ln2)))
					{
						if(avl_search(sroot, ln2)) /* evitons les FP */
						{
							maxlen = i-curstart[j]+1;
							maxstart = curstart[j];
						}
						else
						{
							nbfp++;
						}
					}
				}
			}
		}
        if(maxlen<minmatch)
			continue;
		line[strlen((char*)line)-1] = 0;
		printf("%s\t%s\t", argv[3], line);
		if(maxstart>0)
		{
			memset(ln2, 0, sizeof(ln2));
			strncpy((char*)ln2, (char*)line, maxstart);
			printf("%s\t", ln2);
		}
		else
			printf("\t");
		if(maxstart+maxlen<strlen((char*)line))
			printf("%s\t", line + maxstart + maxlen);
		else
			printf("\t");
		line[maxstart+maxlen] = 0;
		printf("%s\t%d\n", line+maxstart, nbpass);
		nbmatch++;
	}

	fprintf(stderr, "%d passwords analyzed, %d matches, %d false positives\n", nbpass, nbmatch, nbfp);

	return 0;
}