unsigned long hash(char * str) { unsigned long hash = HASH_INIT; unsigned int i; for(i = 0; str[i] && (i<MAXLEN); i++) { HASH_STEP(hash,str[i]); } return hash; }
uint64_t hash(char * str) { uint64_t hash = HASH_INIT; unsigned int i; for(i = 0; str[i] ; i++) { HASH_STEP(hash,str[i]); } return hash; }
int main(int argc, char ** argv) { FILE * dictionnary; FILE * pwds; char line[LINELEN]; unsigned int i,j; avl_tree_t * sroot; struct rlimit rlim; BLOOM_TYPE * bloom; BLOOM_TYPE * bloom_cityhash; unsigned long curhashes[MAXLEN]; unsigned int curstart[MAXLEN]; unsigned int maxlen; unsigned int maxstart; unsigned char ln2[LINELEN]; unsigned int len; unsigned int minmatch; rlim.rlim_cur = MAXMEM; rlim.rlim_max = MAXMEM; if(setrlimit(RLIMIT_AS, &rlim)) { perror("setrlimit"); return 3; } if( (argc != 4) && (argc != 5) ) usage(); nblines = 0; if(argc == 5) { minmatch = atoi(argv[4]); if(minmatch == 0) { fprintf(stderr, "can't parse %s as int\n", argv[4]); return 6; } } else minmatch = 1; /* arbre contenant le dictionnaire pour éviter les FP */ sroot = avl_alloc_tree((avl_compare_t)strcmp, (avl_freeitem_t)free); /* bloom table */ bloom = xmalloc(BLOOM_STORAGE); memset(bloom, 0, BLOOM_STORAGE); bloom_cityhash = xmalloc(BLOOM_STORAGE); memset(bloom_cityhash, 0, BLOOM_STORAGE); fprintf(stderr, "%lld bytes have been allocated for bloom filters\n", BLOOM_STORAGE*2); if(strcmp(argv[1],"-")==0) dictionnary = stdin; else dictionnary = fopen(argv[1], "r"); if(dictionnary == NULL) { perror(argv[1]); return 2; } pwds = fopen(argv[2], "r"); if(pwds == NULL) { perror(argv[2]); return 3; } while(fgets(line, LINELEN-1, dictionnary)) { len = strlen(line); if(len>MAXLEN) continue; if(len<minmatch) continue; nblines++; if(nblines % 1000000 == 0) fprintf(stderr, "%ldM dictionnary lines integrated\n", nblines/1000000); line[len-1]=0; // trim ! SETBIT(bloom, hash(line)); SETBIT(bloom_cityhash, cityhash(line)); avl_insert(sroot, strdup(line)); } printf("%ld lines\n", nblines); fclose(dictionnary); nbpass = 0; nbfp = 0; nbmatch = 0; while(fgets((char*) line, LINELEN-1, pwds)) { nbpass++; if(nbpass % 500000 == 0) fprintf(stderr, "%.1fM passwords analyzed [%s]\n", ((float)nbpass)/1000000.0, argv[3]); maxlen = 0; maxstart = 0; memset(curstart, 0, sizeof(curstart)); for(i=0;i<MAXLEN;i++) curhashes[i]=HASH_INIT; for(i=0;line[i] && line[i]!='\n' && (i<MAXLEN*2);i++) { curhashes[i%MAXLEN]=HASH_INIT; curstart[i%MAXLEN] = i; for(j=0;j<MAXLEN;j++) { HASH_STEP(curhashes[j],line[i]); } for(j=0;j<MAXLEN;j++) { /* si on ne bat pas le maxlen courant ça ne sert à rien */ if( i-curstart[j]+1 < maxlen ) continue; /* check bloom filter */ if(GETBIT(bloom, curhashes[j])) { memset(ln2, 0, sizeof(ln2)); memcpy(ln2, line+curstart[j], i-curstart[j]+1); if(GETBIT(bloom_cityhash, cityhash((char*)ln2))) { if(avl_search(sroot, ln2)) /* evitons les FP */ { maxlen = i-curstart[j]+1; maxstart = curstart[j]; } else { nbfp++; } } } } } if(maxlen<minmatch) continue; line[strlen((char*)line)-1] = 0; printf("%s\t%s\t", argv[3], line); if(maxstart>0) { memset(ln2, 0, sizeof(ln2)); strncpy((char*)ln2, (char*)line, maxstart); printf("%s\t", ln2); } else printf("\t"); if(maxstart+maxlen<strlen((char*)line)) printf("%s\t", line + maxstart + maxlen); else printf("\t"); line[maxstart+maxlen] = 0; printf("%s\t%d\n", line+maxstart, nbpass); nbmatch++; } fprintf(stderr, "%d passwords analyzed, %d matches, %d false positives\n", nbpass, nbmatch, nbfp); return 0; }