int lunascan::addstring(const char *string,unsigned int id) { int i; int serial; if (POOL==NULL) { POOLMAX=2; if ((POOL=(LUNA_TOKEN *)malloc(POOLMAX*sizeof(LUNA_TOKEN)))==NULL) MEMERROR(); for (i=0; i<POOLMAX; i++) { POOL[i].string=NULL; POOL[i].id=LUNA_UNKNOWN; } POOLSIZE=0; } if (POOLSIZE>=POOLMAX) { if ((POOL=(LUNA_TOKEN *)realloc(POOL,2*POOLMAX*sizeof(LUNA_TOKEN)))==NULL) MEMERROR(); for (i=POOLMAX; i<2*POOLMAX; i++) { POOL[i].string=NULL; POOL[i].id=LUNA_UNKNOWN; } POOLMAX*=2; } serial=POOLSIZE; if (POOL[serial].string!=NULL) free(POOL[serial].string); POOL[serial].string=strdup(string); POOL[serial].id=id; addhash(serial); POOLSIZE++; return(serial); }
int main() { static const char *fname="main"; hashItr_t itr; void * data; addhash(0x01010001,0x1234); addhash(0x01060001,0x4567); addhash(0x01060002,0x9324); addhash(0x01070002,0x4321); addhash(0x01070004,0x2134); addhash(0x01080005,0x1324); addhash(0x01030001,0x1243); hashstats(7); hashItrInit(&itr); while ( data = hashItrNext(&itr) ) { CCAPPDEBUG(DEB_F_PREFIX"Itr found %lx", DEB_F_PREFIX_ARGS(SIP_SES_HASH, fname), data); } CCAPPDEBUG(DEB_F_PREFIX"%lx", DEB_F_PREFIX_ARGS(SIP_SES_HASH, fname), findhash(0x01010001)); CCAPPDEBUG(DEB_F_PREFIX"%lx", DEB_F_PREFIX_ARGS(SIP_SES_HASH, fname), findhash(0x01060001)); CCAPPDEBUG(DEB_F_PREFIX"%lx", DEB_F_PREFIX_ARGS(SIP_SES_HASH, fname), findhash(0x01060002)); CCAPPDEBUG(DEB_F_PREFIX"%lx", DEB_F_PREFIX_ARGS(SIP_SES_HASH, fname), findhash(0x01070002)); CCAPPDEBUG(DEB_F_PREFIX"%lx", DEB_F_PREFIX_ARGS(SIP_SES_HASH, fname), findhash(0x01070004)); CCAPPDEBUG(DEB_F_PREFIX"%lx", DEB_F_PREFIX_ARGS(SIP_SES_HASH, fname), findhash(0x01080005)); CCAPPDEBUG(DEB_F_PREFIX"%lx", DEB_F_PREFIX_ARGS(SIP_SES_HASH, fname), findhash(0x01030001)); delhash(0x01030001); delhash(0x01060001); hashstats(7); hashItrInit(&itr); while ( data = hashItrNext(&itr) ) { CCAPPDEBUG(DEB_F_PREFIX"Itr found %lx", DEB_F_PREFIX_ARGS(SIP_SES_HAS, fname), data); } CCAPPDEBUG(DEB_F_PREFIX"%lx", DEB_F_PREFIX_ARGS(SIP_SES_HASH, fname), findhash(0x01010001)); CCAPPDEBUG(DEB_F_PREFIX"%lx", DEB_F_PREFIX_ARGS(SIP_SES_HASH, fname), findhash(0x01060001)); CCAPPDEBUG(DEB_F_PREFIX"%lx", DEB_F_PREFIX_ARGS(SIP_SES_HASH, fname), findhash(0x01060002)); CCAPPDEBUG(DEB_F_PREFIX"%lx", DEB_F_PREFIX_ARGS(SIP_SES_HASH, fname), findhash(0x01070002)); CCAPPDEBUG(DEB_F_PREFIX"%lx", DEB_F_PREFIX_ARGS(SIP_SES_HASH, fname), findhash(0x01070004)); CCAPPDEBUG(DEB_F_PREFIX"%lx", DEB_F_PREFIX_ARGS(SIP_SES_HASH, fname), findhash(0x01080005)); CCAPPDEBUG(DEB_F_PREFIX"%lx", DEB_F_PREFIX_ARGS(SIP_SES_HASH, fname), findhash(0x01030001)); }
/* creation doesn't strdup so strings mustn't change later. */ static State* ht_lookup(char *prefix[NPREF], int create, PrefixStorer* ps) { State *temp; hash_table *ht = ps->tableBase; uint32_t hv = ps->hash(prefix); if(create){ return addhash(ht,prefix,hv); } temp = ht->table[hv % ht->len]; if(temp){ do { if(temp->hv == hv){ if(prefix_compare(temp->pref, prefix)){ return temp; } } } while (temp->next && (temp = temp->next)); } return NULL; }
/* * Decompress the chunk, calculating hashes */ static void hashchunk(int chunkno, char *chunkbufp, struct hashinfo **hinfop) { blockhdr_t *blockhdr; struct region *regp; z_stream z; int err, nreg; char hash[HASH_MAXSIZE]; unsigned char *(*hashfunc)(const unsigned char *, unsigned long, unsigned char *); readbuf_t *rbuf; #ifdef TIMEIT u_int64_t sstamp, estamp; #endif z.zalloc = Z_NULL; z.zfree = Z_NULL; z.opaque = Z_NULL; z.next_in = Z_NULL; z.avail_in = 0; z.next_out = Z_NULL; err = inflateInit(&z); CHECK_ERR(err, "inflateInit"); memset(hash, 0, sizeof hash); /* * Grab the header. It is uncompressed, and holds the real * image size and the magic number. Advance the pointer too. */ blockhdr = (blockhdr_t *)chunkbufp; chunkbufp += DEFAULTREGIONSIZE; nregions += blockhdr->regioncount; z.next_in = chunkbufp; z.avail_in = blockhdr->size; switch (blockhdr->magic) { case COMPRESSED_V1: regp = (struct region *)((struct blockhdr_V1 *)blockhdr + 1); break; case COMPRESSED_V2: regp = (struct region *)((struct blockhdr_V2 *)blockhdr + 1); break; default: fprintf(stderr, "Bad Magic Number!\n"); exit(1); } /* * Deterimine the hash function */ switch (hashtype) { case HASH_TYPE_MD5: default: hashfunc = MD5; break; case HASH_TYPE_SHA1: hashfunc = SHA1; break; } /* * Loop through all regions, decompressing and hashing data * in HASHBLK_SIZE or smaller blocks. */ rbuf = alloc_readbuf(0, bytestosec(HASHBLK_SIZE), 0); if (rbuf == NULL) { fprintf(stderr, "no memory\n"); exit(1); } for (nreg = 0; nreg < blockhdr->regioncount; nreg++) { uint32_t rstart, rsize, hsize; rstart = regp->start; rsize = regp->size; ndatabytes += sectobytes(rsize); while (rsize > 0) { if (rsize > bytestosec(HASHBLK_SIZE)) hsize = bytestosec(HASHBLK_SIZE); else hsize = rsize; z.next_out = rbuf->data; z.avail_out = sectobytes(hsize); #ifdef TIMEIT sstamp = rdtsc(); #endif err = inflate(&z, Z_SYNC_FLUSH); #ifdef TIMEIT estamp = rdtsc(); dcycles += (estamp - sstamp); #endif if (err != Z_OK && err != Z_STREAM_END) { fprintf(stderr, "inflate failed, err=%d\n", err); exit(1); } /* * Make sure we are still in synch */ if (z.avail_out != 0) { fprintf(stderr, "inflate failed to fill buf, %d left\n", z.avail_out); exit(1); } if (err == Z_STREAM_END && hsize != rsize) { fprintf(stderr, "inflate ran out of input, %d left\n", rsize - hsize); exit(1); } /* * Compute the hash */ (void)(*hashfunc)(rbuf->data, sectobytes(hsize), hash); addhash(hinfop, chunkno, rstart, hsize, hash); rstart += hsize; rsize -= hsize; } regp++; } free_readbuf(rbuf); if (z.avail_in != 0) { fprintf(stderr, "too much input for chunk, %d left\n", z.avail_in); exit(1); } }
void countSubstrings(char* buffer, int size) { /*loop counters*/ int i; int k; /*hold various substrings*/ char* sub1; char* sub2; /*run along buffer*/ char* sub1run; char* runner; char* c; int msize; int locsize; int tempC; msize=2000; total=0; mindex=0; tempC=0; sub1 = (char*) calloc (size,sizeof(char)); sub2 = (char*) calloc (size,sizeof(char)); mword= (matchingWord*) calloc (msize,sizeof(matchingWord)); mword[mindex].count=0; if(vflag) printf("total size %d",size); if(vflag) printf("READ: %s\n",buffer); /*all length from 3 to size/2*/ for(i=mnum;i<ceil((size/2));i++) { if(vflag) printf("Looking for substrings of size %d\n",i); sub1run=&buffer[0]; //if(strlen(sub1)==13) // printf("Looking for %s\n",sub1); /*all substrings of size i*/ for(k=0;k<=size-i-i;k++) { sub1=memset(sub1, '\0', i); strncpy(sub1,sub1run,i); sub1[i]='\0'; //if(strlen(sub1)>=mnum) if(check(hash(sub1),sub1,i)) { runner=sub1run+i; // if(vflag) // printf("SUB1 %s\n",sub1); locsize=50; mword[mindex].loc= (int*) calloc (locsize,sizeof(int)); mword[mindex].apart= (int*) calloc (locsize,sizeof(int)); mword[mindex].count=0; /* if(vflag) { printf("Number of distinct words found: %d\n",mindex); //printf("We have moved %d along\n",k); }*/ mword[mindex].loc[mword[mindex].count]=k; sub2=memset(sub2, '\0', i); strncpy(sub2,runner,i); sub2[i]='\0'; /*fills sub2 with all substrings of length i * which are to the right of sub1 that we have not seen before*/ /*if we have not seen this substring before*/ while((c=strstr(runner,sub1))!=NULL) { sub2=memset(sub2, '\0', i); //printf("SUB2\t%s\n",sub2); strncpy(sub2,c,i); runner=c; /*checks to see if they match*/ if(strcmp(sub1,sub2)==0) { //printf("%s FOUND\n",sub1); total++; mword[mindex].count++; tempC=mword[mindex].count; /* if(mword[mindex].count>=locsize-1) { if(vflag) printf("realloc number of locations to %d\n",locsize); locsize=locsize*2; mword[mindex].loc=(int*)realloc (mword[mindex].loc,(sizeof(int)*(locsize))); mword[mindex].apart=(int*)realloc (mword[mindex].apart,(sizeof(int)*(locsize))); }*/ /*check locations and see the difference * then add difference to initial location*/ mword[mindex].loc[tempC]=(c-sub1run)+k; mword[mindex].apart[tempC]=(mword[mindex].loc[tempC] -mword[mindex].loc[tempC-1]); //=c-mword[mindex].loc[tempC]; if(vflag) { if(mword[mindex].count==1) printf("First location is %d \n",mword[mindex].loc[0]); } } runner++; } } if(mword[mindex].count>0) { //printf("COUNT %d\n\n",mword[mindex].count); mword[mindex].length=i; /*add newly found word to hashtable*/ addhash(hash(sub1),sub1,i); //if(vflag) // printf("%s\n",mword[mindex].str); mindex++; } sub1run++; if(mindex>=msize) { //printf("realloc\n"); msize*=2; mword=(matchingWord*) realloc(mword,(sizeof(matchingWord)*(msize))); } } if(total==0) { //printf("no more words on size %d and greater to be found\n",i); break; } else { total=0; } } free(sub1); free(sub2); }