int urlCheck(char *url,char *host){ UPU *tmp; if(memmem(url,strlen(url),host,strlen(host))==NULL) return 0; if(memcmp(url,"mail",4)==0) return 0; if(find(failedPool,url,strlen(url))!=NULL) return 0; if(find(URLPool,url,strlen(url))!=NULL) return 0; if(find(finishPool,url,strlen(url))!=NULL) return 0; if(CURL_typeTest(url)){ return 1; }else{ tmp=failedPool[hashfn(url)]; if(tmp!=NULL){ while(tmp->next!=NULL){ tmp=tmp->next; } myAdd(&(tmp->next),url,strlen(url)); }else{ myAdd(&(failedPool[hashfn(url)]),url,strlen(url)); } return 0; } }
float facehash::search(vec ¢er, geoiterateface &itt) { FP best = itt.initaldist(); FP curr; qcell *next,*consider; pqueue<qcellptr> q; longset ls; q.add(next = tocell(center,ls)); FP currdist = next->celldist; if (level==0) numq++; int p,i; // first take care of the "patches" in toadd int pos = hashfn(next->i,false); if (toadd.length()>0 && (pos == -1 || storage[pos].contents==NULL || storage[pos].contents->length()<toadd.length())) { addall(); } else { for(i=0;i<toadd.length();i++) { nums++; curr = itt.process(toadd[i]); if (curr<best) best = curr; } } while(q.size()>0) { next = q.head().p; q.remove(0); currdist = next->celldist; if (currdist>best) { delete next; break; } p = hashfn(next->i,false); if (p!=-1) { if (storage[p].contents) { for(i=0;i<storage[p].contents->length();i++) { nums++; curr = itt.process((*(storage[p].contents))[i]); if (curr<best) best = curr; if (curr<currdist) break; } } else { curr = storage[p].subhash->search(center,itt); if (curr<best) best = curr; } if (best<currdist) break; } for(i=0;i<DIM;i++) { if ((consider=tocell(center,next->i,i,-1,ls))!=NULL) { q.add(consider); } if ((consider=tocell(center,next->i,i,1,ls))!=NULL) { q.add(consider); } } delete next; } for(i=0;i<q.size();i++) delete q.nth(i).p; return best; }
unsigned long spn_hash_value(const SpnValue *key) { switch (valtype(key)) { case SPN_TTAG_NIL: { return 0; } case SPN_TTAG_BOOL: { return boolvalue(key); /* 0 or 1 */ } case SPN_TTAG_NUMBER: { if (isfloat(key)) { double f = floatvalue(key); /* only test for integer if it fits into one (anti-UB) */ if (LONG_MIN <= f && f <= LONG_MAX) { long i = f; /* truncate */ if (f == i) { /* it's really an integer. * This takes care of the +/- 0 problem too * (since 0 itself is an integer) */ return i; } } else { return spn_hash_bytes(&f, sizeof f); } } /* the hash value of an integer is itself */ return intvalue(key); } case SPN_TTAG_STRING: case SPN_TTAG_ARRAY: case SPN_TTAG_HASHMAP: case SPN_TTAG_FUNC: { SpnObject *obj = objvalue(key); unsigned long (*hashfn)(void *) = obj->isa->hashfn; return hashfn ? hashfn(obj) : (unsigned long)(obj); } case SPN_TTAG_USERINFO: { if (isobject(key)) { SpnObject *obj = objvalue(key); unsigned long (*hashfn)(void *) = obj->isa->hashfn; return hashfn ? hashfn(obj) : (unsigned long)(obj); } return (unsigned long)(ptrvalue(key)); } default: SHANT_BE_REACHED(); } return 0; }
//---------------------------------------------------- // Method : // Description : // Author : Staffan Flodin //---------------------------------------------------- object table::put(object *o,char *prop){ int bucket; linked_list *iter; ODB_INT hashval; property_value *pv; pv=(*o).getpropertynamed(prop); switch((*pv).gettype()){ case _INT_: hashval=(*pv).getintval(); bucket=(*this).hashfn(hashval); break; case _REAL_: bucket=(*this).hashfn((*pv).getrealval()); break; case _CHAR_: bucket=hashfn((*pv).getcharstringval()); break; case _OBJECT_: bucket=(*this).hashfn((*pv).getelem()); break; case _COLLECTION_:bucket=(*this).hashfn((*pv).getsetval()); break; };//end switch iter=tbl+bucket; //pointer arithmetic (*iter).insert(o); return *o; }
void facehash::addcell(short i[DIM], face *t) { unsigned int p = hashfn(i,true); if (storage[p].contents) { if (level<maxlevel && storage[p].contents->length()==maxcontents) { int d; vec mn = mincorner; for(d=0;d<DIM;d++) mn += div[d]*i[d]; vec mx = mn+div; storage[p].subhash = new facehash(mn,mx,mx,level+1); for(d=0;d<storage[p].contents->length();d++) storage[p].subhash-> add(storage[p].contents->nth(d)); storage[p].subhash->add(t); delete storage[p].contents; storage[p].contents = NULL; } else *(storage[p].contents) += t; } else if (storage[p].subhash) { storage[p].subhash->add(t); } else { storage[p].contents = new ilist<face *>; *(storage[p].contents) += t; } }
void facehash::removecell(short i[DIM],face *t) { unsigned int p = hashfn(i,false); if (p!=-1) { if (storage[p].contents) storage[p].contents->remove(t); else storage[p].subhash->remove(t); } }
const void * HashSet::add( const void * item ) { unsigned int hash = hashfn( item ); unsigned index = hash % bucketCount; HashSetEntry *entry; /* search the item in the set*/ for ( entry = buckets[index]; entry; entry = entry->next ) if ( entry->hash == hash && comparefn ( entry->item, item ) == 0 ) return item; /* if count exceeds threshold, rehash all the items */ if ( count >= threshold ) { this->rehash(); index = hash % this->bucketCount; } /* save the new item */ entry = ( HashSetEntry * )malloc( sizeof( HashSetEntry ) ); entry->hash = hash; entry->item = (void *) item; entry->next = buckets[index]; this->buckets[index] = entry; ++this->count; return NULL; }
//---------------------------------------------------- // Method : // Description : // Author : Staffan Flodin //---------------------------------------------------- int table::hashfn(ODB_SET sv){ int sum=0; collection *curr; if (sv==NULL) return 0; curr=sv; while (curr!=NULL) { switch((*curr).gettype()){ case _INT_:sum=sum+(*curr).getintval(); break; case _REAL_:sum=sum+(*curr).getrealval(); break; case _CHAR_:sum=sum+(int)(*curr).getcharstringval(); break; case _OBJECT_:sum=sum+(int)(*curr).getobj(); break; case _COLLECTION_:sum=sum+hashfn((*curr).getsetval()); } curr=(*curr).getnext(); }; return (sum % tablesize); }
void zarad( const char *n ) { int kam; kam=hashfn( n ); while( table[kam]!=0 ) kam=(kam+1)%MAXPEO; table[kam]=strdup( n ); }
int vezmi( const char *n ) { int kam; kam=hashfn( n ); while( table[kam]!=NULL && strcmp(table[kam],n)!=0 ) kam=(kam+1)%MAXPEO; if( table[kam]==NULL ) return -1; return kam; }
UPU *find(UPU **table,char *url,int size){ int site=hashfn(url); UPU *tmp=table[site]; while(tmp!=NULL){ if(memcmp(tmp->URL,url,size)==0) return tmp; tmp=tmp->next; } return NULL; }
static void of_hash(struct ofork *of) { struct ofork **table; table = &ofork_table[hashfn(&of->key)]; if ((of->next = *table) != NULL) (*table)->prevp = &of->next; *table = of; of->prevp = table; }
unsigned long spn_hash_value(const SpnValue *key) { switch (valtype(key)) { case SPN_TTAG_NIL: { return 0; } case SPN_TTAG_BOOL: { return boolvalue(key); /* 0 or 1 */ } case SPN_TTAG_NUMBER: { if (isfloat(key)) { double f = floatvalue(key); long i = f; /* truncate */ if (f == i) { return i; /* it's really an integer */ } else { return spn_hash_bytes(&f, sizeof f); } } /* the hash value of an integer is itself */ return intvalue(key); } case SPN_TTAG_STRING: case SPN_TTAG_ARRAY: case SPN_TTAG_HASHMAP: case SPN_TTAG_FUNC: { SpnObject *obj = objvalue(key); unsigned long (*hashfn)(void *) = obj->isa->hashfn; return hashfn ? hashfn(obj) : (unsigned long)(obj); } case SPN_TTAG_USERINFO: { if (isobject(key)) { SpnObject *obj = objvalue(key); unsigned long (*hashfn)(void *) = obj->isa->hashfn; return hashfn ? hashfn(obj) : (unsigned long)(obj); } return (unsigned long)(ptrvalue(key)); } default: SHANT_BE_REACHED(); } return 0; }
static void __go_map_rehash (struct __go_map *map) { const struct __go_map_descriptor *descriptor; const struct __go_type_descriptor *key_descriptor; uintptr_t key_offset; size_t key_size; size_t (*hashfn) (const void *, size_t); uintptr_t old_bucket_count; void **old_buckets; uintptr_t new_bucket_count; void **new_buckets; uintptr_t i; descriptor = map->__descriptor; key_descriptor = descriptor->__map_descriptor->__key_type; key_offset = descriptor->__key_offset; key_size = key_descriptor->__size; hashfn = key_descriptor->__hashfn; old_bucket_count = map->__bucket_count; old_buckets = map->__buckets; new_bucket_count = __go_map_next_prime (old_bucket_count * 2); new_buckets = (void **) __go_alloc (new_bucket_count * sizeof (void *)); __builtin_memset (new_buckets, 0, new_bucket_count * sizeof (void *)); for (i = 0; i < old_bucket_count; ++i) { char* entry; char* next; for (entry = old_buckets[i]; entry != NULL; entry = next) { size_t key_hash; size_t new_bucket_index; /* We could speed up rehashing at the cost of memory space by caching the hash code. */ key_hash = hashfn (entry + key_offset, key_size); new_bucket_index = key_hash % new_bucket_count; next = *(char **) entry; *(char **) entry = new_buckets[new_bucket_index]; new_buckets[new_bucket_index] = entry; } } __go_free (old_buckets); map->__bucket_count = new_bucket_count; map->__buckets = new_buckets; }
const void * HashSet::get( void * item ) { unsigned int hash = hashfn( item ); unsigned int index = hash % bucketCount; HashSetEntry * entry; for ( entry = buckets[index]; entry; entry = entry->next ) { if ( entry->hash == hash && comparefn( entry->item, item ) == 0) return ( entry->item ); } return (NULL); }
const void * HashSet::remove( const void * item ) { unsigned int hash = hashfn( item ); unsigned int indx = hash % bucketCount; HashSetEntry *entry, *prev; for ( entry = this->buckets[indx], prev = NULL; entry; prev = entry, entry = entry->next ) { if ( entry->hash == hash && comparefn( entry->item, item )==0 ) { item = entry->item; if ( prev ) prev->next = entry->next; else this->buckets[indx] = entry->next; free( entry ); --this->count; return ( item ); } } return (NULL); }
void getURL(char *body,int len,char *srcURL,char *prefix,char *host,char *path,char *filename){ char *tag=" href="; char *buf; char *url_l,*url_r; int url_len; char url[4096],*realURL; char *delimiter="\""; char *delimiter2="\'"; int dlen=strlen(delimiter); int pos; UPU *tmp; //preprocess filter(body,len); buf=memmem(body,len,tag,5); memset(url,0,4096); fprintf(logfp,"do get URL from %s\n",srcURL); while(1){ if(buf!=NULL){ // left of delimiter if((url_l=memmem(buf,len-(buf-body),delimiter,dlen))!=NULL){ url_l+=dlen; // right of delimiter url_r=memmem(url_l+1,len-(url_l-body)-1,delimiter,dlen); // url length }else if((url_l=memmem(buf,len-(buf-body),delimiter2,dlen))!=NULL){ url_l+=dlen; // right of delimiter url_r=memmem(url_l+1,len-(url_l-body)-1,delimiter2,dlen); }else{ url_l=buf+6; url_r=memmem(url_l,len-(url_l-body),">",1); } // url length url_len=url_r-url_l; if(url_len>0){ memcpy(url,url_l,url_len); url[url_len]='\0'; realURL=getRealURL(srcURL,url,prefix,host,path,filename); memset(url,0,url_len); url_len=strlen(realURL); if(urlCheck(realURL,host)){ if(memmem(realURL,url_len,srcURL,strlen(srcURL)-strlen(filename))!=NULL){ pos=hashfn(realURL); if((tmp=URLPool[pos])==NULL){ fprintf(logfp,"[add] %s\n",realURL); myAdd(&(URLPool[pos]),realURL,url_len); }else{ while(tmp->next!=NULL) tmp=tmp->next; fprintf(logfp,"[add] %s\n",realURL); myAdd(&(tmp->next),realURL,url_len); } } } memset(realURL,0,url_len); free(realURL); } } else break; buf=memmem(url_r,len-(url_r-body),tag,5); } }
int main(int argc,char *argv[]){ if(argc<2) usage(0); char *sourceURL=argv[1]; clock_t begin, end; int i=0; char *handleURL; int pos; UPU *tmp; URL_FILE *buf; char prefix[10],host[1024],path[4096],filename[4096]; for(i=0;i<HashSize;i++){ URLPool[i]=NULL; finishPool[i]=NULL; failedPool[i]=NULL; } memset(prefix,0,10); memset(host,0,1024); memset(path,0,4096); memset(filename,0,4096); sepURL(sourceURL,prefix,host,path,filename); //check/create Folder folderInit(host); readRec(sourceURL); //init if(!logInit(host)){ printf("log file error\n"); exit(1); } pos=hashfn(sourceURL); myAdd(&URLPool[pos],sourceURL,strlen(sourceURL)); //start loop begin=clock(); i=0; while(getPoolSize(URLPool)!=0){ //random get one URL Structure Pointer from URLPool handleURL=randGet(URLPool); pos=hashfn(handleURL); printf("run [%d] %s\n",pos,handleURL); memset(prefix,0,10); memset(host,0,1024); memset(path,0,4096); memset(filename,0,4096); sepURL(handleURL,prefix,host,path,filename); //put the url in finishPool tmp=finishPool[pos]; if(tmp==NULL){ myAdd(&finishPool[pos],handleURL,strlen(handleURL)); }else{ while(tmp->next!=NULL){ tmp=tmp->next; } myAdd(&(tmp->next),handleURL,strlen(handleURL)); } //start run the CURL if((buf=runCURL(handleURL))!=NULL){ //write to File myWrite(host,filename,buf->buffer,buf->buffer_len,i++); getURL(buf->buffer,buf->buffer_len,handleURL,prefix,host,path,filename); printf("%d\n",getPoolSize(URLPool)); memset(buf->buffer,0,buf->buffer_len); free(buf->buffer); fflush(logfp); } else{ printf("no content\n"); } free(buf); } logClose(); end=clock(); printf("finally finish: %d 's URL, cost %lf sec\n",getPoolSize(finishPool),(double)( end - begin ) / CLOCKS_PER_SEC); myDump(host); return 0; }
std::size_t hash () const { std::hash<std::string> hashfn; return hashfn (str); }