int main(int argc, char **argv) { struct url_table *url = NULL; struct md5_table *md5 = NULL; int i; if (argc != 2) { printf("Usage: ./url url_db\n"); return 1; } if ((url = load_urldb(argv[1])) == NULL) return 2; md5 = (struct md5_table*) malloc(sizeof(struct md5_table) + sizeof(MD5) * url->num); if (md5 == NULL) { printf("malloc md5 table failed\n"); return 3; } md5->num = url->num; for (i = 0; i < url->num; i ++) { unsigned char digest[16]; getHashKey((char *)(url->urls + i), digest); memcpy(md5->key + i, digest, sizeof(MD5)); } algorithm_init(md5); algorithm_test(md5); free(url); free(md5); return 0; }
void ReducedGrid<T>::compress() { Grid<T> grid(width, height); bool isData[width][height]; for(int i = 0; i < width; i++) { for(int j = 0; j < width; j++) { isData[i][j] = false; } } for(int i = 0; i < tempData.size(); i++) { DataStruct_t entry = tempData[i]; grid(entry.x, entry.y) = entry.data; isData[entry.x][entry.y] = true; } for(int i = 0; i < width; i++) { for(int j = 0; j < width; j++) { if(isData[i][j]) { dataset.append(grid(i,j)); int index = getHashKey(i,j); indiciesHash.insert(index, dataset.size()-1); } } } }
apiRetVal hashMapOpen::remove (hashNodeKey *key, void **data) { uint32_t hashKey; hashNode *node = NULL, *prevNode = NULL; hashKey = getHashKey(key); if (hashKey > tableLength) { return API_RETVAL_INVALID_INPUT; } node = hashTable[hashKey]; while (node) { if (keyCmp(key, &node->nodeKey)) { *data = node->data; if (!prevNode) { // This is the first node. hashTable[hashKey] = node->next; } else { prevNode->next = node->next; } free(node); return (API_RETVAL_SUCCESS); } prevNode = node; node = node->next; } return API_RETVAL_DATA_NOT_FOUND; }
T ReducedGrid<T>::get(int x, int y) { if(!locationExists(x,y)){ std::cerr << "Cannot return a location that doesn't exist, use locationExists() before calling get()!"; std::cerr << std::endl; abort(); } int hashKey = getHashKey(x,y); int index = indiciesHash[hashKey]; return dataset[index]; }
apiRetVal hashMapOpen::find (hashNodeKey *key, void **data) { uint32_t hashKey; hashNode *node = NULL; hashKey = getHashKey(key); if (hashKey > tableLength) { return API_RETVAL_INVALID_INPUT; } node = hashTable[hashKey]; while (node) { if (keyCmp(key, &node->nodeKey)) { *data = node->data; return (API_RETVAL_SUCCESS); } node = node->next; } return API_RETVAL_DATA_NOT_FOUND; }
apiRetVal hashMapOpen::add (hashNodeKey *key, void *data) { uint32_t hashKey; hashNode *node, *newNode = NULL; if (!data) { log->error("Null data while adding\n"); return (API_RETVAL_INVALID_INPUT); } hashKey = getHashKey(key); node = hashTable[hashKey]; newNode = (hashNode *)malloc(sizeof(hashNode)); newNode->data = data; newNode->nodeKey = *key; hashTable[hashKey] = newNode; newNode->next = node; newNode->nodeState = HASH_NODE_USED; numberOfElements++; log->info("Data added to hash table \n"); return API_RETVAL_SUCCESS; }
int addWord(hashTable *pHashTable, char *pWord) { uint key = getHashKey(pWord); wordNode *preWordNode,*curWordNode; while(preWordNode) { /* find the same word node*/ if( 0 == strcmp(pWord,preWordNode->word)) { preWordNode->count++; return key; } preWordNode = preWordNode->next; } /* don't find word in list , alloc new word node , and insert into head*/ curWordNode = newWordNode(pWord); preWordNode = pHashTable->pWordNodes[key]; pHashTable->pWordNodes[key] = curWordNode; curWordNode->next = preWordNode; return key; }
/* Resume the deduping process that happens during the initial scanning. * We also use it as a building block in the run-time map creation * process as well, just by initializing the variables accordingly. * P2D tuple consists of iodedupID to indicate which dedup block does * this pblk map into. * * @param[in] buf * @param[in] len * @param[in] ioblkID * @param[in] initflag * @param[in] lastblk_flag */ int resumeDeduping(unsigned char *buf, __u16 len, __u32 ioblkID, int initflag, int lastblk_flag, int rw_flag) { //uint32_t ptime; /* To be noted when event occurs, is this needed? TODO int ret; __u32 iodedupID; unsigned long long stime=0, etime=0; //savemem unsigned char dig[HASHLEN + MAGIC_SIZE]; unsigned char *dig = malloc(HASHLEN + MAGIC_SIZE); unsigned char *key = NULL; d2pv_datum *dedupd2pv = NULL, *d2pv = NULL; D2P_tuple_t *d2p = NULL; #ifdef STRICT_NO_HASH_COLLISION unsigned char *oldbuf = NULL; //savemem unsigned char debugkey[HASHLEN + MAGIC_SIZE]; unsigned char* debugkey = malloc(HASHLEN + MAGIC_SIZE); #endif #if defined(DEDUPING_DEBUG_SSS) fprintf(stdout, "In %s\n", __FUNCTION__); #endif /* Buf will always have "len" == BLKSIZE. No leftovers. */ #ifdef DEBUG_SS assert(len == BLKSIZE || lastblk_flag == ZEROBLK_FLAG); assert(initflag == INIT_STAGE || initflag == NOINIT_STAGE); #endif if (len != 0) /* Not a zero blk */ { assert(buf != NULL); memset(dig, 0, HASHLEN); stime = gettime(); /* START IODEDUP map-update-get-hash time */ if (getHashKey(buf, len, dig)) RET_ERR("getHashKey() returned error\n"); etime = gettime(); /* END IODEDUP map-update-get-hash */ ACCESSTIME_PRINT("iodedmap-map-update-component-get-hash time: %llu\n", etime - stime); key = (unsigned char*)dig; #if defined(SIMREPLAY_DEBUG_SS_DONE) printf("Content metadata update: buf=%s\n", (char*)buf); printf("Content metadata update: md5="); puts((char*)dig); #endif if (cmaphit_flag && !disksimflag) { assert(REALDISK_SCANNING_NOCOLLECTFORMAT_VMBUNCHREPLAY);//assert(0); //no need to update metadata if this is read request with //metadata hit, and disk is real, not simulated! return 0; } else if (cmaphit_flag) /* read with metadata hit but disk simulated */ { //ideally, i.e. if traces are perfect, then //no need to update metadata here, just return, but.... d2pv_datum *curr_d2pv = NULL; //but if the traces dont have consistent read/write requests, //in some cases of metadata-hit followed by cccahe-miss, the //following assert fails! work-around below... curr_d2pv = (d2pv_datum*) hashtab_search(deduptab.table, key); #ifndef INCONSISTENT_TRACES assert(curr_d2pv != NULL); #endif //work-around: //since we have already updated the content-cache with //the "offending content", we have to update the existing //metadata of this ioblk with new dhashkey, and also add //that new d2pv entry into deduptab (or update existing //dedupd2pv for the new dhashkey, if that is the case) d2pv_datum *cmaphit_d2pv = NULL; //found during metadata hit d2pv_datum *trace_d2pv = NULL; //due to inconsistent trace, //if disk is being simulated #ifdef INCONSISTENT_TRACES D2P_tuple_t *trace_d2p = NULL; #endif cmaphit_d2pv = getDedupMap(cmaphit_iodedupID); assert(cmaphit_d2pv != NULL); trace_d2pv = (d2pv_datum*) hashtab_search(deduptab.table, key); if (cmaphit_d2pv == trace_d2pv) curr_d2pv = cmaphit_d2pv; //true if trace is consistent #ifndef INCONSISTENT_TRACES else RET_ERR("inconsistence why?\n"); #else else //begin: fix for inconsistent trace { //trace is inconsistent, so we have to do some //impromptu metadata updates:- if (trace_d2pv == NULL) { __u32 iodedupID; trace_d2pv = (d2pv_datum*) calloc(1, sizeof(d2pv_datum)); INIT_LIST_HEAD(&trace_d2pv->d2pmaps); iodedupID = getNextDedupNum(initflag); trace_d2p = calloc (1, sizeof(D2P_tuple_t)); /*************************************************************** if (ccache_already_had_flag) { note_dedup_attrs(trace_d2pv, key, iodedupID, ccache_already_had_obj_ioblkID); note_d2p_tuple(trace_d2p, ccache_already_had_obj_ioblkID); } else { note_dedup_attrs(trace_d2pv, key, iodedupID, ioblkID); note_d2p_tuple(trace_d2p, ioblkID); } ***************************************************************/ /* We are here only upon a ccache miss after metadata hit, * so ccache_already_had_flag == 0 mandatory! */ //assert(ccache_already_had_flag == 0);//not true for wif note_dedup_attrs(trace_d2pv, key, iodedupID, ioblkID); note_d2p_tuple(trace_d2p, ioblkID); add_d2p_tuple_to_map(trace_d2p, trace_d2pv); ret = hashtab_insert(deduptab.table, trace_d2pv->dhashkey, trace_d2pv); setDedupMap(trace_d2pv->iodedupID, trace_d2pv); ret = updateBlockio(ioblkID, lastblk_flag, iodedupID); if (ret) RET_ERR("updateBlockio() error'ed\n"); } else { __u32 iodedupID; iodedupID = trace_d2pv->iodedupID; if (NULL == get_nondeduped_d2p(trace_d2pv, ioblkID)) { trace_d2p = calloc (1, sizeof(D2P_tuple_t)); //assert(ccache_already_had_flag == 0); //not true note_d2p_tuple(trace_d2p, ioblkID); add_d2p_tuple_to_map(trace_d2p, trace_d2pv); } ret = updateBlockio(ioblkID, lastblk_flag, iodedupID); if (ret) RET_ERR("updateBlockio() error'ed\n"); } //if current ioblk is the only one in old sector-list, //then delete cmaphit_d2pv from hash-table, else let //it stay there. if (slist_len(&cmaphit_d2pv->d2pmaps) > 1) { del_d2p_from_d2pmaps(cmaphit_d2pv, ioblkID); } else { del_d2p_from_d2pmaps(cmaphit_d2pv, ioblkID); hashtab_remove(deduptab.table, cmaphit_d2pv->dhashkey); setDedupMap(cmaphit_iodedupID, NULL); //d2pv freed } //mark trace_d2pv as curr_d2pv for self-hits/misses below! curr_d2pv = trace_d2pv; }//end: fix for inconsistent trace #endif //update the ioblkID for counts of self-hits/misses //copied from iodeduping.c, if change in 1 place, change both if (!ccache_already_had_flag) //set in __arc_add curr_d2pv->ioblkID = ioblkID; //ensure counts line up -- self else curr_d2pv->ioblkID = ccache_already_had_obj_ioblkID; return 0; }
bool ReducedGrid<T>::locationExists(int x, int y) { int hashKey = getHashKey(x,y); return indiciesHash.contains(hashKey); }
int deleteHash(hashTable *h, int key){ int index = getHashKey(key); *(h->array + index) = 0; return TRUE; }
// 如果不存在该key 则返回0 int findHash(hashTable *h, int key){ int index = getHashKey(key); return *(h->array + index); }
int addHash(hashTable *h, int key, int value){ int index = getHashKey(key); *(h->array + index) = value; return value; }
void Hash2D::hashPoint( Point2D& a) { pointMap[getHashKey(a)] = &a; }
int simdisk_trap(__u16 volID, __u32 blockID, unsigned char *simcontent, unsigned char *md5string, int rw, __u32 len, int consistencycheck) { int ret; char skey[256]; blkid_datum *item=NULL, *dedupitem=NULL; __u32 simdisk_blkid_dedup; if (consistencycheck == 1) assert(rw == 1); if (rw) //read assert(simcontent != NULL && md5string == NULL); else //write assert(simcontent == NULL && md5string != NULL); /* construct key into blkid hash-table */ construct_key_volid_blkid(volID, blockID, skey); #if defined(SIMREPLAY_DEBUG_SS_DONE) if (blockID == 10 || blockID == 33414267 || blockID == 34600770 || blockID == 10100928) { //savemem unsigned char debugkey[HASHLEN+1]; unsigned char *debugkey = malloc(HASHLEN+1); if (rw && consistencycheck) { getHashKey(simcontent, BLKSIZE, debugkey); debugkey[HASHLEN]='\0'; printf("In %s, searching (rw=%u) skey = %s, debugkey=%s\n", __FUNCTION__, rw, skey, debugkey); } else if (!rw) { getHashKey(md5string, BLKSIZE, debugkey); debugkey[HASHLEN]='\0'; printf("In %s, searching (rw=%u) skey = %s, debugkey=%s\n", __FUNCTION__, rw, skey, debugkey); } free(debugkey); //savemem } #endif /* check whether block already encountered */ dedupitem = (blkid_datum*)hashtab_search(blkidtab.table, (unsigned char*)skey); #if defined(SIMREPLAY_DEBUG_SS_DONE) if (blockID == 10 || blockID == 33414267 || blockID == 34600770 || blockID == 10100928) printf("In %s, hashtab_search done\n", __FUNCTION__); #endif if (dedupitem) /* seen block */ { simdisk_blkid_dedup = *((__u32*)dedupitem->data); /* If consistencycheck requested, and block ID found in hashtab, * read-up the block from simdisk and memcmp with incoming data! */ if (consistencycheck) { #if defined(SIMREPLAY_DEBUG_SS_DONE) if (blockID == 10 || blockID == 33414267 || blockID == 34600770 || blockID == 10100928) printf("consistency check for duplicate: %u ", blockID); #endif //savemem unsigned char tempbuf[BLKSIZE]; unsigned char *tempbuf = malloc(BLKSIZE); if (len == BLKSIZE) { #if defined(SIMREPLAY_DEBUG_SS_DONE) if (blockID == 10 || blockID == 33414267 || blockID == 34600770 || blockID == 10100928) printf(", simdisk_blkid_dedup=%u ", simdisk_blkid_dedup); #endif if (read_simdisk(simdisk_blkid_dedup, tempbuf)) RET_ERR("read_simdisk failed\n"); #if defined(SIMREPLAY_DEBUG_SS_DONE) if (blockID == 10 || blockID == 33414267 || blockID == 34600770 || blockID == 10100928) { //savemem unsigned char debugkey[HASHLEN+1]; unsigned char *debugkey = malloc(HASHLEN+1); getHashKey(tempbuf, BLKSIZE, debugkey); debugkey[HASHLEN]='\0'; printf(", retrieveddebugkey=%s ", debugkey); free(debugkey); //savemem } #endif } else if (len == MD5HASHLEN_STR-1) { assert(!(DISKSIM_RUNTIMEMAP_NOCOLLECTFORMAT_PIOEVENTSREPLAY) && !(DISKSIM_VANILLA_NOCOLLECTFORMAT_PIOEVENTSREPLAY)); get_simcontent(simdisk_blkid_dedup, tempbuf, 0);//no gen!! } else assert(0); //not expected for now if (memcmp(simcontent, tempbuf, len) == 0) { #if defined(SIMREPLAY_DEBUG_SS_DONE) if (blockID == 10 || blockID == 33414267 || blockID == 34600770 || blockID == 10100928) printf("succeeded\n"); #endif free(tempbuf); //savemem return 1; //success in consistencycheck } else { #if defined(SIMREPLAY_DEBUG_SS) // if (blockID == 10 || blockID == 33414267 || // blockID == 34600770 || blockID == 10100928) //printf("ccf "); //printf("c"); #endif /* Additionally, copy consistent data into simcontent! */ memcpy(simcontent, tempbuf, len); free(tempbuf); //savemem return 0; //failure in consistencycheck, but fixed } } /* If we are here, consistencycheck was not requested and block ID * was found in hash-tab. */ if (rw) { /* in case of read request, read block from simulated disk */ if (len == BLKSIZE) { if (blockID == 10 || blockID == 33414267 || blockID == 34600770 || blockID == 10100928) printf("%s: simdisk_blkid_dedup=%u ", __FUNCTION__, simdisk_blkid_dedup); if (read_simdisk(simdisk_blkid_dedup, simcontent)) RET_ERR("read_simdisk failed here\n"); } else if (len == MD5HASHLEN_STR-1) { assert(!(DISKSIM_RUNTIMEMAP_NOCOLLECTFORMAT_PIOEVENTSREPLAY) && !(DISKSIM_VANILLA_NOCOLLECTFORMAT_PIOEVENTSREPLAY)); get_simcontent(simdisk_blkid_dedup, simcontent, 0); } else assert(0); //not expected for now #if defined(SIMREPLAY_DEBUG_SS) if (blockID == 10 || blockID == 33414267 || blockID == 34600770 || blockID == 10100928) { //savemem unsigned char debugkey[HASHLEN+1]; unsigned char *debugkey = malloc(HASHLEN + 1); getHashKey(simcontent, BLKSIZE, debugkey); debugkey[HASHLEN]='\0'; printf("%s: read simcontent (rw=%u) skey = %s, debugkey=%s\n", __FUNCTION__, rw, skey, debugkey); free(debugkey); //savemem } #endif } else { /* in case of write request, write block to simulated disk */ if (len == BLKSIZE) { if (blockID == 10 || blockID == 33414267 || blockID == 34600770 || blockID == 10100928) printf("write simdisk_blkid_dedup=%u\n", simdisk_blkid_dedup); if (write_simdisk(simdisk_blkid_dedup, md5string)) { RET_ERR("write_simdisk failed for existing block\n"); } } else if (len == MD5HASHLEN_STR-1) { assert(!(DISKSIM_RUNTIMEMAP_NOCOLLECTFORMAT_PIOEVENTSREPLAY) && !(DISKSIM_VANILLA_NOCOLLECTFORMAT_PIOEVENTSREPLAY)); if (append_simcontent(simdisk_blkid_dedup, md5string)) { RET_ERR("append_simcontent failed for existing block\n"); } } else assert(0); } } else /* new block */ { unsigned char *bufptr = NULL; /* If we are here, consistencycheck was not requested, and * block ID not found in hash-tab for read request */ if (rw && !consistencycheck) { RET_ERR("read block (%u) would have been created already\n", blockID); } /* If we are here, (!rw || consistencycheck) is true. * If consistencycheck requested, and block ID not found in hashtab, * no issue of consistency! Create it now. */ if (rw) bufptr = simcontent; else bufptr = md5string; #if defined(SIMREPLAY_DEBUG_SS) if (consistencycheck && (blockID == 10 || blockID == 33414267 || blockID == 34600770 || blockID == 10100928)) printf("requested consistency check 1st time: %u\n", blockID); #endif if (len == BLKSIZE) { if (blockID == 10 || blockID == 33414267 || blockID == 34600770 || blockID == 10100928) printf("overwrite (read/write) simdisk_blkid=%u\n", simdisk_blkid); if (write_simdisk(simdisk_blkid, bufptr)) { RET_ERR("write_simdisk failed for new block\n"); } } else if (len == MD5HASHLEN_STR-1) { assert(!(DISKSIM_RUNTIMEMAP_NOCOLLECTFORMAT_PIOEVENTSREPLAY) && !(DISKSIM_VANILLA_NOCOLLECTFORMAT_PIOEVENTSREPLAY)); if (append_simcontent(simdisk_blkid, bufptr)) { RET_ERR("append_simcontent failed for new block\n"); } } else assert(0); #if defined(SIMREPLAY_DEBUG_SS) if (!rw && (blockID == 10 || blockID == 33414267 || blockID == 34600770 || blockID == 10100928)) printf("write request for 1st time: %u\n", blockID); #endif /* also add to blkid hash-table */ item = (blkid_datum*) calloc(1, sizeof(blkid_datum)); item->data = (__u32*)malloc(sizeof(__u32)); //free in blkidrem() *(__u32*)(item->data) = simdisk_blkid; item->blkidkey = strdup(skey); //free in blkidrem() #if defined(SIMREPLAY_DEBUG_SS_DONE) if (blockID == 10 || blockID == 33414267 || blockID == 34600770 || blockID == 10100928) printf("In %s, inserting skey = %s, item->blkidkey=%s\n", __FUNCTION__, skey, (unsigned char*)item->blkidkey); #endif ret = hashtab_insert(blkidtab.table, (unsigned char*)item->blkidkey, item); if (ret == -EEXIST) { RET_ERR("block already exists in blkidtab\n"); } else if (ret == -ENOMEM) { RET_ERR("out of memory for blkidtab\n"); } /* increment for next block in file */ simdisk_blkid++; if (consistencycheck) return 1; } return 0; }