void dirent_file_repair(int dir_fd, mdirents_cache_entry_t *root_entry_p, int bucket_idx, dirent_file_repair_cause_e cause) { mdirents_cache_entry_t *cache_entry_p; mdirents_cache_entry_t *cache_entry_p_next; mdirents_hash_entry_t *hash_entry_p = NULL; //int hash_entry_idx = 0; int cache_entry_tb_idx = 0; int coll_idx; int loop_cnt = 0; int next_coll_idx = 0; int first_index; int bit_idx; int chunk_u8_idx; uint8_t *coll_bitmap_p; mdirents_hash_ptr_t *hash_bucket_p; int i; mdirent_sector0_not_aligned_t *sect0_p; dirent_repair_file_stats[cause]++; memset(dirent_cache_repair_cache_entry_list_tab, 0, sizeof(dirent_cache_repair_cache_entry_list_t) * MDIRENTS_MAX_COLLS_IDX+1); info("dirent_file_repair bucket %d cause %s",bucket_idx,dirent_file_repair_cause_e2String(cause)); /* ** set the different parameters */ cache_entry_tb_idx = 0; coll_idx = 0; /* ** get the pointer to the collision file bitmap */ sect0_p = DIRENT_VIRT_TO_PHY_OFF(root_entry_p,sect0_p); coll_bitmap_p = (uint8_t*) §0_p->coll_bitmap; /* **_____________________________________________________ ** First of all do the recovery for the root entry **_____________________________________________________ */ dirent_cache_repair_cache_entry_for_bucket_idx(dir_fd, root_entry_p, root_entry_p, bucket_idx, &hash_entry_p, &first_index); /* ** insert the result in the rescue table: we always inserts the root even if there is ** no hash entry belonging to that least. This is needed it address the case where ** the pointer at the bucket level is the reference of the first collision file ** on which we have hash entries belonging to that list */ dirent_cache_repair_cache_entry_list_tab[cache_entry_tb_idx].coll_idx = -1; // indicate root dirent_cache_repair_cache_entry_list_tab[cache_entry_tb_idx].first_index = first_index; dirent_cache_repair_cache_entry_list_tab[cache_entry_tb_idx].cache_entry_p = root_entry_p; dirent_cache_repair_cache_entry_list_tab[cache_entry_tb_idx].hash_entry_last_p = hash_entry_p; cache_entry_tb_idx++; /* **_______________________________________________________________________ ** case of the collision file, so need to go through the bitmap of the ** dirent root file **_______________________________________________________________________ */ cache_entry_p = NULL; while (coll_idx < MDIRENTS_MAX_COLLS_IDX) { chunk_u8_idx = coll_idx / 8; bit_idx = coll_idx % 8; /* ** there is no collision dirent entry or the collision dirent entry exist and is not full */ if ((coll_bitmap_p[chunk_u8_idx] & (1 << bit_idx)) != 0) { /* ** That entry is free, need to find out the next entry that is busy (0: busy, 1:free) */ if (coll_idx % 8 == 0) { next_coll_idx = check_bytes_val(coll_bitmap_p, coll_idx, MDIRENTS_MAX_COLLS_IDX, &loop_cnt, 1); if (next_coll_idx < 0) break; /* ** next chunk */ if (next_coll_idx == coll_idx) coll_idx++; else coll_idx = next_coll_idx; continue; } /* ** next chunk */ coll_idx++; continue; } /* ** one collision idx has been found ** need to get the entry associated with the collision index */ cache_entry_p = dirent_cache_get_collision_ptr(root_entry_p, coll_idx); if (cache_entry_p == NULL ) { /* ** OK, do not break the analysis, skip that collision entry and try the next if any */ coll_idx++; continue; } /* **_______________________________________________________________________ ** OK, let's try to repair that current dirent cache entry for that bucket idx **_______________________________________________________________________ */ dirent_cache_repair_cache_entry_for_bucket_idx(dir_fd, root_entry_p, cache_entry_p, bucket_idx, &hash_entry_p, &first_index); /* ** insert the result in the rescue table if there is at least one valid entry for that bucket idx */ if (hash_entry_p != NULL ) { dirent_cache_repair_cache_entry_list_tab[cache_entry_tb_idx].coll_idx = coll_idx; dirent_cache_repair_cache_entry_list_tab[cache_entry_tb_idx].first_index = first_index; dirent_cache_repair_cache_entry_list_tab[cache_entry_tb_idx].cache_entry_p = cache_entry_p; dirent_cache_repair_cache_entry_list_tab[cache_entry_tb_idx].hash_entry_last_p = hash_entry_p; cache_entry_tb_idx++; } coll_idx++; } /* **________________________________________________________________________________________________ ** OK now go through that table to find out the order of the cache entries to update each of the ** local end of hash entries **________________________________________________________________________________________________ */ for (i = 0; i < cache_entry_tb_idx; i++) { cache_entry_p = dirent_cache_repair_cache_entry_list_tab[i].cache_entry_p; hash_entry_p = dirent_cache_repair_cache_entry_list_tab[i].hash_entry_last_p; if (hash_entry_p == NULL ) { if (i == 0) { /* ** case of the root cache entry :get the pointer to hash bucket table */ hash_bucket_p = DIRENT_CACHE_GET_BUCKET_PTR(cache_entry_p,bucket_idx) ; if (hash_bucket_p == NULL ) { /* ** That case Must not occur since we have elready scanned the entry ** One solution, if it happens is to skip that cache entry or to create one memory ** array for the range of bucket_idx. */ DIRENT_SEVERE("dirent_file_repair: hash_bucket_p is null for bucket_idx %d in root dirent file ",bucket_idx); hash_bucket_p = DIRENT_CACHE_ALLOCATE_BUCKET_ARRAY(cache_entry_p,bucket_idx); if (hash_bucket_p == NULL) { fatal("dirent_file_repair: system error, out of memory!!"); } } /* ** check the end of list case */ cache_entry_p_next = dirent_cache_repair_cache_entry_list_tab[i+ 1].cache_entry_p; if (cache_entry_p_next == NULL ) { /* ** end of list case */ break; } hash_bucket_p->type = MDIRENTS_HASH_PTR_COLL; hash_bucket_p->idx = dirent_cache_repair_cache_entry_list_tab[i+ 1].coll_idx; dirent_cache_repair_printf_cache_entry_for_bucket_idx(cache_entry_p, bucket_idx, -1); /* ** ____________________________________________________________ ** re-write on disk the corresponding image of the dirent file ** ____________________________________________________________ */ write_mdirents_file(dir_fd, cache_entry_p); continue; } DIRENT_SEVERE("dirent_file_repair: hash_bucket_p is null for bucket_idx %d (line %d)\n",bucket_idx,__LINE__); severe("memory corruption!!"); } /* ** Check if there is a cache entry in the next entry of the rescue table: end of list check */ cache_entry_p_next = dirent_cache_repair_cache_entry_list_tab[i + 1].cache_entry_p; if (cache_entry_p_next == NULL ) { /* ** end of list case */ break; } /* ** there is an entry so update our current last hash_entry and eventually our bucket entry ** index according to the value of first_index */ hash_entry_p->next.type = MDIRENTS_HASH_PTR_COLL; hash_entry_p->next.idx = dirent_cache_repair_cache_entry_list_tab[i + 1].coll_idx; dirent_cache_repair_printf_cache_entry_for_bucket_idx(cache_entry_p, bucket_idx, dirent_cache_repair_cache_entry_list_tab[i].coll_idx); /* ** ____________________________________________________________ ** re-write on disk the corresponding image of the dirent file ** ____________________________________________________________ */ write_mdirents_file(dir_fd, cache_entry_p); } /* ** ____________________________________________________________ ** All the last hash entries of the dirent files have been updated ** expected the last one so now, update the last one ** ____________________________________________________________ */ if (cache_entry_p == NULL ) { /* ** We must have at least the root !! */ DIRENT_SEVERE("dirent_file_repair: empty list for bucket %d (line %d)\n",bucket_idx,__LINE__); severe("memory corruption!!"); } if (hash_entry_p == NULL ) { /* ** this is possible for the case of the root only */ if (cache_entry_p != root_entry_p) { DIRENT_SEVERE("dirent_file_repair: collision entry empty for bucket %d (line %d)\n",bucket_idx,__LINE__); severe("memory corruption!!"); } hash_bucket_p = DIRENT_CACHE_GET_BUCKET_PTR(cache_entry_p,bucket_idx) ; if (hash_bucket_p == NULL ) { /* ** That case Must not occur since we have elready scanned the entry ** One solution, if it happens is to skip that cache entry or to create one memory ** array for the range of bucket_idx. */ DIRENT_SEVERE("dirent_file_repair: hash_bucket_p is null for bucket_idx %d (line %d)\n",bucket_idx,__LINE__); severe("memory corruption!!"); } hash_bucket_p->type = MDIRENTS_HASH_PTR_EOF; hash_bucket_p->idx = 0; /* ** _______________________________________________________________________________ ** re-write dirent root file on disk the corresponding image of the dirent file ** _______________________________________________________________________________ */ write_mdirents_file(dir_fd, cache_entry_p); } else { /* ** re-write last file on disk */ hash_entry_p->next.type = MDIRENTS_HASH_PTR_EOF; hash_entry_p->next.idx = 0; write_mdirents_file(dir_fd, cache_entry_p); } /* ** _______________________________________________________________________________ ** re-write last dirent colision file on disk the corresponding image of the dirent file ** _______________________________________________________________________________ */ write_mdirents_file(dir_fd, root_entry_p); dirent_cache_repair_printf_cache_entry_for_bucket_idx(cache_entry_p, bucket_idx, dirent_cache_repair_cache_entry_list_tab[i].coll_idx); }
exp_invalidate_type_e exp_cache_build_invalidate_sections_msg(exp_cache_dirty_ctx_t * ctx_p, int srv_rank) { exp_cache_srv_front_end_t * front_end_p; exp_dirty_dirty_parent_t * pParent; exp_dirty_dirty_child_t * pChild; int inactive_idx; int idx; int loop_cnt; uint8_t chunk_u8_idx; int bit_idx; rozofs_section_header_u * pSection; //int section_buffer_size; int section_size; /* ** Retrieve front end context of this server */ if (srv_rank >= EXP_MAX_CACHE_SRV) { severe("server rank out of range %d",srv_rank); return exp_invalidate_error; } front_end_p = ctx_p->srv_rank[srv_rank]; if (front_end_p == NULL) { severe("server %d do not exist",srv_rank); return exp_invalidate_error; } /* ** Check whether there has been any modification in the non active bitmaps */ inactive_idx = 1 - front_end_p->active_idx; pParent = front_end_p->parent[inactive_idx]; if (pParent->parent_update_count == 0) { return exp_invalidate_nothing; } /* ** Initialize the message header */ gw_invalidate_sections_msg.hdr.export_id = exp_cache_cnf.export_id; gw_invalidate_sections_msg.hdr.gateway_rank = srv_rank; gw_invalidate_sections_msg.hdr.nb_gateways = ctx_p->nb_cache_servers; gw_invalidate_sections_msg.section.section_len = 0; gw_invalidate_sections_msg.section.section_val = gw_dirty_section_buffer; pSection = (rozofs_section_header_u *) gw_dirty_section_buffer; //section_buffer_size = 0; /* ** Loop on the parent bit map to find out the significant childs */ idx = 0; while ((idx < EXP_PARENT_BITMAP_BIT_SZ) && (pParent->parent_update_count != 0)) { if (idx % 8 == 0) { /* ** skip the entries that have not been modified */ idx = check_bytes_val(pParent->parent_bitmap, idx, EXP_PARENT_BITMAP_BIT_SZ, &loop_cnt, 0); if (idx < 0) break; } chunk_u8_idx = idx / 8; bit_idx = idx % 8; /* ** Current child is not dirty */ if ((pParent->parent_bitmap[chunk_u8_idx] & (1 << bit_idx)) == 0) { idx++; continue; } /* ** This child is dirty */ pChild = &(front_end_p->child[inactive_idx][idx]); /* Clear this child in the parent bitmap */ pParent->parent_bitmap[chunk_u8_idx] &= ~(1 << bit_idx); pParent->parent_update_count--; /* ** Let's add this section in the message */ pSection->u64 = 0; pSection->field.absolute_idx = idx * EXP_CHILD_BITMAP_BYTE_SZ; pSection->field.byte_bitmap = pChild->bitmap; /* ** Copy the valid bytes */ int i; section_size = 0; char * pChar = (char *) (pSection+1); for (i=0; i < EXP_CHILD_BITMAP_BYTE_SZ; i++) { if (pChild->child_bitmap[i] !=0) { if ((pChar - gw_dirty_section_buffer) >= EXP_MAX_SECTION_BUFFER_SIZE) { return exp_invalidate_too_big; } *pChar++ = pChild->child_bitmap[i]; section_size++; } } pSection->field.section_size = section_size; gw_invalidate_sections_msg.section.section_len += (pChar-(char*)pSection); pSection = (rozofs_section_header_u *) pChar; idx++; } /* ** Check that the message actualy contains some sections */ if ((char *)pSection == gw_dirty_section_buffer) { severe("srv %d parent_update_count %d inconsistent with parent_bitmap",srv_rank,pParent->parent_update_count); pParent->parent_update_count = 0; return exp_invalidate_nothing; } return exp_invalidate_ready; }
void dirent_file_check(int dir_fd, mdirents_cache_entry_t *root_entry_p, int bucket_idx) { mdirents_cache_entry_t *cache_entry_p; //int hash_entry_idx = 0; //int cache_entry_tb_idx = 0; int coll_idx; int loop_cnt = 0; int next_coll_idx = 0; int bit_idx; int chunk_u8_idx; uint8_t *coll_bitmap_p; mdirent_sector0_not_aligned_t *sect0_p; /* ** set the different parameters */ //cache_entry_tb_idx = 0; //hash_entry_idx = 0; coll_idx = 0; /* ** get the pointer to the collision file bitmap */ sect0_p = DIRENT_VIRT_TO_PHY_OFF(root_entry_p,sect0_p) ; coll_bitmap_p = (uint8_t*) §0_p->coll_bitmap; /* ** case of the collision file, so need to go through the bitmap of the ** dirent root file */ dirent_cache_repair_printf_cache_entry_for_bucket_idx(root_entry_p, bucket_idx, -1); cache_entry_p = NULL; while (coll_idx < MDIRENTS_MAX_COLLS_IDX) { chunk_u8_idx = coll_idx / 8; bit_idx = coll_idx % 8; /* ** there is no collision dirent entry or the collision dirent entry exist and is not full */ if ((coll_bitmap_p[chunk_u8_idx] & (1 << bit_idx)) != 0) { /* ** That entry is free, need to find out the next entry that is busy (0: busy, 1:free) */ if (coll_idx % 8 == 0) { next_coll_idx = check_bytes_val(coll_bitmap_p, coll_idx, MDIRENTS_MAX_COLLS_IDX, &loop_cnt, 1); if (next_coll_idx < 0) break; /* ** next chunk */ if (next_coll_idx == coll_idx) coll_idx++; else coll_idx = next_coll_idx; continue; } /* ** next chunk */ //hash_entry_idx = 0; coll_idx++; continue; } /* ** one collision idx has been found ** need to get the entry associated with the collision index */ cache_entry_p = dirent_cache_get_collision_ptr(root_entry_p, coll_idx); if (cache_entry_p == NULL ) { /* ** something is rotten in the cache since the pointer to the collision dirent cache ** does not exist */ // DIRENT_SEVERE("dirent_file_repair no collision file for index %d error at %d\n",coll_idx,__LINE__); /* ** OK, do not break the analysis, skip that collision entry and try the next if any */ coll_idx++; continue; } dirent_cache_repair_printf_cache_entry_for_bucket_idx(cache_entry_p, bucket_idx, coll_idx); coll_idx++; } }
/** * Insert a root dirent file reference in the cache * Note : the bitmap is aligned on a 8 byte boundary, so we can perform control by using a uint64_t @param cache : pointer to the main cache structure @param index : index of the root dirent file @param key : pointer of the key of the write back buffer @retval !NULL -> success @retval NULL -> na available buffer . */ mdirents_file_t *writebck_cache_bucket_get_entry(uint64_t *key_ext,uint16_t index) { uint8_t *bitmap_p; writebck_cache_bucket_t *bucket_p; int coll_idx; int next_coll_idx ; uint8_t chunk_u8_idx ; int bit_idx; int loop_cnt; if (writebck_cache_enable == 0) return NULL; uint16_t hash_bucket; writebck_cache_key_t *key = (writebck_cache_key_t*)key_ext; writebck_cache_main_t *cache = &writebck_cache_level0; //reloop: coll_idx = 0; next_coll_idx = 0 ; /* ** Get the index where application can get a write back buffer */ hash_bucket = (uint16_t)(index&WRITEBCK_BUCKET_DEPTH_MASK); bucket_p = &cache->htable[hash_bucket]; writebck_cache_access_tb[hash_bucket]++; /* ** set the pointer to the bucket and load up the pointer to the bitmap */ bitmap_p = bucket_p->bucket_free_bitmap; /* ** Check if the application request a free writeback buffer or need to get the one that ** has been previously returned */ // printf("root_idx %d timestamp %d local_id %d\n",index,key->timestamp,key->local_id); if (key->timestamp != 0) { /* ** the application has to re-use the previously allocated writeback buffer ** Check if the application is still the owner */ if (bucket_p->entry[key->local_id].timestamp == key->timestamp) { /* ** that's OK-> increment just the usage counter */ dirent_clear_chunk_bit(key->local_id,bitmap_p); writebck_cache_hit_counter++; return &cache->mdirents_file_p[(hash_bucket*WRITEBCK_BUCKET_MAX_COLLISIONS) +key->local_id]; } /* ** unlucky, check if we can allocate another write back buffer */ } while(coll_idx < WRITEBCK_BUCKET_MAX_COLLISIONS) { if (coll_idx%8 == 0) { /* ** skip the entries that are alreadt allocated */ next_coll_idx = check_bytes_val(bitmap_p,coll_idx,WRITEBCK_BUCKET_MAX_COLLISIONS,&loop_cnt,0); if (next_coll_idx < 0) break; coll_idx = next_coll_idx; } /* ** check if the return bit is free */ chunk_u8_idx = coll_idx/8; bit_idx = coll_idx%8; if ((bitmap_p[chunk_u8_idx] & (1<<bit_idx)) == 0) { /* ** the entry is busy, check the next one */ coll_idx++; continue; } #if 1 if (coll_idx > writebck_cache_max_level0_collisions) { writebck_cache_max_level0_collisions = coll_idx; } #endif /* ** allocate the entry by clearing the associated bit */ dirent_clear_chunk_bit(coll_idx,bitmap_p); // if (coll_idx > 32) printf("FDL_DEBUG coll_idx %d\n",coll_idx); /* ** OK we found one, check if the memory has been allocated to store the entry ** this will depend on the value of the coll_idx */ // printf("coll_idx %d\n",coll_idx); bucket_p->entry[coll_idx].timestamp += 1; if (bucket_p->entry[coll_idx].timestamp == 0) bucket_p->entry[coll_idx].timestamp = 1; key->timestamp = bucket_p->entry[coll_idx].timestamp; key->local_id = coll_idx; // printf("FDL_DEBUG allocated idx %d\n",coll_idx); /* ** OK, now insert the entry */ return &cache->mdirents_file_p[(hash_bucket*WRITEBCK_BUCKET_MAX_COLLISIONS) +key->local_id]; ; } /* ** Out of entries-> need to go through LRU-> TODO */ #if DIRENT_DEBUG_WRITEBACK { int k; writebck_cache_bucket_entry_t *bucket_entry = bucket_p->entry; for(k = 0; k < WRITEBCK_BUCKET_MAX_COLLISIONS; k++,bucket_entry++) { if (bucket_entry->counter!= 0) continue; printf("Free entry at idx %d\n",k); goto reloop; break; } } #endif writebck_cache_miss_counter++; return NULL; }