/* * update_replicas_linkage -- (internal) update uuids linking replicas */ static int update_replicas_linkage(struct pool_set *set, unsigned repn) { LOG(3, "set %p, repn %u", set, repn); struct pool_replica *rep = REP(set, repn); struct pool_replica *prev_r = REPP(set, repn); struct pool_replica *next_r = REPN(set, repn); ASSERT(rep->nparts > 0); ASSERT(prev_r->nparts > 0); ASSERT(next_r->nparts > 0); /* set uuids in the current replica */ for (unsigned p = 0; p < rep->nhdrs; ++p) { struct pool_hdr *hdrp = HDR(rep, p); memcpy(hdrp->prev_repl_uuid, PART(prev_r, 0).uuid, POOL_HDR_UUID_LEN); memcpy(hdrp->next_repl_uuid, PART(next_r, 0).uuid, POOL_HDR_UUID_LEN); util_checksum(hdrp, sizeof(*hdrp), &hdrp->checksum, 1, POOL_HDR_CSUM_END_OFF); /* store pool's header */ util_persist(PART(rep, p).is_dev_dax, hdrp, sizeof(*hdrp)); } /* set uuids in the previous replica */ for (unsigned p = 0; p < prev_r->nhdrs; ++p) { struct pool_hdr *prev_hdrp = HDR(prev_r, p); memcpy(prev_hdrp->next_repl_uuid, PART(rep, 0).uuid, POOL_HDR_UUID_LEN); util_checksum(prev_hdrp, sizeof(*prev_hdrp), &prev_hdrp->checksum, 1, POOL_HDR_CSUM_END_OFF); /* store pool's header */ util_persist(PART(prev_r, p).is_dev_dax, prev_hdrp, sizeof(*prev_hdrp)); } /* set uuids in the next replica */ for (unsigned p = 0; p < next_r->nhdrs; ++p) { struct pool_hdr *next_hdrp = HDR(next_r, p); memcpy(next_hdrp->prev_repl_uuid, PART(rep, 0).uuid, POOL_HDR_UUID_LEN); util_checksum(next_hdrp, sizeof(*next_hdrp), &next_hdrp->checksum, 1, POOL_HDR_CSUM_END_OFF); /* store pool's header */ util_persist(PART(next_r, p).is_dev_dax, next_hdrp, sizeof(*next_hdrp)); } return 0; }
/* * update_replicas_linkage -- (internal) update uuids linking replicas */ static int update_replicas_linkage(struct pool_set *set, unsigned repn) { struct pool_replica *rep = REP(set, repn); struct pool_replica *prev_r = REP(set, repn - 1); struct pool_replica *next_r = REP(set, repn + 1); /* set uuids in the current replica */ for (unsigned p = 0; p < rep->nparts; ++p) { struct pool_hdr *hdrp = HDR(rep, p); memcpy(hdrp->prev_repl_uuid, PART(prev_r, 0).uuid, POOL_HDR_UUID_LEN); memcpy(hdrp->next_repl_uuid, PART(next_r, 0).uuid, POOL_HDR_UUID_LEN); util_checksum(hdrp, sizeof(*hdrp), &hdrp->checksum, 1); /* store pool's header */ pmem_msync(hdrp, sizeof(*hdrp)); } /* set uuids in the previous replica */ for (unsigned p = 0; p < prev_r->nparts; ++p) { struct pool_hdr *prev_hdrp = HDR(prev_r, p); memcpy(prev_hdrp->next_repl_uuid, PART(rep, 0).uuid, POOL_HDR_UUID_LEN); util_checksum(prev_hdrp, sizeof(*prev_hdrp), &prev_hdrp->checksum, 1); /* store pool's header */ pmem_msync(prev_hdrp, sizeof(*prev_hdrp)); } /* set uuids in the next replica */ for (unsigned p = 0; p < next_r->nparts; ++p) { struct pool_hdr *next_hdrp = HDR(next_r, p); memcpy(next_hdrp->prev_repl_uuid, PART(rep, 0).uuid, POOL_HDR_UUID_LEN); util_checksum(next_hdrp, sizeof(*next_hdrp), &next_hdrp->checksum, 1); /* store pool's header */ pmem_msync(next_hdrp, sizeof(*next_hdrp)); } return 0; }
/* * pool_hdr_valid -- (internal) return true if pool header is valid */ static int pool_hdr_valid(struct pool_hdr *hdrp) { return !util_is_zeroed((void *)hdrp, sizeof(*hdrp)) && util_checksum(hdrp, sizeof(*hdrp), &hdrp->checksum, 0, POOL_HDR_CSUM_END_OFF); }
int convert_v1_v2(void *psf, void *addr) { poolset = psf; pop = addr; heap = (struct heap_layout *)((char *)addr + pop->heap_offset); if (le32toh(pop->hdr.major) != SOURCE_MAJOR_VERSION) return -1; pop->hdr.major = htole32(TARGET_MAJOR_VERSION); util_checksum(&pop->hdr, sizeof(pop->hdr), &pop->hdr.checksum, 1); struct lane_layout *lanes = (struct lane_layout *)((char *)addr + pop->lanes_offset); for (uint64_t i = 0; i < pop->nlanes; ++i) { lane_alloc_recover((struct allocator_lane_section *) &lanes[i].sections[LANE_SECTION_ALLOCATOR]); lane_list_recover((struct lane_list_section *) &lanes[i].sections[LANE_SECTION_LIST]); lane_tx_recover((struct lane_tx_layout *) &lanes[i].sections[LANE_SECTION_TRANSACTION]); } memset(lanes, 0, pop->nlanes * sizeof(struct lane_layout)); pmempool_convert_persist(poolset, lanes, pop->nlanes * sizeof(struct lane_layout)); return 0; }
/* * check_checksums -- (internal) check if checksums are correct for parts in * a given replica */ static int check_checksums(struct pool_set *set, struct poolset_health_status *set_hs) { LOG(3, "set %p, set_hs %p", set, set_hs); for (unsigned r = 0; r < set->nreplicas; ++r) { struct pool_replica *rep = REP(set, r); struct replica_health_status *rep_hs = REP(set_hs, r); if (rep->remote) continue; for (unsigned p = 0; p < rep->nparts; ++p) { /* skip broken parts */ if (replica_is_part_broken(r, p, set_hs)) continue; /* check part's checksum */ LOG(4, "checking checksum for part %u, replica %u", p, r); struct pool_hdr *hdrp = HDR(rep, p); if (!util_checksum(hdrp, sizeof(*hdrp), &hdrp->checksum, 0)) {; ERR("invalid checksum of pool header"); rep_hs->part[p] |= IS_BROKEN; } else if (util_is_zeroed(hdrp, sizeof(*hdrp))) { rep_hs->part[p] |= IS_BROKEN; } } } return 0; }
/* * pmemobj_descr_create -- (internal) create obj pool descriptor */ static int pmemobj_descr_create(PMEMobjpool *pop, const char *layout, size_t poolsize) { LOG(3, "pop %p layout %s poolsize %zu", pop, layout, poolsize); ASSERTeq(poolsize % Pagesize, 0); /* opaque info lives at the beginning of mapped memory pool */ void *dscp = (void *)((uintptr_t)(&pop->hdr) + sizeof (struct pool_hdr)); /* create the persistent part of pool's descriptor */ memset(dscp, 0, OBJ_DSC_P_SIZE); if (layout) strncpy(pop->layout, layout, PMEMOBJ_MAX_LAYOUT - 1); /* initialize run_id, it will be incremented later */ pop->run_id = 0; pmem_msync(&pop->run_id, sizeof (pop->run_id)); pop->lanes_offset = OBJ_LANES_OFFSET; pop->nlanes = OBJ_NLANES; /* zero all lanes */ void *lanes_layout = (void *)((uintptr_t)pop + pop->lanes_offset); memset(lanes_layout, 0, pop->nlanes * sizeof (struct lane_layout)); pmem_msync(lanes_layout, pop->nlanes * sizeof (struct lane_layout)); /* initialization of the obj_store */ pop->obj_store_offset = pop->lanes_offset + pop->nlanes * sizeof (struct lane_layout); pop->obj_store_size = (PMEMOBJ_NUM_OID_TYPES + 1) * sizeof (struct object_store_item); /* + 1 - for root object */ void *store = (void *)((uintptr_t)pop + pop->obj_store_offset); memset(store, 0, pop->obj_store_size); pmem_msync(store, pop->obj_store_size); pop->heap_offset = pop->obj_store_offset + pop->obj_store_size; pop->heap_offset = (pop->heap_offset + Pagesize - 1) & ~(Pagesize - 1); pop->heap_size = poolsize - pop->heap_offset; /* initialize heap prior to storing the checksum */ if ((errno = heap_init(pop)) != 0) { ERR("!heap_init"); return -1; } util_checksum(dscp, OBJ_DSC_P_SIZE, &pop->checksum, 1); /* store the persistent part of pool's descriptor (2kB) */ pmem_msync(dscp, OBJ_DSC_P_SIZE); return 0; }
/* * pool_btt_info_valid -- check consistency of BTT Info header */ int pool_btt_info_valid(struct btt_info *infop) { if (memcmp(infop->sig, BTTINFO_SIG, BTTINFO_SIG_LEN) != 0) return 0; return util_checksum(infop, sizeof(*infop), &infop->checksum, 0); }
/* * shutdown_state_checksum -- (internal) counts SDS checksum and flush it */ static void shutdown_state_checksum(struct shutdown_state *sds, struct pool_replica *rep) { LOG(3, "sds %p", sds); util_checksum(sds, sizeof(*sds), &sds->checksum, 1, 0); FLUSH_SDS(sds, rep); }
/* * update_parts_linkage -- (internal) set uuids linking recreated parts within * a replica */ static int update_parts_linkage(struct pool_set *set, unsigned repn, struct poolset_health_status *set_hs) { LOG(3, "set %p, repn %u, set_hs %p", set, repn, set_hs); struct pool_replica *rep = REP(set, repn); for (unsigned p = 0; p < rep->nhdrs; ++p) { struct pool_hdr *hdrp = HDR(rep, p); struct pool_hdr *prev_hdrp = HDRP(rep, p); struct pool_hdr *next_hdrp = HDRN(rep, p); /* set uuids in the current part */ memcpy(hdrp->prev_part_uuid, PARTP(rep, p).uuid, POOL_HDR_UUID_LEN); memcpy(hdrp->next_part_uuid, PARTN(rep, p).uuid, POOL_HDR_UUID_LEN); util_checksum(hdrp, sizeof(*hdrp), &hdrp->checksum, 1, POOL_HDR_CSUM_END_OFF); /* set uuids in the previous part */ memcpy(prev_hdrp->next_part_uuid, PART(rep, p).uuid, POOL_HDR_UUID_LEN); util_checksum(prev_hdrp, sizeof(*prev_hdrp), &prev_hdrp->checksum, 1, POOL_HDR_CSUM_END_OFF); /* set uuids in the next part */ memcpy(next_hdrp->prev_part_uuid, PART(rep, p).uuid, POOL_HDR_UUID_LEN); util_checksum(next_hdrp, sizeof(*next_hdrp), &next_hdrp->checksum, 1, POOL_HDR_CSUM_END_OFF); /* store pool's header */ util_persist(PART(rep, p).is_dev_dax, hdrp, sizeof(*hdrp)); util_persist(PARTP(rep, p).is_dev_dax, prev_hdrp, sizeof(*prev_hdrp)); util_persist(PARTN(rep, p).is_dev_dax, next_hdrp, sizeof(*next_hdrp)); } return 0; }
static void fill_data_s(struct data_s *rec, uint64_t number) { memcpy(rec->signature, Signature, sizeof(rec->signature)); snprintf(rec->number_str, NUMBER_LEN, "%09lu", number); rec->number = number; for (int i = 0; i < FILL_SIZE; i++) rec->fill[i] = (uint32_t)rand(); util_checksum(rec, sizeof(*rec), &rec->checksum, 1 /* insert */, SKIP_OFFSET); }
/* * shutdown_state_check -- compares and fixes shutdown state */ int shutdown_state_check(struct shutdown_state *curr_sds, struct shutdown_state *pool_sds, struct pool_replica *rep) { LOG(3, "curr_sds %p, pool_sds %p", curr_sds, pool_sds); if (util_is_zeroed(pool_sds, sizeof(*pool_sds)) && !util_is_zeroed(curr_sds, sizeof(*curr_sds))) { shutdown_state_reinit(curr_sds, pool_sds, rep); return 0; } bool is_uuid_usc_correct = le64toh(pool_sds->usc) == le64toh(curr_sds->usc) && le64toh(pool_sds->uuid) == le64toh(curr_sds->uuid); bool is_checksum_correct = util_checksum(pool_sds, sizeof(*pool_sds), &pool_sds->checksum, 0, 0); int dirty = pool_sds->dirty; if (!is_checksum_correct) { /* the program was killed during opening or closing the pool */ LOG(2, "incorrect checksum - SDS will be reinitialized"); shutdown_state_reinit(curr_sds, pool_sds, rep); return 0; } if (is_uuid_usc_correct) { if (dirty == 0) return 0; /* * the program was killed when the pool was opened * but there wasn't an ADR failure */ LOG(2, "the pool was not closed - SDS will be reinitialized"); shutdown_state_reinit(curr_sds, pool_sds, rep); return 0; } if (dirty == 0) { /* an ADR failure but the pool was closed */ LOG(2, "an ADR failure was detected but the pool was closed - SDS will be reinitialized"); shutdown_state_reinit(curr_sds, pool_sds, rep); return 0; } /* an ADR failure - the pool might be corrupted */ ERR("an ADR failure was detected, the pool might be corrupted"); return 1; }
/* * update_parts_linkage -- (internal) set uuids linking recreated parts within * a replica */ static int update_parts_linkage(struct pool_set *set, unsigned repn, struct poolset_health_status *set_hs) { struct pool_replica *rep = REP(set, repn); for (unsigned p = 0; p < rep->nparts; ++p) { struct pool_hdr *hdrp = HDR(rep, p); struct pool_hdr *prev_hdrp = HDR(rep, p - 1); struct pool_hdr *next_hdrp = HDR(rep, p + 1); /* set uuids in the current part */ memcpy(hdrp->prev_part_uuid, PART(rep, p - 1).uuid, POOL_HDR_UUID_LEN); memcpy(hdrp->next_part_uuid, PART(rep, p + 1).uuid, POOL_HDR_UUID_LEN); util_checksum(hdrp, sizeof(*hdrp), &hdrp->checksum, 1); /* set uuids in the previous part */ memcpy(prev_hdrp->next_part_uuid, PART(rep, p).uuid, POOL_HDR_UUID_LEN); util_checksum(prev_hdrp, sizeof(*prev_hdrp), &prev_hdrp->checksum, 1); /* set uuids in the next part */ memcpy(next_hdrp->prev_part_uuid, PART(rep, p).uuid, POOL_HDR_UUID_LEN); util_checksum(next_hdrp, sizeof(*next_hdrp), &next_hdrp->checksum, 1); /* store pool's header */ pmem_msync(hdrp, sizeof(*hdrp)); pmem_msync(prev_hdrp, sizeof(*prev_hdrp)); pmem_msync(next_hdrp, sizeof(*next_hdrp)); } return 0; }
/* * heap_verify_header -- (internal) verifies if the heap header is consistent */ static int heap_verify_header(struct heap_header *hdr) { if (util_checksum(hdr, sizeof(*hdr), &hdr->checksum, 0) != 1) { ERR("heap: invalid header's checksum"); return -1; } if (memcmp(hdr->signature, HEAP_SIGNATURE, HEAP_SIGNATURE_LEN) != 0) { ERR("heap: invalid signature"); return -1; } return 0; }
/* * update_poolset_uuids -- (internal) update poolset uuid in recreated parts */ static int update_poolset_uuids(struct pool_set *set, unsigned repn, struct poolset_health_status *set_hs) { struct pool_replica *rep = REP(set, repn); for (unsigned p = 0; p < rep->nparts; ++p) { struct pool_hdr *hdrp = HDR(rep, p); memcpy(hdrp->poolset_uuid, set->uuid, POOL_HDR_UUID_LEN); util_checksum(hdrp, sizeof(*hdrp), &hdrp->checksum, 1); /* store pool's header */ pmem_msync(hdrp, sizeof(*hdrp)); } return 0; }
/* * update_poolset_uuids -- (internal) update poolset uuid in recreated parts */ static int update_poolset_uuids(struct pool_set *set, unsigned repn, struct poolset_health_status *set_hs) { LOG(3, "set %p, repn %u, set_hs %p", set, repn, set_hs); struct pool_replica *rep = REP(set, repn); for (unsigned p = 0; p < rep->nparts; ++p) { struct pool_hdr *hdrp = HDR(rep, p); memcpy(hdrp->poolset_uuid, set->uuid, POOL_HDR_UUID_LEN); util_checksum(hdrp, sizeof(*hdrp), &hdrp->checksum, 1); /* store pool's header */ util_persist(PART(rep, p).is_dev_dax, hdrp, sizeof(*hdrp)); } return 0; }
/* * pmemobj_descr_check -- (internal) validate obj pool descriptor */ static int pmemobj_descr_check(PMEMobjpool *pop, const char *layout, size_t poolsize) { LOG(3, "pop %p layout %s poolsize %zu", pop, layout, poolsize); void *dscp = (void *)((uintptr_t)(&pop->hdr) + sizeof (struct pool_hdr)); if (!util_checksum(dscp, OBJ_DSC_P_SIZE, &pop->checksum, 0)) { ERR("invalid checksum of pool descriptor"); errno = EINVAL; return -1; } if (layout && strncmp(pop->layout, layout, PMEMOBJ_MAX_LAYOUT)) { ERR("wrong layout (\"%s\"), " "pool created with layout \"%s\"", layout, pop->layout); errno = EINVAL; return -1; } if (pop->size < poolsize) { ERR("replica size smaller than pool size: %zu < %zu", pop->size, poolsize); errno = EINVAL; return -1; } if (pop->heap_offset + pop->heap_size != poolsize) { ERR("heap size does not match pool size: %zu != %zu", pop->heap_offset + pop->heap_size, poolsize); errno = EINVAL; return -1; } if (pop->heap_offset % Pagesize || pop->heap_size % Pagesize) { ERR("unaligned heap: off %ju, size %zu", pop->heap_offset, pop->heap_size); errno = EINVAL; return -1; } return 0; }
/* * heap_write_header -- (internal) creates a clean header */ static void heap_write_header(struct heap_header *hdr, size_t size) { struct heap_header newhdr = { .signature = HEAP_SIGNATURE, .major = HEAP_MAJOR, .minor = HEAP_MINOR, .size = size, .chunksize = CHUNKSIZE, .chunks_per_zone = MAX_CHUNK, .reserved = {0}, .checksum = 0 }; util_checksum(&newhdr, sizeof(newhdr), &newhdr.checksum, 1); *hdr = newhdr; }
/* * shutdown_state_add_part -- adds file uuid and usc to shutdown_state struct * * if path does not exist it will fail which does NOT mean shutdown failure */ int shutdown_state_add_part(struct shutdown_state *sds, const char *path, struct pool_replica *rep) { LOG(3, "sds %p, path %s", sds, path); size_t len = 0; char *uid; uint64_t usc; if (os_dimm_usc(path, &usc)) { ERR("cannot read unsafe shutdown count of %s", path); return 1; } if (os_dimm_uid(path, NULL, &len)) { ERR("cannot read uuid of %s", path); return 1; } len += 4 - len % 4; uid = Zalloc(len); if (uid == NULL) { ERR("!Zalloc"); return 1; } if (os_dimm_uid(path, uid, &len)) { ERR("cannot read uuid of %s", path); Free(uid); return 1; } sds->usc = htole64(le64toh(sds->usc) + usc); uint64_t tmp; util_checksum(uid, len, &tmp, 1, 0); sds->uuid = htole64(le64toh(sds->uuid) + tmp); FLUSH_SDS(sds, rep); Free(uid); shutdown_state_checksum(sds, rep); return 0; }
/* * update_replica_header -- (internal) update field values in the first header * in the replica */ static void update_replica_header(struct pool_set *set, unsigned repn) { LOG(3, "set %p, repn %u", set, repn); struct pool_replica *rep = REP(set, repn); struct pool_set_part *part = PART(REP(set, repn), 0); struct pool_hdr *hdr = (struct pool_hdr *)part->hdr; if (set->options & OPTION_SINGLEHDR) { hdr->incompat_features |= POOL_FEAT_SINGLEHDR; memcpy(hdr->next_part_uuid, hdr->uuid, POOL_HDR_UUID_LEN); memcpy(hdr->prev_part_uuid, hdr->uuid, POOL_HDR_UUID_LEN); } else { hdr->incompat_features &= (uint32_t)(~POOL_FEAT_SINGLEHDR); } util_checksum(hdr, sizeof(*hdr), &hdr->checksum, 1, POOL_HDR_CSUM_END_OFF); util_persist_auto(rep->is_pmem, hdr, sizeof(*hdr)); }
/* * pool_hdr_checksum_fix -- (internal) fix checksum */ static int pool_hdr_checksum_fix(PMEMpoolcheck *ppc, location *loc, uint32_t question, void *context) { LOG(3, NULL); ASSERTne(loc, NULL); switch (question) { case Q_CHECKSUM: util_checksum(&loc->hdr, sizeof(loc->hdr), &loc->hdr.checksum, 1, POOL_HDR_CSUM_END_OFF); CHECK_INFO(ppc, "%ssetting pool_hdr.checksum to 0x%jx", loc->prefix, le64toh(loc->hdr.checksum)); break; default: ERR("not implemented question id: %u", question); } return 0; }
/* * util_convert_hdr -- convert header to host byte order & validate * * Returns true if header is valid, and all the integer fields are * converted to host byte order. If the header is not valid, this * routine returns false and the header passed in is left in an * unknown state. */ int util_convert_hdr(struct pool_hdr *hdrp) { LOG(3, "hdrp %p", hdrp); util_convert2h_hdr_nocheck(hdrp); /* to be valid, a header must have a major version of at least 1 */ if (hdrp->major == 0) { ERR("invalid major version (0)"); return 0; } /* and to be valid, the fields must checksum correctly */ if (!util_checksum(hdrp, sizeof(*hdrp), &hdrp->checksum, 0)) { ERR("invalid checksum of pool header"); return 0; } LOG(3, "valid header, signature \"%.8s\"", hdrp->signature); return 1; }
/* * replica_check_store_size -- (internal) store size from pool descriptor for * replica */ static int replica_check_store_size(struct pool_set *set, struct poolset_health_status *set_hs, unsigned repn) { LOG(3, "set %p, set_hs %p, repn %u", set, set_hs, repn); struct pool_replica *rep = set->replica[repn]; struct pmemobjpool pop; if (rep->remote) { memcpy(&pop.hdr, rep->part[0].hdr, sizeof(pop.hdr)); void *descr = (void *)((uintptr_t)&pop + POOL_HDR_SIZE); if (Rpmem_read(rep->remote->rpp, descr, POOL_HDR_SIZE, sizeof(pop) - POOL_HDR_SIZE, 0)) { return -1; } } else { /* round up map size to Mmap align size */ if (util_map_part(&rep->part[0], NULL, MMAP_ALIGN_UP(sizeof(pop)), 0, MAP_SHARED, 1)) { return -1; } memcpy(&pop, rep->part[0].addr, sizeof(pop)); util_unmap_part(&rep->part[0]); } void *dscp = (void *)((uintptr_t)&pop + sizeof(pop.hdr)); if (!util_checksum(dscp, OBJ_DSC_P_SIZE, &pop.checksum, 0, 0)) { set_hs->replica[repn]->flags |= IS_BROKEN; return 0; } set_hs->replica[repn]->pool_size = pop.heap_offset + pop.heap_size; return 0; }
/* * read_info -- (internal) convert btt_info to host byte order & validate * * Returns true if info block is valid, and all the integer fields are * converted to host byte order. If the info block is not valid, this * routine returns false and the info block passed in is left in an * unknown state. */ static int read_info(struct btt_info *infop) { LOG(3, "infop %p", infop); if (memcmp(infop->sig, Sig, BTTINFO_SIG_LEN)) { LOG(3, "signature invalid"); return 0; } /* to be valid, info block must have a major version of at least 1 */ if ((infop->major = le16toh(infop->major)) == 0) { LOG(3, "invalid major version (0)"); return 0; } infop->flags = le32toh(infop->flags); infop->minor = le16toh(infop->minor); infop->external_lbasize = le32toh(infop->external_lbasize); infop->external_nlba = le32toh(infop->external_nlba); infop->internal_lbasize = le32toh(infop->internal_lbasize); infop->internal_nlba = le32toh(infop->internal_nlba); infop->nfree = le32toh(infop->nfree); infop->infosize = le32toh(infop->infosize); infop->nextoff = le64toh(infop->nextoff); infop->dataoff = le64toh(infop->dataoff); infop->mapoff = le64toh(infop->mapoff); infop->flogoff = le64toh(infop->flogoff); infop->infooff = le64toh(infop->infooff); infop->checksum = le64toh(infop->checksum); /* and to be valid, the fields must checksum correctly */ if (!util_checksum(infop, sizeof (*infop), &infop->checksum, 0)) { LOG(3, "invalid checksum"); return 0; } return 1; }
/* * update_uuids -- (internal) update uuids in all headers in the replica */ static void update_uuids(struct pool_set *set, unsigned repn) { LOG(3, "set %p, repn %u", set, repn); struct pool_replica *rep = REP(set, repn); struct pool_hdr *hdr0 = HDR(rep, 0); for (unsigned p = 0; p < rep->nhdrs; ++p) { struct pool_hdr *hdrp = HDR(rep, p); memcpy(hdrp->next_part_uuid, PARTN(rep, p)->uuid, POOL_HDR_UUID_LEN); memcpy(hdrp->prev_part_uuid, PARTP(rep, p)->uuid, POOL_HDR_UUID_LEN); memcpy(hdrp->next_repl_uuid, hdr0->next_repl_uuid, POOL_HDR_UUID_LEN); memcpy(hdrp->prev_repl_uuid, hdr0->prev_repl_uuid, POOL_HDR_UUID_LEN); memcpy(hdrp->poolset_uuid, hdr0->poolset_uuid, POOL_HDR_UUID_LEN); util_checksum(hdrp, sizeof(*hdrp), &hdrp->checksum, 1, POOL_HDR_CSUM_END_OFF); util_persist(PART(rep, p)->is_dev_dax, hdrp, sizeof(*hdrp)); } }
/* * replica_check_store_size -- (internal) store size from pool descriptor for * replica */ static int replica_check_store_size(struct pool_set *set, struct poolset_health_status *set_hs, unsigned r) { struct pool_replica *rep = set->replica[r]; struct pmemobjpool pop; if (rep->remote) { memcpy(&pop.hdr, rep->part[0].hdr, sizeof(pop.hdr)); void *descr = (void *)((uintptr_t)&pop + POOL_HDR_SIZE); if (Rpmem_read(rep->remote->rpp, descr, 0, sizeof(pop) - POOL_HDR_SIZE)) { return -1; } } else { if (util_map_part(&rep->part[0], NULL, sizeof(pop), 0, MAP_PRIVATE|MAP_NORESERVE)) { return -1; } memcpy(&pop, rep->part[0].addr, sizeof(pop)); util_unmap_part(&rep->part[0]); } void *dscp = (void *)((uintptr_t)&pop + sizeof(pop.hdr)); if (!util_checksum(dscp, OBJ_DSC_P_SIZE, &pop.checksum, 0)) { set_hs->replica[r]->flags |= IS_BROKEN; return 0; } set_hs->replica[r]->pool_size = pop.heap_offset + pop.heap_size; return 0; }
/* * pmempool_convert_func -- main function for convert command */ int pmempool_convert_func(char *appname, int argc, char *argv[]) { if (argc != 2) { print_usage(appname); return -1; } int ret = 0; const char *f = argv[1]; struct pmem_pool_params params; if (pmem_pool_parse_params(f, ¶ms, 1)) { fprintf(stderr, "Cannot determine type of pool.\n"); return -1; } if (params.is_part) { fprintf(stderr, "Conversion cannot be performed on " "a poolset part.\n"); return -1; } if (params.type != PMEM_POOL_TYPE_OBJ) { fprintf(stderr, "Conversion is currently supported only for " "pmemobj pools.\n"); return -1; } struct pool_set_file *psf = pool_set_file_open(f, 0, 1); if (psf == NULL) { perror(f); return -1; } if (psf->poolset->remote) { fprintf(stderr, "Conversion of remotely replicated pools is " "currently not supported. Remove the replica first\n"); pool_set_file_close(psf); return -1; } void *addr = pool_set_file_map(psf, 0); if (addr == NULL) { perror(f); ret = -1; goto out; } struct pool_hdr *phdr = addr; uint32_t m = le32toh(phdr->major); if (m >= COUNT_OF(version_convert) || !version_convert[m]) { fprintf(stderr, "There's no conversion method for the pool.\n" "Please make sure the pmempool utility " "is up-to-date.\n"); ret = -1; goto out; } printf("This tool will update the pool to the latest available " "layout version.\nThis process is NOT fail-safe.\n" "Proceed only if the pool has been backed up or\n" "the risks are fully understood and acceptable.\n"); if (ask_Yn('?', "convert the pool '%s' ?", f) != 'y') { ret = 0; goto out; } PMEMobjpool *pop = addr; for (unsigned r = 0; r < psf->poolset->nreplicas; ++r) { struct pool_replica *rep = psf->poolset->replica[r]; for (unsigned p = 0; p < rep->nparts; ++p) { struct pool_set_part *part = &rep->part[p]; if (util_map_hdr(part, MAP_SHARED, 0) != 0) { fprintf(stderr, "Failed to map headers.\n" "Conversion did not start.\n"); ret = -1; goto out; } } } uint32_t i; for (i = m; i < COUNT_OF(version_convert); ++i) { if (version_convert[i](psf, pop) != 0) { fprintf(stderr, "Failed to convert the pool\n"); break; } else { /* need to update every header of every part */ uint32_t target_m = i + 1; for (unsigned r = 0; r < psf->poolset->nreplicas; ++r) { struct pool_replica *rep = psf->poolset->replica[r]; for (unsigned p = 0; p < rep->nparts; ++p) { struct pool_set_part *part = &rep->part[p]; struct pool_hdr *hdr = part->hdr; hdr->major = htole32(target_m); util_checksum(hdr, sizeof(*hdr), &hdr->checksum, 1); PERSIST_GENERIC_AUTO(hdr, sizeof(struct pool_hdr)); } } } } if (i != m) /* at least one step has been performed */ printf("The pool has been converted to version %d\n.", i); PERSIST_GENERIC_AUTO(pop, psf->size); out: for (unsigned r = 0; r < psf->poolset->nreplicas; ++r) { struct pool_replica *rep = psf->poolset->replica[r]; for (unsigned p = 0; p < rep->nparts; ++p) { struct pool_set_part *part = &rep->part[p]; if (part->hdr != NULL) util_unmap_hdr(part); } } pool_set_file_close(psf); return ret; }
int main(int argc, char *argv[]) { START(argc, argv, "checksum"); if (argc < 2) UT_FATAL("usage: %s files...", argv[0]); for (int arg = 1; arg < argc; arg++) { int fd = OPEN(argv[arg], O_RDONLY); os_stat_t stbuf; FSTAT(fd, &stbuf); size_t size = (size_t)stbuf.st_size; void *addr = MMAP(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); uint64_t *ptr = addr; /* * Loop through, selecting successive locations * where the checksum lives in this block, and * let util_checksum() insert it so it can be * verified against the gold standard fletcher64 * routine in this file. */ while ((char *)(ptr + 1) < (char *)addr + size) { /* save whatever was at *ptr */ uint64_t oldval = *ptr; /* mess with it */ *ptr = htole64(0x123); /* * calculate a checksum and have it installed */ util_checksum(addr, size, ptr, 1, 0); uint64_t csum = *ptr; /* * verify inserted checksum checks out */ UT_ASSERT(util_checksum(addr, size, ptr, 0, 0)); /* put a zero where the checksum was installed */ *ptr = 0; /* calculate a checksum */ uint64_t gold_csum = fletcher64(addr, size); /* put the old value back */ *ptr = oldval; /* * verify checksum now fails */ UT_ASSERT(!util_checksum(addr, size, ptr, 0, 0)); /* * verify the checksum matched the gold version */ UT_ASSERTeq(csum, gold_csum); UT_OUT("%s:%" PRIu64 " 0x%" PRIx64, argv[arg], (char *)ptr - (char *)addr, csum); ptr++; } uint64_t *addr2 = MMAP(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); uint64_t *csum = (uint64_t *)addr; /* * put a zero where the checksum will be installed * in the second map */ *addr2 = 0; for (size_t i = size / 8 - 1; i > 0; i -= 1) { /* calculate a checksum and have it installed */ util_checksum(addr, size, csum, 1, i * 8); /* * put a zero in the second map where an ignored part is */ *(addr2 + i) = 0; /* calculate a checksum */ uint64_t gold_csum = fletcher64(addr2, size); /* * verify the checksum matched the gold version */ UT_ASSERTeq(*csum, gold_csum); } CLOSE(fd); MUNMAP(addr, size); MUNMAP(addr2, size); } DONE(NULL); }
/* * util_header_create -- (internal) create header of a single pool set file */ static int util_header_create(struct pool_set *set, unsigned repidx, unsigned partidx, size_t hdrsize, const char *sig, uint32_t major, uint32_t compat, uint32_t incompat, uint32_t ro_compat) { LOG(3, "set %p repidx %u partidx %u hdrsize %zu sig %s major %u " "compat %#x incompat %#x ro_comapt %#x", set, repidx, partidx, hdrsize, sig, major, compat, incompat, ro_compat); struct pool_replica *rep = set->replica[repidx]; /* opaque info lives at the beginning of mapped memory pool */ struct pool_hdr *hdrp = rep->part[partidx].hdr; /* check if the pool header is all zeros */ if (!util_is_zeroed(hdrp, sizeof (*hdrp))) { ERR("Non-empty file detected"); errno = EINVAL; return -1; } /* * Zero out the pool descriptor - just in case we fail right after * header checksum is stored. */ void *descp = (void *)((uintptr_t)hdrp + sizeof (*hdrp)); memset(descp, 0, hdrsize - sizeof (*hdrp)); pmem_msync(descp, hdrsize - sizeof (*hdrp)); /* create pool's header */ strncpy(hdrp->signature, sig, POOL_HDR_SIG_LEN); hdrp->major = htole32(major); hdrp->compat_features = htole32(compat); hdrp->incompat_features = htole32(incompat); hdrp->ro_compat_features = htole32(ro_compat); memcpy(hdrp->poolset_uuid, set->uuid, POOL_HDR_UUID_LEN); memcpy(hdrp->uuid, PART(rep, partidx).uuid, POOL_HDR_UUID_LEN); /* link parts */ memcpy(hdrp->prev_part_uuid, PART(rep, partidx - 1).uuid, POOL_HDR_UUID_LEN); memcpy(hdrp->next_part_uuid, PART(rep, partidx + 1).uuid, POOL_HDR_UUID_LEN); /* link replicas */ memcpy(hdrp->prev_repl_uuid, PART(REP(set, repidx - 1), 0).uuid, POOL_HDR_UUID_LEN); memcpy(hdrp->next_repl_uuid, PART(REP(set, repidx + 1), 0).uuid, POOL_HDR_UUID_LEN); hdrp->crtime = htole64((uint64_t)time(NULL)); if (util_get_arch_flags(&hdrp->arch_flags)) { ERR("Reading architecture flags failed\n"); errno = EINVAL; return -1; } hdrp->arch_flags.alignment_desc = htole64(hdrp->arch_flags.alignment_desc); hdrp->arch_flags.e_machine = htole16(hdrp->arch_flags.e_machine); util_checksum(hdrp, sizeof (*hdrp), &hdrp->checksum, 1); /* store pool's header */ pmem_msync(hdrp, sizeof (*hdrp)); return 0; }
/* * pmemlog_map_common -- (internal) map a log memory pool * * This routine does all the work, but takes a rdonly flag so internal * calls can map a read-only pool if required. * * If empty flag is set, the file is assumed to be a new memory pool, and * a new pool header is created. Otherwise, a valid header must exist. */ static PMEMlogpool * pmemlog_map_common(int fd, size_t poolsize, int rdonly, int empty) { LOG(3, "fd %d poolsize %zu rdonly %d empty %d", fd, poolsize, rdonly, empty); void *addr; if ((addr = util_map(fd, poolsize, rdonly)) == NULL) { (void) close(fd); return NULL; /* util_map() set errno, called LOG */ } VALGRIND_REGISTER_PMEM_MAPPING(addr, poolsize); VALGRIND_REGISTER_PMEM_FILE(fd, addr, poolsize, 0); (void) close(fd); /* check if the mapped region is located in persistent memory */ int is_pmem = pmem_is_pmem(addr, poolsize); /* opaque info lives at the beginning of mapped memory pool */ struct pmemlog *plp = addr; if (!empty) { struct pool_hdr hdr; memcpy(&hdr, &plp->hdr, sizeof (hdr)); if (!util_convert_hdr(&hdr)) { errno = EINVAL; goto err; } /* * valid header found */ if (strncmp(hdr.signature, LOG_HDR_SIG, POOL_HDR_SIG_LEN)) { ERR("wrong pool type: \"%s\"", hdr.signature); errno = EINVAL; goto err; } if (hdr.major != LOG_FORMAT_MAJOR) { ERR("log pool version %d (library expects %d)", hdr.major, LOG_FORMAT_MAJOR); errno = EINVAL; goto err; } /* XXX - pools sets / replicas */ if (memcmp(hdr.uuid, hdr.prev_part_uuid, POOL_HDR_UUID_LEN) || memcmp(hdr.uuid, hdr.next_part_uuid, POOL_HDR_UUID_LEN) || memcmp(hdr.uuid, hdr.prev_repl_uuid, POOL_HDR_UUID_LEN) || memcmp(hdr.uuid, hdr.next_repl_uuid, POOL_HDR_UUID_LEN)) { ERR("wrong UUID"); errno = EINVAL; goto err; } uint64_t hdr_start = le64toh(plp->start_offset); uint64_t hdr_end = le64toh(plp->end_offset); uint64_t hdr_write = le64toh(plp->write_offset); if ((hdr_start != roundup(sizeof (*plp), LOG_FORMAT_DATA_ALIGN)) || (hdr_end != poolsize) || (hdr_start > hdr_end)) { ERR("wrong start/end offsets (start: %ju end: %ju), " "pool size %zu", hdr_start, hdr_end, poolsize); errno = EINVAL; goto err; } if ((hdr_write > hdr_end) || (hdr_write < hdr_start)) { ERR("wrong write offset " "(start: %ju end: %ju write: %ju)", hdr_start, hdr_end, hdr_write); errno = EINVAL; goto err; } LOG(3, "start: %ju, end: %ju, write: %ju", hdr_start, hdr_end, hdr_write); int retval = util_feature_check(&hdr, LOG_FORMAT_INCOMPAT, LOG_FORMAT_RO_COMPAT, LOG_FORMAT_COMPAT); if (retval < 0) goto err; else if (retval == 0) rdonly = 1; } else { LOG(3, "creating new log memory pool"); ASSERTeq(rdonly, 0); struct pool_hdr *hdrp = &plp->hdr; /* check if the pool header is all zero */ if (!util_is_zeroed(hdrp, sizeof (*hdrp))) { ERR("Non-empty file detected"); errno = EINVAL; goto err; } /* create required metadata first */ plp->start_offset = htole64(roundup(sizeof (*plp), LOG_FORMAT_DATA_ALIGN)); plp->end_offset = htole64(poolsize); plp->write_offset = plp->start_offset; /* store non-volatile part of pool's descriptor */ pmem_msync(&plp->start_offset, 3 * sizeof (uint64_t)); /* create pool header */ strncpy(hdrp->signature, LOG_HDR_SIG, POOL_HDR_SIG_LEN); hdrp->major = htole32(LOG_FORMAT_MAJOR); hdrp->compat_features = htole32(LOG_FORMAT_COMPAT); hdrp->incompat_features = htole32(LOG_FORMAT_INCOMPAT); hdrp->ro_compat_features = htole32(LOG_FORMAT_RO_COMPAT); uuid_generate(hdrp->uuid); /* XXX - pools sets / replicas */ uuid_generate(hdrp->poolset_uuid); memcpy(hdrp->prev_part_uuid, hdrp->uuid, POOL_HDR_UUID_LEN); memcpy(hdrp->next_part_uuid, hdrp->uuid, POOL_HDR_UUID_LEN); memcpy(hdrp->prev_repl_uuid, hdrp->uuid, POOL_HDR_UUID_LEN); memcpy(hdrp->next_repl_uuid, hdrp->uuid, POOL_HDR_UUID_LEN); hdrp->crtime = htole64((uint64_t)time(NULL)); if (util_get_arch_flags(&hdrp->arch_flags)) { ERR("Reading architecture flags failed\n"); errno = EINVAL; goto err; } hdrp->arch_flags.alignment_desc = htole64(hdrp->arch_flags.alignment_desc); hdrp->arch_flags.e_machine = htole16(hdrp->arch_flags.e_machine); util_checksum(hdrp, sizeof (*hdrp), &hdrp->checksum, 1); /* store pool's header */ pmem_msync(hdrp, sizeof (*hdrp)); } /* remove volatile part of header */ VALGRIND_REMOVE_PMEM_MAPPING(&plp->addr, sizeof (struct pmemlog) - sizeof (struct pool_hdr) - 3 * sizeof (uint64_t)); /* * Use some of the memory pool area for run-time info. This * run-time state is never loaded from the file, it is always * created here, so no need to worry about byte-order. */ plp->addr = addr; plp->size = poolsize; plp->rdonly = rdonly; plp->is_pmem = is_pmem; if ((plp->rwlockp = Malloc(sizeof (*plp->rwlockp))) == NULL) { ERR("!Malloc for a RW lock"); goto err; } if ((errno = pthread_rwlock_init(plp->rwlockp, NULL))) { ERR("!pthread_rwlock_init"); goto err_free; } /* * If possible, turn off all permissions on the pool header page. * * The prototype PMFS doesn't allow this when large pages are in * use. It is not considered an error if this fails. */ util_range_none(addr, sizeof (struct pool_hdr)); /* the rest should be kept read-only (debug version only) */ RANGE_RO(addr + sizeof (struct pool_hdr), poolsize - sizeof (struct pool_hdr)); LOG(3, "plp %p", plp); return plp; err_free: Free((void *)plp->rwlockp); err: LOG(4, "error clean up"); int oerrno = errno; VALGRIND_REMOVE_PMEM_MAPPING(addr, poolsize); util_unmap(addr, poolsize); errno = oerrno; return NULL; }
int main(int argc, char *argv[]) { START(argc, argv, "checksum"); if (argc < 2) FATAL("usage: %s files...", argv[0]); for (int arg = 1; arg < argc; arg++) { int fd = OPEN(argv[arg], O_RDONLY); struct stat stbuf; FSTAT(fd, &stbuf); void *addr = MMAP(0, stbuf.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); close(fd); uint64_t *ptr = addr; /* * Loop through, selecting successive locations * where the checksum lives in this block, and * let util_checksum() insert it so it can be * verified against the gold standard fletcher64 * routine in this file. */ while ((void *)(ptr + 1) < addr + stbuf.st_size) { /* save whatever was at *ptr */ uint64_t oldval = *ptr; /* mess with it */ *ptr = htole64(0x123); /* * calc a checksum and have it installed */ util_checksum(addr, stbuf.st_size, ptr, 1); uint64_t csum = *ptr; /* * verify inserted checksum checks out */ ASSERT(util_checksum(addr, stbuf.st_size, ptr, 0)); /* put a zero where the checksum was installed */ *ptr = 0; /* calculate a checksum */ uint64_t gold_csum = fletcher64(addr, stbuf.st_size); /* put the old value back */ *ptr = oldval; /* * verify checksum now fails */ ASSERT(!util_checksum(addr, stbuf.st_size, ptr, 0)); /* * verify the checksum matched the gold version */ ASSERTeq(csum, gold_csum); OUT("%s:%lu 0x%lx", argv[arg], (void *)ptr - addr, csum); ptr++; } } DONE(NULL); }