/* * update_replicas_linkage -- (internal) update uuids linking replicas */ static int update_replicas_linkage(struct pool_set *set, unsigned repn) { LOG(3, "set %p, repn %u", set, repn); struct pool_replica *rep = REP(set, repn); struct pool_replica *prev_r = REPP(set, repn); struct pool_replica *next_r = REPN(set, repn); ASSERT(rep->nparts > 0); ASSERT(prev_r->nparts > 0); ASSERT(next_r->nparts > 0); /* set uuids in the current replica */ for (unsigned p = 0; p < rep->nhdrs; ++p) { struct pool_hdr *hdrp = HDR(rep, p); memcpy(hdrp->prev_repl_uuid, PART(prev_r, 0).uuid, POOL_HDR_UUID_LEN); memcpy(hdrp->next_repl_uuid, PART(next_r, 0).uuid, POOL_HDR_UUID_LEN); util_checksum(hdrp, sizeof(*hdrp), &hdrp->checksum, 1, POOL_HDR_CSUM_END_OFF); /* store pool's header */ util_persist(PART(rep, p).is_dev_dax, hdrp, sizeof(*hdrp)); } /* set uuids in the previous replica */ for (unsigned p = 0; p < prev_r->nhdrs; ++p) { struct pool_hdr *prev_hdrp = HDR(prev_r, p); memcpy(prev_hdrp->next_repl_uuid, PART(rep, 0).uuid, POOL_HDR_UUID_LEN); util_checksum(prev_hdrp, sizeof(*prev_hdrp), &prev_hdrp->checksum, 1, POOL_HDR_CSUM_END_OFF); /* store pool's header */ util_persist(PART(prev_r, p).is_dev_dax, prev_hdrp, sizeof(*prev_hdrp)); } /* set uuids in the next replica */ for (unsigned p = 0; p < next_r->nhdrs; ++p) { struct pool_hdr *next_hdrp = HDR(next_r, p); memcpy(next_hdrp->prev_repl_uuid, PART(rep, 0).uuid, POOL_HDR_UUID_LEN); util_checksum(next_hdrp, sizeof(*next_hdrp), &next_hdrp->checksum, 1, POOL_HDR_CSUM_END_OFF); /* store pool's header */ util_persist(PART(next_r, p).is_dev_dax, next_hdrp, sizeof(*next_hdrp)); } return 0; }
/* * blk_descr_create -- (internal) create block memory pool descriptor */ static void blk_descr_create(PMEMblkpool *pbp, uint32_t bsize, int zeroed) { LOG(3, "pbp %p bsize %u zeroed %d", pbp, bsize, zeroed); /* create the required metadata */ pbp->bsize = htole32(bsize); util_persist(pbp->is_pmem, &pbp->bsize, sizeof(bsize)); pbp->is_zeroed = zeroed; util_persist(pbp->is_pmem, &pbp->is_zeroed, sizeof(pbp->is_zeroed)); }
/* * update_parts_linkage -- (internal) set uuids linking recreated parts within * a replica */ static int update_parts_linkage(struct pool_set *set, unsigned repn, struct poolset_health_status *set_hs) { LOG(3, "set %p, repn %u, set_hs %p", set, repn, set_hs); struct pool_replica *rep = REP(set, repn); for (unsigned p = 0; p < rep->nhdrs; ++p) { struct pool_hdr *hdrp = HDR(rep, p); struct pool_hdr *prev_hdrp = HDRP(rep, p); struct pool_hdr *next_hdrp = HDRN(rep, p); /* set uuids in the current part */ memcpy(hdrp->prev_part_uuid, PARTP(rep, p).uuid, POOL_HDR_UUID_LEN); memcpy(hdrp->next_part_uuid, PARTN(rep, p).uuid, POOL_HDR_UUID_LEN); util_checksum(hdrp, sizeof(*hdrp), &hdrp->checksum, 1, POOL_HDR_CSUM_END_OFF); /* set uuids in the previous part */ memcpy(prev_hdrp->next_part_uuid, PART(rep, p).uuid, POOL_HDR_UUID_LEN); util_checksum(prev_hdrp, sizeof(*prev_hdrp), &prev_hdrp->checksum, 1, POOL_HDR_CSUM_END_OFF); /* set uuids in the next part */ memcpy(next_hdrp->prev_part_uuid, PART(rep, p).uuid, POOL_HDR_UUID_LEN); util_checksum(next_hdrp, sizeof(*next_hdrp), &next_hdrp->checksum, 1, POOL_HDR_CSUM_END_OFF); /* store pool's header */ util_persist(PART(rep, p).is_dev_dax, hdrp, sizeof(*hdrp)); util_persist(PARTP(rep, p).is_dev_dax, prev_hdrp, sizeof(*prev_hdrp)); util_persist(PARTN(rep, p).is_dev_dax, next_hdrp, sizeof(*next_hdrp)); } return 0; }
/* * update_poolset_uuids -- (internal) update poolset uuid in recreated parts */ static int update_poolset_uuids(struct pool_set *set, unsigned repn, struct poolset_health_status *set_hs) { LOG(3, "set %p, repn %u, set_hs %p", set, repn, set_hs); struct pool_replica *rep = REP(set, repn); for (unsigned p = 0; p < rep->nparts; ++p) { struct pool_hdr *hdrp = HDR(rep, p); memcpy(hdrp->poolset_uuid, set->uuid, POOL_HDR_UUID_LEN); util_checksum(hdrp, sizeof(*hdrp), &hdrp->checksum, 1); /* store pool's header */ util_persist(PART(rep, p).is_dev_dax, hdrp, sizeof(*hdrp)); } return 0; }
/* * update_uuids -- (internal) update uuids in all headers in the replica */ static void update_uuids(struct pool_set *set, unsigned repn) { LOG(3, "set %p, repn %u", set, repn); struct pool_replica *rep = REP(set, repn); struct pool_hdr *hdr0 = HDR(rep, 0); for (unsigned p = 0; p < rep->nhdrs; ++p) { struct pool_hdr *hdrp = HDR(rep, p); memcpy(hdrp->next_part_uuid, PARTN(rep, p)->uuid, POOL_HDR_UUID_LEN); memcpy(hdrp->prev_part_uuid, PARTP(rep, p)->uuid, POOL_HDR_UUID_LEN); memcpy(hdrp->next_repl_uuid, hdr0->next_repl_uuid, POOL_HDR_UUID_LEN); memcpy(hdrp->prev_repl_uuid, hdr0->prev_repl_uuid, POOL_HDR_UUID_LEN); memcpy(hdrp->poolset_uuid, hdr0->poolset_uuid, POOL_HDR_UUID_LEN); util_checksum(hdrp, sizeof(*hdrp), &hdrp->checksum, 1, POOL_HDR_CSUM_END_OFF); util_persist(PART(rep, p)->is_dev_dax, hdrp, sizeof(*hdrp)); } }
/* * copy_data_to_broken_parts -- (internal) copy data to all parts created * in place of the broken ones */ static int copy_data_to_broken_parts(struct pool_set *set, unsigned healthy_replica, unsigned flags, struct poolset_health_status *set_hs) { LOG(3, "set %p, healthy_replica %u, flags %u, set_hs %p", set, healthy_replica, flags, set_hs); /* get pool size from healthy replica */ size_t poolsize = set->poolsize; for (unsigned r = 0; r < set_hs->nreplicas; ++r) { /* skip unbroken and consistent replicas */ if (replica_is_replica_healthy(r, set_hs)) continue; struct pool_replica *rep = REP(set, r); struct pool_replica *rep_h = REP(set, healthy_replica); for (unsigned p = 0; p < rep->nparts; ++p) { /* skip unbroken parts from consistent replicas */ if (!replica_is_part_broken(r, p, set_hs) && replica_is_replica_consistent(r, set_hs)) continue; const struct pool_set_part *part = &rep->part[p]; size_t off = replica_get_part_data_offset(set, r, p); size_t len = replica_get_part_data_len(set, r, p); /* do not allow copying too much data */ if (off >= poolsize) continue; if (off + len > poolsize || rep->remote) len = poolsize - off; /* * First part of replica is mapped * with header */ size_t fpoff = (p == 0) ? POOL_HDR_SIZE : 0; void *dst_addr = ADDR_SUM(part->addr, fpoff); if (rep->remote) { int ret = Rpmem_persist(rep->remote->rpp, off - POOL_HDR_SIZE, len, 0); if (ret) { LOG(1, "Copying data to remote node " "failed -- '%s' on '%s'", rep->remote->pool_desc, rep->remote->node_addr); return -1; } } else if (rep_h->remote) { int ret = Rpmem_read(rep_h->remote->rpp, dst_addr, off - POOL_HDR_SIZE, len); if (ret) { LOG(1, "Reading data from remote node " "failed -- '%s' on '%s'", rep_h->remote->pool_desc, rep_h->remote->node_addr); return -1; } } else { if (off + len > poolsize) len = poolsize - off; void *src_addr = ADDR_SUM(rep_h->part[0].addr, off); /* copy all data */ memcpy(dst_addr, src_addr, len); util_persist(part->is_dev_dax, dst_addr, len); } } } return 0; }