Alias *alias_lookup_strict(Node *node, char *username, char *fullname) { Alias *a; for(a=alias_list; a; a=a->next) { if(username) if(!stricmp(a->username, username) && node_eq(node, &a->node)) return a; if(fullname) if(!stricmp(a->fullname, fullname) && node_eq(node, &a->node)) return a; } for(a=alias_list; a; a=a->next) { if(username) if(!stricmp(a->username, username) && node_np_eq(node, &a->node)) return a; if(fullname) if(!stricmp(a->fullname, fullname) && node_np_eq(node, &a->node)) return a; } return NULL; }
/* * Return FILE for output packet */ FILE *outpkt_open(Node *from, Node *to, int grade, int type, int flav, int bad) { Packet pkt; OutPkt *p; Passwd *pwd; /* * Search for existing packet */ for(p=outpkt_first; p; p=p->next) if(node_eq(from, &p->from) && node_eq(to, &p->to) && flav==p->flav && type==p->type && grade==p->grade && bad==p->bad) { p->n++; /* If file exists, but isn't currently opened, reopen it */ if(!p->fp) p->fp = outpkt_fopen(p->tmpname, A_MODE); return p->fp; } /* * Doesn't exist, create a new one */ p = outpkt_new(from, to); outpkt_names(p, grade, type, flav, bad); /* Open file and write header */ p->fp = outpkt_fopen(p->tmpname, W_MODE); if(p->fp == NULL) { fglog("$failed to open packet %s", p->tmpname); return NULL; } debug(2, "New packet %s (%s): %s -> %s", p->outname, p->tmpname, znfp1(&p->from), znfp2(&p->to) ); pkt.from = p->from; pkt.to = p->to; pkt.time = time(NULL); /* Password */ pwd = passwd_lookup("packet", to); BUF_COPY(pkt.passwd, pwd ? pwd->passwd : ""); /* Rest is filled in by pkt_put_hdr() */ if(pkt_put_hdr(p->fp, &pkt) == ERROR) { fglog("$Can't write to packet file %s", p->tmpname); fclose(p->fp); p->fp = NULL; return NULL; } /* o.k., return the FILE */ return p->fp; }
/* * Returns true if an event is processed */ static bool local_process_event(void) { struct local_event *ev; enum cluster_join_result res; ev = shm_queue_peek(); if (!ev) return false; switch (ev->type) { case EVENT_JOIN_REQUEST: if (!node_eq(&ev->nodes[0], &this_node)) return false; res = sd_check_join_cb(&ev->sender, ev->buf); ev->join_result = res; ev->type = EVENT_JOIN_RESPONSE; msync(ev, sizeof(*ev), MS_SYNC); shm_queue_notify(); if (res == CJ_RES_MASTER_TRANSFER) { eprintf("failed to join sheepdog cluster: " "please retry when master is up\n"); shm_queue_unlock(); exit(1); } return false; case EVENT_JOIN_RESPONSE: if (ev->join_result == CJ_RES_MASTER_TRANSFER) { /* FIXME: This code is tricky, but Sheepdog assumes that */ /* nr_nodes = 1 when join_result = MASTER_TRANSFER... */ ev->nr_nodes = 1; ev->nodes[0] = this_node; ev->pids[0] = getpid(); shm_queue_set_chksum(); } sd_join_handler(&ev->sender, ev->nodes, ev->nr_nodes, ev->join_result, ev->buf); shm_queue_pop(); break; case EVENT_LEAVE: sd_leave_handler(&ev->sender, ev->nodes, ev->nr_nodes); shm_queue_pop(); break; case EVENT_BLOCK: sd_block_handler(&ev->sender); return false; case EVENT_NOTIFY: sd_notify_handler(&ev->sender, ev->buf, ev->buf_len); shm_queue_pop(); break; } return true; }
static void finish_join(struct join_message *msg, struct sd_node *joined, struct sd_node *nodes, size_t nr_nodes) { int i; sys->nr_copies = msg->nr_copies; sys->epoch = msg->epoch; /* add nodes execept for newly joined one */ for (i = 0; i < nr_nodes; i++) { if (node_eq(nodes + i, joined)) continue; sys->nodes[sys->nr_nodes++] = nodes[i]; } qsort(sys->nodes, sys->nr_nodes, sizeof(*sys->nodes), node_cmp); if (msg->cluster_status != SD_STATUS_OK) { int nr_leave_nodes; uint32_t le; nr_leave_nodes = msg->nr_leave_nodes; le = get_latest_epoch(); for (i = 0; i < nr_leave_nodes; i++) { struct node *n; if (find_entry_list(&msg->leave_nodes[i], &sys->leave_list) || !find_entry_epoch(&msg->leave_nodes[i], le)) { continue; } n = zalloc(sizeof(*n)); if (!n) panic("failed to allocate memory\n"); n->ent = msg->leave_nodes[i]; list_add_tail(&n->list, &sys->leave_list); } } sys->join_finished = 1; if ((msg->cluster_status == SD_STATUS_OK || msg->cluster_status == SD_STATUS_HALT) && msg->inc_epoch) update_epoch_log(sys->epoch); if (!sd_store && strlen((char *)msg->store)) { sd_store = find_store_driver((char *)msg->store); if (sd_store) { sd_store->init(obj_path); if (set_cluster_store(sd_store->name) != SD_RES_SUCCESS) panic("failed to store into config file\n"); } else panic("backend store %s not supported\n", msg->store); } }
/* * Lookup area/newsgroup in area_list * * Parameters: * area, NULL --- lookup by area * NULL, group --- lookup by group */ Area *areas_lookup(char *area, char *group, Node *aka) { Area *p; /* * Inefficient search, but order is important! */ for(p=area_list; p; p=p->next) { if(area) { if(p->flags & AREA_HIERARCHY && (!aka || node_eq(&p->addr, aka))) { if( 0 == strlen( p->area ) || !strnicmp(area, p->area, strlen(p->area))) return p->flags & AREA_NO ? NULL : area_build(p, area, group); } else { if(!stricmp(area, p->area ) && (!aka || !node_eq(&p->addr, aka))) return p->flags & AREA_NO ? NULL : p; } } if(group && group[0]==p->group[0]) { if(p->flags & AREA_HIERARCHY) { if(!strnicmp(group, p->group, strlen(p->group))) return p->flags & AREA_NO ? NULL : area_build(p, area, group); } else { if(!stricmp(group, p->group)) return p->flags & AREA_NO ? NULL : p; } } } return NULL; }
static struct sd_node *find_entry_list(struct sd_node *entry, struct list_head *head) { struct node *n; list_for_each_entry(n, head, list) if (node_eq(&n->ent, entry)) return entry; return NULL; }
static struct vnode_info *alloc_old_vnode_info(struct sd_node *joined, struct sd_node *nodes, size_t nr_nodes) { struct sd_node old_nodes[SD_MAX_NODES]; size_t count = 0, i; /* exclude the newly added one */ for (i = 0; i < nr_nodes; i++) { if (!node_eq(nodes + i, joined)) old_nodes[count++] = nodes[i]; } return alloc_vnode_info(old_nodes, count); }
static struct sd_node *find_entry_epoch(struct sd_node *entry, uint32_t epoch) { struct sd_node nodes[SD_MAX_NODES]; int nr, i; nr = epoch_log_read_nr(epoch, (char *)nodes, sizeof(nodes)); for (i = 0; i < nr; i++) if (node_eq(&nodes[i], entry)) return entry; return NULL; }
static void prepare_recovery(struct sd_node *joined, struct sd_node *nodes, size_t nr_nodes) { int i; joining_nodes[nr_joining_nodes++] = *joined; if (!nr_all_nodes) { /* exclude the newly added one */ for (i = 0; i < nr_nodes; i++) { if (!node_eq(nodes + i, joined)) all_nodes[nr_all_nodes++] = nodes[i]; } } if (!current_vnode_info) current_vnode_info = alloc_vnode_info(all_nodes, nr_all_nodes); }
/* * Perform a blocked cluster operation if we were the node requesting it * and do not have any other operation pending. * * If this method returns false the caller must call the method again for * the same event once it gets notified again. * * Must run in the main thread as it accesses unlocked state like * sys->pending_list. */ bool sd_block_handler(struct sd_node *sender) { struct request *req; if (!node_eq(sender, &sys->this_node)) return false; if (cluster_op_running) return false; cluster_op_running = true; req = list_first_entry(&sys->pending_list, struct request, pending_list); req->work.fn = do_process_work; req->work.done = cluster_op_done; queue_work(sys->block_wqueue, &req->work); return true; }
static void do_get_vdi_bitmap(struct work *work) { struct vdi_bitmap_work *w = container_of(work, struct vdi_bitmap_work, work); int i; for (i = 0; i < w->nr_members; i++) { /* We should not fetch vdi_bitmap from myself */ if (node_eq(&w->members[i], &sys->this_node)) continue; get_vdi_bitmap_from(&w->members[i]); /* * If a new comer try to join the running cluster, it only * need read one copy of bitmap from one of other members. */ if (sys->status == SD_STATUS_WAIT_FOR_FORMAT) break; } }
static void do_get_vdis(struct work *work) { struct get_vdis_work *w = container_of(work, struct get_vdis_work, work); int i, ret; for (i = 0; i < w->nr_members; i++) { /* We should not fetch vdi_bitmap and copy list from myself */ if (node_eq(&w->members[i], &sys->this_node)) continue; ret = get_vdis_from(&w->members[i]); if (ret != SD_RES_SUCCESS) /* try to read from another node */ continue; /* * If a new comer try to join the running cluster, it only * need read one copy of bitmap from one of other members. */ if (sys->status == SD_STATUS_WAIT_FOR_FORMAT) break; } }
static bool lnode_eq(const struct local_node *a, const struct local_node *b) { return node_eq(&a->node, &b->node); }
static int get_cluster_status(struct sd_node *from, struct sd_node *entries, int nr_entries, uint64_t ctime, uint32_t epoch, uint32_t *status, uint8_t *inc_epoch) { int i, j, ret = SD_RES_SUCCESS; int nr, nr_local_entries, nr_leave_entries; struct sd_node local_entries[SD_MAX_NODES]; char str[256]; uint32_t sys_stat = sys_stat_get(); *status = sys_stat; if (inc_epoch) *inc_epoch = 0; ret = cluster_sanity_check(entries, nr_entries, ctime, epoch); if (ret) goto out; switch (sys_stat) { case SD_STATUS_HALT: case SD_STATUS_OK: if (inc_epoch) *inc_epoch = 1; break; case SD_STATUS_WAIT_FOR_FORMAT: if (nr_entries != 0) ret = SD_RES_NOT_FORMATTED; break; case SD_STATUS_WAIT_FOR_JOIN: nr = sys->nr_nodes + 1; nr_local_entries = epoch_log_read_nr(epoch, (char *)local_entries, sizeof(local_entries)); if (nr != nr_local_entries) { nr_leave_entries = get_nodes_nr_from(&sys->leave_list); if (nr_local_entries == nr + nr_leave_entries) { /* Even though some nodes have left, we can make do without them. * Order cluster to do recovery right now. */ if (inc_epoch) *inc_epoch = 1; *status = SD_STATUS_OK; } break; } for (i = 0; i < nr_local_entries; i++) { if (node_eq(local_entries + i, from)) goto next; for (j = 0; j < sys->nr_nodes; j++) { if (node_eq(local_entries + i, sys->nodes + j)) goto next; } break; next: ; } *status = SD_STATUS_OK; break; case SD_STATUS_SHUTDOWN: ret = SD_RES_SHUTDOWN; break; default: break; } out: if (ret) eprintf("%x, %s\n", ret, addr_to_str(str, sizeof(str), from->addr, from->port)); return ret; }
static void acrd_handler(int listen_fd, int events, void *data) { int ret; eventfd_t value; struct acrd_event ev; enum cluster_join_result res; if (events & EPOLLHUP) { eprintf("accord driver received EPOLLHUP event, exiting.\n"); log_close(); exit(1); } dprintf("read event\n"); ret = eventfd_read(efd, &value); if (ret < 0) return; pthread_mutex_lock(&queue_lock); ret = acrd_queue_pop(ahandle, &ev); if (ret < 0) goto out; switch (ev.type) { case EVENT_JOIN_REQUEST: if (!node_eq(&ev.nodes[0], &this_node)) { acrd_queue_push_back(ahandle, NULL); break; } res = sd_check_join_cb(&ev.sender, ev.buf); ev.join_result = res; ev.type = EVENT_JOIN_RESPONSE; acrd_queue_push_back(ahandle, &ev); if (res == CJ_RES_MASTER_TRANSFER) { eprintf("failed to join sheepdog cluster: " "please retry when master is up\n"); exit(1); } break; case EVENT_JOIN_RESPONSE: if (ev.join_result == CJ_RES_MASTER_TRANSFER) { /* FIXME: This code is tricky, but Sheepdog assumes that */ /* nr_nodes = 1 when join_result = MASTER_TRANSFER... */ ev.nr_nodes = 1; ev.nodes[0] = this_node; ev.ids[0] = this_id; acrd_queue_push_back(ahandle, &ev); acrd_queue_pop(ahandle, &ev); } sd_join_handler(&ev.sender, ev.nodes, ev.nr_nodes, ev.join_result, ev.buf); break; case EVENT_LEAVE: sd_leave_handler(&ev.sender, ev.nodes, ev.nr_nodes); break; case EVENT_BLOCK: acrd_queue_push_back(ahandle, NULL); sd_block_handler(&ev.sender); break; case EVENT_NOTIFY: sd_notify_handler(&ev.sender, ev.buf, ev.buf_len); break; } out: pthread_mutex_unlock(&queue_lock); }
/* Returns true if an event is processed */ static bool local_process_event(void) { struct local_event *ev; int i; struct sd_node nodes[SD_MAX_NODES]; size_t nr_nodes; ev = shm_queue_peek(); if (!ev) return false; sd_debug("type = %d, sender = %s", ev->type, lnode_to_str(&ev->sender)); sd_debug("callbacked = %d, removed = %d", ev->callbacked, ev->removed); nr_nodes = 0; for (i = 0; i < ev->nr_lnodes; i++) { sd_debug("%d: %s", i, lnode_to_str(ev->lnodes + i)); if (!ev->lnodes[i].gateway) nodes[nr_nodes++] = ev->lnodes[i].node; } if (ev->removed) goto out; if (ev->callbacked) return false; /* wait for unblock event */ if (!joined) { if (!lnode_eq(&this_node, &ev->sender)) goto out; switch (ev->type) { case EVENT_JOIN: break; case EVENT_ACCEPT: sd_debug("join Sheepdog"); joined = true; break; default: goto out; } } switch (ev->type) { case EVENT_JOIN: /* nodes[nr_nodes - 1] is a sender, so don't include it */ assert(node_eq(&ev->sender.node, &nodes[nr_nodes - 1])); if (sd_join_handler(&ev->sender.node, nodes, nr_nodes - 1, ev->buf)) { ev->type = EVENT_ACCEPT; msync(ev, sizeof(*ev), MS_SYNC); shm_queue_notify(); } return false; case EVENT_ACCEPT: sd_accept_handler(&ev->sender.node, nodes, nr_nodes, ev->buf); break; case EVENT_LEAVE: if (ev->sender.gateway) { sd_debug("gateway %s left sheepdog", lnode_to_str(&ev->sender)); break; } /* fall through */ case EVENT_GATEWAY: sd_leave_handler(&ev->sender.node, nodes, nr_nodes); break; case EVENT_BLOCK: ev->callbacked = sd_block_handler(&ev->sender.node); msync(ev, sizeof(*ev), MS_SYNC); return false; case EVENT_NOTIFY: sd_notify_handler(&ev->sender.node, ev->buf, ev->buf_len); break; case EVENT_UPDATE_NODE: if (lnode_eq(&ev->sender, &this_node)) this_node = ev->sender; sd_update_node_handler(&ev->sender.node); break; } out: shm_queue_remove(ev); return true; }