/** * Waits for this server to finish sending pending items from its queue. */ void server_shutdown(server *s) { int i; size_t failures; size_t inqueue; int err; /* this function should only be called once for each server */ assert(s->tid != 0); if (s->secondariescnt > 0) { /* if we have a working connection, or we still have stuff in * our queue, wait for our secondaries, as they might need us, * or we need them */ do { failures = 0; inqueue = 0; for (i = 0; i < s->secondariescnt; i++) { if (s->secondaries[i]->failure) failures++; if (s->secondaries[i]->running) inqueue += queue_len(s->secondaries[i]->queue); } /* loop until we all failed, or nothing is in the queues */ } while (failures != s->secondariescnt && inqueue != 0 && logout("any_of cluster pending %zu metrics " "(with %zu failed nodes)\n", inqueue, failures) >= -1 && usleep((200 + (rand() % 100)) * 1000) <= 0); /* shut down entire cluster */ for (i = 0; i < s->secondariescnt; i++) s->secondaries[i]->keep_running = 0; /* to pretend to be dead for above loop (just in case) */ if (inqueue != 0) for (i = 0; i < s->secondariescnt; i++) s->secondaries[i]->failure = 1; } s->keep_running = 0; if ((err = pthread_join(s->tid, NULL)) != 0) logerr("%s:%u: failed to join server thread: %s\n", s->ip, s->port, strerror(err)); s->tid = 0; if (s->ctype == CON_TCP) { size_t qlen = queue_len(s->queue); if (qlen > 0) logerr("dropping %zu metrics for %s:%u\n", qlen, s->ip, s->port); } }
/* 查询userid的邻居,最多查找1024个,按照距离升序排列,并最多显示topn个。 * 由于子进程执行完该函数很快退出,因此就不一一释放内存了。 */ static void query_neighbour(int flag, char *userid, int topn, int distance) { int i; UserNode *punode; QueryCondt qc; FILE *fp; char tmpf[256], fpath[256]; if(topn <= 0 || distance <= 0) return; sprintf(tmpf, FRIENDS_TMPDIR "/%s.%d.tmp", userid, flag); if((fp = fopen(tmpf, "w+")) == NULL) return; punode = ght_get(fidx, strlen(userid), userid); if(punode == NULL) { fprintf(fp, "Query return null result."); goto END_WITH_RESULT; } qc.max = flag ? queue_len(punode->to) : queue_len(punode->from); qc.list = malloc(sizeof(Neighbour)*(qc.max+1)); if(qc.list == NULL) goto END_WITH_ERROR; qc.count = 0; qc.distance = distance; if(flag) apply_queue(punode->to, get_neighbour, &qc); else apply_queue(punode->from, get_neighbour, &qc); if(qc.count == 0) fprintf(fp, "Query return null result."); qsort(qc.list, qc.count, sizeof(Neighbour), (void*)cmp_dist); for(i = 0; i < qc.count && i < topn; i++) { fprintf(fp, "%s%c", qc.list[i].userid, (i % 6 == 5) ? '\n' : '\t'); } END_WITH_RESULT: fclose(fp); sprintf(fpath, FRIENDS_TMPDIR "/%s.%d", userid, flag); rename(tmpf, fpath); return; END_WITH_ERROR: fclose(fp); unlink(tmpf); return; }
int main(int argc, char **argv) { struct Queue *q; int64_t l = 0; int opt = 0; char *cq = NULL; while((opt = getopt(argc, argv, "hq:")) != -1) switch(opt) { case 'q': cq = strdup(optarg); break; default: puts("Usage: qlen [-h] [-q queue-name] [--]"); return EXIT_FAILURE; } q = queue_open(SELECTQUEUE(cq)); if(0 == queue_is_opened(q)) { fprintf(stderr,"Failed to open the queue:%s\n", queue_get_last_error(q)); closequeue(q); return EXIT_FAILURE; } if(queue_len(q, &l) != LIBQUEUE_SUCCESS) { puts("Failed to get the queue length."); closequeue(q); return EXIT_FAILURE; } printf("%lld\n", (long long)l); if(cq != NULL) free(cq); return closequeue(q); }
/** * Returns the (approximate) number of metrics waiting to be sent. */ inline size_t server_get_queue_len(server *s) { if (s == NULL) return 0; return queue_len(s->queue); }
/** * Frees this server and associated resources. This includes joining * the server thread. */ void server_free(server *s) { int err; if (s->tid != 0 && (err = pthread_join(s->tid, NULL)) != 0) logerr("%s:%u: failed to join server thread: %s\n", s->ip, s->port, strerror(err)); s->tid = 0; if (s->ctype == CON_TCP) { size_t qlen = queue_len(s->queue); if (qlen > 0) logerr("dropping %zu metrics for %s:%u\n", qlen, s->ip, s->port); } queue_destroy(s->queue); free(s->batch); if (s->instance) free(s->instance); if (s->saddr != NULL) freeaddrinfo(s->saddr); free((char *)s->ip); s->ip = NULL; free(s); }
/** * Take the first phit in a queue. * * Removes the head phit from queue & returns it via "i" * Requires a non-empty queue. Otherwise, panics. * * @param q A queue. * @param i The removed phit is returned here. */ void rem_queue (queue *q, phit *i) { if (queue_len(q) == 0) panic("Removing the head of an empty queue"); else { q->head = (q->head + 1)%tr_ql; *i = (q->pos)[q->head]; } }
/** * Inserts a phit in a queue. * * Requires a buffer with room for the phit. Otherwise, panics * * @param q A queue. * @param i The phit to be inserted. */ void ins_queue (queue *q, phit *i) { if (queue_len(q) == (tr_ql-1)) panic("Inserting a phit in a full queue"); else { q->tail = (q->tail + 1)%tr_ql; (q->pos)[q->tail] = *i; } }
/** * Looks at the first phit of a queue. * * Requires a non-empty queue. Otherwise, panics * * @param q A queue. * @return A pointer to the first phit of the queue. */ phit * head_queue (queue *q) { long aux; if (queue_len(q) == 0) panic("Asking for the head of an empty queue"); else { aux = (q->head + 1)%tr_ql; return &((q->pos)[aux]); } }
/** * Inserts many (identical) copies of a phit "i" in queue "q" * * Requires enough space. Otherwise, panics. * * @param q A queue. * @param i The phit to be cloned & inserted. * @param copies Number of clones of i. */ void ins_mult_queue (queue *q, phit *i, long copies) { long aux; for (aux = 0; aux < copies; aux++) if (queue_len(q) == (tr_ql-1)) panic("Inserting multiple phits in a full queue"); else { q->tail = (q->tail + 1)%tr_ql; (q->pos)[q->tail] = *i; } }
int main(int argc, char **argv) { struct Queue * q; struct QueueData d; int64_t l = 0; int64_t j = 0; char *cq = NULL; int opt = 0; while((opt = getopt(argc, argv, "hq:")) != -1) switch(opt) { case 'q': cq = strdup(optarg); break; default: case 'h': puts("Usage: qpeek [-h] [-q queue-name] [--] <args>"); return EXIT_FAILURE; } int i = optind-1; q = queue_open(SELECTQUEUE(cq)); if(0 == queue_is_opened(q)) { fprintf(stderr,"Failed to open the queue:%s\n", queue_get_last_error(q)); closequeue(q); return EXIT_FAILURE; } if(queue_len(q, &l) != LIBQUEUE_SUCCESS) { puts("Failed to read the queue length."); closequeue(q); return EXIT_FAILURE; } while(argv[++i]) { if((j = (int64_t)atoi((const char*)argv[i])) < 0 || (j+1)>l) { printf("Index out of bounds: %lld (%lld)\n", (long long)j, (long long)l); continue; } if(queue_peek(q, j-1, &d) != LIBQUEUE_SUCCESS) { printf("Failed to peek at element #%lld\n",(long long) j); } else printf("%s\n", (const char*)d.v); if(cq != NULL) free(cq); } return closequeue(q); }
/** * Performs consumption of all phits that have arrived to a node. * * For "multiple" consumption, all input ports can access to the consumption port. * @param i The node in which the consumption is performed. */ void arbitrate_cons_multiple(long i) { port_type s_p; for (s_p=0; s_p<last_port_arb_con; s_p++) { if (network[i].p[p_con].req[s_p]) { // Has input port "s_p" requested consumption port? if (!queue_len(&network[i].p[s_p].q)) { printf("node %d, p_con %d, s_p %d\n",i,p_con,s_p); panic("Trying to assign consumption port to empty input queue - multiple"); } network[i].p[s_p].aop = p_con; network[i].p[s_p].bet = B_TRIAL_0; // Success reserving!! Reset my next bet -- Only for adaptive continue; } } }
/** * Tells this server to finish sending pending items from its queue. */ void server_shutdown(server *s) { int i; size_t failures; size_t inqueue; /* this function should only be called on a running server */ if (s->keep_running == 0) return; if (s->secondariescnt > 0) { /* if we have a working connection, or we still have stuff in * our queue, wait for our secondaries, as they might need us, * or we need them */ do { failures = 0; inqueue = 0; for (i = 0; i < s->secondariescnt; i++) { if (s->secondaries[i]->failure) failures++; if (s->secondaries[i]->running) inqueue += queue_len(s->secondaries[i]->queue); } /* loop until we all failed, or nothing is in the queues */ } while (failures != s->secondariescnt && inqueue != 0 && logout("any_of cluster pending %zu metrics " "(with %zu failed nodes)\n", inqueue, failures) >= -1 && usleep((200 + (rand() % 100)) * 1000) <= 0); /* shut down entire cluster */ for (i = 0; i < s->secondariescnt; i++) s->secondaries[i]->keep_running = 0; /* to pretend to be dead for above loop (just in case) */ if (inqueue != 0) for (i = 0; i < s->secondariescnt; i++) s->secondaries[i]->failure = 1; } s->keep_running = 0; }
G_MODULE_EXPORT gboolean on_btnDequeue_clicked(void) { char *sel = glist_selected(GTK_TREE_VIEW(treeTransfers)); if(!sel){ status("Uh... select a queue'd file if you please"); return FALSE; } /* gui list */ QUEUE_REM(sel); /* internal linked list */ queue_rem(&file_queue, sel); if(queue_len(file_queue) == 0) gtk_widget_set_sensitive(btnSend, FALSE); return FALSE; }
int main(int argc, const char *argv[]) { /* allocate a new empty queue */ struct queue *queue = queue(); /* push data to queue */ char *val1 = "val1"; char *val2 = "val2"; char *val3 = "val3"; assert(queue_push(queue, val1) == QUEUE_OK); assert(queue_push(queue, val2) == QUEUE_OK); assert(queue_push(queue, val3) == QUEUE_OK); /* get queue len */ printf("current queue length: %zu\n", queue_len(queue)); /* pop data from queue, the order is the same with push */ void *data; while ((data = queue_pop(queue)) != NULL) printf("pop data: %s\n", (char *)data); /* free queue */ queue_free(queue); return 0; }
static int save_friends() { ght_iterator_t iterator; void *key; UserNode *punode; FILE *fp; char *filename = MY_BBS_HOME "/bbstmpfs/tmp/friends.dump.tmp"; fp = fopen(filename, "w+"); if(fp == NULL) return -1; for (punode = ght_first(fidx, &iterator, &key); punode; punode = ght_next(fidx, &iterator, &key)) { if(queue_len(punode->to) > 0) { fprintf(fp, "#%s\n", punode->userid); apply_queue(punode->to, dump_user, fp); } } fclose(fp); crossfs_rename(filename, MY_BBS_HOME "/friends.dump"); return 0; }
/** * Select the port with the biggest queue length. * * Given a range of input-injection ports, select the one whose queue occupation is longer. * We take a look to the queues in a round-robin fashion, starting with the last lucky * input port (network[i].p[d_p].ri), if several ports have the same occupation, the first * visited will be the selected one. * * @param i The node in which the arbitration is performed. * @param d_p The destination port for wich the arbitration is performed. * @param first The first port for looking to. * @param last The next port from the last to looking to. This port is not included. * @return The selected port, or NULL_PORT if there isnt anyone. * * @see arbitrate * @see arbitrate_select */ port_type arbitrate_select_longest(long i, port_type d_p, port_type first, port_type last) { port_type s_p, selected_port, visited; long len_of_selected, pl; long dif=last-first; s_p = first + ((network[i].p[d_p].ri + 1) % dif); if (s_p >= last) s_p = first; len_of_selected = -1; for (visited=first; visited<last; visited++) { if (network[i].p[d_p].req[s_p]) { pl = queue_len(&network[i].p[s_p].q); if (pl > len_of_selected) { len_of_selected = pl; selected_port = s_p; } } s_p = first + ((s_p + 1) % (dif)); if (s_p >= last) s_p = first; } if (len_of_selected != -1) return(selected_port); else return(NULL_PORT); }
/* maybe, 1 element in the queue cannot be used.. */ static int queue_isfull(bus_t busnumber) { return queue_len(busnumber) >= QUEUELEN - 1; }
static void friends_top10() { ght_iterator_t it; void *key; UserNode *tmp; int i, j; TopSort *ts; FILE *fp; ts = calloc(MAXUSERS, sizeof(TopSort)); if(ts == NULL) return; for(i = 0, tmp = ght_first(fidx, &it, &key); tmp; tmp = ght_next(fidx, &it, &key)) { strcpy(ts[i].userid, tmp->userid); ts[i].to_num = queue_len(tmp->to); ts[i].from_num = queue_len(tmp->from); ts[i].to_min = INITIAL_PATH; apply_queue(tmp->to, min_of_queue, &ts[i].to_min); apply_queue(tmp->from, sum_of_queue, &ts[i].from_dist); ts[i].from_dist /= (ts[i].from_num ? ts[i].from_num : 1); i++; } //十大大众情人,"from"队列长度 qsort(ts, i, sizeof(TopSort), (void *)cmp_fromnum); fp = fopen(FRIENDS_TMPDIR "/top10.idolA.tmp", "w+"); if(fp == NULL) goto END_FREE_TS; for(j = 0; j < 10 && j < i; j++) { if(ts[j].from_num == 0) break; fprintf(fp, "\t\t%12s\t%4d\n", ts[j].userid, ts[j].from_num); } fclose(fp); rename(FRIENDS_TMPDIR "/top10.idolA.tmp", FRIENDS_TMPDIR "/top10.idolA"); //十大博爱粉丝,"to"队列长度 qsort(ts, i, sizeof(TopSort), (void *)cmp_tonum); fp = fopen(FRIENDS_TMPDIR "/top10.fansA.tmp", "w+"); if(fp == NULL) goto END_FREE_TS; for(j = 0; j < 10 && j < i; j++) { if(ts[j].to_num == 0) break; fprintf(fp, "\t\t%12s\t%4d\n", ts[j].userid, ts[j].to_num); } fclose(fp); rename(FRIENDS_TMPDIR "/top10.fansA.tmp", FRIENDS_TMPDIR "/top10.fansA"); //十大实力偶像, "from"队列值平均距离 qsort(ts, i, sizeof(TopSort), (void *)cmp_fromdist); fp = fopen(FRIENDS_TMPDIR "/top10.idolB.tmp", "w+"); if(fp == NULL) goto END_FREE_TS; for(j = 0; j < 10 && j < i; j++) { if(ts[j].from_dist == 0) break; fprintf(fp, "\t\t%12s\t%4d\n", ts[j].userid, ts[j].from_dist); } fclose(fp); rename(FRIENDS_TMPDIR "/top10.idolB.tmp", FRIENDS_TMPDIR "/top10.idolB"); //十大忠诚粉丝, "to"队列最小值 qsort(ts, i, sizeof(TopSort), (void *)cmp_tomin); fp = fopen(FRIENDS_TMPDIR "/top10.fansB.tmp", "w+"); if(fp == NULL) goto END_FREE_TS; for(j = 0; j < 10 && j < i; j++) { if(INITIAL_PATH-ts[j].to_min == 0) break; fprintf(fp, "\t\t%12s\t%4d\n", ts[j].userid, INITIAL_PATH-ts[j].to_min); } fclose(fp); rename(FRIENDS_TMPDIR "/top10.fansB.tmp", FRIENDS_TMPDIR "/top10.fansB"); //十大人气圈子 bzero(ts, sizeof(TopSort)*MAXUSERS); for(tmp = ght_first(fidx, &it, &key); tmp; tmp = ght_next(fidx, &it, &key)) { if(tmp->div > 0 && tmp->div < MAXUSERS) { if(ts[tmp->div].userid[0] == '\0') //first user as representive strcpy(ts[tmp->div].userid, tmp->userid); ts[tmp->div].div_num++; } } qsort(ts, MAXUSERS, sizeof(TopSort), (void *)cmp_divnum); fp = fopen(FRIENDS_TMPDIR "/top10.bigdiv.tmp", "w+"); if(fp == NULL) goto END_FREE_TS; for(j = 0; j < 10 && ts[j].div_num > 0; j++) fprintf(fp, "\t\t%12s\t%4d\n", ts[j].userid, ts[j].div_num); fclose(fp); rename(FRIENDS_TMPDIR "/top10.bigdiv.tmp", FRIENDS_TMPDIR "/top10.bigdiv"); END_FREE_TS: free(ts); return; }
/** * Removes the head of queue. * * Does not return anything. Requires a non-empty queue. Otherwise, panics. * * @param q A queue. */ void rem_head_queue (queue *q) { if (queue_len(q) == 0) panic("Removing the head of an empty queue"); else q->head = (q->head + 1)%tr_ql; }
/** * Reads from the queue and sends items to the remote server. This * function is designed to be a thread. Data sending is attempted to be * batched, but sent one by one to reduce loss on sending failure. * A connection with the server is maintained for as long as there is * data to be written. As soon as there is none, the connection is * dropped if a timeout of DISCONNECT_WAIT_TIME exceeds. */ static void * server_queuereader(void *d) { server *self = (server *)d; size_t len; ssize_t slen; const char **metric = self->batch; struct timeval start, stop; struct timeval timeout; queue *squeue; char idle = 0; size_t *secpos = NULL; *metric = NULL; self->metrics = 0; self->ticks = 0; #define FAIL_WAIT_TIME 6 /* 6 * 250ms = 1.5s */ #define DISCONNECT_WAIT_TIME 12 /* 12 * 250ms = 3s */ #define LEN_CRITICAL(Q) (queue_free(Q) < self->bsize) self->running = 1; while (1) { if (queue_len(self->queue) == 0) { /* if we're idling, close the TCP connection, this allows us * to reduce connections, while keeping the connection alive * if we're writing a lot */ gettimeofday(&start, NULL); if (self->ctype == CON_TCP && self->fd >= 0 && idle++ > DISCONNECT_WAIT_TIME) { close(self->fd); self->fd = -1; } gettimeofday(&stop, NULL); self->ticks += timediff(start, stop); if (!self->keep_running) break; /* nothing to do, so slow down for a bit */ usleep((200 + (rand() % 100)) * 1000); /* 200ms - 300ms */ /* if we are in failure mode, keep checking if we can * connect, this avoids unnecessary queue moves */ if (!self->failure) /* it makes no sense to try and do something, so skip */ continue; } else if (self->secondariescnt > 0 && (self->failure >= FAIL_WAIT_TIME || (!self->failover && LEN_CRITICAL(self->queue)))) { size_t i; gettimeofday(&start, NULL); if (self->secondariescnt > 0) { if (secpos == NULL) { secpos = malloc(sizeof(size_t) * self->secondariescnt); if (secpos == NULL) { logerr("server: failed to allocate memory " "for secpos\n"); gettimeofday(&stop, NULL); self->ticks += timediff(start, stop); continue; } for (i = 0; i < self->secondariescnt; i++) secpos[i] = i; } if (!self->failover) { /* randomise the failover list such that in the * grand scheme of things we don't punish the first * working server in the list to deal with all * traffic meant for a now failing server */ for (i = 0; i < self->secondariescnt; i++) { size_t n = rand() % (self->secondariescnt - i); if (n != i) { size_t t = secpos[n]; secpos[n] = secpos[i]; secpos[i] = t; } } } } /* offload data from our queue to our secondaries * when doing so, observe the following: * - avoid nodes that are in failure mode * - avoid nodes which queues are >= critical_len * when no nodes remain given the above * - send to nodes which queue size < critical_len * where there are no such nodes * - do nothing (we will overflow, since we can't send * anywhere) */ *metric = NULL; squeue = NULL; for (i = 0; i < self->secondariescnt; i++) { /* both conditions below make sure we skip ourself */ if (self->secondaries[secpos[i]]->failure) continue; squeue = self->secondaries[secpos[i]]->queue; if (!self->failover && LEN_CRITICAL(squeue)) { squeue = NULL; continue; } if (*metric == NULL) { /* send up to batch size of our queue to this queue */ len = queue_dequeue_vector( self->batch, self->queue, self->bsize); self->batch[len] = NULL; metric = self->batch; } for (; *metric != NULL; metric++) if (!queue_putback(squeue, *metric)) break; /* try to put back stuff that didn't fit */ for (; *metric != NULL; metric++) if (!queue_putback(self->queue, *metric)) break; } for (; *metric != NULL; metric++) { if (mode & MODE_DEBUG) logerr("dropping metric: %s", *metric); free((char *)*metric); self->dropped++; } gettimeofday(&stop, NULL); self->ticks += timediff(start, stop); if (squeue == NULL) { /* we couldn't do anything, take it easy for a bit */ if (self->failure) self->failure = 1; if (!self->keep_running) break; usleep((200 + (rand() % 100)) * 1000); /* 200ms - 300ms */ } } else if (self->failure) { if (!self->keep_running) break; usleep((200 + (rand() % 100)) * 1000); /* 200ms - 300ms */ } /* at this point we've got work to do, if we're instructed to * shut down, however, try to get everything out of the door * (until we fail, see top of this loop) */ gettimeofday(&start, NULL); /* try to connect */ if (self->fd < 0) { if (self->ctype == CON_PIPE) { int intconn[2]; if (pipe(intconn) < 0) { if (!self->failure) logerr("failed to create pipe: %s\n", strerror(errno)); self->failure += self->failure >= FAIL_WAIT_TIME ? 0 : 1; continue; } dispatch_addconnection(intconn[0]); self->fd = intconn[1]; } else if (self->ctype == CON_UDP) { if ((self->fd = socket(self->saddr->ai_family, self->saddr->ai_socktype, self->saddr->ai_protocol)) < 0) { if (!self->failure) logerr("failed to create udp socket: %s\n", strerror(errno)); self->failure += self->failure >= FAIL_WAIT_TIME ? 0 : 1; continue; } if (connect(self->fd, self->saddr->ai_addr, self->saddr->ai_addrlen) < 0) { if (!self->failure) logerr("failed to connect udp socket: %s\n", strerror(errno)); close(self->fd); self->fd = -1; self->failure += self->failure >= FAIL_WAIT_TIME ? 0 : 1; continue; } } else if (self->ctype == CON_FILE) { if ((self->fd = open(self->ip, O_WRONLY | O_APPEND | O_CREAT, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) < 0) { if (!self->failure) logerr("failed to open file '%s': %s\n", self->ip, strerror(errno)); self->failure += self->failure >= FAIL_WAIT_TIME ? 0 : 1; continue; } } else { int ret; int args; if ((self->fd = socket(self->saddr->ai_family, self->saddr->ai_socktype, self->saddr->ai_protocol)) < 0) { if (!self->failure) logerr("failed to create socket: %s\n", strerror(errno)); self->failure += self->failure >= FAIL_WAIT_TIME ? 0 : 1; continue; } /* put socket in non-blocking mode such that we can * poll() (time-out) on the connect() call */ args = fcntl(self->fd, F_GETFL, NULL); (void) fcntl(self->fd, F_SETFL, args | O_NONBLOCK); ret = connect(self->fd, self->saddr->ai_addr, self->saddr->ai_addrlen); if (ret < 0 && errno == EINPROGRESS) { /* wait for connection to succeed if the OS thinks * it can succeed */ struct pollfd ufds[1]; ufds[0].fd = self->fd; ufds[0].events = POLLIN | POLLOUT; ret = poll(ufds, 1, self->iotimeout + (rand() % 100)); if (ret == 0) { /* time limit expired */ if (!self->failure) logerr("failed to connect() to " "%s:%u: Operation timed out\n", self->ip, self->port); close(self->fd); self->fd = -1; self->failure += self->failure >= FAIL_WAIT_TIME ? 0 : 1; continue; } else if (ret < 0) { /* some select error occurred */ if (!self->failure) logerr("failed to poll() for %s:%u: %s\n", self->ip, self->port, strerror(errno)); close(self->fd); self->fd = -1; self->failure += self->failure >= FAIL_WAIT_TIME ? 0 : 1; continue; } else { if (ufds[0].revents & POLLHUP) { if (!self->failure) logerr("failed to connect() for %s:%u: " "Hangup\n", self->ip, self->port); close(self->fd); self->fd = -1; self->failure += self->failure >= FAIL_WAIT_TIME ? 0 : 1; continue; } } } else if (ret < 0) { if (!self->failure) { logerr("failed to connect() to %s:%u: %s\n", self->ip, self->port, strerror(errno)); dispatch_check_rlimit_and_warn(); } close(self->fd); self->fd = -1; self->failure += self->failure >= FAIL_WAIT_TIME ? 0 : 1; continue; } /* make socket blocking again */ (void) fcntl(self->fd, F_SETFL, args); } /* ensure we will break out of connections being stuck more * quickly than the kernel would give up */ timeout.tv_sec = 10; timeout.tv_usec = (rand() % 300) * 1000; setsockopt(self->fd, SOL_SOCKET, SO_SNDTIMEO, &timeout, sizeof(timeout)); #ifdef SO_NOSIGPIPE setsockopt(self->fd, SOL_SOCKET, SO_NOSIGPIPE, NULL, 0); #endif } /* send up to batch size */ len = queue_dequeue_vector(self->batch, self->queue, self->bsize); self->batch[len] = NULL; metric = self->batch; if (len != 0 && !self->keep_running) { /* be noisy during shutdown so we can track any slowing down * servers, possibly preventing us to shut down */ logerr("shutting down %s:%u: waiting for %zu metrics\n", self->ip, self->port, len + queue_len(self->queue)); } if (len == 0 && self->failure) { /* if we don't have anything to send, we have at least a * connection succeed, so assume the server is up again, * this is in particular important for recovering this * node by probes, to avoid starvation of this server since * its queue is possibly being offloaded to secondaries */ if (self->ctype != CON_UDP) logerr("server %s:%u: OK after probe\n", self->ip, self->port); self->failure = 0; } for (; *metric != NULL; metric++) { len = strlen(*metric); if ((slen = write(self->fd, *metric, len)) != len) { /* not fully sent, or failure, close connection * regardless so we don't get synchonisation problems, * partially sent data is an error for us, since we use * blocking sockets, and hence partial sent is * indication of a failure */ if (self->ctype != CON_UDP && !self->failure) logerr("failed to write() to %s:%u: %s\n", self->ip, self->port, (slen < 0 ? strerror(errno) : "uncomplete write")); close(self->fd); self->fd = -1; self->failure += self->failure >= FAIL_WAIT_TIME ? 0 : 1; /* put back stuff we couldn't process */ for (; *metric != NULL; metric++) { if (!queue_putback(self->queue, *metric)) { if (mode & MODE_DEBUG) logerr("server %s:%u: dropping metric: %s", self->ip, self->port, *metric); free((char *)*metric); self->dropped++; } } break; } else if (self->failure) { if (self->ctype != CON_UDP) logerr("server %s:%u: OK\n", self->ip, self->port); self->failure = 0; } free((char *)*metric); self->metrics++; } gettimeofday(&stop, NULL); self->ticks += timediff(start, stop); idle = 0; } self->running = 0; if (self->fd >= 0) close(self->fd); return NULL; }