static void analyze_datafile(jlog_ctx *ctx, u_int32_t logid) { char idxfile[MAXPATHLEN]; if (jlog_inspect_datafile(ctx, logid) > 0) { fprintf(stderr, "One or more errors were found.\n"); if(repair_datafiles) { jlog_repair_datafile(ctx, logid); fprintf(stderr, "Log file reconstructed, deleting the corresponding idx file.\n"); STRSETDATAFILE(ctx, idxfile, logid); strcat(idxfile, INDEX_EXT); unlink(idxfile); } } }
static jlog_file *__jlog_open_reader(jlog_ctx *ctx, u_int32_t log) { char file[MAXPATHLEN]; if(ctx->current_log != log) { __jlog_close_reader(ctx); __jlog_close_indexer(ctx); } if(ctx->data) { return ctx->data; } STRSETDATAFILE(ctx, file, log); #ifdef DEBUG fprintf(stderr, "opening log file[ro]: '%s'\n", file); #endif ctx->data = jlog_file_open(file, 0, ctx->file_mode); ctx->current_log = log; return ctx->data; }
static jlog_file *__jlog_open_writer(jlog_ctx *ctx) { char file[MAXPATHLEN]; if(ctx->data) { /* Still open */ return ctx->data; } if(!jlog_file_lock(ctx->metastore)) SYS_FAIL(JLOG_ERR_LOCK); if(__jlog_restore_metastore(ctx, 1)) SYS_FAIL(JLOG_ERR_META_OPEN); STRSETDATAFILE(ctx, file, ctx->storage.log); #ifdef DEBUG fprintf(stderr, "opening log file[rw]: '%s'\n", file); #endif ctx->data = jlog_file_open(file, O_CREAT, ctx->file_mode); finish: jlog_file_unlock(ctx->metastore); return ctx->data; }
static jlog_file *__jlog_open_indexer(jlog_ctx *ctx, u_int32_t log) { char file[MAXPATHLEN]; int len; if(ctx->current_log != log) { __jlog_close_reader(ctx); __jlog_close_indexer(ctx); } if(ctx->index) { return ctx->index; } STRSETDATAFILE(ctx, file, log); len = strlen(file); if((len + sizeof(INDEX_EXT)) > sizeof(file)) return NULL; memcpy(file + len, INDEX_EXT, sizeof(INDEX_EXT)); #ifdef DEBUG fprintf(stderr, "opening index file: '%s'\n", idx); #endif ctx->index = jlog_file_open(file, O_CREAT, ctx->file_mode); ctx->current_log = log; return ctx->index; }
static int __jlog_unlink_datafile(jlog_ctx *ctx, u_int32_t log) { char file[MAXPATHLEN]; int len; if(ctx->current_log == log) { __jlog_close_reader(ctx); __jlog_close_indexer(ctx); } STRSETDATAFILE(ctx, file, log); #ifdef DEBUG fprintf(stderr, "unlinking %s\n", file); #endif unlink(file); len = strlen(file); if((len + sizeof(INDEX_EXT)) > sizeof(file)) return -1; memcpy(file + len, INDEX_EXT, sizeof(INDEX_EXT)); #ifdef DEBUG fprintf(stderr, "unlinking %s\n", file); #endif unlink(file); return 0; }
static fq_msg *queue_jlog_dequeue(fqd_queue_impl_data f) { struct queue_jlog *d = (struct queue_jlog *)f; jlog_message msg; fq_msg *m; if(d->count == 0 && d->last_seen_nenqueued == d->nenqueued) return NULL; retry: if(d->count <= 0) { d->count = jlog_ctx_read_interval(d->reader, &d->start, &d->finish); fq_debug(FQ_DEBUG_IO, "jlog read batch count -> %d\n", d->count); if(d->count < 0) { char idxfile[PATH_MAX]; fq_debug(FQ_DEBUG_IO, "jlog_ctx_read_interval: %s\n", jlog_ctx_err_string(d->reader)); switch (jlog_ctx_err(d->reader)) { case JLOG_ERR_FILE_CORRUPT: case JLOG_ERR_IDX_CORRUPT: jlog_repair_datafile(d->reader, d->start.log); jlog_repair_datafile(d->reader, d->start.log + 1); fq_debug(FQ_DEBUG_IO, "jlog reconstructed, deleting corresponding index.\n"); STRSETDATAFILE(d->reader, idxfile, d->start.log); strncpy(idxfile + strlen(idxfile), INDEX_EXT, sizeof(idxfile) - strlen(idxfile)); unlink(idxfile); STRSETDATAFILE(d->reader, idxfile, d->start.log + 1); strncpy(idxfile + strlen(idxfile), INDEX_EXT, sizeof(idxfile) - strlen(idxfile)); unlink(idxfile); break; default: break; } } if(d->count <= 0) return NULL; } if(jlog_ctx_read_message(d->reader, &d->start, &msg) == -1) { d->count = 0; return NULL; } if(d->last_dequeued.log > d->start.log || (d->last_dequeued.log == d->start.log && d->last_dequeued.marker > d->start.marker)) { d->count--; JLOG_ID_ADVANCE(&d->start); goto retry; } if(msg.mess_len < sizeof(fq_msg)-1) m = NULL; else { off_t expected_len; uint32_t payload_len; m = (fq_msg *)msg.mess; memcpy(&payload_len, &m->payload_len, sizeof(m->payload_len)); expected_len = offsetof(fq_msg, payload) + payload_len; if(expected_len != msg.mess_len) m = NULL; else { m = malloc(expected_len); memcpy(m, msg.mess, expected_len); m->sender_msgid.id.u32.p3 = d->start.log; m->sender_msgid.id.u32.p4 = d->start.marker; } } d->count--; fq_debug(FQ_DEBUG_IO, "jlog batch count -> %d\n", d->count); if(d->count == 0) { if(d->auto_chkpt) { jlog_ctx_read_checkpoint(d->reader, &d->start); } } d->last_dequeued = d->start; JLOG_ID_ADVANCE(&d->start); ck_pr_inc_uint(&d->last_seen_nenqueued); return m; }
void * noit_jlog_thread_main(void *e_vptr) { int mask, bytes_read; eventer_t e = e_vptr; acceptor_closure_t *ac = e->closure; noit_jlog_closure_t *jcl = ac->service_ctx; char inbuff[sizeof(jlog_id)]; eventer_set_fd_blocking(e->fd); while(1) { jlog_id client_chkpt; int sleeptime = (ac->cmd == NOIT_JLOG_DATA_TEMP_FEED) ? 1 : DEFAULT_SECONDS_BETWEEN_BATCHES; jlog_get_checkpoint(jcl->jlog, ac->remote_cn, &jcl->chkpt); jcl->count = jlog_ctx_read_interval(jcl->jlog, &jcl->start, &jcl->finish); if(jcl->count < 0) { char idxfile[PATH_MAX]; noitL(noit_error, "jlog_ctx_read_interval: %s\n", jlog_ctx_err_string(jcl->jlog)); switch (jlog_ctx_err(jcl->jlog)) { case JLOG_ERR_FILE_CORRUPT: case JLOG_ERR_IDX_CORRUPT: jlog_repair_datafile(jcl->jlog, jcl->start.log); jlog_repair_datafile(jcl->jlog, jcl->start.log + 1); noitL(noit_error, "jlog reconstructed, deleting corresponding index.\n"); STRSETDATAFILE(jcl->jlog, idxfile, jcl->start.log); strlcat(idxfile, INDEX_EXT, sizeof(idxfile)); unlink(idxfile); STRSETDATAFILE(jcl->jlog, idxfile, jcl->start.log + 1); strlcat(idxfile, INDEX_EXT, sizeof(idxfile)); unlink(idxfile); goto alldone; break; default: goto alldone; } } if(jcl->count > MAX_ROWS_AT_ONCE) { /* Artificially set down the range to make the batches a bit easier * to handle on the stratcond/postgres end. * However, we must have more data, so drop the sleeptime to 0 */ jcl->count = MAX_ROWS_AT_ONCE; jcl->finish.marker = jcl->start.marker + jcl->count; sleeptime = 0; } if(jcl->count > 0) { if(noit_jlog_push(e, jcl)) { goto alldone; } /* Read our jlog_id accounting for possibly short reads */ bytes_read = 0; while(bytes_read < sizeof(jlog_id)) { int len; if((len = e->opset->read(e->fd, inbuff + bytes_read, sizeof(jlog_id) - bytes_read, &mask, e)) <= 0) goto alldone; bytes_read += len; } memcpy(&client_chkpt, inbuff, sizeof(jlog_id)); /* Fix the endian */ client_chkpt.log = ntohl(client_chkpt.log); client_chkpt.marker = ntohl(client_chkpt.marker); if(memcmp(&jcl->chkpt, &client_chkpt, sizeof(jlog_id))) { noitL(noit_error, "client %s submitted invalid checkpoint %u:%u expected %u:%u\n", ac->remote_cn, client_chkpt.log, client_chkpt.marker, jcl->chkpt.log, jcl->chkpt.marker); goto alldone; } gettimeofday(&jcl->feed_stats->last_checkpoint, NULL); jlog_ctx_read_checkpoint(jcl->jlog, &jcl->chkpt); } else { /* we have nothing to write -- maybe we have no checks configured... * If this is the case "forever", the remote might disconnect and * we would never know. Do the painful work of detecting a * disconnected client. */ struct pollfd pfd; pfd.fd = e->fd; pfd.events = POLLIN | POLLHUP | POLLRDNORM; pfd.revents = 0; if(poll(&pfd, 1, 0) != 0) { /* normally, we'd recv PEEK|DONTWAIT. However, the client should * not be writing to us. So, we know we can't have any legitimate * data on this socket (true even though this is SSL). So, if we're * here then "shit went wrong" */ noitL(noit_error, "jlog client %s disconnected while idle\n", ac->remote_cn); goto alldone; } } if(sleeptime) sleep(sleeptime); } alldone: e->opset->close(e->fd, &mask, e); noit_atomic_dec32(&jcl->feed_stats->connections); noit_jlog_closure_free(jcl); acceptor_closure_free(ac); return NULL; }
static int __jlog_find_first_log_after(jlog_ctx *ctx, jlog_id *chkpt, jlog_id *start, jlog_id *finish) { jlog_id last; int closed; memcpy(start, chkpt, sizeof(*chkpt)); attempt: if(__jlog_resync_index(ctx, start->log, &last, &closed) != 0) { if(ctx->last_error == JLOG_ERR_FILE_OPEN && ctx->last_errno == ENOENT) { char file[MAXPATHLEN]; int ferr, len; struct stat sb = {0}; STRSETDATAFILE(ctx, file, start->log + 1); while((ferr = stat(file, &sb)) == -1 && errno == EINTR); /* That file doesn't exist... bad, but we can fake a recovery by advancing the next file that does exist */ ctx->last_error = JLOG_ERR_SUCCESS; if(start->log >= ctx->storage.log || ferr != 0 || sb.st_size == 0) { /* We don't advance past where people are writing */ memcpy(finish, start, sizeof(*start)); return 0; } if(__jlog_resync_index(ctx, start->log + 1, &last, &closed) != 0) { /* We don't advance past where people are writing */ memcpy(finish, start, sizeof(*start)); return 0; } len = strlen(file); if((len + sizeof(INDEX_EXT)) > sizeof(file)) return -1; memcpy(file + len, INDEX_EXT, sizeof(INDEX_EXT)); while((ferr = stat(file, &sb)) == -1 && errno == EINTR); if(ferr != 0 || sb.st_size == 0) { /* We don't advance past where people are writing */ memcpy(finish, start, sizeof(*start)); return 0; } start->marker = 0; start->log++; /* BE SMARTER! */ goto attempt; } return -1; /* Just persist resync's error state */ } /* If someone checkpoints off the end, be nice */ if(last.log == start->log && last.marker < start->marker) memcpy(start, &last, sizeof(*start)); if(!memcmp(start, &last, sizeof(last)) && closed) { char file[MAXPATHLEN]; int ferr, len; struct stat sb = {0}; STRSETDATAFILE(ctx, file, start->log + 1); while((ferr = stat(file, &sb)) == -1 && errno == EINTR); if(start->log >= ctx->storage.log || ferr != 0 || sb.st_size == 0) { /* We don't advance past where people are writing */ memcpy(finish, start, sizeof(*start)); return 0; } if(__jlog_resync_index(ctx, start->log + 1, &last, &closed) != 0) { /* We don't advance past where people are writing */ memcpy(finish, start, sizeof(*start)); return 0; } len = strlen(file); if((len + sizeof(INDEX_EXT)) > sizeof(file)) return -1; memcpy(file + len, INDEX_EXT, sizeof(INDEX_EXT)); while((ferr = stat(file, &sb)) == -1 && errno == EINTR); if(ferr != 0 || sb.st_size == 0) { /* We don't advance past where people are writing */ memcpy(finish, start, sizeof(*start)); return 0; } start->marker = 0; start->log++; goto attempt; } memcpy(finish, &last, sizeof(last)); return 0; }