Пример #1
0
STRBUF *wrap(STRBUF *buf, int width)
{
	const char *lf = "\n";
	const size_t lflen = strlen(lf);
	const char *bufp;
	const char *last;
	const char *lastspace = 0;
	size_t linelen = 0;
	STRBUF *out = strbuf_new();

	bufp = strbuf_get(buf);
	last = bufp;

	if (width == -1) {
		strbuf_append_n(out, strbuf_get(buf), strbuf_len(buf));
		return out;
	}

	strbuf_append_n(out, lf, lflen);
	while(bufp - strbuf_get(buf) < (ptrdiff_t)strbuf_len(buf)) {
		if (*bufp == ' ')
			lastspace = bufp;
		else if (*bufp == '\n') {
			strbuf_append_n(out, last, (size_t)(bufp - last));
			do {
				strbuf_append_n(out, lf, lflen);
			} while (*++bufp == '\n');
			lastspace = NULL;

			while(*bufp == ' ') {
				bufp++;
			}
			last = bufp;
			linelen = 0;
		}

		if (NULL != lastspace && (int)linelen > width) {
			strbuf_append_n(out, last, (size_t)(lastspace - last));
			strbuf_append_n(out, lf, lflen);
			last = lastspace;
			lastspace = NULL;
			linelen = (size_t)(bufp - last);

			while(*last == ' ') {
				last++;
			}
			if(last > bufp)
				bufp = last;
		}

		bufp++;
		linelen++;
		if ((unsigned char)*bufp > 0x80)
			bufp += utf8_length[(unsigned char)*bufp - 0x80];
	}
	strbuf_append_n(out, "\n", 1);
	return out;
}
Пример #2
0
static void _flush_log_file()
{
  if (_log_file && _log_file != NO_FILE) {
    fprintf(_log_file, "%s%s%s",
	strbuf_len(&_log_file_strbuf) ? strbuf_str(&_log_file_strbuf) : "",
	strbuf_len(&_log_file_strbuf) ? "\n" : "",
	strbuf_overrun(&_log_file_strbuf) ? "LOG OVERRUN\n" : ""
      );
    strbuf_reset(&_log_file_strbuf);
  }
}
Пример #3
0
static int rhizome_server_set_response(rhizome_http_request *r, const struct http_response *h)
{
  strbuf b = strbuf_local((char *) r->buffer, r->buffer_size);
  strbuf_build_http_response(b, h);
  if (r->buffer == NULL || strbuf_overrun(b)) {
    // Need a bigger buffer
    if (r->buffer)
      free(r->buffer);
    r->buffer_size = strbuf_count(b) + 1;
    r->buffer = malloc(r->buffer_size);
    if (r->buffer == NULL) {
      WHYF_perror("malloc(%u)", r->buffer_size);
      r->buffer_size = 0;
      return WHY("Cannot send response, out of memory");
    }
    strbuf_init(b, (char *) r->buffer, r->buffer_size);
    strbuf_build_http_response(b, h);
    if (strbuf_overrun(b))
      return WHYF("Bug! Cannot send response, buffer not big enough");
  }
  r->buffer_length = strbuf_len(b);
  r->buffer_offset = 0;
  r->request_type |= RHIZOME_HTTP_REQUEST_FROMBUFFER;
  if (debug & DEBUG_RHIZOME_TX)
    DEBUGF("Sending HTTP response: %s", alloca_toprint(120, (const char *)r->buffer, r->buffer_length));
  return 0;
}
Пример #4
0
int
dna_helper_enqueue(struct subscriber *source, mdp_port_t source_port, const char *did)
{
  if (config.debug.dnahelper)
    DEBUGF("DNAHELPER request did=%s sid=%s", did, alloca_tohex_sid_t(source->sid));
  if (dna_helper_pid == 0)
    return 0;
  // Only try to restart a DNA helper process if the previous one is well and truly gone.
  if (dna_helper_pid == -1 && dna_helper_stdin == -1 && dna_helper_stdout == -1 && dna_helper_stderr == -1) {
    if (dna_helper_start() == -1) {
      /* Something broke, bail out */
      return WHY("DNAHELPER start failed");
    }
  }
  /* Write request to dna helper.
     Request takes form:  SID-of-Requestor|DID|\n
     By passing the requestor's SID to the helper, we don't need to maintain
     any state, as all we have to do is wait for responses from the helper,
     which will include the requestor's SID.
  */
  if (dna_helper_stdin == -1)
    return 0;
  if (request_bufptr && request_bufptr != request_buffer) {
    WARNF("DNAHELPER currently sending request %s -- dropping new request", request_buffer);
    return 0;
  }
  if (awaiting_reply) {
    WARN("DNAHELPER currently awaiting reply -- dropping new request");
    return 0;
  }
  char buffer[sizeof request_buffer];
  strbuf b = strbuf_local(request_bufptr == request_buffer ? buffer : request_buffer, sizeof buffer);
  strbuf_tohex(b, SID_STRLEN, source->sid.binary);
  strbuf_putc(b, '|');
  strbuf_puts(b, did);
  strbuf_putc(b, '|');
  strbuf_putc(b, '\n');
  if (strbuf_overrun(b)) {
    WHYF("DNAHELPER request buffer overrun: %s -- request not sent", strbuf_str(b));
    request_bufptr = request_bufend = NULL;
  } else {
    if (strbuf_str(b) != request_buffer) {
      if (strcmp(strbuf_str(b), request_buffer) != 0)
	WARNF("DNAHELPER overwriting unsent request %s", request_buffer);
      strcpy(request_buffer, strbuf_str(b));
    }
    request_bufptr = request_buffer;
    request_bufend = request_buffer + strbuf_len(b);
    request_source = source;
    request_port = source_port;
    strncpy(request_did, did, sizeof request_did);
    request_did[sizeof request_did - 1] = '\0';
  }
  if (dna_helper_started) {
    sched_requests.poll.fd = dna_helper_stdin;
    watch(&sched_requests);
  }
  return 1;
}
Пример #5
0
static STRBUF *conv(iconv_t ic, STRBUF *buf) {
	STRBUF *output;

	output = strbuf_new();
	strbuf_append_n(output, strbuf_get(buf), strbuf_len(buf));

	return output;
}
Пример #6
0
int overlay_mdp_dnalookup_reply(const sockaddr_mdp *dstaddr, const unsigned char *resolved_sid, const char *uri, const char *did, const char *name)
{
  overlay_mdp_frame mdpreply;
  bzero(&mdpreply, sizeof mdpreply);
  mdpreply.packetTypeAndFlags = MDP_TX; // outgoing MDP message
  memcpy(mdpreply.out.src.sid, resolved_sid, SID_SIZE);
  mdpreply.out.src.port = MDP_PORT_DNALOOKUP;
  bcopy(dstaddr, &mdpreply.out.dst, sizeof(sockaddr_mdp));
  /* build reply as TOKEN|URI|DID|NAME|<NUL> */
  strbuf b = strbuf_local((char *)mdpreply.out.payload, sizeof mdpreply.out.payload);
  strbuf_tohex(b, resolved_sid, SID_SIZE);
  strbuf_sprintf(b, "|%s|%s|%s|", uri, did, name);
  if (strbuf_overrun(b))
    return WHY("MDP payload overrun");
  mdpreply.out.payload_length = strbuf_len(b) + 1;
  /* deliver reply */
  return overlay_mdp_dispatch(&mdpreply, 0 /* system generated */, NULL, 0);
}
Пример #7
0
static void write_to_file(STRBUF *outbuf, const char *filename)
{
	int fd;
	ssize_t len;

	fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0644);
	if (fd == -1) {
		fprintf(stderr, "Can't open %s: %s\n", filename, strerror(errno));
		exit(EXIT_FAILURE);
	}

	len = write(fd, strbuf_get(outbuf), strbuf_len(outbuf));
	if (len == -1) {
		fprintf(stderr, "Can't write to %s: %s\n", filename, strerror(errno));
		exit(EXIT_FAILURE);
	}

	close(fd);
}
Пример #8
0
static void _log_prefix(_log_iterator *it, int level)
{
  if (it->config == &config_file) {
    if (strbuf_is_empty(&_log_file_strbuf))
      strbuf_init(&_log_file_strbuf, _log_file_buf, sizeof _log_file_buf);
    else if (strbuf_len(&_log_file_strbuf))
      strbuf_putc(&_log_file_strbuf, '\n');
    it->xpf = XPRINTF_STRBUF(&_log_file_strbuf);
    _log_prefix_level(it, level);
  }
#ifdef ANDROID
  else if (it->config == &config.log.android) {
    strbuf_init(&_android_strbuf, _log_android_buf, sizeof _log_android_buf);
    it->xpf = XPRINTF_STRBUF(&_android_strbuf);
  }
#endif // ANDROID
  else if (it->config == &config.log.console) {
    it->xpf = XPRINTF_STDIO(logfile_stderr);
    _log_prefix_level(it, level);
  }
  else
    abort();
  if (it->config->show_pid)
    xprintf(it->xpf, "[%5u] ", getpid());
  if (it->config->show_time) {
    if (it->tv.tv_sec == 0) {
      xputs("NOTIME______ ", it->xpf);
    } else {
      char buf[50];
      if (strftime(buf, sizeof buf, "%T", &it->tm) == 0)
	xputs("EMPTYTIME___ ", it->xpf);
      else
	xprintf(it->xpf, "%s.%03u ", buf, (unsigned int)it->tv.tv_usec / 1000);
    }
  }
}
Пример #9
0
int rhizome_direct_parse_http_request(rhizome_http_request *r)
{
  const char *submitBareFileURI=confValueGet("rhizome.api.addfile.uri", NULL);
  DEBUGF("uri=%s", submitBareFileURI ? alloca_str_toprint(submitBareFileURI) : "NULL");
  
  /* Switching to writing, so update the call-back */
  r->alarm.poll.events=POLLOUT;
  watch(&r->alarm);
  // Parse the HTTP request into verb, path, protocol, headers and content.
  char *const request_end = r->request + r->request_length;
  char *verb = r->request;
  char *path = NULL;
  char *proto = NULL;
  size_t pathlen = 0;
  char *headers = NULL;
  int headerlen = 0;
  char *content = NULL;
  int contentlen = 0;
  char *p;
  if ((str_startswith(verb, "GET", &p) || str_startswith(verb, "POST", &p)) && isspace(*p)) {
    *p++ = '\0';
    path = p;
    while (p < request_end && !isspace(*p))
      ++p;
    if (p < request_end) {
      pathlen = p - path;
      *p++ = '\0';
      proto = p;
      if ( str_startswith(p, "HTTP/1.", &p)
	&& (str_startswith(p, "0", &p) || str_startswith(p, "1", &p))
	&& (str_startswith(p, "\r\n", &headers) || str_startswith(p, "\n", &headers))
      ) {
	*p = '\0';
	char *eoh = str_str(headers, "\r\n\r\n", request_end - p);
	if (eoh) {
	  content = eoh + 4;
	  headerlen = content - headers;
	  contentlen = request_end - content;
	}
      }
    }
  }
  if (content == NULL) {
    if (debug & DEBUG_RHIZOME_TX)
      DEBUGF("Received malformed HTTP request %s", alloca_toprint(160, (const char *)r->request, r->request_length));
    return rhizome_server_simple_http_response(r, 400, "<html><h1>Malformed request</h1></html>\r\n");
  }
  INFOF("RHIZOME HTTP SERVER, %s %s %s", verb, alloca_toprint(-1, path, pathlen), proto);
  if (debug & DEBUG_RHIZOME_TX)
    DEBUGF("headers %s", alloca_toprint(-1, headers, headerlen));
  if (strcmp(verb, "GET") == 0 && strcmp(path, "/favicon.ico") == 0) {
    r->request_type = RHIZOME_HTTP_REQUEST_FAVICON;
    rhizome_server_http_response_header(r, 200, "image/vnd.microsoft.icon", favicon_len);
  } else if (strcmp(verb, "POST") == 0
      && (   strcmp(path, "/rhizome/import") == 0 
	  || strcmp(path, "/rhizome/enquiry") == 0
	  || (submitBareFileURI && strcmp(path, submitBareFileURI) == 0)
	 )
  ) {
    const char *cl_str=str_str(headers,"Content-Length: ",headerlen);
    const char *ct_str=str_str(headers,"Content-Type: multipart/form-data; boundary=",headerlen);
    if (!cl_str)
      return rhizome_server_simple_http_response(r,400,"<html><h1>Missing Content-Length header</h1></html>\r\n");
    if (!ct_str)
      return rhizome_server_simple_http_response(r,400,"<html><h1>Missing or unsupported Content-Type header</h1></html>\r\n");
    /* ok, we have content-type and content-length, now make sure they are well formed. */
    long long content_length;
    if (sscanf(cl_str,"Content-Length: %lld",&content_length)!=1)
      return rhizome_server_simple_http_response(r,400,"<html><h1>Malformed Content-Length header</h1></html>\r\n");
    char boundary_string[1024];
    int i;
    ct_str+=strlen("Content-Type: multipart/form-data; boundary=");
    for(i=0;i<1023&&*ct_str&&*ct_str!='\n'&&*ct_str!='\r';i++,ct_str++)
      boundary_string[i]=*ct_str;
    boundary_string[i] = '\0';
    if (i<4||i>128)
      return rhizome_server_simple_http_response(r,400,"<html><h1>Malformed Content-Type header</h1></html>\r\n");

    DEBUGF("content_length=%lld, boundary_string=%s contentlen=%d", (long long) content_length, alloca_str_toprint(boundary_string), contentlen);

    /* Now start receiving and parsing multi-part data.  If we already received some of the
	post-header data, process that first.  Tell the HTTP request that it has moved to multipart
	form data parsing, and what the actual requested action is.
    */

    /* Remember boundary string and source path.
	Put the preceeding -- on the front to make our life easier when
	parsing the rest later. */
    strbuf bs = strbuf_local(r->boundary_string, sizeof r->boundary_string);
    strbuf_puts(bs, "--");
    strbuf_puts(bs, boundary_string);
    if (strbuf_overrun(bs))
      return rhizome_server_simple_http_response(r,500,"<html><h1>Internal server error: Multipart boundary string too long</h1></html>\r\n");
    strbuf ps = strbuf_local(r->path, sizeof r->path);
    strbuf_puts(ps, path);
    if (strbuf_overrun(ps))
      return rhizome_server_simple_http_response(r,500,"<html><h1>Internal server error: Path too long</h1></html>\r\n");
    r->boundary_string_length = strbuf_len(bs);
    r->source_index = 0;
    r->source_count = content_length;
    r->request_type = RHIZOME_HTTP_REQUEST_RECEIVING_MULTIPART;
    r->request_length = 0;
    r->source_flags = 0;

    /* Find the end of the headers and start of any body bytes that we have read
	so far. Copy the bytes to a separate buffer, because r->request and 
	r->request_length get used internally in the parser.
      */
    if (contentlen) {
      char buffer[contentlen];
      bcopy(content, buffer, contentlen);
      rhizome_direct_process_post_multipart_bytes(r, buffer, contentlen);
    }

    /* Handle the rest of the transfer asynchronously. */
    return 0;
  } else {
    rhizome_server_simple_http_response(r, 404, "<html><h1>Not found (OTHER)</h1></html>\r\n");
  }
  
  /* Try sending data immediately. */
  rhizome_server_http_send_bytes(r);

  return 0;
}
Пример #10
0
int main(int argc, const char **argv)
{
	struct stat st;
	iconv_t ic;
	STRBUF *wbuf;
	STRBUF *docbuf;
	STRBUF *outbuf;
	int i = 1;

	(void)setlocale(LC_ALL, "");

	while (argv[i]) {
		if (!strcmp(argv[i], "--raw")) {
			opt_raw = 1;
			i++; continue;
		} else if (!strcmp(argv[i], "--raw-input")) {
			opt_raw_input = 1;
			i++; continue;
		} else if (!strncmp(argv[i], "--encoding=", 11)) {
			size_t arglen = strlen(argv[i]) - 10;
#ifdef iconvlist
			if (!strcmp(argv[i] + 11, "list")) {
				show_iconvlist();
			}
#endif
			opt_encoding = ymalloc(arglen);
			memcpy(opt_encoding, argv[i] + 11, arglen);
			i++; continue;
		} else if (!strncmp(argv[i], "--width=", 8)) {
			opt_width = atoi(argv[i] + 8);
			if(opt_width < 3 && opt_width != -1) {
				fprintf(stderr, "Invalid value for width: %s\n",
					argv[i] + 8);
				exit(EXIT_FAILURE);
			}
			i++; continue;
		} else if (!strcmp(argv[i], "--force")) {
			// ignore this setting
			i++; continue;
		} else if (!strncmp(argv[i], "--output=", 9)) {
			if (*(argv[i] + 9) != '-') {
				size_t arglen = strlen(argv[i]) - 8;
				opt_output = ymalloc(arglen);
				memcpy(opt_output, argv[i] + 9, arglen);
			}
			i++; continue;
		} else if (!strncmp(argv[i], "--subst=", 8)) {
			if (!strcmp(argv[i] + 8, "none"))
				opt_subst = SUBST_NONE;
			else if (!strcmp(argv[i] + 8, "some"))
				opt_subst = SUBST_SOME;
			else if (!strcmp(argv[i] + 8, "all"))
				opt_subst = SUBST_ALL;
			else {
				fprintf(stderr, "Invalid value for --subst: %s\n",
					argv[i] + 8);
				exit(EXIT_FAILURE);
			}
			i++; continue;
		} else if (!strcmp(argv[i], "--help")) {
			usage();
		} else if (!strcmp(argv[i], "--version")
			   || !strcmp(argv[i], "-v")) {
			version_info();
		} else if (!strcmp(argv[i], "-")) {
			usage();
		} else {
			if(opt_filename)
				usage();
			opt_filename = argv[i];
			i++; continue;
		}
	}

	if(opt_encoding && !strcmp("show", opt_encoding)) {
		yfree(opt_encoding);
		opt_encoding = guess_encoding();
		printf("%s\n", opt_encoding);
		yfree(opt_encoding);
		exit(EXIT_SUCCESS);
	}

	if(opt_raw)
		opt_width = -1;

	if(!opt_filename)
		usage();

	if(!opt_encoding) {
		opt_encoding = guess_encoding();
	}

	ic = init_conv("UTF-8", opt_encoding);

	if (0 != stat(opt_filename, &st)) {
		fprintf(stderr, "%s: %s\n",
			opt_filename, strerror(errno));
		exit(EXIT_FAILURE);
	}

	/* read content.xml */
	docbuf = opt_raw_input ?
		read_from_xml(opt_filename, "content.xml") :
		read_from_zip(opt_filename, "content.xml");

	if (!opt_raw) {
		subst_doc(ic, docbuf);
		format_doc(docbuf, opt_raw_input);
	}

	wbuf = wrap(docbuf, opt_width);

	/* remove all trailing whitespace */
	(void) regex_subst(wbuf, " +\n", _REG_GLOBAL, "\n");

	outbuf = conv(ic, wbuf);

	if (opt_output)
		write_to_file(outbuf, opt_output);
	else
		fwrite(strbuf_get(outbuf), strbuf_len(outbuf), 1, stdout);

	finish_conv(ic);
	strbuf_free(wbuf);
	strbuf_free(docbuf);
	strbuf_free(outbuf);
#ifndef NO_ICONV
	yfree(opt_encoding);
#endif
	if (opt_output)
		yfree(opt_output);

	return EXIT_SUCCESS;
}
Пример #11
0
static STRBUF *conv(iconv_t ic, STRBUF *buf)
{
	/* FIXME: This functionality belongs into strbuf.c */
	ICONV_CHAR *doc;
	char *out, *outbuf;
	size_t inleft, outleft = 0;
	size_t r;
	size_t outlen = 0;
	const size_t alloc_step = 4096;
	STRBUF *output;

	inleft = strbuf_len(buf);
	doc = (ICONV_CHAR*)strbuf_get(buf);
	outlen = alloc_step; outleft = alloc_step;
	outbuf = ymalloc(alloc_step);
	out = outbuf;
	outleft = alloc_step;

	do {
		if (!outleft) {
			outlen += alloc_step; outleft += alloc_step;
			yrealloc_buf(&outbuf, &out, outlen);
		}
		r = iconv(ic, &doc, &inleft, &out, &outleft);
		if (r == (size_t)-1) {
			if(errno == E2BIG) {
				outlen += alloc_step; outleft += alloc_step;
				if (outlen > (strbuf_len(buf) << 3)) {
					fprintf(stderr, "Buffer grew to much. "
						"Corrupted document?\n");
					exit(EXIT_FAILURE);
				}
				yrealloc_buf(&outbuf, &out, outlen);
				continue;
			} else if ((errno == EILSEQ) || (errno == EINVAL)) {
				char skip = 1;

				/* advance in source buffer */
				if ((unsigned char)*doc > 0x80)
					skip += utf8_length[(unsigned char)*doc - 0x80];
				doc += skip;
				inleft -= skip;

				/* advance in output buffer */
				*out = '?';
				out++;
				outleft--;

				continue;
			}
			fprintf(stderr, "iconv returned: %s\n", strerror(errno));
			exit(EXIT_FAILURE);
		}
	} while(inleft != 0);

	if (!outleft) {
		outbuf = yrealloc(outbuf, outlen + 1);
	}
	*out = '\0';

	output = strbuf_slurp_n(outbuf, (size_t)(out - outbuf));
	strbuf_setopt(output, STRBUF_NULLOK);
	return output;
}
Пример #12
0
void cmark_process_line(cmark_doc_parser *parser, const unsigned char *buffer,
		 size_t bytes)
{
	cmark_node* last_matched_container;
	int offset = 0;
	int matched = 0;
	int lev = 0;
	int i;
	cmark_list *data = NULL;
	bool all_matched = true;
	cmark_node* container;
	cmark_node* cur = parser->current;
	bool blank = false;
	int first_nonspace;
	int indent;
	chunk input;

	utf8proc_detab(parser->curline, buffer, bytes);

	// Add a newline to the end if not present:
	// TODO this breaks abstraction:
	if (parser->curline->ptr[parser->curline->size - 1] != '\n') {
		strbuf_putc(parser->curline, '\n');
	}
	input.data = parser->curline->ptr;
	input.len = parser->curline->size;

	// container starts at the document root.
	container = parser->root;

	parser->line_number++;

	// for each containing cmark_node, try to parse the associated line start.
	// bail out on failure:  container will point to the last matching cmark_node.

	while (container->last_child && container->last_child->open) {
		container = container->last_child;

		first_nonspace = offset;
		while (peek_at(&input, first_nonspace) == ' ') {
			first_nonspace++;
		}

		indent = first_nonspace - offset;
		blank = peek_at(&input, first_nonspace) == '\n';

		if (container->type == NODE_BQUOTE) {
			matched = indent <= 3 && peek_at(&input, first_nonspace) == '>';
			if (matched) {
				offset = first_nonspace + 1;
				if (peek_at(&input, offset) == ' ')
					offset++;
			} else {
				all_matched = false;
			}

		} else if (container->type == NODE_LIST_ITEM) {

			if (indent >= container->as.list.marker_offset +
					container->as.list.padding) {
				offset += container->as.list.marker_offset +
					container->as.list.padding;
			} else if (blank) {
				offset = first_nonspace;
			} else {
				all_matched = false;
			}

		} else if (container->type == NODE_INDENTED_CODE) {

			if (indent >= CODE_INDENT) {
				offset += CODE_INDENT;
			} else if (blank) {
				offset = first_nonspace;
			} else {
				all_matched = false;
			}

		} else if (container->type == NODE_ATX_HEADER ||
				container->type == NODE_SETEXT_HEADER) {

			// a header can never contain more than one line
			all_matched = false;

		} else if (container->type == NODE_FENCED_CODE) {

			// skip optional spaces of fence offset
			i = container->as.code.fence_offset;
			while (i > 0 && peek_at(&input, offset) == ' ') {
				offset++;
				i--;
			}

		} else if (container->type == NODE_HTML) {

			if (blank) {
				all_matched = false;
			}

		} else if (container->type == NODE_PARAGRAPH) {

			if (blank) {
				container->last_line_blank = true;
				all_matched = false;
			}

		}

		if (!all_matched) {
			container = container->parent;  // back up to last matching cmark_node
			break;
		}
	}

	last_matched_container = container;

	// check to see if we've hit 2nd blank line, break out of list:
	if (blank && container->last_line_blank) {
		break_out_of_lists(parser, &container, parser->line_number);
	}

	// unless last matched container is code cmark_node, try new container starts:
	while (container->type != NODE_FENCED_CODE && container->type != NODE_INDENTED_CODE &&
			container->type != NODE_HTML) {

		first_nonspace = offset;
		while (peek_at(&input, first_nonspace) == ' ')
			first_nonspace++;

		indent = first_nonspace - offset;
		blank = peek_at(&input, first_nonspace) == '\n';

		if (indent >= CODE_INDENT) {
			if (cur->type != NODE_PARAGRAPH && !blank) {
				offset += CODE_INDENT;
				container = add_child(parser, container, NODE_INDENTED_CODE, parser->line_number, offset + 1);
			} else { // indent > 4 in lazy line
				break;
			}

		} else if (peek_at(&input, first_nonspace) == '>') {

			offset = first_nonspace + 1;
			// optional following character
			if (peek_at(&input, offset) == ' ')
				offset++;
			container = add_child(parser, container, NODE_BQUOTE, parser->line_number, offset + 1);

		} else if ((matched = scan_atx_header_start(&input, first_nonspace))) {

			offset = first_nonspace + matched;
			container = add_child(parser, container, NODE_ATX_HEADER, parser->line_number, offset + 1);

			int hashpos = chunk_strchr(&input, '#', first_nonspace);
			int level = 0;

			while (peek_at(&input, hashpos) == '#') {
				level++;
				hashpos++;
			}
			container->as.header.level = level;

		} else if ((matched = scan_open_code_fence(&input, first_nonspace))) {

			container = add_child(parser, container, NODE_FENCED_CODE, parser->line_number, first_nonspace + 1);
			container->as.code.fence_char = peek_at(&input, first_nonspace);
			container->as.code.fence_length = matched;
			container->as.code.fence_offset = first_nonspace - offset;
			offset = first_nonspace + matched;

		} else if ((matched = scan_html_block_tag(&input, first_nonspace))) {

			container = add_child(parser, container, NODE_HTML, parser->line_number, first_nonspace + 1);
			// note, we don't adjust offset because the tag is part of the text

		} else if (container->type == NODE_PARAGRAPH &&
				(lev = scan_setext_header_line(&input, first_nonspace)) &&
				// check that there is only one line in the paragraph:
				strbuf_strrchr(&container->string_content, '\n',
					strbuf_len(&container->string_content) - 2) < 0) {

			container->type = NODE_SETEXT_HEADER;
			container->as.header.level = lev;
			offset = input.len - 1;

		} else if (!(container->type == NODE_PARAGRAPH && !all_matched) &&
				(matched = scan_hrule(&input, first_nonspace))) {

			// it's only now that we know the line is not part of a setext header:
			container = add_child(parser, container, NODE_HRULE, parser->line_number, first_nonspace + 1);
			finalize(parser, container, parser->line_number);
			container = container->parent;
			offset = input.len - 1;

		} else if ((matched = parse_list_marker(&input, first_nonspace, &data))) {

			// compute padding:
			offset = first_nonspace + matched;
			i = 0;
			while (i <= 5 && peek_at(&input, offset + i) == ' ') {
				i++;
			}
			// i = number of spaces after marker, up to 5
			if (i >= 5 || i < 1 || peek_at(&input, offset) == '\n') {
				data->padding = matched + 1;
				if (i > 0) {
					offset += 1;
				}
			} else {
				data->padding = matched + i;
				offset += i;
			}

			// check container; if it's a list, see if this list item
			// can continue the list; otherwise, create a list container.

			data->marker_offset = indent;

			if (container->type != NODE_LIST ||
					!lists_match(&container->as.list, data)) {
				container = add_child(parser, container, NODE_LIST, parser->line_number,
						first_nonspace + 1);

				memcpy(&container->as.list, data, sizeof(*data));
			}

			// add the list item
			container = add_child(parser, container, NODE_LIST_ITEM, parser->line_number,
					first_nonspace + 1);
			/* TODO: static */
			memcpy(&container->as.list, data, sizeof(*data));
			free(data);
		} else {
			break;
		}

		if (accepts_lines(container->type)) {
			// if it's a line container, it can't contain other containers
			break;
		}
	}

	// what remains at offset is a text line.  add the text to the
	// appropriate container.

	first_nonspace = offset;
	while (peek_at(&input, first_nonspace) == ' ')
		first_nonspace++;

	indent = first_nonspace - offset;
	blank = peek_at(&input, first_nonspace) == '\n';

	// cmark_node quote lines are never blank as they start with >
	// and we don't count blanks in fenced code for purposes of tight/loose
	// lists or breaking out of lists.  we also don't set last_line_blank
	// on an empty list item.
	container->last_line_blank = (blank &&
			container->type != NODE_BQUOTE &&
			container->type != NODE_FENCED_CODE &&
			!(container->type == NODE_LIST_ITEM &&
				container->first_child == NULL &&
				container->start_line == parser->line_number));

	cmark_node *cont = container;
	while (cont->parent) {
		cont->parent->last_line_blank = false;
		cont = cont->parent;
	}

	if (cur != last_matched_container &&
			container == last_matched_container &&
			!blank &&
			cur->type == NODE_PARAGRAPH &&
			strbuf_len(&cur->string_content) > 0) {

		add_line(cur, &input, offset);

	} else { // not a lazy continuation

		// finalize any blocks that were not matched and set cur to container:
		while (cur != last_matched_container) {
			finalize(parser, cur, parser->line_number);
			cur = cur->parent;
			assert(cur != NULL);
		}

		if (container->type == NODE_INDENTED_CODE) {

			add_line(container, &input, offset);

		} else if (container->type == NODE_FENCED_CODE) {
			matched = 0;

			if (indent <= 3 &&
					peek_at(&input, first_nonspace) == container->as.code.fence_char) {
				int fence_len = scan_close_code_fence(&input, first_nonspace);
				if (fence_len > container->as.code.fence_length)
					matched = 1;
			}

			if (matched) {
				// if closing fence, don't add line to container; instead, close it:
				finalize(parser, container, parser->line_number);
				container = container->parent; // back up to parent
			} else {
				add_line(container, &input, offset);
			}

		} else if (container->type == NODE_HTML) {

			add_line(container, &input, offset);

		} else if (blank) {

			// ??? do nothing

		} else if (container->type == NODE_ATX_HEADER) {

			chop_trailing_hashtags(&input);
			add_line(container, &input, first_nonspace);
			finalize(parser, container, parser->line_number);
			container = container->parent;

		} else if (accepts_lines(container->type)) {

			add_line(container, &input, first_nonspace);

		} else if (container->type != NODE_HRULE && container->type != NODE_SETEXT_HEADER) {

			// create paragraph container for line
			container = add_child(parser, container, NODE_PARAGRAPH, parser->line_number, first_nonspace + 1);
			add_line(container, &input, first_nonspace);

		} else {
			assert(false);
		}

		parser->current = container;
	}
	strbuf_clear(parser->curline);

}
Пример #13
0
void rhizome_direct_http_dispatch(rhizome_direct_sync_request *r)
{
  DEBUGF("Dispatch size_high=%lld",r->cursor->size_high);
  rhizome_direct_transport_state_http *state = r->transport_specific_state;

  int sock=socket(AF_INET, SOCK_STREAM, 0);
  if (sock==-1) {
    WHY_perror("socket");    
    goto end;
  } 

  struct hostent *hostent;
  hostent = gethostbyname(state->host);
  if (!hostent) {
    DEBUGF("could not resolve hostname");
    goto end;
  }

  struct sockaddr_in addr;  
  addr.sin_family = AF_INET;     
  addr.sin_port = htons(state->port);   
  addr.sin_addr = *((struct in_addr *)hostent->h_addr);
  bzero(&(addr.sin_zero),8);     

  if (connect(sock,(struct sockaddr *)&addr,sizeof(struct sockaddr)) == -1) {
    WHY_perror("connect");
    close(sock);
    goto end;
  }
 
  char boundary[20];
  char buffer[8192];

  strbuf bb = strbuf_local(boundary, sizeof boundary);
  strbuf_sprintf(bb, "%08lx%08lx", random(), random());
  assert(!strbuf_overrun(bb));
  strbuf content_preamble = strbuf_alloca(200);
  strbuf content_postamble = strbuf_alloca(40);
  strbuf_sprintf(content_preamble,
      "--%s\r\n"
      "Content-Disposition: form-data; name=\"data\"; filename=\"IHAVEs\"\r\n"
      "Content-Type: application/octet-stream\r\n"
      "\r\n",
      boundary
    );
  strbuf_sprintf(content_postamble, "\r\n--%s--\r\n", boundary);
  assert(!strbuf_overrun(content_preamble));
  assert(!strbuf_overrun(content_postamble));
  int content_length = strbuf_len(content_preamble)
		     + r->cursor->buffer_offset_bytes
		     + r->cursor->buffer_used
		     + strbuf_len(content_postamble);
  strbuf request = strbuf_local(buffer, sizeof buffer);
  strbuf_sprintf(request,
      "POST /rhizome/enquiry HTTP/1.0\r\n"
      "Content-Length: %d\r\n"
      "Content-Type: multipart/form-data; boundary=%s\r\n"
      "\r\n%s",
      content_length, boundary, strbuf_str(content_preamble)
    );
  assert(!strbuf_overrun(request));

  /* TODO: Refactor this code so that it uses our asynchronous framework.
   */
  int len = strbuf_len(request);
  int sent=0;
  while(sent<len) {
    DEBUGF("write(%d, %s, %d)", sock, alloca_toprint(-1, &buffer[sent], len-sent), len-sent);
    int count=write(sock,&buffer[sent],len-sent);
    if (count == -1) {
      if (errno==EPIPE) goto rx;
      WHYF_perror("write(%d)", len - sent);
      close(sock);
      goto end;
    }
    sent+=count;
  }

  len=r->cursor->buffer_offset_bytes+r->cursor->buffer_used;
  sent=0;
  while(sent<len) {
    int count=write(sock,&r->cursor->buffer[sent],len-sent);
    if (count == -1) {
      if (errno == EPIPE)
	goto rx;
      WHYF_perror("write(%d)", count);
      close(sock);
      goto end;
    }
    sent+=count;
  }

  strbuf_reset(request);
  strbuf_puts(request, strbuf_str(content_postamble));
  len = strbuf_len(request);
  sent=0;
  while(sent<len) {
    DEBUGF("write(%d, %s, %d)", sock, alloca_toprint(-1, &buffer[sent], len-sent), len-sent);
    int count=write(sock,&buffer[sent],len-sent);
    if (count == -1) {
      if (errno==EPIPE) goto rx;
      WHYF_perror("write(%d)", len - sent);
      close(sock);
      goto end;
    }
    sent+=count;
  }

  struct http_response_parts parts;
 rx:
  /* request sent, now get response back. */
  if (receive_http_response(sock, buffer, sizeof buffer, &parts) == -1) {
    close(sock);
    goto end;
  }

  /* For some reason the response data gets overwritten during a push,
     so we need to copy it, and use the copy instead. */
  unsigned char *actionlist=alloca(parts.content_length);
  bcopy(parts.content_start, actionlist, parts.content_length);
  dump("response", actionlist, parts.content_length);

  /* We now have the list of (1+RHIZOME_BAR_PREFIX_BYTES)-byte records that indicate
     the list of BAR prefixes that differ between the two nodes.  We can now action
     those which are relevant, i.e., based on whether we are pushing, pulling or 
     synchronising (both).

     I am currently undecided as to whether it is cleaner to have some general
     rhizome direct function for doing that, or whether it just adds unnecessary
     complication, and the responses should just be handled in here.

     For now, I am just going to implement it in here, and we can generalise later.
  */
  int i;
  for(i=10;i<content_length;i+=(1+RHIZOME_BAR_PREFIX_BYTES))
    {
      int type=actionlist[i];
      unsigned long long 
	bid_prefix_ll=rhizome_bar_bidprefix_ll((unsigned char *)&actionlist[i+1]);
      DEBUGF("%s %016llx* @ 0x%x",type==1?"push":"pull",bid_prefix_ll,i);
      if (type==2&&r->pullP) {
	/* Need to fetch manifest.  Once we have the manifest, then we can
	   use our normal bundle fetch routines from rhizome_fetch.c	 

	   Generate a request like: GET /rhizome/manifestbybar/<hex of bar>
	   and add it to our list of HTTP fetch requests, then watch
	   until the request is finished.  That will give us the manifest.
	   Then as noted above, we can use that to pull the file down using
	   existing routines.
	*/
	if (!rhizome_fetch_request_manifest_by_prefix
	    (&addr,&actionlist[i+1],RHIZOME_BAR_PREFIX_BYTES,
	     1 /* import, getting file if needed */))
	  {
	    /* Fetching the manifest, and then using it to see if we want to 
	       fetch the file for import is all handled asynchronously, so just
	       wait for it to finish. */
	    while(rhizome_file_fetch_queue_count) fd_poll();
	  }
	
      } else if (type==1&&r->pushP) {
	/* Form up the POST request to submit the appropriate bundle. */

	/* Start by getting the manifest, which is the main thing we need, and also
	   gives us the information we need for sending any associated file. */
	rhizome_manifest 
	  *m=rhizome_direct_get_manifest(&actionlist[i+1],
					 RHIZOME_BAR_PREFIX_BYTES);
	if (!m) {
	  WHY("This should never happen.  The manifest exists, but when I went looking for it, it doesn't appear to be there.");
	  goto next_item;
	}

	/* Get filehash and size from manifest if present */
	const char *id = rhizome_manifest_get(m, "id", NULL, 0);
	DEBUGF("bundle id = '%s'",id);
	const char *hash = rhizome_manifest_get(m, "filehash", NULL, 0);
	DEBUGF("bundle file hash = '%s'",hash);
	long long filesize = rhizome_manifest_get_ll(m, "filesize");
	DEBUGF("file size = %lld",filesize);

	/* We now have everything we need to compose the POST request and send it.
	 */
	char *template="POST /rhizome/import HTTP/1.0\r\n"
Пример #14
0
int regex_subst(STRBUF *buf,
		const char *regex, int regopt,
		const void *subst)
{
	int r;
	const char *bufp;
	size_t off = 0;
	const int i = 0;
	int match_count = 0;

	regex_t rx;
	const size_t nmatches = 10;
	regmatch_t matches[10];

	r = regcomp(&rx, regex, REG_EXTENDED);
	if (r) {
		print_regexp_err(r, &rx);
		exit(EXIT_FAILURE);
	}

	do {
		if (off > strbuf_len(buf))
			break;

		bufp = strbuf_get(buf) + off;

#ifdef REG_STARTEND
		matches[0].rm_so = 0;
		matches[0].rm_eo = strbuf_len(buf) - off;

		if (0 != regexec(&rx, bufp, nmatches, matches, REG_STARTEND))
#else
		if (0 != regexec(&rx, bufp, nmatches, matches, 0))
#endif
			break;

		if (matches[i].rm_so != -1) {
			char *s;
			int subst_len;

			if (regopt & _REG_EXEC) {
				s = (*(char *(*)
				       (const char *buf, regmatch_t matches[],
					size_t nmatch, size_t off))subst)
					(strbuf_get(buf), matches, nmatches, off);
			} else
				s = (char*)subst;

			subst_len = strbuf_subst(buf,
						 matches[i].rm_so + off,
						 matches[i].rm_eo + off,
						 s);
			match_count++;

			if (regopt & _REG_EXEC)
				yfree(s);

			off += matches[i].rm_so;
			if (subst_len >= 0)
				off += subst_len + 1;
		}
	} while (regopt & _REG_GLOBAL);

	regfree(&rx);
	return match_count;
}
Пример #15
0
// Convert a node_block list to HTML.  Returns 0 on success, and sets result.
static void blocks_to_html(strbuf *html, node_block *b)
{
	struct ListData *data;
	render_stack* rstack = NULL;
	bool visit_children = false;
	bool tight = false;

	while(b != NULL) {
		visit_children = false;
		switch(b->tag) {
		case BLOCK_DOCUMENT:
			rstack = push_block(rstack, b->next, "", false, false);
			visit_children = true;
			break;

		case BLOCK_PARAGRAPH:
			if (tight) {
				inlines_to_html(html, b->inline_content);
			} else {
				cr(html);
				strbuf_puts(html, "<p>");
				inlines_to_html(html, b->inline_content);
				strbuf_puts(html, "</p>\n");
			}
			break;

		case BLOCK_BQUOTE:
			cr(html);
			strbuf_puts(html, "<blockquote>\n");
			rstack = push_block(rstack, b->next, "</blockquote>\n", tight, false);
			tight = false;
			visit_children = true;
			break;

		case BLOCK_LIST_ITEM:
			cr(html);
			strbuf_puts(html, "<li>");
			rstack = push_block(rstack, b->next, "</li>\n", tight, true);
			visit_children = true;
			break;

		case BLOCK_LIST:
			// make sure a list starts at the beginning of the line:
			cr(html);
			data = &(b->as.list);

			if (data->start > 1) {
				strbuf_printf(html, "<%s start=\"%d\">\n",
					      data->list_type == bullet ? "ul" : "ol",
					      data->start);
			} else {
				strbuf_puts(html, data->list_type == bullet ? "<ul>\n" : "<ol>\n");
			}

			rstack = push_block(rstack, b->next,
					    data->list_type == bullet ?
					    "\n</ul>\n" : "\n</ol>\n", tight, false);
			tight = data->tight;
			visit_children = true;
			break;

		case BLOCK_ATX_HEADER:
		case BLOCK_SETEXT_HEADER:
			cr(html);
			strbuf_printf(html, "<h%d>", b->as.header.level);
			inlines_to_html(html, b->inline_content);
			strbuf_printf(html, "</h%d>\n", b->as.header.level);
			break;

		case BLOCK_INDENTED_CODE:
		case BLOCK_FENCED_CODE:
			cr(html);

			strbuf_puts(html, "<pre><code");

			if (b->tag == BLOCK_FENCED_CODE) {
				strbuf *info = &b->as.code.info;

				if (strbuf_len(info) > 0) {
					int first_tag = strbuf_strchr(info, ' ', 0);
					if (first_tag < 0)
						first_tag = strbuf_len(info);

					strbuf_puts(html, " class=\"language-");
					escape_html(html, info->ptr, first_tag);
					strbuf_putc(html, '"');
				}
			}

			strbuf_putc(html, '>');
			escape_html(html, b->string_content.ptr, b->string_content.size);
			strbuf_puts(html, "</code></pre>\n");
			break;

		case BLOCK_HTML:
			strbuf_put(html, b->string_content.ptr, b->string_content.size);
			break;

		case BLOCK_HRULE:
			strbuf_puts(html, "<hr />\n");
			break;

		case BLOCK_REFERENCE_DEF:
			break;

		default:
			assert(false);
		}
		if (visit_children) {
			b = b->children;
		} else {
			b = b->next;
		}
		while (b == NULL && rstack != NULL) {
			strbuf_puts(html, rstack->literal);
			if (rstack->trim) {
				strbuf_rtrim(html);
			}
			tight = rstack->tight;
			b = rstack->next_sibling.block;
			rstack = pop_render_stack(rstack);
		}
	}

	free_render_stack(rstack);
}
Пример #16
0
//outputs fastQ unless add_greedy_bases_for_better_bwt_compression==true, in which case is for 1000genomes, and they want fastA
inline void error_correct_file_against_graph(char* fastq_file, char quality_cutoff, char ascii_qual_offset,
        dBGraphEc *db_graph, char* outfile,
        uint64_t *bases_modified_count_array,//distribution across reads; how many of the read_length bases are fixed
        uint64_t *posn_modified_count_array,//where in the read are we making corrections?
        int bases_modified_count_array_size,
        int min_read_len,
        HandleLowQualUncorrectable policy,
        boolean add_greedy_bases_for_better_bwt_compression,
        int num_greedy_bases,
        boolean rev_comp_read_if_on_reverse_strand)
{
    int max_read_len = bases_modified_count_array_size - 2;
    int read_len_upper_bound = max_read_len + num_greedy_bases;
    int read_len_lower_bound = min_read_len + num_greedy_bases;
    int read_len_final = 0;

    //reset the stats arrays, we get stats per input file
    set_uint64_t_array(bases_modified_count_array,bases_modified_count_array_size, (uint64_t) 0);
    set_uint64_t_array(posn_modified_count_array, bases_modified_count_array_size, (uint64_t) 0);


    //set some variables, quality etc
    quality_cutoff+=ascii_qual_offset;
    short kmer_size = db_graph->kmer_size;

    StrBuf* uncorrectedGoodQual_file = strbuf_create(outfile);
    StrBuf* uncorrectedLowQual_file = strbuf_create(outfile);
    StrBuf* corrected_file = strbuf_create(outfile);
    StrBuf* discarded_undefined_file = strbuf_create(outfile);
    StrBuf* discarded_uncorrectable_file = strbuf_create(outfile);
    StrBuf* discarded_shortread_file = strbuf_create(outfile);


    strbuf_append_str(uncorrectedGoodQual_file, ".printuncorrectedgoodqual");
    strbuf_append_str(uncorrectedLowQual_file, ".printuncorrectedlowqual");
    strbuf_append_str(corrected_file, ".printcorrected");
    strbuf_append_str(discarded_undefined_file, ".discardundefinedbase");
    strbuf_append_str(discarded_uncorrectable_file, ".discarduncorrectable");
    strbuf_append_str(discarded_shortread_file, ".discardshortread");

    FILE* uncorrectedGoodQual_fp = fopen(uncorrectedGoodQual_file->buff, "w");
    FILE* uncorrectedLowQual_fp = fopen(uncorrectedLowQual_file->buff, "w");
    FILE* corrected_fp = fopen(corrected_file->buff, "w");
    FILE* discarded_undefined_fp = fopen(discarded_undefined_file->buff, "w");
    FILE* discarded_uncorrectable_fp = fopen(discarded_uncorrectable_file->buff, "w");
    FILE* discarded_shortread_fp = fopen(discarded_shortread_file->buff, "w");

    char* suff1 = ".distrib_num_modified_bases";
    char* suff2 = ".distrib_posn_modified_bases";
    char* suff3 =".read_stats";
    char* stat1 = (char*) malloc(sizeof(char)*(strlen(outfile)+strlen(suff1)+1));
    char* stat2 = (char*) malloc(sizeof(char)*(strlen(outfile)+strlen(suff2)+1));
    char* stat3 = (char*) malloc(sizeof(char)*(strlen(outfile)+strlen(suff3)+1));

    if ( (stat1==NULL) || (stat2==NULL) || (stat3==NULL) )
    {
        die("Unable to malloc FILENAME strings. Something badly wrong with your server\n");
    }
    set_string_to_null(stat1, strlen(outfile)+strlen(suff1)+1);
    set_string_to_null(stat2, strlen(outfile)+strlen(suff2)+1);
    set_string_to_null(stat3, strlen(outfile)+strlen(suff3)+1);
    strcpy(stat1, outfile);
    strcat(stat1, suff1);
    strcat(stat2, outfile);
    strcat(stat2, suff2);
    strcat(stat3, outfile);
    strcat(stat3, suff3);

    FILE* out_stat1 = fopen(stat1, "w");
    FILE* out_stat2 = fopen(stat2, "w");
    FILE* out_stat3 = fopen(stat3, "w");
    if ( (out_stat1==NULL)|| (out_stat2==NULL) || (out_stat3==NULL) )
    {
        die("Unable to open %s or %s or %s to write to - permissions issue?\n", stat1, stat2, stat3);
    }

    SeqFile *sf = seq_file_open(fastq_file);
    if(sf == NULL)
    {
        // Error opening file
        fprintf(stderr, "Error: cannot read seq file '%s'\n", fastq_file);
        exit(EXIT_FAILURE);
    }
    char is_fastq = seq_has_quality_scores(sf);
    if (is_fastq==0)
    {
        die("Error correction is only meant to work on FASTQ and this file: %s is not\n", fastq_file);
    }

    StrBuf* buf_seq  = strbuf_new();
    StrBuf* buf_qual = strbuf_new();
    StrBuf* working_buf=strbuf_new();
    dBNodeEc* last_node_in_read;
    Orientation last_or_in_read;
    int num_original_reads=0, num_final_reads=0, num_corrected_reads=0, num_discarded_reads=0;
    int num_discarded_undefined = 0;
    int num_discarded_uncorrectable = 0;
    int num_discarded_short_read = 0;
    int num_print_uncorrected_lowqual = 0;
    int num_print_uncorrected_goodqual = 0;
    int num_print_corrected = 0;

    StrBuf* buf_dashes = strbuf_create("---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------");

    while(seq_next_read(sf))
    {
        int count_corrected_bases=0;
        //NOTE - uses modified version fo Isaacs code - new func
        seq_read_all_bases_and_quals(sf, buf_seq, buf_qual);
        StrBuf* buf_seq_debug  = strbuf_clone(buf_seq);
        StrBuf* buf_seq_origin  = strbuf_clone(buf_seq);
        StrBuf* buf_seq_fixed = strbuf_clone(buf_dashes);
        strbuf_resize(buf_seq_fixed, strbuf_len(buf_seq)+1);

        int read_len = seq_get_length(sf);
        int num_kmers = read_len-kmer_size+1;
        int quality_good[read_len];
        set_int_array(quality_good, read_len, 1);

        int first_good=0;//index of first kmer in graph

        //populate the qual array showing which bases have qual >threshold
        //if all quals are high, will Print uncorrected
        //else, if all kmers NOT in graph, will discard or print uncorrected depending on policy
        //else print corrected.
        Orientation strand_first_good_kmer;
        ReadCorrectionDecison dec =
            get_first_good_kmer_and_populate_qual_array(seq_get_read_name(sf), buf_seq, buf_qual, num_kmers, read_len,
                    quality_good, quality_cutoff,
                    &first_good, &strand_first_good_kmer,
                    db_graph, policy, rev_comp_read_if_on_reverse_strand);


        //*** start of local functions

        //if going right, keep going to right hand end. if going left, keep going to left hand end
        boolean condition(WhichEndOfKmer direction, int pos)
        {
            if ((direction==Right) && (pos<num_kmers))
            {
                return true;
            }
            if ((direction==Left) && (pos>=0))
            {
                return true;
            }
            return false;
        }
        boolean kmer_is_in_graph(char* kmer, dBGraphEc* db_g)
        {
            BinaryKmer curr_kmer;
            if (seq_to_binary_kmer(kmer, kmer_size, &curr_kmer)==NULL)
            {
                //is an N
                return false;
            }

            BinaryKmer temp_key;
            element_ec_get_key(&curr_kmer, kmer_size, &temp_key);
            dBNodeEc* node = hash_table_ec_find(&temp_key, db_g);
            if (node==NULL)
            {
                return false;
            }
            else
            {
                return true;
            }
        }
        int increment(int i, WhichEndOfKmer direction)
        {
            if (direction==Right)
            {
                return i+1;
            }
            else
            {
                return i-1;
            }
        }
        char working_str[kmer_size+1];

        // start_pos is in kmer units
        boolean check_bases_to_end_of_read(int start_pos, ReadCorrectionDecison* decision,
                                           WhichEndOfKmer direction,
                                           int* num_corrected_bases_in_this_read_debug)

        {
            boolean any_correction_done=false;
            if ((start_pos<0) || (start_pos>=num_kmers))
            {
                return any_correction_done;
            }
            int pos=start_pos;
            int offset=0;
            if (direction==Right)
            {
                offset= kmer_size-1;
            }
            char local_kmer[kmer_size+1];
            local_kmer[kmer_size]='\0';

            while ( (*decision==PrintCorrected) && (condition(direction,pos)==true) )
            {
                strncpy(local_kmer, buf_seq->buff+pos, kmer_size);

                if (quality_good[pos+offset]==1)
                {
                    //nothing to do
                }
                else if (kmer_is_in_graph(local_kmer, db_graph)==true)
                {
                    //nothing to do - don't correct if kmer is in graph
                }
                else//kmer not in graph and quality bad
                {
                    boolean fixed = fix_end_if_unambiguous(direction, buf_seq, buf_seq_fixed, buf_qual, quality_cutoff, pos,
                                                           working_buf, working_str, db_graph);
                    if ( (policy==DiscardReadIfLowQualBaseUnCorrectable)
                            &&
                            (fixed==false) )
                    {
                        *decision=DiscardUncorrectable;
                    }
                    else if (fixed==true)
                    {
                        any_correction_done=true;
                        count_corrected_bases++;
                        *num_corrected_bases_in_this_read_debug=*num_corrected_bases_in_this_read_debug+1;
                        if (offset+pos<bases_modified_count_array_size)
                        {
                            posn_modified_count_array[offset+pos]++;
                        }
                        else
                        {
                            posn_modified_count_array[bases_modified_count_array_size-1]++;
                        }
                    }
                }
                pos = increment(pos, direction);
            }
            return any_correction_done;
        }
Пример #17
0
void error_correct_list_of_files(StrBuf* list_fastq,char quality_cutoff, char ascii_qual_offset,
                                 dBGraphEc *db_graph, HandleLowQualUncorrectable policy,
                                 int max_read_len, int min_read_len, StrBuf* suffix, char* outdir,
                                 boolean add_greedy_bases_for_better_bwt_compression,
                                 int num_greedy_bases, boolean rev_comp_read_if_on_reverse_strand)

{
    printf("error correct list of files\n");
    fflush(stdout);
    int len = max_read_len+2;
    uint64_t* distrib_num_bases_corrected     =(uint64_t*) malloc(sizeof(uint64_t)*len);
    uint64_t* distrib_position_bases_corrected=(uint64_t*) malloc(sizeof(uint64_t)*len);
    if ( (distrib_num_bases_corrected==NULL)|| (distrib_position_bases_corrected==NULL))
    {
        die("Unable to alloc arrays for keeping stats. Your machine must have hardly any spare memory\n");
    }
    set_uint64_t_array(distrib_num_bases_corrected,      len, (uint64_t) 0);
    set_uint64_t_array(distrib_position_bases_corrected, len, (uint64_t) 0);

    FILE* list_fastq_fp = fopen(list_fastq->buff, "r");
    if (list_fastq_fp==NULL)
    {
        printf("Cannot open file %s\n", list_fastq->buff);
    }
    StrBuf *next_fastq     = strbuf_new();
    StrBuf* corrected_file = strbuf_new();
    StrBuf* corrected_file_newpath = strbuf_new();

    while(strbuf_reset_readline(next_fastq, list_fastq_fp))
    {
        strbuf_chomp(next_fastq);
        if(strbuf_len(next_fastq) > 0)
        {
            strbuf_reset(corrected_file);
            strbuf_reset(corrected_file_newpath);
            strbuf_copy(corrected_file, 0,//dest
                        next_fastq,0,strbuf_len(next_fastq));
            strbuf_append_str(corrected_file, suffix->buff);
            char* corrected_file_basename = basename(corrected_file->buff);
            strbuf_append_str(corrected_file_newpath, outdir);
            strbuf_append_str(corrected_file_newpath, corrected_file_basename);

            error_correct_file_against_graph(next_fastq->buff, quality_cutoff, ascii_qual_offset,
                                             db_graph, corrected_file_newpath->buff,
                                             distrib_num_bases_corrected,
                                             distrib_position_bases_corrected,
                                             len,
                                             min_read_len,
                                             policy,
                                             add_greedy_bases_for_better_bwt_compression,
                                             num_greedy_bases, rev_comp_read_if_on_reverse_strand);


        }
    }
    fclose(list_fastq_fp);
    strbuf_free(next_fastq);
    strbuf_free(corrected_file);
    strbuf_free(corrected_file_newpath);
    free(distrib_num_bases_corrected);
    free(distrib_position_bases_corrected);
}