static int xml_parse_internal(XMLState *s, const char *buf_start, int buf_len, EditBuffer *b, int offset_start) { int ch, offset, offset0, text_offset_start, ret, offset_end; const char *buf_end, *buf; buf = buf_start; buf_end = buf + buf_len; offset = offset_start; offset_end = offset_start + buf_len; offset0 = 0; /* not used */ text_offset_start = 0; /* not used */ for (;;) { if (buf) { if (buf >= buf_end) break; ch = charset_decode(&s->charset_state, &buf); } else { if (offset >= offset_end) break; offset0 = offset; ch = eb_nextc(b, offset, &offset); } /* increment line number to signal errors */ if (ch == '\n') { /* well, should add counter, but we test abort here */ if (s->abort_func(s->abort_opaque)) return -1; s->line_num++; } switch (s->state) { case XML_STATE_TAG: if (ch == '>') { strbuf_addch(&s->str, '\0'); ret = parse_tag(s, (char *)s->str.buf); switch (ret) { default: case XML_STATE_TEXT: xml_text: strbuf_reset(&s->str); s->state = XML_STATE_TEXT; text_offset_start = offset; break; case XML_STATE_PRETAG: strbuf_reset(&s->str); s->state = XML_STATE_PRETAG; text_offset_start = offset; break; } } else { strbuf_addch(&s->str, ch); /* test comment */ if (s->str.size == 3 && s->str.buf[0] == '!' && s->str.buf[1] == '-' && s->str.buf[2] == '-') { s->state = XML_STATE_COMMENT; } } break; case XML_STATE_TEXT: if (ch == '<') { /* XXX: not strictly correct with comments : should not flush if comment */ if (buf) { strbuf_addch(&s->str, '\0'); flush_text(s, (char *)s->str.buf); strbuf_reset(&s->str); } else { flush_text_buffer(s, text_offset_start, offset0); } s->state = XML_STATE_TAG; } else { if (buf) { /* evaluate entities */ if (ch == '&') { buf--; ch = parse_entity(&buf); } strbuf_addch(&s->str, ch); } } break; case XML_STATE_COMMENT: if (ch == '-') s->state = XML_STATE_COMMENT1; break; case XML_STATE_COMMENT1: if (ch == '-') s->state = XML_STATE_COMMENT2; else s->state = XML_STATE_COMMENT; break; case XML_STATE_COMMENT2: if (ch == '>') { goto xml_text; } else if (ch != '-') { s->state = XML_STATE_COMMENT; } break; case XML_STATE_PRETAG: { int len, taglen; strbuf_addch(&s->str, ch); taglen = s->pretaglen + 2; len = s->str.size - taglen; if (len >= 0 && s->str.buf[len] == '<' && s->str.buf[len + 1] == '/' && !xml_tagcmp((char *)s->str.buf + len + 2, s->pretag)) { s->str.buf[len] = '\0'; if (!xml_tagcmp(s->pretag, "style")) { if (s->style_sheet) { CSSParseState b1, *b = &b1; b->ptr = (char *)s->str.buf; b->line_num = s->line_num; /* XXX: incorrect */ b->filename = s->filename; b->ignore_case = s->ignore_case; css_parse_style_sheet(s->style_sheet, b); } } else if (!xml_tagcmp(s->pretag, "script")) { /* XXX: handle script */ } else { /* just add the content */ if (buf) { flush_text(s, (char *)s->str.buf); } else { /* XXX: would be incorrect if non ascii chars */ flush_text_buffer(s, text_offset_start, offset - taglen); } strbuf_reset(&s->str); if (s->box) s->box = s->box->parent; } s->state = XML_STATE_WAIT_EOT; } } break; case XML_STATE_WAIT_EOT: /* wait end of tag */ if (ch == '>') goto xml_text; break; } } return buf - buf_start; }
static void read_file (struct buffer *buffer) { int fd; void *map = MAP_FAILED; struct line line_buffer; size_t line_length; const char *line_begin, *line_end, *end; char newline[8]; size_t newline_length = 0; off_t size = 0; uint32_t digest = 0; if (-1 == (fd = open (buffer->path + 7, O_RDONLY))) { if (errno != ENOENT) { set_error (_("Failed to open '%s' for reading: %s"), buffer->path, strerror (errno)); goto failed; } buffer->charset = CHARSET_UTF8; buffer->line_ending = LINE_ENDING_LF; buffer->log = log_open (buffer->undo_path, buffer, 0); if (!buffer->log) goto failed; return; } if (-1 == (size = lseek (fd, 0, SEEK_END))) { static const size_t readbuf_size = 65536; char *readbuf; const char *begin, *end; int ret; struct line encoded_buffer; readbuf = malloc (readbuf_size); ARRAY_INIT (&encoded_buffer); while (0 < (ret = read (fd, readbuf, readbuf_size))) { digest = crc32 (digest, readbuf, ret); if (!buffer->charset) { charset_detect (&buffer->charset, &buffer->line_ending, readbuf, ret); newline_length = charset_encode_line_ending (newline, buffer->charset, buffer->line_ending); } begin = readbuf; end = readbuf + ret; while (begin != end) { line_end = memmem (begin, end - begin, newline, newline_length); if (!line_end) { ARRAY_ADD_SEVERAL (&encoded_buffer, begin, end - begin); break; } ARRAY_ADD_SEVERAL (&encoded_buffer, begin, line_end - begin); begin = line_end + newline_length; line_length = charset_decode (0, buffer->charset, &ARRAY_GET (&encoded_buffer, 0), ARRAY_COUNT (&encoded_buffer)); ARRAY_INIT (&line_buffer); ARRAY_RESERVE (&line_buffer, line_length); charset_decode (&ARRAY_GET (&line_buffer, 0), buffer->charset, &ARRAY_GET (&encoded_buffer, 0), ARRAY_COUNT (&encoded_buffer)); ARRAY_COUNT (&line_buffer) = line_length; pthread_mutex_lock (&buffer->lock); ARRAY_ADD (&buffer->lines, line_buffer); pthread_mutex_unlock (&buffer->lock); ARRAY_RESET (&encoded_buffer); } } } else if (size > 0) { if (MAP_FAILED == (map = mmap (0, size, PROT_READ, MAP_SHARED, fd, 0))) { set_error (_("Failed to mmap '%s': %s"), buffer->path, strerror (errno)); goto failed; } digest = crc32 (0, map, size); charset_detect (&buffer->charset, &buffer->line_ending, map, size); newline_length = charset_encode_line_ending (newline, buffer->charset, buffer->line_ending); end = (char *) map + size; for (line_begin = map; line_begin < end; line_begin = line_end + newline_length) { line_end = memmem (line_begin, end - line_begin, newline, newline_length); if (!line_end) line_end = end; line_length = charset_decode (0, buffer->charset, line_begin, line_end - line_begin); ARRAY_INIT (&line_buffer); ARRAY_RESERVE (&line_buffer, line_length); charset_decode (&ARRAY_GET (&line_buffer, 0), buffer->charset, line_begin, line_end - line_begin); ARRAY_COUNT (&line_buffer) = line_length; pthread_mutex_lock (&buffer->lock); ARRAY_ADD (&buffer->lines, line_buffer); pthread_mutex_unlock (&buffer->lock); /* XXX: Avoid issuing full repaints when nothing really happens onscreen */ gui_repaint (); } } else { buffer->charset = CHARSET_UTF8; buffer->line_ending = LINE_ENDING_LF; } buffer->log = log_open (buffer->undo_path, buffer, digest); failed: if (fd != -1) close (fd); if (map != MAP_FAILED) munmap (map, size); }