static struct rr *naptr_parse(char *name, long ttl, int type, char *s) { struct rr_naptr *rr = getmem(sizeof(*rr)); int i; struct binary_data text; i = extract_integer(&s, "order"); if (i < 0) return NULL; if (i >= 65536) return bitch("order range is not valid"); rr->order = i; i = extract_integer(&s, "preference"); if (i < 0) return NULL; if (i >= 65536) return bitch("preference range is not valid"); rr->preference = i; text = extract_text(&s, "flags"); if (text.length < 0) return NULL; for (i = 0; i < text.length; i++) { if (!isalnum(text.data[i])) { return bitch("flags contains illegal characters"); } } rr->flags = text; text = extract_text(&s, "services"); if (text.length < 0) return NULL; rr->services = text; text = extract_text(&s, "regexp"); if (text.length < 0) return NULL; rr->regexp = text; rr->replacement = extract_name(&s, "replacement"); if (!rr->replacement) return NULL; if (*s) { return bitch("garbage after valid NAPTR data"); } return store_record(type, name, ttl, rr); }
/* Add a variable to a linked list */ void create (struct node *start, int vartype, char string[], char comment[]) { struct node *end = start, *temp; if (find_node (start, string) == NULL) { /* Create a new node and add it to the end of the linked list */ while (end->next != NULL) end = end->next; end->next = malloc (sizeof(struct node)); if (end->next == NULL) memory_error (); end = end->next; end->vartype = vartype; end->action = NOP; end->calltype = current_calltype; end->line_number = current_line; end->name = calloc (extract_text(string)+1, sizeof(char)); if (end->name == NULL) memory_error (); copy (end->name, string, extract_text(string)); if ((find_char (string, '(') < find_char (string, ',')) && (find_char (string, '(') < find_char (string, '!'))) { /* Variable is an array */ if (start == common_start) { /* Array is defined in a common block, so print a warning */ eprint_newline (); eprint ("Warning - Array size defined in common block:"); eprint_newline (); eprint_line (current_line); eprintn (string, find_char(string, ')') - string); eprint_newline (); temp = find_node (register_start, string); if (temp != NULL) { if (temp->vartype > 0) array_flags[temp->calltype][temp->vartype]=1; free (temp->size); temp->size = parse_array_size (string); end->size = NULL; } } else end->size = parse_array_size (string); } else (end->size = NULL); if (comment != NULL) { end->title = calloc (strlen(comment), sizeof(char)); if (end->title == NULL) memory_error (); copy (end->title, comment, strlen(comment)-1); } else end->title = NULL; end->next = NULL; } }
void extract_text(struct box *box, bool *first, save_text_whitespace *before, struct save_text_state *save) { struct box *child; const char *whitespace_text = ""; size_t whitespace_length = 0; assert(box); /* If box has a list marker */ if (box->list_marker) { /* do the marker box before continuing with the rest of the * list element */ extract_text(box->list_marker, first, before, save); } /* read before calling the handler in case it modifies the tree */ child = box->children; save_text_solve_whitespace(box, first, before, &whitespace_text, &whitespace_length); if (box->type != BOX_BR && !((box->type == BOX_FLOAT_LEFT || box->type == BOX_FLOAT_RIGHT) && !box->text) && box->length > 0 && box->text) { /* Box meets criteria for export; add text to buffer */ save_text_add_to_buffer(box->text, box->length, box, whitespace_text, whitespace_length, save); *first = false; *before = WHITESPACE_NONE; } /* Work though the children of this box, extracting any text */ while (child) { extract_text(child, first, before, save); child = child->next; } return; }
/* Return a pointer to the node which has "string" as it's name field */ struct node *find_node (struct node *start, char string[]) { int length, flag=1; struct node *ptr = start; do { ptr = ptr->next; if (ptr != NULL) { length = max (extract_text(string), strlen(ptr->name)); flag = strncmp_i (ptr->name, string, length); } } while ((flag != 0) && (ptr != NULL)); return (ptr); }
void save_as_text(hlcache_handle *c, char *path) { FILE *out; struct save_text_state save = { NULL, 0, 0 }; save_text_whitespace before = WHITESPACE_NONE; bool first = true; nserror ret; char *result; if (!c || content_get_type(c) != CONTENT_HTML) { return; } extract_text(html_get_box_tree(c), &first, &before, &save); if (!save.block) return; ret = guit->utf8->utf8_to_local(save.block, save.length, &result); free(save.block); if (ret != NSERROR_OK) { LOG(("failed to convert to local encoding, return %d", ret)); return; } out = fopen(path, "w"); if (out) { int res = fputs(result, out); if (res < 0) { LOG(("Warning: write failed")); } res = fputs("\n", out); if (res < 0) { LOG(("Warning: failed writing trailing newline")); } fclose(out); } free(result); }
int determine_type (char string[]) { int i, length; for (i=0; i<NUM_TYPES; i++) { length = max (strlen(types[i]), skip_nonblanks(string)-string); if (strncmp_i(string, types[i], length) == 0) return (i/2); } if (strncmp_i(string, "character", 9) == 0) return (4); if (strncmp_i(string, "integer*2", 9) == 0) { eprint_newline (); eprint ("Warning - Type integer*2:"); eprint_newline (); eprint_line (current_line); eprintn (string, strlen(string) - 1); eprint_newline (); return (NOP); } for (i=0; i<3; i++) { length = max (strlen(keywords[i]), extract_text(string)); if (strncmp_i(string, keywords[i], length) == 0) return (-i-1); } return (NOP); }
static struct rr *x25_parse(char *name, long ttl, int type, char *s) { struct rr_x25 *rr = getmem(sizeof(*rr)); int i; rr->psdn_address = extract_text(&s, "PSDN-address"); if (rr->psdn_address.length < 0) return NULL; if (rr->psdn_address.length > 255) return bitch("PSDN-address too long"); if (rr->psdn_address.length < 4) return bitch("PSDN-address too short"); for (i = 0; i < rr->psdn_address.length; i++) { if (!isdigit(rr->psdn_address.data[i])) return bitch("PSDN-address contains non-digits"); } if (*s) { return bitch("garbage after valid X25 data"); } return store_record(type, name, ttl, rr); }
static struct rr *txt_parse(char *name, long ttl, int type, char *s) { struct rr_txt *rr; struct binary_data txt; struct rr_txt_segment *first = NULL; struct rr_txt_segment *last = NULL; struct rr_txt_segment *cur = NULL; int i; i = 0; while (*s) { freeall_temp(); txt = extract_text(&s, "text segment"); if (txt.length < 0) return NULL; if (txt.length > 255) return bitch("TXT segment too long"); i++; cur = getmem(sizeof(*cur)); cur->txt = txt; cur->next = NULL; if (!first) first = cur; if (last) last->next = cur; last = cur; } if (i == 0) return bitch("empty text record"); rr = getmem(sizeof(*rr)); rr->count = i; rr->txt = first; return store_record(type, name, ttl, rr); }
/* Open a named physical or virtual file, extract the text from it, search for document or page attachments, and process these recursively. Either filename must be supplied for physical files, or data+length from which a virtual file will be created. The caller cannot create the PVF file since we create a new TET object here in case an exception happens with the embedded document - the caller can happily continue with his TET object even in case of an exception here. */ static int process_document(FILE *outfp, const char *filename, const char *realname, const unsigned char *data, int length) { TET *tet; if ((tet = TET_new()) == (TET *) 0) { fprintf(stderr, "extractor: out of memory\n"); return(4); } TET_TRY (tet) { const char *pvfname = "/pvf/attachment"; int doc; int file, filecount; int page, pagecount; const unsigned char *attdata; int attlength; int objtype; /* Construct a PVF file if data instead of a filename was provided */ if (!filename) { TET_create_pvf(tet, pvfname, 0, data, length, ""); filename = pvfname; } TET_set_option(tet, globaloptlist); doc = TET_open_document(tet, filename, 0, docoptlist); if (doc == -1) { fprintf(stderr, "Error %d in %s() (source: attachment '%s'): %s\n", TET_get_errnum(tet), TET_get_apiname(tet), realname, TET_get_errmsg(tet)); TET_EXIT_TRY(tet); TET_delete(tet); return(5); } /* -------------------- Extract the document's own page contents */ extract_text(tet, doc, outfp); /* -------------------- Process all document-level file attachments */ /* Get the number of document-level file attachments. */ filecount = (int) TET_pcos_get_number(tet, doc, "length:names/EmbeddedFiles"); for (file = 0; file < filecount; file++) { const char *attname; /* fetch the name of the file attachment; check for Unicode file * name (a PDF 1.7 feature) */ objtype = (int) TET_pcos_get_number(tet, doc, "type:names/EmbeddedFiles[%d]/UF", file); if (objtype == pcos_ot_string) { attname = TET_pcos_get_string(tet, doc, "names/EmbeddedFiles[%d]/UF", file); } else { /* fetch the name of the file attachment */ objtype = (int) TET_pcos_get_number(tet, doc, "type:names/EmbeddedFiles[%d]/F", file); if (objtype == pcos_ot_string) { attname = TET_pcos_get_string(tet, doc, "names/EmbeddedFiles[%d]/F", file); } else { attname = "(unnamed)"; } } fprintf(outfp, "\n----- File attachment '%s':\n", attname); /* fetch the contents of the file attachment and process it */ objtype = (int) TET_pcos_get_number(tet, doc, "type:names/EmbeddedFiles[%d]/EF/F", file); if (objtype == pcos_ot_stream) { attdata = TET_pcos_get_stream(tet, doc, &attlength, "", "names/EmbeddedFiles[%d]/EF/F", file); (void) process_document(outfp, 0, attname, attdata, attlength); } } /* -------------------- Process all page-level file attachments */ pagecount = (int) TET_pcos_get_number(tet, doc, "length:pages"); /* Check all pages for annotations of type FileAttachment */ for (page = 0; page < pagecount; page++) { int annot, annotcount; annotcount = (int) TET_pcos_get_number(tet, doc, "length:pages[%d]/Annots", page); for (annot = 0; annot < annotcount; annot++) { const char *val; char attname[128]; val = TET_pcos_get_string(tet, doc, "pages[%d]/Annots[%d]/Subtype", page, annot); sprintf(attname, "page %d, annotation %d", page+1, annot+1); if (!strcmp(val, "FileAttachment")) { /* fetch the contents of the attachment and process it */ objtype = (int) TET_pcos_get_number(tet, doc, "type:pages[%d]/Annots[%d]/FS/EF/F", page, annot); if (objtype == pcos_ot_stream) { attdata = TET_pcos_get_stream(tet, doc, &attlength, "", "pages[%d]/Annots[%d]/FS/EF/F", page, annot); (void) process_document(outfp, 0, attname, attdata, attlength); } } } } TET_close_document(tet, doc); /* If there was no PVF file deleting it won't do any harm */ TET_delete_pvf(tet, pvfname, 0); } TET_CATCH (tet) { fprintf(stderr, "Error %d in %s() (source: attachment '%s'): %s\n", TET_get_errnum(tet), TET_get_apiname(tet), realname, TET_get_errmsg(tet)); } TET_delete(tet); return(0); }