int html_parser::parse_init() { outer_ = recode_new_outer(true); page_ = (char *) malloc(max_page_len + 1); buffer_conv_ = (char *) malloc(max_page_len + 1); title_ = (char *) malloc(max_title_len + 1); content_ = (char *) malloc(max_content_len + 1); links_ = (link_t *) malloc(sizeof(link_t) * (max_links_num + 1)); tree_ = new html_tree(max_page_len + 1); if (page_ == NULL || title_ == NULL || buffer_conv_ == NULL || content_ == NULL || links_ == NULL || tree_ == NULL) goto fail; page_[0] = '\0'; content_[0] = '\0'; title_[0] = '\0'; buffer_conv_len_ = max_page_len + 1; return 0; fail: parse_destroy(); return -1; }
BibtexField * bibtex_reverse_field (BibtexField * field, gboolean use_braces, gboolean do_quote) { BibtexStruct * s = NULL; gchar * string, * tmp; gboolean is_upper, has_space, is_command, was_command; gint i; BibtexAuthor * author; static GString * st = NULL; static RECODE_OUTER outer = NULL; static RECODE_REQUEST request = NULL; g_return_val_if_fail (field != NULL, NULL); if (st == NULL) st = g_string_sized_new (16); if (outer == NULL) { outer = recode_new_outer (false); g_assert (outer != NULL); } if (request == NULL) { request = recode_new_request (outer); g_assert (request != NULL); if (! recode_scan_request (request, "latin1..latex")) { g_error ("can't create recoder"); } } if (field->structure) { bibtex_struct_destroy (field->structure, TRUE); field->structure = NULL; } field->loss = FALSE; switch (field->type) { case BIBTEX_OTHER: case BIBTEX_VERBATIM: g_return_val_if_fail (field->text != NULL, NULL); g_string_truncate (st, 0); if (! use_braces) { if (strchr (field->text, '"')) { use_braces = TRUE; } } if (use_braces) { g_string_append (st, "@preamble{{"); } else { g_string_append (st, "@preamble{\""); } if (do_quote) { tmp = recode_string (request, field->text); g_string_append (st, tmp); g_free (tmp); } else { g_string_append (st, field->text); } if (use_braces) { g_string_append (st, "}}"); } else { g_string_append (st, "\"}"); } s = text_to_struct (st->str); break; case BIBTEX_TITLE: g_return_val_if_fail (field->text != NULL, NULL); g_string_truncate (st, 0); if (! use_braces) { if (strchr (field->text, '"')) { use_braces = TRUE; } } tmp = recode_string (request, field->text); if (use_braces) { g_string_append (st, "@preamble{{"); } else { g_string_append (st, "@preamble{\""); } /* Put the first lower case between {} */ string = tmp; if (* tmp >= 'a' && * tmp <= 'z') { /* Put the beginning in lower cases */ g_string_append_c (st, '{'); g_string_append_c (st, * tmp); g_string_append_c (st, '}'); } else { /* The first character is done */ g_string_append_c (st, * tmp); } tmp ++; /* check for upper cases afterward */ is_upper = false; is_command = false; was_command = false; while (* tmp) { /* start a latex command */ if (* tmp == '\\') { /* eventually closes the bracket */ if (is_upper) { is_upper = false; g_string_append_c (st, '}'); } is_command = true; was_command = false; g_string_append_c (st, * tmp); tmp ++; continue; } if (is_command) { if (! ((* tmp >= 'a' && * tmp <= 'z') || (* tmp >= 'A' && * tmp <= 'Z'))) { is_command = false; was_command = true; } g_string_append_c (st, * tmp); tmp ++; continue; } if (* tmp >= 'A' && * tmp <= 'Z') { if (! is_upper) { g_string_append_c (st, '{'); g_string_append_c (st, * tmp); if (was_command) { g_string_append_c (st, '}'); } else { is_upper = true; } } else { g_string_append_c (st, * tmp); } } else { if (is_upper) { g_string_append_c (st, '}'); is_upper = false; } g_string_append_c (st, * tmp); } was_command = false; tmp ++; } /* eventually close the brackets */ if (is_upper) { g_string_append_c (st, '}'); is_upper = false; } g_free (string); if (use_braces) { g_string_append (st, "}}"); } else { g_string_append (st, "\"}"); } s = text_to_struct (st->str); break; case BIBTEX_AUTHOR: g_return_val_if_fail (field->field.author != NULL, NULL); g_string_truncate (st, 0); /* Create a simple preamble to parse */ if (! use_braces) { for (i = 0 ; i < field->field.author->len; i ++) { author = & g_array_index (field->field.author, BibtexAuthor, i); if (author->last && strchr (author->last, '"')) { use_braces = TRUE; break; } if (author->lineage && strchr (author->lineage, '"')) { use_braces = TRUE; break; } if (author->first && strchr (author->first, '"')) { use_braces = TRUE; break; } } } if (use_braces) { g_string_append (st, "@preamble{{"); } else { g_string_append (st, "@preamble{\""); } for (i = 0 ; i < field->field.author->len; i ++) { author = & g_array_index (field->field.author, BibtexAuthor, i); if (i != 0) { g_string_append (st, " and "); } if (author->last) { /* quotes if there is no first name */ has_space = author_needs_quotes (author->last) || (author->first == NULL && strpbrk (author->last, " \t") != NULL); if (has_space) { g_string_append_c (st, '{'); } tmp = recode_string (request, author->last); g_string_append (st, tmp); g_free (tmp); if (has_space) { g_string_append_c (st, '}'); } } if (author->lineage) { g_string_append (st, ", "); has_space = author_needs_quotes (author->lineage); if (has_space) { g_string_append_c (st, '{'); } tmp = recode_string (request, author->lineage); g_string_append (st, tmp); g_free (tmp); if (has_space) { g_string_append_c (st, '}'); } } if (author->first) { g_string_append (st, ", "); has_space = author_needs_quotes (author->first); if (has_space) { g_string_append_c (st, '{'); } tmp = recode_string (request, author->first); g_string_append (st, tmp); g_free (tmp); if (has_space) { g_string_append_c (st, '}'); } } } if (use_braces) { g_string_append (st, "}}"); } else { g_string_append (st, "\"}"); } s = text_to_struct (st->str); break; case BIBTEX_DATE: s = bibtex_struct_new (BIBTEX_STRUCT_TEXT); s->value.text = g_strdup_printf ("%d", field->field.date.year); break; default: g_assert_not_reached (); } field->structure = s; /* remove text field */ if (field->text) { g_free (field->text); field->text = NULL; field->converted = FALSE; } return field; }
/* convert file using GNU recode library returns 0 on success, nonzero error code otherwise */ int convert_recode(File *file, EncaEncoding from_enc) { RECODE_REQUEST request; RECODE_TASK task; File *tempfile = NULL; bool success; const char *encreq; /* Allocate librecode outer if we are called first time. */ if (outer == NULL) { if ((outer = recode_new_outer(false)) == NULL) { fprintf(stderr, "%s: recode library doesn't like us\n", program_name); return ERR_LIBCOM; } } /* Construct recode request string, try to mimic surfaceless converter now. */ { EncaEncoding enc; enc.charset = from_enc.charset; enc.surface = from_enc.surface | ENCA_SURFACE_REMOVE; encreq = format_request_string(enc, options.target_enc, ENCA_SURFACE_EOL_LF); } /* Create a recode request from it. */ request = get_recode_request(encreq); if (request == NULL) return ERR_CANNOT; /* Now we have to distinguish between file and stdin, namely because * in case of stdin, it's first part is already loaded in the buffer. */ if (file->name != NULL) { /* File is a regular file. Since recode doesn't recode files in place, we make a temporary file and copy contents of file fname to it. */ if (file_seek(file, 0, SEEK_SET) != 0) return ERR_IOFAIL; file->buffer->pos = 0; if ((tempfile = file_temporary(file->buffer, 1)) == NULL || copy_and_convert(file, tempfile, NULL) != 0 || file_seek(file, 0, SEEK_SET) != 0 || file_seek(tempfile, 0, SEEK_SET) != 0 || file_truncate(file, 0) != 0) { file_free(tempfile); return ERR_IOFAIL; } /* Create a task from the request. */ task = recode_new_task(request); task->fail_level = enca_recode_fail_level; task->abort_level = RECODE_SYSTEM_ERROR; task->input.name = NULL; task->input.file = tempfile->stream; task->output.name = NULL; task->output.file = file->stream; /* Now run conversion temporary file -> original. */ success = recode_perform_task(task); /* If conversion wasn't successfull, original file is probably damaged (damned librecode!) try to restore it from the temporary copy. */ if (!success) { if (task->error_so_far >= RECODE_SYSTEM_ERROR) { fprintf(stderr, "%s: librecode probably damaged file `%s'. " "Trying to recover... ", program_name, file->name); tempfile->buffer->pos = 0; if (file_seek(tempfile, 0, SEEK_SET) != -1 && file_seek(file, 0, SEEK_SET) != -1 && file_truncate(file, file->size) == 0 && copy_and_convert(tempfile, file, NULL) == 0) fprintf(stderr, "succeeded.\n"); else fprintf(stderr, "failed\n"); } else print_recode_warning(task->error_so_far, file->name); } recode_delete_task(task); file_free(tempfile); } else { /* File is stdin. First recode begining saved in io_buffer, then append rest of stdin. */ enum recode_error errmax = RECODE_NO_ERROR; /* Create a task from the request. * Set it up for buffer -> stdout conversion */ task = recode_new_task(request); task->fail_level = enca_recode_fail_level; task->abort_level = RECODE_SYSTEM_ERROR; task->input.name = NULL; task->input.file = NULL; task->input.buffer = (char*)file->buffer->data; task->input.cursor = (char*)file->buffer->data; task->input.limit = (char*)file->buffer->data + file->buffer->pos; task->output.name = NULL; task->output.file = stdout; success = recode_perform_task(task); if (!success) { if (task->error_so_far >= RECODE_SYSTEM_ERROR) { fprintf(stderr, "%s: librecode probably damaged `%s'. " "No way to recover in a pipe.\n", program_name, ffname_r(NULL)); recode_delete_task(task); return ERR_IOFAIL; } else errmax = task->error_so_far; } recode_delete_task(task); /* Create a task from the request. * Set it up for stdin -> stdout conversion */ task = recode_new_task(request); task->fail_level = enca_recode_fail_level; task->abort_level = RECODE_SYSTEM_ERROR; task->input.name = NULL; task->input.file = stdin; task->output.name = NULL; task->output.file = stdout; success = recode_perform_task(task); if (!success) { if (task->error_so_far >= RECODE_SYSTEM_ERROR) { fprintf(stderr, "%s: librecode probably damaged `%s'. " "No way to recover in a pipe.\n", program_name, ffname_r(NULL)); recode_delete_task(task); return ERR_IOFAIL; } else { if (errmax < task->error_so_far) errmax = task->error_so_far; } } if (errmax >= enca_recode_fail_level) print_recode_warning(errmax, ffname_r(NULL)); recode_delete_task(task); } /* return ERR_IOFAIL on failure since it means file-related problems */ return success ? ERR_OK : ERR_IOFAIL; }
RecodeOuter::RecodeOuter() : mOuter(recode_new_outer(false)) { }