예제 #1
0
int html_parser::parse_init()
{

    outer_ = recode_new_outer(true);

    page_ = (char *) malloc(max_page_len + 1);
    buffer_conv_ = (char *) malloc(max_page_len + 1);

    title_ = (char *) malloc(max_title_len + 1);
    content_ = (char *) malloc(max_content_len + 1);
    links_ = (link_t *) malloc(sizeof(link_t) * (max_links_num + 1));


    tree_ = new html_tree(max_page_len + 1);

    if (page_ == NULL || title_ == NULL || buffer_conv_ == NULL ||
        content_ == NULL || links_ == NULL || tree_ == NULL)
        goto fail;

    page_[0] = '\0';
    content_[0] = '\0';
    title_[0] = '\0';

    buffer_conv_len_ = max_page_len + 1;

    return 0;

  fail:
    parse_destroy();
    return -1;
}
예제 #2
0
BibtexField * 
bibtex_reverse_field (BibtexField * field,
		      gboolean use_braces,
		      gboolean do_quote) {
    BibtexStruct * s = NULL;
    gchar * string, * tmp;
    gboolean is_upper, has_space, is_command, was_command;
    gint i;
    BibtexAuthor * author;

    static GString *      st      = NULL;
    static RECODE_OUTER   outer   = NULL;
    static RECODE_REQUEST request = NULL;

    g_return_val_if_fail (field != NULL, NULL);

    if (st == NULL) st = g_string_sized_new (16);
	
    if (outer == NULL) {
	outer = recode_new_outer (false);
	g_assert (outer != NULL);
    }

    if (request == NULL) {
	request = recode_new_request (outer);
	g_assert (request != NULL);
	if (! recode_scan_request (request, "latin1..latex")) {
	    g_error ("can't create recoder");
	}
    }

    if (field->structure) {
	bibtex_struct_destroy (field->structure, TRUE);
	field->structure = NULL;
    }

    field->loss = FALSE;

    switch (field->type) {
    case BIBTEX_OTHER:
    case BIBTEX_VERBATIM:
	g_return_val_if_fail (field->text != NULL, NULL);

	g_string_truncate (st, 0);

	if (! use_braces) {
	    if (strchr (field->text, '"')) {
	        use_braces = TRUE;
	    }
	}

	if (use_braces) {
	    g_string_append (st, "@preamble{{");
	}
	else {
	    g_string_append (st, "@preamble{\"");
	}

	if (do_quote) {
	  tmp = recode_string (request, field->text);
	  g_string_append (st, tmp);
	  g_free (tmp);
	}
	else {
	  g_string_append (st, field->text);
	}

	if (use_braces) {
	    g_string_append (st, "}}");
	}
	else {
	    g_string_append (st, "\"}");
	}

	s = text_to_struct (st->str);
	break;

    case BIBTEX_TITLE:
	g_return_val_if_fail (field->text != NULL, NULL);

	g_string_truncate (st, 0);
	
	if (! use_braces) {
	    if (strchr (field->text, '"')) {
	        use_braces = TRUE;
	    }
	}

	tmp = recode_string (request, field->text);

	if (use_braces) {
	    g_string_append (st, "@preamble{{");
	}
	else {
	    g_string_append (st, "@preamble{\"");
	}

	/* Put the first lower case between {} */
	string = tmp;
	if (* tmp >= 'a' && * tmp <= 'z') {
	    /* Put the beginning in lower cases */
	    g_string_append_c (st, '{');
	    g_string_append_c (st, * tmp);
	    g_string_append_c (st, '}');
	}
	else {
	    /* The first character is done */
	    g_string_append_c (st, * tmp);
	}
	
	tmp ++;

	/* check for upper cases afterward */
	is_upper    = false;
	is_command  = false;
	was_command = false;

	while (* tmp) {
	    /* start a latex command */
	    if (* tmp == '\\') {

		/* eventually closes the bracket */
		if (is_upper) {
		    is_upper = false;
		    g_string_append_c (st, '}');
		}

		is_command  = true;
		was_command = false;
		g_string_append_c (st, * tmp);
		tmp ++;

		continue;
	    }
	    if (is_command) {
		if (! ((* tmp >= 'a' && * tmp <= 'z') ||
		       (* tmp >= 'A' && * tmp <= 'Z'))) {
		    is_command  = false;
		    was_command = true;
		}
		g_string_append_c (st, * tmp);
		tmp ++;

		continue;
	    }

	    if (* tmp >= 'A' && * tmp <= 'Z') {
		if (! is_upper) {
		    g_string_append_c (st, '{');
		    g_string_append_c (st, * tmp);
		    if (was_command) {
			g_string_append_c (st, '}');
		    } else {
			is_upper = true;
		    }
		} else {
		    g_string_append_c (st, * tmp);
		}
	    }
	    else {
		if (is_upper) {
		    g_string_append_c (st, '}');
		    is_upper = false;
		}

		g_string_append_c (st, * tmp);
	    }
	    was_command = false;
	    tmp ++;
	}

	/* eventually close the brackets */
	if (is_upper) {
	    g_string_append_c (st, '}');
	    is_upper = false;
	}
	g_free (string);

	if (use_braces) {
	    g_string_append (st, "}}");
	}
	else {
	    g_string_append (st, "\"}");
	}

	s = text_to_struct (st->str);
	break;

    case BIBTEX_AUTHOR:
	g_return_val_if_fail (field->field.author != NULL, NULL);

	g_string_truncate (st, 0);

	/* Create a simple preamble to parse */
	if (! use_braces) {
	    for (i = 0 ; i < field->field.author->len; i ++) {
		author = & g_array_index (field->field.author, BibtexAuthor, i);
		
		if (author->last && strchr (author->last, '"')) {
		    use_braces = TRUE;
		    break;
		}
		if (author->lineage && strchr (author->lineage, '"')) {
		    use_braces = TRUE;
		    break;
		}
		if (author->first && strchr (author->first, '"')) {
		    use_braces = TRUE;
		    break;
		}
	    }
	}
	
	if (use_braces) {
	    g_string_append (st, "@preamble{{");
	}
	else {
	    g_string_append (st, "@preamble{\"");
	}

	for (i = 0 ; i < field->field.author->len; i ++) {
	    author = & g_array_index (field->field.author, BibtexAuthor, i);

	    if (i != 0) {
		g_string_append (st, " and ");
	    }

	    if (author->last) {
	        /* quotes if there is no first name */
	        has_space = author_needs_quotes (author->last) ||
		  (author->first == NULL && 
		   strpbrk (author->last, " \t") != NULL);

		if (has_space) {
		    g_string_append_c (st, '{');
		}

		tmp = recode_string (request, author->last);
		g_string_append (st, tmp);
		g_free (tmp);

		if (has_space) {
		    g_string_append_c (st, '}');
		}
	    }

	    if (author->lineage) {
		g_string_append (st, ", ");

	        has_space = author_needs_quotes (author->lineage);

		if (has_space) {
		    g_string_append_c (st, '{');
		}

		tmp = recode_string (request, author->lineage);
		g_string_append (st, tmp);
		g_free (tmp);

		if (has_space) {
		    g_string_append_c (st, '}');
		}
	    }


	    if (author->first) {
		g_string_append (st, ", ");

	        has_space = author_needs_quotes (author->first);

		if (has_space) {
		    g_string_append_c (st, '{');
		}

		tmp = recode_string (request, author->first);
		g_string_append (st, tmp);
		g_free (tmp);

		if (has_space) {
		    g_string_append_c (st, '}');
		}
	    }
	}

	if (use_braces) {
	    g_string_append (st, "}}");
	}
	else {
	    g_string_append (st, "\"}");
	}

	s = text_to_struct (st->str);
	break;

    case BIBTEX_DATE:
	s = bibtex_struct_new (BIBTEX_STRUCT_TEXT);
	s->value.text = g_strdup_printf ("%d", field->field.date.year);
	break;

    default:
	g_assert_not_reached ();
    }

    field->structure = s;

    /* remove text field */
    if (field->text) {
	g_free (field->text);

	field->text = NULL;
	field->converted = FALSE;
    }

    return field;
}
예제 #3
0
/* convert file using GNU recode library
   returns 0 on success, nonzero error code otherwise */
int
convert_recode(File *file,
               EncaEncoding from_enc)
{
  RECODE_REQUEST request;
  RECODE_TASK task;
  File *tempfile = NULL;
  bool success;
  const char *encreq;

  /* Allocate librecode outer if we are called first time. */
  if (outer == NULL) {
    if ((outer = recode_new_outer(false)) == NULL) {
      fprintf(stderr, "%s: recode library doesn't like us\n",
                      program_name);
      return ERR_LIBCOM;
    }
  }

  /* Construct recode request string,
     try to mimic surfaceless converter now. */
  {
    EncaEncoding enc;

    enc.charset = from_enc.charset;
    enc.surface = from_enc.surface | ENCA_SURFACE_REMOVE;
    encreq = format_request_string(enc, options.target_enc,
                                   ENCA_SURFACE_EOL_LF);
  }
  /* Create a recode request from it. */
  request = get_recode_request(encreq);
  if (request == NULL)
    return ERR_CANNOT;

  /* Now we have to distinguish between file and stdin, namely because
   * in case of stdin, it's first part is already loaded in the buffer. */
  if (file->name != NULL) {
    /* File is a regular file.
       Since recode doesn't recode files in place, we make a temporary file
       and copy contents of file fname to it. */
    if (file_seek(file, 0, SEEK_SET) != 0)
      return ERR_IOFAIL;
    file->buffer->pos = 0;

    if ((tempfile = file_temporary(file->buffer, 1)) == NULL
        || copy_and_convert(file, tempfile, NULL) != 0
        || file_seek(file, 0, SEEK_SET) != 0
        || file_seek(tempfile, 0, SEEK_SET) != 0
        || file_truncate(file, 0) != 0) {
      file_free(tempfile);
      return ERR_IOFAIL;
    }

    /* Create a task from the request. */
    task = recode_new_task(request);
    task->fail_level = enca_recode_fail_level;
    task->abort_level = RECODE_SYSTEM_ERROR;
    task->input.name = NULL;
    task->input.file = tempfile->stream;
    task->output.name = NULL;
    task->output.file = file->stream;

    /* Now run conversion temporary file -> original. */
    success = recode_perform_task(task);

    /* If conversion wasn't successfull, original file is probably damaged
       (damned librecode!) try to restore it from the temporary copy. */
    if (!success) {
      if (task->error_so_far >= RECODE_SYSTEM_ERROR) {
        fprintf(stderr, "%s: librecode probably damaged file `%s'. "
                        "Trying to recover... ",
                        program_name,
                        file->name);
        tempfile->buffer->pos = 0;
        if (file_seek(tempfile, 0, SEEK_SET) != -1
            && file_seek(file, 0, SEEK_SET) != -1
            && file_truncate(file, file->size) == 0
            && copy_and_convert(tempfile, file, NULL) == 0)
          fprintf(stderr, "succeeded.\n");
        else
          fprintf(stderr, "failed\n");
      }
      else
        print_recode_warning(task->error_so_far, file->name);
    }

    recode_delete_task(task);
    file_free(tempfile);
  }
  else {
    /* File is stdin.
       First recode begining saved in io_buffer, then append rest of stdin. */
    enum recode_error errmax = RECODE_NO_ERROR;

    /* Create a task from the request.
     * Set it up for buffer -> stdout conversion */
    task = recode_new_task(request);
    task->fail_level = enca_recode_fail_level;
    task->abort_level = RECODE_SYSTEM_ERROR;
    task->input.name = NULL;
    task->input.file = NULL;
    task->input.buffer = (char*)file->buffer->data;
    task->input.cursor = (char*)file->buffer->data;
    task->input.limit = (char*)file->buffer->data + file->buffer->pos;
    task->output.name = NULL;
    task->output.file = stdout;

    success = recode_perform_task(task);
    if (!success) {
      if (task->error_so_far >= RECODE_SYSTEM_ERROR) {
        fprintf(stderr, "%s: librecode probably damaged `%s'. "
                        "No way to recover in a pipe.\n",
                        program_name,
                        ffname_r(NULL));
        recode_delete_task(task);
        return ERR_IOFAIL;
      }
      else
        errmax = task->error_so_far;
    }
    recode_delete_task(task);

    /* Create a task from the request.
     * Set it up for stdin -> stdout conversion */
    task = recode_new_task(request);
    task->fail_level = enca_recode_fail_level;
    task->abort_level = RECODE_SYSTEM_ERROR;
    task->input.name = NULL;
    task->input.file = stdin;
    task->output.name = NULL;
    task->output.file = stdout;

    success = recode_perform_task(task);
    if (!success) {
      if (task->error_so_far >= RECODE_SYSTEM_ERROR) {
        fprintf(stderr, "%s: librecode probably damaged `%s'. "
                        "No way to recover in a pipe.\n",
                        program_name,
                        ffname_r(NULL));
        recode_delete_task(task);
        return ERR_IOFAIL;
      }
      else {
        if (errmax < task->error_so_far)
          errmax = task->error_so_far;
      }
    }
    if (errmax >= enca_recode_fail_level)
      print_recode_warning(errmax, ffname_r(NULL));

    recode_delete_task(task);
  }

  /* return ERR_IOFAIL on failure since it means file-related problems */
  return success ? ERR_OK : ERR_IOFAIL;
}
예제 #4
0
	RecodeOuter::RecodeOuter()
	: mOuter(recode_new_outer(false))
	{
	}