コード例 #1
0
ファイル: xmlreader.c プロジェクト: anhk/lua-xmlreader
/* reader:bytes_consumed() */
static int xmlreader_bytes_consumed(lua_State *L) { xmlreader xr = check_xmlreader(L, 1);
  long ret = xmlTextReaderByteConsumed(xr);
  if (ret != -1) {
    lua_pushinteger(L, ret);
    return 1;
  } else {
    lua_pushnil(L);
    xmlreader_pusherror(L);
    return 2;
  }
}
コード例 #2
0
ファイル: ruby_xml_reader.c プロジェクト: astro/libxml-ruby
/*
 * call-seq:
 *    reader.byte_consumed -> value
 *
 * This method provides the current index of the parser used by the reader,
 * relative to the start of the current entity.
 */
static VALUE
rxml_reader_byte_consumed(VALUE self)
{
  return INT2NUM(xmlTextReaderByteConsumed(rxml_text_reader_get(self)));
}
コード例 #3
0
ファイル: ruby_xml_reader.c プロジェクト: GREENMASK/mgr
/*
 * call-seq:
 *    reader.byte_consumed -> value
 *
 * This method provides the current index of the parser used by the reader,
 * relative to the start of the current entity.
 */
static VALUE
rxml_reader_byte_consumed(VALUE self)
{
  xmlTextReaderPtr xreader = rxml_text_reader_get(self);
  return INT2NUM(xmlTextReaderByteConsumed(xreader));
}
コード例 #4
0
ファイル: indexWiki_old_v2.c プロジェクト: ahabeger/indexWiki
/**
 * filters, transforms, and indexes file using ngrams to the index
 * 
 * file name - name of file to process
 * wikiindex - the judy arrays to store the index of the wiki in
 */
void indexWiki(char* inFileName, Pvoid_t *wikiIndex, int* articleCount) {
	
	//-------------------- initialization --------------------//
	bool articleIndex[lastNgram] = {0}; // boolean array of what trigrams are in an article
	struct stemmer * currentStemmer = create_stemmer();
	
	// file for writing the titles to
	FILE* titleFile = NULL;
	if (writeFiles) {
		titleFile = fopen("title_file", "w");
		if (NULL == titleFile) {
			fprintf(stderr, "Error open title file: %m\n");
			exit(1);
		}
	}
	
	// initializes the libxml library
	LIBXML_TEST_VERSION
	xmlTextReaderPtr wikiReader; //the reader for the document
	wikiReader = xmlReaderForFile(inFileName, NULL, XML_PARSE_RECOVER+XML_PARSE_NOWARNING+XML_PARSE_NOERROR+XML_PARSE_HUGE);
	if (NULL == wikiReader) {
		//fprintf(stderr, "%s %s\n", "Failed to open ", wikiFileName);
		fprintf(stderr, "Error opening XML wiki: %m\n");
		exit(1);
	}

	// for progress bar
	int percent = 0;
	long fileSize = getFileSize(inFileName);
	
	// initialization for currentArticle and its componens 
	article currentArticle;	
	currentArticle.title = g_string_sized_new(256);
	currentArticle.body  = g_string_sized_new(786432); //768*1024

	//-------------------- index the wiki --------------------//
	optionalPrint ("%s", "Adding collection to index.\n");
	optionalPrint ("%d", (int)(fileSize / 1048576));
	optionalPrint (" MB in file\n");
	displayProgressBar (xmlTextReaderByteConsumed(wikiReader), fileSize, &percent);
	 
	//prime the loop
	currentArticle.title->len = 0;
	currentArticle.body->len  = 0;
	xmlTextReaderRead(wikiReader);// at a <page> tag, drop in
	xmlTextReaderRead(wikiReader);// at a <page> tag, drop in
	 
	// reads from xml file until file is finished, adds articles to index, and writes tittles to file
	// processes one article per iteration
	while (getArticle (wikiReader, &currentArticle)) {
		currentArticle.articleNumber = *articleCount;
		*articleCount = *articleCount + 1;
		// filter / transform text
		removeMarkup(currentArticle.body);
		stemText(currentArticle.body, currentStemmer); //ngramming.h
		// index the text
		indexText(currentArticle.body, articleIndex); //ngramming.h
		addToIndex(articleIndex, wikiIndex, currentArticle.articleNumber);
		//adds titles to title file
		if (writeFiles) {fprintf(titleFile, "%s\n", currentArticle.title->str);}
		//re-prime the loop
		currentArticle.title->len = 0;
		currentArticle.body->len  = 0;
		displayProgressBar (xmlTextReaderByteConsumed(wikiReader), fileSize, &percent);
	}
	optionalPrint ("\n%s", "Finished indexing. \n");
	optionalPrint ("%lu", (long)(xmlTextReaderByteConsumed(wikiReader)/1048576));
	optionalPrint ("MB consumed\n");
	
	
	optionalPrint ("%d %s %d %s", *articleCount, "articles found", (int) currentArticle.body->allocated_len, "length allocated for article body\n");
	// clean up of structures needed to process wiki
	if (writeFiles) {fclose (titleFile);}
	free_stemmer(currentStemmer);
	xmlFreeTextReader(wikiReader);
	xmlCleanupParser();
	//g_string_free(currentArticle.title, TRUE);
	//g_string_free(currentArticle.body, TRUE); //malloc fail if this is uncommented ?!
}
コード例 #5
0
static gboolean
parse_service (xmlTextReaderPtr reader, AgService *service)
{
    const gchar *name;
    int ret, type;

    if (!service->name)
    {
        xmlChar *_name = xmlTextReaderGetAttribute (reader,
                                                    (xmlChar *) "id");
        service->name = g_strdup ((const gchar *)_name);
        if (_name) xmlFree(_name);
    }

    ret = xmlTextReaderRead (reader);
    while (ret == 1)
    {
        name = (const gchar *)xmlTextReaderConstName (reader);
        if (G_UNLIKELY (!name)) return FALSE;

        type = xmlTextReaderNodeType (reader);
        if (type == XML_READER_TYPE_END_ELEMENT &&
            strcmp (name, "service") == 0)
            break;

        if (type == XML_READER_TYPE_ELEMENT)
        {
            gboolean ok;

            if (strcmp (name, "type") == 0 && !service->type)
            {
                ok = _ag_xml_dup_element_data (reader, &service->type);
            }
            else if (strcmp (name, "name") == 0 && !service->display_name)
            {
                ok = _ag_xml_dup_element_data (reader, &service->display_name);
            }
            else if (strcmp (name, "description") == 0)
            {
                ok = _ag_xml_dup_element_data (reader, &service->description);
            }
            else if (strcmp (name, "provider") == 0 && !service->provider)
            {
                ok = _ag_xml_dup_element_data (reader, &service->provider);
            }
            else if (strcmp (name, "icon") == 0)
            {
                ok = _ag_xml_dup_element_data (reader, &service->icon_name);
            }
            else if (strcmp (name, "translations") == 0)
            {
                ok = _ag_xml_dup_element_data (reader, &service->i18n_domain);
            }

            else if (strcmp (name, "template") == 0)
            {
                ok = parse_template (reader, service);
            }
            else if (strcmp (name, "preview") == 0)
            {
                ok = parse_preview (reader, service);
            }
            else if (strcmp (name, "type_data") == 0)
            {
                static const gchar element[] = "<type_data";
                gsize offset;

                /* find the offset in the file where this element begins */
                offset = xmlTextReaderByteConsumed(reader);
                while (offset > 0)
                {
                    if (strncmp (service->file_data + offset, element,
                                 sizeof (element)) == 0)
                    {
                        service->type_data_offset = offset;
                        break;
                    }
                    offset--;
                }

                /* this element is placed after all the elements we are
                 * interested in: we can stop the parsing now */
                return TRUE;
            }
            else if (strcmp (name, "tags") == 0)
            {
                ok = _ag_xml_parse_element_list (reader, "tag",
                                                 &service->tags);
            }
            else
                ok = TRUE;

            if (G_UNLIKELY (!ok)) return FALSE;
        }

        ret = xmlTextReaderNext (reader);
    }
    return TRUE;
}