/* * raptor_rdfxmla_emit_resource_uri: * @serializer: #raptor_serializer object * @element: XML Element * @uri: URI object * @depth: depth into tree * * Emit a description of a resource using an XML Element * * Return value: non-0 on failure **/ static int raptor_rdfxmla_emit_resource_uri(raptor_serializer *serializer, raptor_xml_element *element, raptor_uri* uri, int depth) { raptor_rdfxmla_context* context = (raptor_rdfxmla_context*)serializer->context; raptor_xml_writer *xml_writer = context->xml_writer; raptor_qname **attrs; unsigned char *attr_name; unsigned char *attr_value; RAPTOR_DEBUG2("Emitting resource predicate URI %s\n", raptor_uri_as_string(uri)); attrs = RAPTOR_CALLOC(raptor_qname**, 1, sizeof(raptor_qname*)); if(!attrs) return 1; attr_name = (unsigned char *)"resource"; if(RAPTOR_OPTIONS_GET_NUMERIC(serializer, RAPTOR_OPTION_RELATIVE_URIS)) /* newly allocated string */ attr_value = raptor_uri_to_relative_uri_string(serializer->base_uri, uri); else attr_value = raptor_uri_as_string(uri); attrs[0] = raptor_new_qname_from_namespace_local_name(serializer->world, context->rdf_nspace, attr_name, attr_value); if(RAPTOR_OPTIONS_GET_NUMERIC(serializer, RAPTOR_OPTION_RELATIVE_URIS)) RAPTOR_FREE(char*, attr_value); if(!attrs[0]) { RAPTOR_FREE(qnamearray, attrs); return 1; } raptor_xml_element_set_attributes(element, attrs, 1); raptor_xml_writer_start_element(xml_writer, element); raptor_xml_writer_end_element(context->xml_writer, element); RAPTOR_DEBUG2("Emitted resource predicate URI %s\n", raptor_uri_as_string(uri)); return 0; }
/** * raptor_www_set_http_accept: * @www: #raptor_www class * @value: Accept: header value or NULL to have an empty one. * * Set HTTP Accept header. * **/ void raptor_www_set_http_accept(raptor_www* www, const char *value) { char *value_copy; size_t len = 8; /* strlen("Accept:")+1 */ size_t value_len = 0; if(value) { value_len = strlen(value); len += 1 + value_len; /* " "+value */ } value_copy = (char*)RAPTOR_MALLOC(cstring, len); if(!value_copy) return; www->http_accept = value_copy; /* copy header name */ memcpy(value_copy, "Accept:", 7); /* Do not copy NUL */ value_copy += 7; /* copy header value */ if(value) { *value_copy ++= ' '; memcpy(value_copy, value, value_len + 1); /* Copy NUL */ } else { /* Ensure value is NUL terminated */ *value_copy = '\0'; } #if RAPTOR_DEBUG > 1 RAPTOR_DEBUG2("Using Accept header: '%s'\n", www->http_accept); #endif }
int raptor_rss_model_add_item(raptor_rss_model* rss_model) { raptor_rss_item* item; item = raptor_new_rss_item(rss_model->world); if(!item) return 1; /* new list */ if(!rss_model->items) rss_model->items = item; /* join last item to this one */ if(rss_model->last) rss_model->last->next = item; /* this is now the last item */ rss_model->last = item; rss_model->items_count++; RAPTOR_DEBUG2("Added item %d\n", rss_model->items_count); return 0; }
/** * raptor_new_uri_from_id: * @base_uri: existing base URI * @id: RDF ID * * Constructor - create a new URI from a base URI and RDF ID. * * This creates a URI equivalent to concatenating @base_uri with * ## and @id. * * Return value: a new #raptor_uri object or NULL on failure. **/ raptor_uri* raptor_new_uri_from_id(raptor_uri *base_uri, const unsigned char *id) { raptor_uri *new_uri; unsigned char *local_name; int len; if(!base_uri || !id) return NULL; #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 RAPTOR_DEBUG2("Using ID %s\n", id); #endif /* "#id\0" */ len=1+strlen((char*)id) + sizeof(char*); local_name=(unsigned char*)RAPTOR_MALLOC(cstring, len); if(!local_name) return NULL; *local_name='#'; strcpy((char*)local_name+1, (char*)id); new_uri=raptor_new_uri_relative_to_base(base_uri, local_name); RAPTOR_FREE(cstring, local_name); return new_uri; }
/* * raptor_turtle_emit_resource: * @serializer: #raptor_serializer object * @node: resource node * @depth: depth into tree * * Emit a description of a resource using an XML Element * * Return value: non-0 on failure **/ static int raptor_turtle_emit_resource(raptor_serializer *serializer, raptor_abbrev_node* node, int depth) { raptor_turtle_context* context=(raptor_turtle_context*)serializer->context; raptor_turtle_writer *turtle_writer = context->turtle_writer; raptor_qname* qname = NULL; RAPTOR_DEBUG5("Emitting resource node %p refcount %d subject %d object %d\n", node, node->ref_count, node->count_as_subject, node->count_as_object); if(node->type != RAPTOR_IDENTIFIER_TYPE_RESOURCE) return 1; qname = raptor_namespaces_qname_from_uri(context->nstack, node->value.resource.uri, 10); if(qname) { raptor_turtle_writer_qname(turtle_writer, qname); raptor_free_qname(qname); } else { raptor_turtle_writer_reference(turtle_writer, node->value.resource.uri); } RAPTOR_DEBUG2("Emitted %p\n", node); return 0; }
/* * raptor_turtle_emit_literal: * @serializer: #raptor_serializer object * @node: literal node * @depth: depth into tree * * Emit a description of a literal (object). * * Return value: non-0 on failure **/ static int raptor_turtle_emit_literal(raptor_serializer *serializer, raptor_abbrev_node* node, int depth) { raptor_turtle_context* context=(raptor_turtle_context*)serializer->context; raptor_turtle_writer *turtle_writer = context->turtle_writer; int rc=0; RAPTOR_DEBUG5("Emitting literal node %p refcount %d subject %d object %d\n", node, node->ref_count, node->count_as_subject, node->count_as_object); if(node->type != RAPTOR_IDENTIFIER_TYPE_LITERAL) return 1; rc=raptor_turtle_writer_literal(turtle_writer, context->nstack, node->value.literal.string, node->value.literal.language, node->value.literal.datatype); RAPTOR_DEBUG2("Emitted %p\n", node); return rc; }
/** * raptor_www_set_http_cache_control: * @www: WWW object * @cache_control: Cache-Control header value (or NULL to disable) * * Set HTTP Cache-Control:header (default none) * * The @cache_control value can be a string to set it, "" to send * a blank header or NULL to not set the header at all. * * Return value: non-0 on failure **/ int raptor_www_set_http_cache_control(raptor_www* www, const char* cache_control) { char *cache_control_copy; const char* const header="Cache-Control:"; const size_t header_len = 14; /* strlen("Cache-Control:") */ size_t len; size_t cc_len; RAPTOR_ASSERT((strlen(header) != header_len), "Cache-Control header length is wrong"); if(www->cache_control) { RAPTOR_FREE(cstring, www->cache_control); www->cache_control = NULL; } if(!cache_control) { www->cache_control = NULL; return 0; } cc_len = strlen(cache_control); len = header_len + 1 + cc_len + 1; /* header+" "+cache_control+"\0" */ cache_control_copy = (char*)RAPTOR_MALLOC(cstring, len); if(!cache_control_copy) return 1; www->cache_control = cache_control_copy; /* copy header name */ memcpy(cache_control_copy, header, header_len); /* Do not copy NUL */ cache_control_copy += header_len; /* copy header value */ if(*cache_control) { *cache_control_copy ++= ' '; memcpy(cache_control_copy, cache_control, cc_len + 1); /* Copy NUL */ } else { /* Ensure value is NUL terminated */ *cache_control_copy = '\0'; } #if RAPTOR_DEBUG > 1 RAPTOR_DEBUG2("Using Cache-Control header: '%s'\n", www->cache_control); #endif return 0; }
static int raptor_ntriples_term_valid(unsigned char c, int position, raptor_ntriples_term_class term_class) { int result = 0; switch(term_class) { case RAPTOR_TERM_CLASS_URI: /* ends on > */ result = (c != '>'); break; case RAPTOR_TERM_CLASS_BNODEID: /* ends on first non [A-Za-z0-9_:][-.A-Za-z0-9]* */ result = IS_ASCII_ALPHA(c) || IS_ASCII_DIGIT(c) || c == '_' || c == ':'; if(position) /* FIXME * This isn't correct; '.' is allowed in positions 1..N-1 but * this calling convention of character-by-character cannot * check this. */ result = (result || c == '-' || c == '.'); break; case RAPTOR_TERM_CLASS_STRING: /* ends on " */ result = (c != '"'); break; case RAPTOR_TERM_CLASS_LANGUAGE: /* ends on first non [a-zA-Z]+ ('-' [a-zA-Z0-9]+ )? */ result = IS_ASCII_ALPHA(c); if(position) result = (result || IS_ASCII_DIGIT(c) || c == '-'); break; default: RAPTOR_DEBUG2("Unknown N-Triples term class %d", term_class); } return result; }
static void raptor_rss_item_add(raptor_rss_parser_context *rss_parser) { raptor_rss_item* item=(raptor_rss_item*)RAPTOR_CALLOC(raptor_rss_item, 1, sizeof(raptor_rss_item)); item->next=NULL; /* new list */ if(!rss_parser->items) rss_parser->items=item; /* join last item to this one */ if(rss_parser->last) rss_parser->last->next=item; /* this is now the last item */ rss_parser->last=item; rss_parser->items_count++; RAPTOR_DEBUG2("Added item %d\n", rss_parser->items_count); }
/* * raptor_turtle_emit_blank: * @serializer: #raptor_serializer object * @node: blank node * @depth: depth into tree * * Emit a description of a blank node * * Return value: non-0 on failure **/ static int raptor_turtle_emit_blank(raptor_serializer *serializer, raptor_abbrev_node* node, int depth) { raptor_turtle_context* context=(raptor_turtle_context*)serializer->context; int rc=0; RAPTOR_DEBUG5("Emitting blank node %p refcount %d subject %d object %d\n", node, node->ref_count, node->count_as_subject, node->count_as_object); if(node->type != RAPTOR_IDENTIFIER_TYPE_ANONYMOUS) return 1; if((node->count_as_subject == 1 && node->count_as_object == 1)) { /* If this is only used as a 1 subject and object or never * used as a subject or never used as an object, it never need * be referenced with an explicit name */ raptor_abbrev_subject* blank; blank = raptor_abbrev_subject_find(context->blanks, node->type, node->value.blank.string); if(blank) { rc = raptor_turtle_emit_subject(serializer, blank, depth+1); raptor_abbrev_subject_invalidate(blank); } } else { /* Blank node that needs an explicit name */ const unsigned char *node_id = node->value.blank.string; raptor_turtle_writer_raw(context->turtle_writer, (const unsigned char*)"_:"); raptor_turtle_writer_raw(context->turtle_writer, node_id); } RAPTOR_DEBUG2("Emitted %p\n", node); return rc; }
/* * raptor_serializer_register_factory: * @world: raptor_world object * @name: the short syntax name * @label: readable label for syntax * @mime_type: MIME type of the syntax generated by the serializer (or NULL) * @uri_string: URI string of the syntax (or NULL) * @factory: pointer to function to call to register the factory * * INTERNAL - Register a syntax that can be generated by a serializer factory * * Return value: non-0 on failure **/ RAPTOR_EXTERN_C raptor_serializer_factory* raptor_serializer_register_factory(raptor_world* world, int (*factory) (raptor_serializer_factory*)) { raptor_serializer_factory *serializer; serializer = RAPTOR_CALLOC(raptor_serializer_factory*, 1, sizeof(*serializer)); if(!serializer) return NULL; serializer->world = world; serializer->desc.mime_types = NULL; if(raptor_sequence_push(world->serializers, serializer)) return NULL; /* on error, serializer is already freed by the sequence */ /* Call the serializer registration function on the new object */ if(factory(serializer)) return NULL; /* serializer is owned and freed by the serializers sequence */ if(raptor_syntax_description_validate(&serializer->desc)) { raptor_log_error(world, RAPTOR_LOG_LEVEL_ERROR, NULL, "Serializer description failed to validate\n"); goto tidy; } #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 RAPTOR_DEBUG2("Registered serializer %s\n", serializer->desc.names[0]); #endif return serializer; /* Clean up on failure */ tidy: raptor_free_serializer_factory(serializer); return NULL; }
static void raptor_guess_parse_content_type_handler(raptor_parser* rdf_parser, const char* content_type) { raptor_guess_parser_context* guess_parser = (raptor_guess_parser_context*)rdf_parser->context; if(content_type) { const char *p; size_t len; if((p = strchr(content_type,';'))) len = p-content_type; else len = strlen(content_type); guess_parser->content_type = RAPTOR_MALLOC(char*, len + 1); memcpy(guess_parser->content_type, content_type, len); guess_parser->content_type[len]='\0'; RAPTOR_DEBUG2("Got content type '%s'\n", guess_parser->content_type); } }
static size_t raptor_www_curl_write_callback(void *ptr, size_t size, size_t nmemb, void *userdata) { raptor_www* www = (raptor_www*)userdata; size_t bytes = size * nmemb; /* If WWW has been aborted, return nothing so that * libcurl will abort the transfer */ if(www->failed) return 0; raptor_www_curl_update_status(www); #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 2 RAPTOR_DEBUG2("Got %d bytes\n", bytes); #endif if(www->write_bytes) www->write_bytes(www, www->write_bytes_userdata, ptr, size, nmemb); www->total_bytes += bytes; return bytes; }
/* * raptor_turtle_emit_resource: * @serializer: #raptor_serializer object * @node: resource node * @depth: depth into tree * * Emit a description of a resource using an XML Element * * Return value: non-0 on failure **/ static int raptor_turtle_emit_resource(raptor_serializer *serializer, raptor_abbrev_node* node, int depth) { raptor_turtle_context* context = (raptor_turtle_context*)serializer->context; raptor_turtle_writer *turtle_writer = context->turtle_writer; raptor_qname* qname = NULL; RAPTOR_DEBUG5("Emitting resource node %p refcount %d subject %d object %d\n", node, node->ref_count, node->count_as_subject, node->count_as_object); if(node->term->type != RAPTOR_TERM_TYPE_URI) return 1; qname = raptor_new_qname_from_namespace_uri(context->nstack, node->term->value.uri, 10); /* XML Names allow leading '_' and '.' anywhere but Turtle does not */ if(qname && !raptor_turtle_is_legal_turtle_qname(qname)) { raptor_free_qname(qname); qname = NULL; } if(qname) { raptor_turtle_writer_qname(turtle_writer, qname); raptor_free_qname(qname); } else { raptor_turtle_writer_reference(turtle_writer, node->term->value.uri); } RAPTOR_DEBUG2("Emitted %p\n", node); return 0; }
/* * raptor_turtle_emit_subject: * @serializer: #raptor_serializer object * @subject: subject node * @depth: depth into tree * * Emit a subject node * * Return value: non-0 on failure **/ static int raptor_turtle_emit_subject(raptor_serializer *serializer, raptor_abbrev_subject* subject, int depth) { raptor_turtle_context* context=(raptor_turtle_context*)serializer->context; raptor_turtle_writer* turtle_writer=context->turtle_writer; int blank = 1; int collection = 0; int rc = 0; if (!raptor_abbrev_subject_valid(subject)) return 0; RAPTOR_DEBUG5("Emitting subject node %p refcount %d subject %d object %d\n", subject->node, subject->node->ref_count, subject->node->count_as_subject, subject->node->count_as_object); if(!depth && subject->node->type == RAPTOR_IDENTIFIER_TYPE_ANONYMOUS && subject->node->count_as_subject == 1 && subject->node->count_as_object == 1) { RAPTOR_DEBUG2("Skipping subject node %p\n", subject->node); return 0; } if(raptor_avltree_size(subject->properties) == 0) { RAPTOR_DEBUG2("Skipping subject node %p\n", subject->node); return 0; } /* check if we can do collection abbreviation */ if(raptor_avltree_size(subject->properties) >= 2) { raptor_avltree_iterator* iter=NULL; raptor_abbrev_node* pred1; raptor_abbrev_node* pred2; iter=raptor_new_avltree_iterator(subject->properties, NULL, NULL, 1); if(!iter) return 1; pred1=((raptor_abbrev_node**)raptor_avltree_iterator_get(iter))[0]; raptor_avltree_iterator_next(iter); pred2=((raptor_abbrev_node**)raptor_avltree_iterator_get(iter))[0]; raptor_free_avltree_iterator(iter); if(pred1->type == RAPTOR_IDENTIFIER_TYPE_RESOURCE && pred2->type == RAPTOR_IDENTIFIER_TYPE_RESOURCE && ( (raptor_uri_equals_v2(serializer->world, pred1->value.resource.uri, context->rdf_first_uri) && raptor_uri_equals_v2(serializer->world, pred2->value.resource.uri, context->rdf_rest_uri)) || (raptor_uri_equals_v2(serializer->world, pred2->value.resource.uri, context->rdf_first_uri) && raptor_uri_equals_v2(serializer->world, pred1->value.resource.uri, context->rdf_rest_uri)) ) ) { collection = 1; } } /* emit the subject node */ if(subject->node->type == RAPTOR_IDENTIFIER_TYPE_RESOURCE) { rc= raptor_turtle_emit_resource(serializer, subject->node, depth+1); if(rc) return rc; blank = 0; } else if(subject->node->type == RAPTOR_IDENTIFIER_TYPE_ANONYMOUS) { if((subject->node->count_as_subject == 1 && subject->node->count_as_object == 0) && depth > 1) { blank = 1; } else if(subject->node->count_as_object == 0) { raptor_turtle_writer_raw(turtle_writer, (const unsigned char*)"[]"); blank = 0; } else if(!collection && subject->node->count_as_object > 1) { /* Referred to (used as an object), so needs a nodeID */ const unsigned char* genid = subject->node->value.blank.string; size_t len = strlen((const char*)genid); unsigned char* subject_str; subject_str= (unsigned char *)RAPTOR_MALLOC(cstring, len+3); if(!subject_str) return 1; subject_str[0]='_'; subject_str[1]=':'; strncpy((char*)&subject_str[2], (const char*)genid, len+1); raptor_turtle_writer_raw(turtle_writer, subject_str); RAPTOR_FREE(cstring, subject_str); } } else if(subject->node->type == RAPTOR_IDENTIFIER_TYPE_ORDINAL) { unsigned char* subject_str; subject_str = (unsigned char *)RAPTOR_MALLOC(string, raptor_rdf_namespace_uri_len + MAX_ASCII_INT_SIZE + 2); if(!subject_str) return 1; sprintf((char*)subject, "%s_%d", raptor_rdf_namespace_uri, subject->node->value.ordinal.ordinal); raptor_turtle_writer_raw(turtle_writer, subject_str); RAPTOR_FREE(cstring, subject_str); return blank = 0; } if(collection) { raptor_turtle_writer_raw(turtle_writer, (const unsigned char*)"("); raptor_turtle_writer_increase_indent(turtle_writer); rc=raptor_turtle_emit_subject_collection_items(serializer, subject, depth+1); raptor_turtle_writer_decrease_indent(turtle_writer); raptor_turtle_writer_newline(turtle_writer); raptor_turtle_writer_raw(turtle_writer, (const unsigned char*)")"); } else { if(blank && depth > 1) raptor_turtle_writer_raw(turtle_writer, (const unsigned char*)"["); raptor_turtle_writer_increase_indent(turtle_writer); raptor_turtle_writer_newline(turtle_writer); raptor_turtle_emit_subject_properties(serializer, subject, depth+1); raptor_turtle_writer_decrease_indent(turtle_writer); if(blank && depth > 1) { raptor_turtle_writer_newline(turtle_writer); raptor_turtle_writer_raw(turtle_writer, (const unsigned char*)"]"); } } if(depth == 0) { /* NOTE: the space before the . here MUST be there or statements * that end in a numeric literal will be interpreted incorrectly * (the "." will be parsed as part of the literal and statement * left unterminated) */ raptor_turtle_writer_raw(turtle_writer, (const unsigned char*)" ."); raptor_turtle_writer_newline(turtle_writer); raptor_turtle_writer_newline(turtle_writer); } return rc; }
void raptor_libxml_xmlStructuredErrorFunc(void *user_data, xmlErrorPtr err) { raptor_error_handlers* error_handlers=(raptor_error_handlers*)user_data; raptor_stringbuffer* sb; char *nmsg; raptor_message_handler handler=NULL; void* handler_data=NULL; raptor_log_level level=RAPTOR_LOG_LEVEL_ERROR; if(err == NULL || err->code == XML_ERR_OK || err->level == XML_ERR_NONE) return; /* Do not warn about things with no location */ if(err->level == XML_ERR_WARNING && !err->file) return; /* XML fatal errors never cause an abort */ if(err->level == XML_ERR_FATAL) err->level= XML_ERR_ERROR; sb=raptor_new_stringbuffer(); if(err->domain != XML_FROM_HTML) raptor_stringbuffer_append_counted_string(sb, (const unsigned char*)"XML ", 4, 1); if(err->domain != XML_FROM_NONE && err->domain < XML_LAST_DL) { const unsigned char* label; label=(const unsigned char*)raptor_libxml_domain_labels[(int)err->domain]; raptor_stringbuffer_append_string(sb, label, 1); raptor_stringbuffer_append_counted_string(sb, (const unsigned char*)" ", 1, 1); } if(err->level == XML_ERR_WARNING) raptor_stringbuffer_append_counted_string(sb, (const unsigned char*)"warning: ", 9, 1); else /* XML_ERR_ERROR or XML_ERR_FATAL */ raptor_stringbuffer_append_counted_string(sb, (const unsigned char*)"error: ", 7, 1); if(err->message) { unsigned char* msg; size_t len; msg=(unsigned char*)err->message; len= strlen((const char*)msg); if(len && msg[len-1] == '\n') msg[--len]='\0'; raptor_stringbuffer_append_counted_string(sb, msg, len, 1); } #if LIBXML_VERSION >= 20618 /* 2005-02-13 - v2.6.18 */ /* str1 has the detailed HTTP error */ if(err->domain == XML_FROM_HTTP && err->str1) { unsigned char* msg; size_t len; msg=(unsigned char*)err->str1; len= strlen((const char*)msg); if(len && msg[len-1] == '\n') msg[--len]='\0'; raptor_stringbuffer_append_counted_string(sb, (const unsigned char*)" - ", 3, 1); raptor_stringbuffer_append_counted_string(sb, msg, len, 1); } #endif /* When err->domain == XML_FROM_XPATH then err->int1 is * the offset into err->str1, the line with the error */ if(err->domain == XML_FROM_XPATH && err->str1) { raptor_stringbuffer_append_counted_string(sb, (const unsigned char*)" in ", 4, 1); raptor_stringbuffer_append_string(sb, (const unsigned char*)err->str1, 1); } if(error_handlers) { if(error_handlers->magic != RAPTOR_ERROR_HANDLER_MAGIC) { #ifdef RAPTOR_DEBUG if(1) /* FIXME */ RAPTOR_DEBUG2("Received bogus error_handlers pointer %p\n", error_handlers); else RAPTOR_FATAL2("Received bogus error_handlers pointer %p\n", error_handlers); #endif error_handlers=NULL; } } nmsg=(char*)raptor_stringbuffer_as_string(sb); if(err->level == XML_ERR_FATAL) level=RAPTOR_LOG_LEVEL_FATAL; else if(err->level == XML_ERR_ERROR) level=RAPTOR_LOG_LEVEL_ERROR; else level=RAPTOR_LOG_LEVEL_WARNING; if(error_handlers && level <= error_handlers->last_log_level) { handler=error_handlers->handlers[level].handler; handler_data=error_handlers->handlers[level].user_data; } raptor_log_error(level, handler, handler_data, (error_handlers ? error_handlers->locator : NULL), nmsg); raptor_free_stringbuffer(sb); }
static int raptor_ntriples_parse_chunk(raptor_parser* rdf_parser, const unsigned char *s, size_t len, int is_end) { unsigned char *buffer; unsigned char *ptr; unsigned char *start; raptor_ntriples_parser_context *ntriples_parser=(raptor_ntriples_parser_context*)rdf_parser->context; #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 RAPTOR_DEBUG2("adding %d bytes to buffer\n", (unsigned int)len); #endif /* No data? It's the end */ if(!len) return 0; buffer=(unsigned char*)RAPTOR_MALLOC(cstring, ntriples_parser->line_length + len + 1); if(!buffer) { raptor_parser_fatal_error(rdf_parser, "Out of memory"); return 1; } if(ntriples_parser->line_length) { strncpy((char*)buffer, (const char*)ntriples_parser->line, ntriples_parser->line_length); RAPTOR_FREE(cstring, ntriples_parser->line); } ntriples_parser->line=buffer; /* move pointer to end of cdata buffer */ ptr=buffer+ntriples_parser->line_length; /* adjust stored length */ ntriples_parser->line_length += len; /* now write new stuff at end of cdata buffer */ strncpy((char*)ptr, (const char*)s, len); ptr += len; *ptr = '\0'; #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 RAPTOR_DEBUG2("buffer now %d bytes\n", ntriples_parser->line_length); #endif ptr=buffer+ntriples_parser->offset; while(*(start=ptr)) { unsigned char *line_start=ptr; #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 RAPTOR_DEBUG3("line buffer now '%s' (offset %d)\n", ptr, ptr-(buffer+ntriples_parser->offset)); #endif /* skip \n when just seen \r - i.e. \r\n or CR LF */ if(ntriples_parser->last_char == '\r' && *ptr == '\n') { #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 RAPTOR_DEBUG1("skipping a \\n\n"); #endif ptr++; rdf_parser->locator.byte++; rdf_parser->locator.column=0; start=line_start=ptr; } while(*ptr && *ptr != '\n' && *ptr != '\r') ptr++; if(!*ptr) break; #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 RAPTOR_DEBUG3("found newline \\x%02x at offset %d\n", *ptr, ptr-line_start); #endif ntriples_parser->last_char=*ptr; len=ptr-line_start; rdf_parser->locator.column=0; *ptr='\0'; if(raptor_ntriples_parse_line(rdf_parser,line_start,len)) return 1; rdf_parser->locator.line++; /* go past newline */ ptr++; rdf_parser->locator.byte++; #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 /* Do not peek if too far */ if(ptr-buffer < ntriples_parser->line_length) RAPTOR_DEBUG2("next char is \\x%02x\n", *ptr); else RAPTOR_DEBUG1("next char unknown - end of buffer\n"); #endif } ntriples_parser->offset=start-buffer; len=ntriples_parser->line_length - ntriples_parser->offset; if(len) { /* collapse buffer */ #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 RAPTOR_DEBUG3("collapsing buffer from %d to %d bytes\n", ntriples_parser->line_length, (unsigned int)len); #endif buffer=(unsigned char*)RAPTOR_MALLOC(cstring, len + 1); if(!buffer) { raptor_parser_fatal_error(rdf_parser, "Out of memory"); return 1; } strncpy((char*)buffer, (const char*)ntriples_parser->line+ntriples_parser->line_length-len, len); buffer[len]='\0'; RAPTOR_FREE(cstring, ntriples_parser->line); ntriples_parser->line=buffer; ntriples_parser->line_length -= ntriples_parser->offset; ntriples_parser->offset=0; #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 RAPTOR_DEBUG3("buffer now '%s' (%d bytes)\n", ntriples_parser->line, ntriples_parser->line_length); #endif } /* exit now, no more input */ if(is_end) { if(ntriples_parser->offset != ntriples_parser->line_length) { raptor_parser_error(rdf_parser, "Junk at end of input.\""); return 1; } return 0; } return 0; }
static void raptor_rss_parser_processNode(raptor_parser *rdf_parser) { raptor_rss_parser_context* rss_parser=(raptor_rss_parser_context*)rdf_parser->context; xmlTextReaderPtr reader=rss_parser->reader; xmlChar *name, *value; int type; name = xmlTextReaderName(reader); if (name == NULL) name = xmlStrdup(BAD_CAST "--"); value = xmlTextReaderValue(reader); type=xmlTextReaderNodeType(reader); switch(type) { case 1: /* start element */ if(rss_parser->current_type==RAPTOR_RSS_NONE) { if(!strcmp((const char*)name, "rss") || !strcmp((const char*)name, "rdf") || !strcmp((const char*)name, "RDF")) { break; } if(!strcmp((const char*)name, "item")) { raptor_rss_item_add(rss_parser); rss_parser->current_type=RAPTOR_RSS_ITEM; } else { int i; rss_parser->current_type=RAPTOR_RSS_UNKNOWN; for(i=0; i<RAPTOR_RSS_COMMON_SIZE; i++) if(!strcmp((const char*)name, raptor_rss_types_info[i].name)) { rss_parser->current_type=(raptor_rss_type)i; break; } } if(rss_parser->current_type==RAPTOR_RSS_UNKNOWN) { RAPTOR_DEBUG2("Unknown start element named %s\n", name); } else { RAPTOR_DEBUG3("FOUND type %d - %s\n", rss_parser->current_type, raptor_rss_types_info[rss_parser->current_type].name); } } else { /* have current_type, this is an element inside */ int i; raptor_rss_type old_type=rss_parser->current_type; /* check it is not a type here */ if(!strcmp((const char*)name, "item")) { raptor_rss_item_add(rss_parser); rss_parser->current_type=RAPTOR_RSS_ITEM; } else { for(i=0; i<RAPTOR_RSS_COMMON_SIZE; i++) if(!strcmp((const char*)name, raptor_rss_types_info[i].name)) { rss_parser->current_type=(raptor_rss_type)i; break; } } if(rss_parser->current_type != old_type) { RAPTOR_DEBUG6("FOUND element %s for type %d - %s INSIDE current type %d - %s\n", name, rss_parser->current_type, raptor_rss_types_info[rss_parser->current_type].name, old_type, raptor_rss_types_info[old_type].name); rss_parser->prev_type=old_type; break; } rss_parser->current_field=RAPTOR_RSS_FIELD_UNKNOWN; for(i=0; i<RAPTOR_RSS_FIELDS_SIZE; i++) if(!strcmp((const char*)name, raptor_rss_fields_info[i].name)) { rss_parser->current_field=(raptor_rss_fields_type)i; break; } if(rss_parser->current_field==RAPTOR_RSS_FIELD_UNKNOWN) { RAPTOR_DEBUG3("Unknown field element named %s inside type %s\n", name, raptor_rss_types_info[rss_parser->current_type].name); } else { RAPTOR_DEBUG4("FOUND field %d - %s inside type %s\n", rss_parser->current_field, raptor_rss_fields_info[rss_parser->current_field].name, raptor_rss_types_info[rss_parser->current_type].name); } } /* Now check for attributes */ while((xmlTextReaderMoveToNextAttribute(reader))) { xmlChar *attrName = xmlTextReaderName(reader); xmlChar *attrValue = xmlTextReaderValue(reader); RAPTOR_DEBUG3(" attribute %s=%s\n", attrName, attrValue); /* Pick a few attributes to care about */ if(!strcmp((const char*)attrName, "isPermaLink")) { if(!strcmp((const char*)name, "guid")) { /* <guid isPermaLink="..."> */ if(rss_parser->last) { /* rss_parser->last->guid_is_url=!strcmp(attrValue, "true"); */ } } } else if(!strcmp((const char*)attrName, "url")) { if(!strcmp((const char*)name, "source")) { /* <source url="...">foo</source> */ if(rss_parser->last) { /* rss_parser->last->source_url=attrValue; attrValue=NULL; */ } } } else if(!strcmp((const char*)attrName, "domain")) { if(!strcmp((const char*)name, "category")) { /* <category domain="URL">foo</source> */ if(rss_parser->last) { /* rss_parser->last->category_url=attrValue; attrValue=NULL; */ } } } xmlFree(attrName); if(attrValue) xmlFree(attrValue); } if(!xmlTextReaderIsEmptyElement(reader)) break; /* FALLTHROUGH if is empty element */ case 15: /* end element */ if(rss_parser->current_type != RAPTOR_RSS_NONE) { if(rss_parser->current_field != RAPTOR_RSS_FIELD_NONE) { RAPTOR_DEBUG3("Ending element %s field %s\n", name, raptor_rss_fields_info[rss_parser->current_field].name); rss_parser->current_field= RAPTOR_RSS_FIELD_NONE; } else { RAPTOR_DEBUG3("Ending element %s type %s\n", name, raptor_rss_types_info[rss_parser->current_type].name); if(rss_parser->prev_type != RAPTOR_RSS_NONE) { rss_parser->current_type=rss_parser->prev_type; rss_parser->prev_type=RAPTOR_RSS_NONE; RAPTOR_DEBUG3("Returning to type %d - %s\n", rss_parser->current_type, raptor_rss_types_info[rss_parser->current_type].name); } else rss_parser->current_type= RAPTOR_RSS_NONE; } } break; case 3: /* text */ if((rss_parser->current_type==RAPTOR_RSS_NONE || rss_parser->current_type==RAPTOR_RSS_UNKNOWN) || (rss_parser->current_field==RAPTOR_RSS_FIELD_NONE || rss_parser->current_field==RAPTOR_RSS_FIELD_UNKNOWN)) { char *p=(char*)value; while(*p) { if(!isspace(*p)) break; p++; } if(*p) RAPTOR_DEBUG2("IGNORING non-whitespace text node '%s'\n", value); break; } if(rss_parser->current_type != RAPTOR_RSS_ITEM && rss_parser->current_type >= RAPTOR_RSS_COMMON_IGNORED) { /* skipHours, skipDays common but IGNORED */ } else { raptor_rss_item* update_item; if(rss_parser->current_type == RAPTOR_RSS_ITEM) update_item=rss_parser->last; else update_item=&rss_parser->common[rss_parser->current_type]; RAPTOR_DEBUG4("Added text '%s' to field %s of type %s\n", value, raptor_rss_fields_info[rss_parser->current_field].name, raptor_rss_types_info[rss_parser->current_type].name); if(!update_item->fields[rss_parser->current_field]) update_item->fields_count++; update_item->fields[rss_parser->current_field]=(char*)value; value=NULL; } break; case 4: /* CData sections */ case 5: /* entity references */ case 6: /* entity declarations */ case 7: /* PIs */ case 8: /* comments */ case 9: /* document nodes */ case 10: /* DTD/Doctype nodes */ case 11: /* document fragment */ case 12: /* notation nodes */ break; default: #if defined(RAPTOR_DEBUG) RAPTOR_DEBUG3("depth %d type %d", xmlTextReaderDepth(reader), type); fprintf(stderr," name %s %s", name, xmlTextReaderIsEmptyElement(reader) ? "Empty" : ""); if (value == NULL) fprintf(stderr, "\n"); else { fprintf(stderr, " '%s'\n", value); } #endif RAPTOR_DEBUG2("Ignoring type %d\n", type); } xmlFree(name); if(value) xmlFree(value); }
static int raptor_guess_parse_chunk(raptor_parser* rdf_parser, const unsigned char *buffer, size_t len, int is_end) { raptor_guess_parser_context* guess_parser = (raptor_guess_parser_context*)rdf_parser->context; if(guess_parser->do_guess) { const unsigned char *identifier = NULL; const char *name; guess_parser->do_guess = 0; if(rdf_parser->base_uri) identifier = raptor_uri_as_string(rdf_parser->base_uri); name = raptor_world_guess_parser_name(rdf_parser->world, NULL, guess_parser->content_type, buffer, len, identifier); if(!name) { raptor_parser_error(rdf_parser, "Failed to guess parser from content type '%s'", guess_parser->content_type ? guess_parser->content_type : "(none)"); raptor_parser_parse_abort(rdf_parser); if(guess_parser->parser) { raptor_free_parser(guess_parser->parser); guess_parser->parser = NULL; } return 1; } else { #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 RAPTOR_DEBUG2("Guessed parser name '%s'\n", name); #endif /* If there is an existing guessed parser factory present and * it's different from the wanted parser, free it */ if(guess_parser->parser) { raptor_parser_factory* factory = raptor_world_get_parser_factory(rdf_parser->world, name); if(guess_parser->parser->factory != factory) { raptor_free_parser(guess_parser->parser); guess_parser->parser = NULL; } } if(!guess_parser->parser) { guess_parser->parser = raptor_new_parser(rdf_parser->world, name); if(!guess_parser->parser) return 1; } /* copy any user data to the grddl parser */ if(raptor_parser_copy_user_state(guess_parser->parser, rdf_parser)) return 1; if(raptor_parser_parse_start(guess_parser->parser, rdf_parser->base_uri)) return 1; } } /* now we can pass on calls to internal guess_parser */ return raptor_parser_parse_chunk(guess_parser->parser, buffer, len, is_end); }
/* grow a tree by sprouting with a new node * * Return values: * 0 on success * >0 if equivalent item exists (and the old element remains in the tree) * <0 if memory is exhausted. */ static int raptor_avltree_sprout(raptor_avltree* tree, raptor_avltree_node* parent, raptor_avltree_node** node_pp, void* p_data, int *rebalancing_p) { int cmp; #if RAPTOR_DEBUG > 1 RAPTOR_AVLTREE_DEBUG1("Enter\n"); if ( *node_pp) { raptor_avltree_print_node(*node_pp); RAPTOR_AVLTREE_DEBUG1("\n"); } else { RAPTOR_AVLTREE_DEBUG1("Nil node\n"); } #endif /* If grounded, add the node here, set the rebalance flag and return */ if(!*node_pp) { RAPTOR_AVLTREE_DEBUG1("grounded. adding new node, setting rebalancing flag true\n"); *node_pp = (raptor_avltree_node*)RAPTOR_MALLOC(raptor_avltree_node, sizeof(**node_pp)); if(!*node_pp) { if(tree->free_handler) tree->free_handler(p_data); return RAPTOR_AVLTREE_ENOMEM; } #if RAPTOR_DEBUG > 1 RAPTOR_DEBUG2("Creating new node %p\n", *node_pp); #endif (*node_pp)->parent = parent; (*node_pp)->left = NULL; (*node_pp)->right = NULL; (*node_pp)->balance = 0; (*node_pp)->data= p_data; *rebalancing_p = TRUE; tree->size++; #if RAPTOR_DEBUG > 1 raptor_avltree_check_node(tree, *node_pp, 0, 0); RAPTOR_AVLTREE_DEBUG1("Tree now looks this way\n"); raptor_avltree_dump(tree,stderr); #endif return FALSE; } /* check node */ #if RAPTOR_DEBUG > 1 raptor_avltree_check_node(tree, *node_pp, 0, 0); #endif /* compare the data */ cmp = tree->compare_handler(p_data, (*node_pp)->data); if(cmp < 0) /* if LESS, prepare to move to the left. */ return raptor_avltree_sprout_left(tree, node_pp, p_data, rebalancing_p); else if(cmp > 0) /* if MORE, prepare to move to the right. */ return raptor_avltree_sprout_right(tree, node_pp, p_data, rebalancing_p); /* otherwise equivalent key */ *rebalancing_p = FALSE; if(tree->flags & RAPTOR_AVLTREE_FLAG_REPLACE_DUPLICATES) { /* replace item with equivalent key */ if(tree->free_handler) tree->free_handler((*node_pp)->data); (*node_pp)->data= p_data; return FALSE; } else { /* ignore item with equivalent key */ if(tree->free_handler) tree->free_handler(p_data); return RAPTOR_AVLTREE_EXISTS; } }