void rdfa_update_mapping(char** mapping, const char* key, const char* value) { int found = 0; char** mptr = mapping; // search the current mapping to see if the namespace // prefix exists in the mapping while(*mptr != NULL) { if(strcmp(*mptr, key) == 0) { mptr++; *mptr = rdfa_replace_string(*mptr, value); found = 1; } else { mptr++; } mptr++; } // if we made it through the entire URI mapping and the key was not // found, create a new key-value pair. if(!found) { *mptr = rdfa_replace_string(*mptr, key); mptr++; *mptr = rdfa_replace_string(*mptr, value); } }
char* rdfa_canonicalize_string(const char* str) { char* rval = (char*)malloc(sizeof(char) * (strlen(str) + 2)); char* working_string = NULL; char* token = NULL; char* wptr = NULL; char* offset = rval; working_string = rdfa_replace_string(working_string, str); // split on any whitespace character that we may find token = strtok_r(working_string, RDFA_WHITESPACE_CHARACTERS, &wptr); while(token != NULL) { size_t token_length = strlen(token); memcpy(offset, token, token_length); offset += token_length; *offset++ = ' '; *offset = '\0'; token = strtok_r(NULL, RDFA_WHITESPACE_CHARACTERS, &wptr); } if(offset != rval) { offset--; *offset = '\0'; } free(working_string); return rval; }
void rdfa_add_item(rdfalist* list, void* data, liflag_t flags) { rdfalistitem* item = (rdfalistitem*)malloc(sizeof(rdfalistitem)); item->data = NULL; if(flags & RDFALIST_FLAG_CONTEXT) { item->data = data; } else { item->data = (char*)rdfa_replace_string( (char*)item->data, (const char*)data); } item->flags = flags; if(list->num_items == list->max_items) { list->max_items = 1 + (list->max_items * 2); list->items = (rdfalistitem**)realloc( list->items, sizeof(rdfalistitem) * list->max_items); } list->items[list->num_items] = item; ++list->num_items; }
static void character_data( void *parser_context, const xmlChar *s, int len) { /*xmlParserCtxtPtr parser = (xmlParserCtxtPtr)parser_context;*/ rdfalist* context_stack = (rdfalist*)((rdfacontext*)parser_context)->context_stack; rdfacontext* context = (rdfacontext*) context_stack->items[context_stack->num_items - 1]->data; char *buffer = (char*)malloc(len + 1); memset(buffer, 0, len + 1); memcpy(buffer, s, len); /* append the text to the current context's plain literal */ if(context->plain_literal == NULL) { context->plain_literal = rdfa_replace_string(context->plain_literal, buffer); context->plain_literal_size = len; } else { context->plain_literal = rdfa_n_append_string( context->plain_literal, &context->plain_literal_size, buffer, len); } /* append the text to the current context's XML literal */ if(context->xml_literal == NULL) { context->xml_literal = rdfa_replace_string(context->xml_literal, buffer); context->xml_literal_size = len; } else { context->xml_literal = rdfa_n_append_string( context->xml_literal, &context->xml_literal_size, buffer, len); } /*printf("plain_literal: %s\n", context->plain_literal);*/ /*printf("xml_literal: %s\n", context->xml_literal);*/ free(buffer); }
rdftriple* rdfa_create_triple(const char* subject, const char* predicate, const char* object, rdfresource_t object_type, const char* datatype, const char* language) { rdftriple* rval = (rdftriple*)malloc(sizeof(rdftriple)); /* clear the memory */ rval->subject = NULL; rval->predicate = NULL; rval->object = NULL; rval->object_type = object_type; rval->datatype = NULL; rval->language = NULL; #if 0 printf("SUBJECT : %s\n", subject); printf("PREDICATE: %s\n", predicate); printf("OBJECT : %s\n", object); printf("DATATYPE : %s\n", datatype); printf("LANG : %s\n", language); #endif /* a triple needs a subject, predicate and object at minimum to be * considered a triple. */ if((subject != NULL) && (predicate != NULL) && (object != NULL)) { rval->subject = rdfa_replace_string(rval->subject, subject); rval->predicate = rdfa_replace_string(rval->predicate, predicate); rval->object = rdfa_replace_string(rval->object, object); /* if the datatype is present, set it */ if(datatype != NULL) { rval->datatype = rdfa_replace_string(rval->datatype, datatype); } /* if the language was specified, set it */ if(language != NULL) { rval->language = rdfa_replace_string(rval->language, language); } } return rval; }
/** * Creates a new bnode given an RDFa context. * * @param context the RDFa context. * * @return a newly allocated string containing the bnode name. This * string MUST be memory collected. */ char* rdfa_create_bnode(rdfacontext* context) { char* rval = NULL; char buffer[64]; // print and increment the bnode count sprintf(buffer, "_:bnode%i", (int)context->bnode_count++); rval = rdfa_replace_string(rval, buffer); return rval; }
/** * Establishes a new subject for the given context given the * attributes on the current element. The given context's new_subject * value is updated if a new subject is found. * * @param context the RDFa context. * @param name the name of the current element that is being processed. * @param about the full IRI for about, or NULL if there isn't one. * @param src the full IRI for src, or NULL if there isn't one. * @param resource the full IRI for resource, or NULL if there isn't one. * @param href the full IRI for href, or NULL if there isn't one. * @param type_of The list of IRIs for type_of, or NULL if there was * no type_of specified. */ void rdfa_establish_new_1_0_subject( rdfacontext* context, const char* name, const char* about, const char* src, const char* resource, const char* href, const rdfalist* type_of) { /* 4. If the [current element] contains no valid @rel or @rev * URI, obtained according to the section on CURIE and URI * Processing, then the next step is to establish a value for * [new subject]. Any of the attributes that can carry a * resource can set [new subject]; */ if(about != NULL) { /* * by using the URI from @about, if present, obtained according * to the section on CURIE and URI Processing; */ context->new_subject = rdfa_replace_string(context->new_subject, about); } else if(src != NULL) { /* * otherwise, by using the URI from @src, if present, obtained * according to the section on CURIE and URI Processing. */ context->new_subject = rdfa_replace_string(context->new_subject, src); } else if(resource != NULL) { /* * otherwise, by using the URI from @resource, if present, * obtained according to the section on CURIE and URI * Processing; */ context->new_subject = rdfa_replace_string(context->new_subject, resource); } else if(href != NULL) { /* * otherwise, by using the URI from @href, if present, obtained * according to the section on CURIE and URI Processing. */ context->new_subject = rdfa_replace_string(context->new_subject, href); } else if((type_of != NULL) && (type_of->num_items > 0)) { /* * if @type_of is present, obtained according to the * section on CURIE and URI Processing, then [new subject] is * set to be a newly created [bnode]; */ char* bnode = rdfa_create_bnode(context); context->new_subject = rdfa_replace_string(context->new_subject, bnode); free(bnode); } else if(context->parent_object != NULL) { /* * otherwise, if [parent object] is present, [new subject] is * set to that and the [skip element] flag is set to 'true'; */ context->new_subject = rdfa_replace_string(context->new_subject, context->parent_object); /* TODO: The skip element flag will be set even if there is a * @property value, which is a bug, isn't it? */ /*context->skip_element = 1;*/ } }
rdfalist* rdfa_resolve_curie_list( rdfacontext* rdfa_context, const char* uris, curieparse_t mode) { rdfalist* rval = rdfa_create_list(3); char* working_uris = NULL; char* uptr = NULL; char* ctoken = NULL; working_uris = rdfa_replace_string(working_uris, uris); // go through each item in the list of CURIEs and resolve each ctoken = strtok_r(working_uris, RDFA_WHITESPACE, &uptr); while(ctoken != NULL) { char* resolved_curie = NULL; if((mode == CURIE_PARSE_INSTANCEOF_DATATYPE) || (mode == CURIE_PARSE_ABOUT_RESOURCE) || (mode == CURIE_PARSE_PROPERTY)) { resolved_curie = rdfa_resolve_curie(rdfa_context, ctoken, mode); } else if(mode == CURIE_PARSE_RELREV) { resolved_curie = rdfa_resolve_relrev_curie(rdfa_context, ctoken); } // add the CURIE if it was a valid one if(resolved_curie != NULL) { rdfa_add_item(rval, resolved_curie, RDFALIST_FLAG_TEXT); free(resolved_curie); } ctoken = strtok_r(NULL, RDFA_WHITESPACE, &uptr); } free(working_uris); return rval; }
rdfalist* rdfa_copy_list(rdfalist* list) { rdfalist* rval = (rdfalist*)malloc(sizeof(rdfalist)); unsigned int i; // copy the base list variables over rval->max_items = list->max_items; rval->num_items = list->num_items; rval->items = (rdfalistitem**)malloc(sizeof(void*) * rval->max_items); // copy the data of every list member along with all of the flags // for each list member. for(i = 0; i < list->max_items; i++) { if(i < list->num_items) { rval->items[i] = (rdfalistitem*)malloc(sizeof(rdfalistitem)); rval->items[i]->data = NULL; rval->items[i]->flags = list->items[i]->flags; // copy specific data type if(list->items[i]->flags & RDFALIST_FLAG_TEXT) { rval->items[i]->data = (char*)rdfa_replace_string( NULL, (const char*)list->items[i]->data); } /* else if(flags & RDFALIST_FLAG_CONTEXT) { // TODO: Implement the copy for context, if it is needed. }*/ } else { rval->items[i] = NULL; } } return rval; }
static void end_element(void* parser_context, const char* name, const char* prefix,const xmlChar* URI) { /*xmlParserCtxtPtr parser = (xmlParserCtxtPtr)parser_context;*/ rdfalist* context_stack = (rdfalist*)((rdfacontext*)parser_context)->context_stack; rdfacontext* context = (rdfacontext*)rdfa_pop_item(context_stack); rdfacontext* parent_context = (rdfacontext*) context_stack->items[context_stack->num_items - 1]->data; /* append the text to the current context's XML literal */ char* buffer = (char*)malloc(strlen(name) + 4); #if defined(DEBUG) && DEBUG > 0 printf("DEBUG: </%s>\n", name); printf("context->local_list_mappings (start of end_element): "); rdfa_print_mapping(context->local_list_mappings, (print_mapping_value_fp)rdfa_print_triple_list); #endif sprintf(buffer, "</%s>", name); if(context->xml_literal == NULL) { context->xml_literal = rdfa_replace_string(context->xml_literal, buffer); context->xml_literal_size = strlen(buffer); } else { context->xml_literal = rdfa_n_append_string( context->xml_literal, &context->xml_literal_size, buffer, strlen(buffer)); } free(buffer); /* 9. The next step of the iteration is to establish any * [current object literal]; */ /* generate the complete object literal triples */ if(context->property != NULL) { /* save the current xml literal */ char* saved_xml_literal = context->xml_literal; char* content_start = NULL; char* content_end = NULL; /* ensure to mark only the inner-content of the XML node for * processing the object literal. */ buffer = NULL; if(context->xml_literal != NULL) { /* get the data between the first tag and the last tag */ content_start = strchr(context->xml_literal, '>'); content_end = strrchr(context->xml_literal, '<'); if((content_start != NULL) && (content_end != NULL)) { /* set content end to null terminator */ context->xml_literal = ++content_start; *content_end = '\0'; } } /* update the plain literal if the XML Literal is an empty string */ if(context->xml_literal != NULL && strlen(context->xml_literal) == 0) { context->plain_literal = rdfa_replace_string(context->plain_literal, ""); } /* process data between first tag and last tag * this needs the xml literal to be null terminated */ if(context->rdfa_version == RDFA_VERSION_1_0) { rdfa_complete_object_literal_triples(context); } else { rdfa_complete_current_property_value_triples(context); } if(content_end != NULL) { /* set content end back */ *content_end = '<'; } if(saved_xml_literal != NULL) { /* restore xml literal */ context->xml_literal = saved_xml_literal; } } /*printf(context->plain_literal);*/ if(parent_context != NULL) { /* append the XML literal and plain text literals to the parent * literals */ if(context->xml_literal != NULL) { if(parent_context->xml_literal == NULL) { parent_context->xml_literal = rdfa_replace_string( parent_context->xml_literal, context->xml_literal); parent_context->xml_literal_size = context->xml_literal_size; } else { parent_context->xml_literal = rdfa_n_append_string( parent_context->xml_literal, &parent_context->xml_literal_size, context->xml_literal, context->xml_literal_size); } /* if there is an XML literal, there is probably a plain literal */ if(context->plain_literal != NULL) { if(parent_context->plain_literal == NULL) { parent_context->plain_literal = rdfa_replace_string( parent_context->plain_literal, context->plain_literal); parent_context->plain_literal_size = context->plain_literal_size; } else { parent_context->plain_literal = rdfa_n_append_string( parent_context->plain_literal, &parent_context->plain_literal_size, context->plain_literal, context->plain_literal_size); } } } /* preserve the bnode count by copying it to the parent_context */ parent_context->bnode_count = context->bnode_count; parent_context->underscore_colon_bnode_name = \ rdfa_replace_string(parent_context->underscore_colon_bnode_name, context->underscore_colon_bnode_name); } /* 10. If the [ skip element ] flag is 'false', and [ new subject ] * was set to a non-null value, then any [ incomplete triple ]s * within the current context should be completed: */ if((context->skip_element == 0) && (context->new_subject != NULL)) { rdfa_complete_incomplete_triples(context); } /* 14. Once all the child elements have been traversed, list triples are * generated, if necessary. */ if(context->rdfa_version == RDFA_VERSION_1_1 && (context->new_subject != NULL)) { rdfa_complete_list_triples(context); if(parent_context != NULL) { /* copy the current mapping to the parent mapping */ rdfa_free_mapping(parent_context->local_list_mappings, (free_mapping_value_fp)rdfa_free_list); parent_context->local_list_mappings = rdfa_copy_mapping( (void**)context->local_list_mappings, (copy_mapping_value_fp)rdfa_replace_list); #if defined(DEBUG) && DEBUG > 0 printf("parent_context->local_list_mappings (after copy): "); rdfa_print_mapping(context->local_list_mappings, (print_mapping_value_fp)rdfa_print_triple_list); #endif rdfa_free_mapping(context->local_list_mappings, (free_mapping_value_fp)rdfa_free_list); context->local_list_mappings = NULL; } } /* free the context */ rdfa_free_context(context); #if defined(DEBUG) && DEBUG > 0 printf("-------------------------------------------------------------\n"); #endif }
/** * Establishes a new subject for the given context given the * attributes on the current element. The given context's new_subject * value is updated if a new subject is found. * * @param context the RDFa context. * @param name the name of the current element that is being processed. * @param about the full IRI for about, or NULL if there isn't one. * @param src the full IRI for src, or NULL if there isn't one. * @param resource the full IRI for resource, or NULL if there isn't one. * @param href the full IRI for href, or NULL if there isn't one. * @param type_of The list of IRIs for type_of, or NULL if there was * no type_of specified. */ void rdfa_establish_new_subject( rdfacontext* context, const char* name, const char* about, const char* src, const char* resource, const char* href, const rdfalist* type_of) { // 4. If the [current element] contains no valid @rel or @rev // URI, obtained according to the section on CURIE and URI // Processing, then the next step is to establish a value for // [new subject]. Any of the attributes that can carry a // resource can set [new subject]; if(about != NULL) { // * by using the URI from @about, if present, obtained according // to the section on CURIE and URI Processing; context->new_subject = rdfa_replace_string(context->new_subject, about); } else if(src != NULL) { // * otherwise, by using the URI from @src, if present, obtained // according to the section on CURIE and URI Processing. context->new_subject = rdfa_replace_string(context->new_subject, src); } else if(resource != NULL) { // * otherwise, by using the URI from @resource, if present, // obtained according to the section on CURIE and URI // Processing; context->new_subject = rdfa_replace_string(context->new_subject, resource); } else if(href != NULL) { // * otherwise, by using the URI from @href, if present, obtained // according to the section on CURIE and URI Processing. context->new_subject = rdfa_replace_string(context->new_subject, href); } // * If no URI is provided by a resource attribute, then the first // match from the following rules will apply: else if((strcmp(name, "head") == 0) || (strcmp(name, "body") == 0)) { // * if the element is the head or body element then act as if // there is an empty @about present, and process it according to // the rule for @about, above; context->new_subject = rdfa_replace_string(context->new_subject, context->base); } else if((type_of != NULL) && (type_of->num_items > 0)) { // * if @type_of is present, obtained according to the // section on CURIE and URI Processing, then [new subject] is // set to be a newly created [bnode]; context->new_subject = rdfa_create_bnode(context); } else if(context->parent_object != NULL) { // * otherwise, if [parent object] is present, [new subject] is // set to that and the [skip element] flag is set to 'true'; context->new_subject = rdfa_replace_string(context->new_subject, context->parent_object); // TODO: The skip element flag will be set even if there is a // @property value, which is a bug, isn't it? //context->skip_element = 1; } }
/** * Read the head of the XHTML document and determines the base IRI for * the document. * * @param context the current working context. * @param working_buffer the current working buffer. * @param wb_allocated the number of bytes that have been allocated to * the working buffer. * * @return the size of the data available in the working buffer. */ static size_t rdfa_init_base( rdfacontext* context, char** working_buffer, size_t* working_buffer_size, char* temp_buffer, size_t bytes_read) { char* head_end = NULL; size_t offset = context->wb_position; size_t needed_size = 0; if((offset + bytes_read) > *working_buffer_size) { needed_size = (offset + bytes_read) - *working_buffer_size; } /* search for the end of <head>, stop if <head> was found */ /* extend the working buffer size */ if(needed_size > 0) { size_t temp_buffer_size = sizeof(char) * READ_BUFFER_SIZE; if((size_t)needed_size > temp_buffer_size) temp_buffer_size += needed_size; *working_buffer_size += temp_buffer_size; /* +1 for NUL at end, to allow strstr() etc. to work */ *working_buffer = (char*)realloc(*working_buffer, *working_buffer_size + 1); } /* append to the working buffer */ memmove(*working_buffer + offset, temp_buffer, bytes_read); /* ensure the buffer is a NUL-terminated string */ *(*working_buffer + offset + bytes_read) = '\0'; /* Sniff the beginning of the document for any document information */ if(strstr(*working_buffer, "-//W3C//DTD XHTML+RDFa 1.0//EN") != NULL) { context->host_language = HOST_LANGUAGE_XHTML1; context->rdfa_version = RDFA_VERSION_1_0; } else if(strstr(*working_buffer, "-//W3C//DTD XHTML+RDFa 1.1//EN") != NULL) { context->host_language = HOST_LANGUAGE_XHTML1; context->rdfa_version = RDFA_VERSION_1_1; } else if(strstr(*working_buffer, "<html") != NULL) { context->host_language = HOST_LANGUAGE_HTML; context->rdfa_version = RDFA_VERSION_1_1; } else { context->host_language = HOST_LANGUAGE_XML1; context->rdfa_version = RDFA_VERSION_1_1; } #ifdef LIBRDFA_IN_RAPTOR if(context->raptor_rdfa_version == 10) { context->host_language = HOST_LANGUAGE_XHTML1; context->rdfa_version = RDFA_VERSION_1_0; } else if(context->raptor_rdfa_version == 11) context->rdfa_version = RDFA_VERSION_1_1; #endif /* search for the end of </head> in */ head_end = strstr(*working_buffer, "</head>"); if(head_end == NULL) head_end = strstr(*working_buffer, "</HEAD>"); context->wb_position += bytes_read; if(head_end == NULL) return bytes_read; /* if </head> was found, search for <base and extract the base URI */ if(head_end != NULL) { char* base_start = strstr(*working_buffer, "<base "); char* href_start = NULL; if(base_start == NULL) base_start = strstr(*working_buffer, "<BASE "); if(base_start != NULL) href_start = strstr(base_start, "href="); if(href_start != NULL) { char sep = href_start[5]; char* uri_start = href_start + 6; char* uri_end = strchr(uri_start, sep); if(uri_end != NULL) { if(*uri_start != sep) { size_t uri_size = uri_end - uri_start; char* temp_uri = (char*)malloc(sizeof(char) * uri_size + 1); char* cleaned_base; strncpy(temp_uri, uri_start, uri_size); temp_uri[uri_size] = '\0'; /* TODO: This isn't in the processing rules, should it * be? Setting current_object_resource will make * sure that the BASE element is inherited by all * subcontexts. */ cleaned_base = rdfa_iri_get_base(temp_uri); context->current_object_resource = rdfa_replace_string( context->current_object_resource, cleaned_base); /* clean up the base context */ context->base = rdfa_replace_string(context->base, cleaned_base); free(cleaned_base); free(temp_uri); } } } } return bytes_read; }
/** * Establishes a new subject for the given context when @rel or @rev * is present. The given context's new_subject and * current_object_resource values are updated if a new subject is found. * * @param context the RDFa context. * @param about the full IRI for about, or NULL if there isn't one. * @param src the full IRI for src, or NULL if there isn't one. * @param resource the full IRI for resource, or NULL if there isn't one. * @param href the full IRI for href, or NULL if there isn't one. * @param type_of the list of IRIs for type_of, or NULL if type_of * wasn't specified on the current element. */ void rdfa_establish_new_subject_with_relrev( rdfacontext* context, const char* name, const char* about, const char* src, const char* resource, const char* href, const rdfalist* type_of) { // 5. If the [current element] does contain a valid @rel or @rev // URI, obtained according to the section on CURIE and URI // Processing, then the next step is to establish both a value // for [new subject] and a value for [current object resource]: // // [new subject] is set to the URI obtained from the first match // from the following rules: if(about != NULL) { // * by using the URI from @about, if present, obtained // according to the section on CURIE and URI Processing; context->new_subject = rdfa_replace_string(context->new_subject, about); } else if(src != NULL) { // * otherwise, by using the URI from @src, if present, obtained // according to the section on CURIE and URI Processing. context->new_subject = rdfa_replace_string(context->new_subject, src); } // * If no URI is provided then the first match from the following // rules will apply: else if((strcmp(name, "head") == 0) || (strcmp(name, "body") == 0)) { // * if the element is the head or body element then act as if // there is an empty @about present, and process it according to // the rule for @about, above; context->new_subject = rdfa_replace_string(context->new_subject, context->base); } else if((type_of != NULL) && (type_of->num_items > 0)) { // * if @type_of is present, obtained according to the // section on CURIE and URI Processing, then [new subject] is // set to be a newly created [bnode]; context->new_subject = rdfa_create_bnode(context); } else if(context->parent_object != NULL) { // * otherwise, if [parent object] is present, [new subject] is // set to that; context->new_subject = rdfa_replace_string(context->new_subject, context->parent_object); } // Then the [current object resource] is set to the URI obtained // from the first match from the following rules: if(resource != NULL) { // * by using the URI from @resource, if present, obtained // according to the section on CURIE and URI Processing; context->current_object_resource = rdfa_replace_string(context->current_object_resource, resource); } else if(href != NULL) { // * otherwise, by using the URI from @href, if present, // obtained according to the section on CURIE and URI Processing. context->current_object_resource = rdfa_replace_string(context->current_object_resource, href); } else { // * otherwise, null. context->current_object_resource = NULL; } // Note that final value of the [current object resource] will // either be null, or a full URI. }
char* rdfa_resolve_uri(rdfacontext* context, const char* uri) { char* rval = NULL; char* path_start = NULL; size_t base_length = strlen(context->base); if(strlen(uri) < 1) { /* if a blank URI is given, use the base context */ rval = rdfa_replace_string(rval, context->base); } else if(strstr(uri, ":") != NULL) { /* if a IRI is given, don't concatenate */ rval = rdfa_replace_string(rval, uri); } else if(uri[0] == '#' || uri[0] == '?') { /* if a fragment ID or start of a query parameter is given, * concatenate it with the base URI */ rval = rdfa_join_string(context->base, uri); } else if(uri[0] == '/') { /* if a relative URI is given, but it starts with a '/', use the * host part concatenated to the given URI */ char* tmp = NULL; char* end_index = NULL; /* initialize the working-set data */ tmp = rdfa_replace_string(tmp, context->base); end_index = strchr(tmp, '/'); /* find the final '/' character after the host part of the context base. */ if(end_index != NULL) { end_index = strchr(end_index + 1, '/'); if(end_index != NULL) { end_index = strchr(end_index + 1, '/'); } } /* if the '/' character after the host part was found, copy the host * part and append the given URI to the URI, otherwise, append the * host part and the URI part as-is, ensuring that a '/' exists at the * end of the host part. */ if(end_index != NULL) { char* rval_copy; *end_index = '\0'; /* if the '/' character after the host part was found, copy the host * part and append the given URI to the URI. */ rval_copy = rdfa_replace_string(rval, tmp); rval = rdfa_join_string(rval_copy, uri); free(rval_copy); } else { /* append the host part and the URI part as-is, ensuring that a * '/' exists at the end of the host part. */ size_t tlen = strlen(tmp) - 1; char* rval_copy; rval_copy = rdfa_replace_string(rval, tmp); if(rval_copy[tlen] == '/') { rval_copy[tlen] = '\0'; } rval = rdfa_join_string(rval_copy, uri); free(rval_copy); } free(tmp); } else { if((char)context->base[base_length - 1] == '/') { /* if the base URI already ends in /, concatenate */ rval = rdfa_join_string(context->base, uri); } else { /* if we have a relative URI, chop off the name of the file * and replace it with the relative pathname */ char* end_index = strrchr(context->base, '/'); if(end_index != NULL) { char* tmpstr = NULL; char* end_index2; tmpstr = rdfa_replace_string(tmpstr, context->base); end_index2 = strrchr(tmpstr, '/'); if(end_index2 != NULL) { end_index2++; *end_index2 = '\0'; } rval = rdfa_join_string(tmpstr, uri); free(tmpstr); } } } /* It is possible that rval may be NULL here in OOM scenarios */ if(!rval) return NULL; /* Find the start of a scheme-based URL path */ path_start = (char*)strstr(rval, "://"); if(path_start != NULL) { if(strstr(path_start, "/.") != NULL) { path_start += 3; path_start = strstr(path_start, "/"); } else { path_start = NULL; } } /* remove any dot-segments that remain in the URL for URLs w/ schemes */ if(path_start != NULL) { size_t rlen = strlen(rval) + 1; size_t hlen = path_start - rval; char* src = (char*)malloc(rlen + 4); char* sptr = src + hlen; char* dest = (char*)malloc(rlen + 1); char* dptr = dest + hlen; char* dfence = dptr; memset(src, 0, rlen + 4); memcpy(src, rval, rlen); strncpy(dest, rval, hlen); /* Process the path portion of the IRI */ while(sptr[0] != '?' && sptr[0] != '\0') { if(sptr[0] == '.' && sptr[1] == '.' && sptr[2] == '/') { /* A. If the input buffer begins with a prefix of "../", * then remove that prefix from the input buffer; otherwise, */ sptr += 3; } else if(sptr[0] == '.' && sptr[1] == '/') { /* A. If the input buffer begins with a prefix of "./", * then remove that prefix from the input buffer; otherwise, */ sptr += 2; } else if(sptr[0] == '/' && sptr[1] == '.' && sptr[2] == '/') { /* B. if the input buffer begins with a prefix of "/./", * then replace that prefix with "/" in the input buffer; * otherwise, */ sptr += 2; } else if(sptr[0] == '/' && sptr[1] == '.' && sptr[2] == '\0') { /* B. if the input buffer begins with a prefix of "/.", * where "." is a complete path segment, then replace that * prefix with "/" in the input buffer; otherwise, */ sptr += 1; *sptr = '/'; } else if(sptr[0] == '/' && sptr[1] == '.' && sptr[2] == '.' && ((sptr[3] == '/') || (sptr[3] == '\0'))) { /* C. if the input buffer begins with a prefix of "/../", * then replace that prefix with "/" in the input buffer and * remove the last segment and its preceding "/" (if any) from * the output buffer; otherwise, */ if(sptr[3] == '/') { sptr += 3; } else if(sptr[3] == '\0') { sptr += 2; *sptr = '/'; } /* remove the last segment and the preceding '/' */ if(dptr > dfence) { dptr--; if(dptr[0] == '/') { dptr--; } } while(dptr >= dfence && dptr[0] != '/') { dptr--; } if(dptr >= dfence) { dptr[0] = '\0'; } else { dptr = dfence; dptr[0] = '\0'; } } else if(sptr[0] == '.' && sptr[1] == '\0') { /* D. if the input buffer consists only of ".", then remove * that from the input buffer; otherwise, */ sptr++; } else if(sptr[0] == '.' && sptr[1] == '.' && sptr[1] == '\0') { /* D. if the input buffer consists only of "..", then remove * that from the input buffer; otherwise, */ sptr += 2; } else { /* Copy the path segment */ do { *dptr++ = *sptr++; *dptr = '\0'; } while(sptr[0] != '/' && sptr[0] != '?' && sptr[0] != '\0'); } } /* Copy the remaining query parameters */ if(sptr[0] == '?') { size_t rest_len = strlen(sptr); memcpy(dptr, sptr, rest_len + 1); } else { dptr[0] = '\0'; } free(rval); free(src); rval = dest; } return rval; }
char* rdfa_resolve_curie( rdfacontext* context, const char* uri, curieparse_t mode) { char* rval = NULL; curie_t ctype = rdfa_get_curie_type(uri); if(ctype == CURIE_TYPE_INVALID) { rval = NULL; } else if((ctype == CURIE_TYPE_IRI_OR_UNSAFE) && ((mode == CURIE_PARSE_HREF_SRC) || (mode == CURIE_PARSE_ABOUT_RESOURCE))) { // If we are parsing something that can take either a CURIE or a // URI, and the type is either IRI or UNSAFE, assume that it is // an IRI rval = rdfa_resolve_uri(context, uri); } // if we are processing a safe CURIE OR // if we are parsing an unsafe CURIE that is an @type_of, // @datatype, @property, @rel, or @rev attribute, treat the curie // as not an IRI, but an unsafe CURIE if((ctype == CURIE_TYPE_SAFE) || ((ctype == CURIE_TYPE_IRI_OR_UNSAFE) && ((mode == CURIE_PARSE_INSTANCEOF_DATATYPE) || (mode == CURIE_PARSE_PROPERTY) || (mode == CURIE_PARSE_RELREV)))) { char* working_copy = NULL; char* wcptr = NULL; char* prefix = NULL; char* curie_reference = NULL; const char* expanded_prefix = NULL; working_copy = (char*)malloc(strlen(uri) + 1); strcpy(working_copy, uri);//rdfa_replace_string(working_copy, uri); // if this is a safe CURIE, chop off the beginning and the end if(ctype == CURIE_TYPE_SAFE) { prefix = strtok_r(working_copy, "[:]", &wcptr); if(wcptr) curie_reference = strtok_r(NULL, "[:]", &wcptr); } else if(ctype == CURIE_TYPE_IRI_OR_UNSAFE) { prefix = strtok_r(working_copy, ":", &wcptr); if(wcptr) curie_reference = strtok_r(NULL, ":", &wcptr); } // fully resolve the prefix and get it's length // if a colon was found, but no prefix, use the XHTML vocabulary URI // as the expanded prefix if((uri[0] == ':') || (strcmp(uri, "[:]") == 0)) { expanded_prefix = XHTML_VOCAB_URI; curie_reference = prefix; prefix = NULL; } else if(uri[0] == ':') { // FIXME: This looks like a bug - don't know why this code is // in here. I think it's for the case where ":next" is // specified, but the code's not checking that -- manu expanded_prefix = context->base; curie_reference = prefix; prefix = NULL; } else if(prefix != NULL) { if(strcmp(prefix, "_") == 0) { // if the prefix specifies this as a blank node, then we // use the blank node prefix expanded_prefix = "_"; } //else if(strcmp(prefix, "rdf") == 0) //{ // expanded_prefix = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; //} else { // if the prefix was defined, get it from the set of URI mappings. #ifdef LIBRDFA_IN_RAPTOR raptor_namespace *nspace; raptor_uri* ns_uri; nspace = raptor_namespaces_find_namespace(&context->sax2->namespaces, (const unsigned char*)prefix, strlen(prefix)); if(nspace) { ns_uri = raptor_namespace_get_uri(nspace); if(ns_uri) expanded_prefix = (const char*)raptor_uri_as_string(ns_uri); } #else expanded_prefix = rdfa_get_mapping(context->uri_mappings, prefix); #endif } } if((expanded_prefix != NULL) && (curie_reference != NULL)) { // if the expanded prefix and the reference exist, generate the // full IRI. if(strcmp(expanded_prefix, "_") == 0) { rval = rdfa_join_string("_:", curie_reference); } else { rval = rdfa_join_string(expanded_prefix, curie_reference); } } else if((expanded_prefix != NULL) && (expanded_prefix[0] != '_') && (curie_reference == NULL)) { // if the expanded prefix exists, but the reference is null, // generate the CURIE because a reference-less CURIE is still // valid rval = rdfa_join_string(expanded_prefix, ""); } free(working_copy); } // if we're NULL at this point, the CURIE might be the special // unnamed bnode specified by _: if((rval == NULL) && ((strcmp(uri, "[_:]") == 0) || (strcmp(uri, "_:") == 0))) { if(context->underscore_colon_bnode_name == NULL) { context->underscore_colon_bnode_name = rdfa_create_bnode(context); } rval = rdfa_replace_string(rval, context->underscore_colon_bnode_name); } // even though a reference-only CURIE is valid, it does not // generate a triple in XHTML+RDFa. If we're NULL at this point, // the given value wasn't valid in XHTML+RDFa. return rval; }
char* rdfa_resolve_uri(rdfacontext* context, const char* uri) { char* rval = NULL; size_t base_length = strlen(context->base); if(strlen(uri) < 1) { // if a blank URI is given, use the base context rval = rdfa_replace_string(rval, context->base); } else if(strstr(uri, ":") != NULL) { // if a IRI is given, don't concatenate rval = rdfa_replace_string(rval, uri); } else if(uri[0] == '#') { // if a fragment ID is given, concatenate it with the base URI rval = rdfa_join_string(context->base, uri); } else if(uri[0] == '/') { // if a relative URI is given, but it starts with a '/', use the // host part concatenated to the given URI char* tmp = NULL; char* end_index = NULL; // initialize the working-set data tmp = rdfa_replace_string(tmp, context->base); end_index = strchr(tmp, '/'); // find the final '/' character after the host part of the context base. if(end_index != NULL) { end_index = strchr(end_index + 1, '/'); if(end_index != NULL) { end_index = strchr(end_index + 1, '/'); } } // if the '/' character after the host part was found, copy the host // part and append the given URI to the URI, otherwise, append the // host part and the URI part as-is, ensuring that a '/' exists at the // end of the host part. if(end_index != NULL) { char* rval_copy; *end_index = '\0'; // if the '/' character after the host part was found, copy the host // part and append the given URI to the URI. rval_copy = rdfa_replace_string(rval, tmp); rval = rdfa_join_string(rval_copy, uri); free(rval_copy); } else { // append the host part and the URI part as-is, ensuring that a // '/' exists at the end of the host part. unsigned int tlen = strlen(tmp) - 1; char* rval_copy; rval_copy = rdfa_replace_string(rval, tmp); if(rval_copy[tlen] == '/') { rval_copy[tlen] = '\0'; } rval = rdfa_join_string(rval_copy, uri); free(rval_copy); } free(tmp); } else { if((char)context->base[base_length - 1] == '/') { // if the base URI already ends in /, concatenate rval = rdfa_join_string(context->base, uri); } else { // if we have a relative URI, chop off the name of the file // and replace it with the relative pathname char* end_index = strrchr(context->base, '/'); if(end_index != NULL) { char* tmpstr = NULL; char* end_index2; tmpstr = rdfa_replace_string(tmpstr, context->base); end_index2= strrchr(tmpstr, '/'); end_index2++; *end_index2 = '\0'; rval = rdfa_join_string(tmpstr, uri); free(tmpstr); } } } return rval; }
/** * Handles the start_element call */ static void start_element(void *parser_context, const char* name, const char* prefix, const char* URI, int nb_namespaces, const char** namespaces, int nb_attributes, int nb_defaulted, const char** attributes) { rdfacontext* root_context = (rdfacontext*)parser_context; rdfalist* context_stack = (rdfalist*)root_context->context_stack; rdfacontext* context = rdfa_create_new_element_context(context_stack); char* xml_lang = NULL; const char* about_curie = NULL; char* about = NULL; const char* src_curie = NULL; char* src = NULL; const char* type_of_curie = NULL; rdfalist* type_of = NULL; const char* rel_curie = NULL; rdfalist* rel = NULL; const char* rev_curie = NULL; rdfalist* rev = NULL; const char* property_curie = NULL; rdfalist* property = NULL; const char* resource_curie = NULL; char* resource = NULL; const char* href_curie = NULL; char* href = NULL; char* content = NULL; const char* datatype_curie = NULL; char* datatype = NULL; #ifdef LIBRDFA_IN_RAPTOR if(1) { raptor_parser* rdf_parser = (raptor_parser*)context->callback_data; raptor_sax2_update_document_locator(context->sax2, &rdf_parser->locator); } #endif rdfa_push_item(context_stack, context, RDFALIST_FLAG_CONTEXT); #if defined(DEBUG) && DEBUG > 0 if(1) { int i; /* dump all arguments sent to this callback */ fprintf(stdout, "DEBUG: SAX.startElementNs(%s", (char *) name); if (prefix == NULL) fprintf(stdout, ", NULL"); else fprintf(stdout, ", %s", (char *) prefix); if (URI == NULL) fprintf(stdout, ", NULL"); else fprintf(stdout, ", '%s'", (char *) URI); fprintf(stdout, ", %d", nb_namespaces); /* dump all namespaces */ if (namespaces != NULL) { for (i = 0;i < nb_namespaces * 2;i++) { fprintf(stdout, ", xmlns"); if (namespaces[i] != NULL) fprintf(stdout, ":%s", namespaces[i]); i++; fprintf(stdout, "='%s'", namespaces[i]); } } /* dump all attributes */ fprintf(stdout, ", %d, %d", nb_attributes, nb_defaulted); if (attributes != NULL) { for (i = 0;i < nb_attributes * 5;i += 5) { if (attributes[i + 1] != NULL) fprintf( stdout, ", %s:%s='", attributes[i + 1], attributes[i]); else fprintf(stdout, ", %s='", attributes[i]); fprintf(stdout, "%.4s...', %d", attributes[i + 3], (int)(attributes[i + 4] - attributes[i + 3])); } } fprintf(stdout, ")\n"); } #endif /* start the XML Literal text */ if(context->xml_literal == NULL) { context->xml_literal = rdfa_replace_string(context->xml_literal, "<"); context->xml_literal_size = 1; } else { context->xml_literal = rdfa_n_append_string( context->xml_literal, &context->xml_literal_size, "<", 1); } context->xml_literal = rdfa_n_append_string( context->xml_literal, &context->xml_literal_size, name, strlen(name)); if(!context->xml_literal_namespaces_defined) { /* append namespaces to XML Literal */ #ifdef LIBRDFA_IN_RAPTOR raptor_namespace_stack* nstack = &context->sax2->namespaces; raptor_namespace* ns; raptor_namespace** ns_list = NULL; size_t ns_size; #else void** umap = context->uri_mappings; #endif char* umap_key = NULL; void* umap_value = NULL; /* if the namespaces are not defined, then neither is the xml:lang */ context->xml_literal_xml_lang_defined = 0; #ifdef LIBRDFA_IN_RAPTOR ns_size = 0; ns_list = raptor_namespace_stack_to_array(nstack, &ns_size); qsort((void*)ns_list, ns_size, sizeof(raptor_namespace*), raptor_nspace_compare); while(ns_size > 0) #else while(*umap != NULL) #endif { unsigned char insert_xmlns_definition = 1; const char* attr = NULL; /* get the next mapping to process */ #ifdef LIBRDFA_IN_RAPTOR ns=ns_list[--ns_size]; umap_key = (char*)raptor_namespace_get_prefix(ns); if(!umap_key) umap_key=(char*)XMLNS_DEFAULT_MAPPING; umap_value = (char*)raptor_uri_as_string(raptor_namespace_get_uri(ns)); #else rdfa_next_mapping(umap++, &umap_key, &umap_value); umap++; #endif /* check to make sure that the namespace isn't already * defined in the current element. */ if(attributes != NULL) { const char** attrs = attributes; while((*attrs != NULL) && insert_xmlns_definition) { attr = *attrs++; /* if the attribute is a umap_key, skip the definition * of the attribute. */ if(strcmp(attr, umap_key) == 0) { insert_xmlns_definition = 0; } } } /* if the namespace isn't already defined on the element, * copy it to the XML Literal string. */ if(insert_xmlns_definition) { /* append the namespace attribute to the XML Literal */ context->xml_literal = rdfa_n_append_string( context->xml_literal, &context->xml_literal_size, " xmlns", strlen(" xmlns")); /* check to see if we're dumping the standard XHTML namespace or * a user-defined XML namespace */ if(strcmp(umap_key, XMLNS_DEFAULT_MAPPING) != 0) { context->xml_literal = rdfa_n_append_string( context->xml_literal, &context->xml_literal_size, ":", 1); context->xml_literal = rdfa_n_append_string( context->xml_literal, &context->xml_literal_size, umap_key, strlen(umap_key)); } /* append the namespace value */ context->xml_literal = rdfa_n_append_string( context->xml_literal, &context->xml_literal_size, "=\"", 2); context->xml_literal = rdfa_n_append_string( context->xml_literal, &context->xml_literal_size, (const char*)umap_value, strlen((char*)umap_value)); context->xml_literal = rdfa_n_append_string( context->xml_literal, &context->xml_literal_size, "\"", 1); } } /* end while umap not NULL */ context->xml_literal_namespaces_defined = 1; #ifdef LIBRDFA_IN_RAPTOR if(ns_list) raptor_free_memory(ns_list); #endif } /* end if namespaces inserted */ #ifdef LIBRDFA_IN_RAPTOR /* Raptor namespace code does this already */ #else /* 3. For backward compatibility, RDFa Processors should also permit the * definition of mappings via @xmlns. In this case, the value to be mapped * is set by the XML namespace prefix, and the value to map is the value of * the attribute - an IRI. (Note that prefix mapping via @xmlns is * deprecated, and may be removed in a future version of this * specification.) When xmlns is supported, such mappings must be processed * before processing any mappings from @prefix on the same element. */ if(namespaces != NULL) { int ni; for(ni = 0; ni < nb_namespaces * 2; ni += 2) { const char* ns = namespaces[ni]; const char* value = namespaces[ni + 1]; /* Regardless of how the mapping is declared, the value to be mapped * must be converted to lower case, and the IRI is not processed in * any way; in particular if it is a relative path it must not be * resolved against the current base. */ char* lcns = NULL; if(ns != NULL) { /* convert the namespace string to lowercase */ unsigned int i; size_t ns_length = strlen(ns); lcns = (char*)malloc(ns_length + 1); for(i = 0; i <= ns_length; i++) { lcns[i] = tolower(ns[i]); } } /* update the URI mappings */ rdfa_update_uri_mappings(context, lcns, value); if(lcns != NULL) { free(lcns); } } } #endif /* detect the RDFa version of the document, if specified */ if(attributes != NULL) { int ci; /* search for a version attribute */ for(ci = 0; ci < nb_attributes * 5; ci += 5) { const char* attr; char* value; size_t value_length = 0; attr = attributes[ci]; value_length = attributes[ci + 4] - attributes[ci + 3] + 1; if(strcmp(attr, "version") == 0) { /* append the attribute-value pair to the XML literal */ value = (char*)malloc(value_length + 1); snprintf(value, value_length, "%s", attributes[ci + 3]); if(strstr(value, "RDFa 1.0") != NULL) { context->rdfa_version = RDFA_VERSION_1_0; } else if(strstr(value, "RDFa 1.1") != NULL) { context->rdfa_version = RDFA_VERSION_1_1; } free(value); } } } #ifdef LIBRDFA_IN_RAPTOR if(context->sax2) { /* Raptor handles xml:lang itself but not 'lang' */ xml_lang = (char*)raptor_sax2_inscope_xml_language(context->sax2); xml_lang = rdfa_replace_string(NULL, xml_lang); } #endif /* prepare all of the RDFa-specific attributes we are looking for. * scan all of the attributes for the RDFa-specific attributes */ if(attributes != NULL) { int ci; if(context->rdfa_version == RDFA_VERSION_1_1) { /* process all vocab and prefix attributes */ for(ci = 0; ci < nb_attributes * 5; ci += 5) { const char* attr; char* value; size_t value_length = 0; attr = attributes[ci]; value_length = attributes[ci + 4] - attributes[ci + 3] + 1; /* append the attribute-value pair to the XML literal */ value = (char*)malloc(value_length + 1); snprintf(value, value_length, "%s", attributes[ci + 3]); /* 2. Next the current element is examined for any change to the * default vocabulary via @vocab. */ if(strcmp(attr, "vocab") == 0) { if(strlen(value) < 1) { /* If the value is empty, then the local default vocabulary * must be reset to the Host Language defined default * (if any). */ free(context->default_vocabulary); context->default_vocabulary = NULL; } else { char* resolved_uri; rdftriple* triple; /* If @vocab is present and contains a value, the local * default vocabulary is updated according to the * section on CURIE and IRI Processing. */ resolved_uri = rdfa_resolve_uri(context, value); context->default_vocabulary = rdfa_replace_string( context->default_vocabulary, resolved_uri); /* The value of @vocab is used to generate a triple */ triple = rdfa_create_triple( context->base, "http://www.w3.org/ns/rdfa#usesVocabulary", resolved_uri, RDF_TYPE_IRI, NULL, NULL); context->default_graph_triple_callback( triple, context->callback_data); free(resolved_uri); } } else if(strcmp(attr, "prefix") == 0) { /* Mappings are defined via @prefix. */ char* working_string = NULL; char* atprefix = NULL; char* iri = NULL; char* saveptr = NULL; working_string = rdfa_replace_string(working_string, value); /* Values in this attribute are evaluated from beginning to * end (e.g., left to right in typical documents). */ atprefix = strtok_r(working_string, ":", &saveptr); while(atprefix != NULL) { /* find the prefix and IRI mappings while skipping whitespace */ while((*saveptr == ' ' || *saveptr == '\n' || *saveptr == '\r' || *saveptr == '\t' || *saveptr == '\f' || *saveptr == '\v') && *saveptr != '\0') { saveptr++; } iri = strtok_r(NULL, RDFA_WHITESPACE, &saveptr); /* update the prefix mappings */ rdfa_update_uri_mappings(context, atprefix, iri); if(!saveptr) break; while((*saveptr == ' ' || *saveptr == '\n' || *saveptr == '\r' || *saveptr == '\t' || *saveptr == '\f' || *saveptr == '\v') && *saveptr != '\0') { saveptr++; } /* get the next prefix to process */ atprefix = strtok_r(NULL, ":", &saveptr); } free(working_string); } else if(strcmp(attr, "inlist") == 0) { context->inlist_present = 1; } free(value); } } /* resolve all of the other RDFa values */ for(ci = 0; ci < nb_attributes * 5; ci += 5) { const char* attr; char* value; char* attrns; char* literal_text; size_t value_length = 0; attr = attributes[ci]; attrns = (char*)attributes[ci + 1]; value_length = attributes[ci + 4] - attributes[ci + 3] + 1; /* append the attribute-value pair to the XML literal */ value = (char*)malloc(value_length + 1); literal_text = (char*)malloc(strlen(attr) + value_length + 5); snprintf(value, value_length, "%s", attributes[ci + 3]); sprintf(literal_text, " %s=\"%s\"", attr, value); context->xml_literal = rdfa_n_append_string( context->xml_literal, &context->xml_literal_size, literal_text, strlen(literal_text)); free(literal_text); /* if xml:lang is defined, ensure that it is not overwritten */ if(attrns != NULL && strcmp(attrns, "xml") == 0 && strcmp(attr, "lang") == 0) { context->xml_literal_xml_lang_defined = 1; } /* process all of the RDFa attributes */ if(strcmp(attr, "about") == 0) { about_curie = value; about = rdfa_resolve_curie( context, about_curie, CURIE_PARSE_ABOUT_RESOURCE); } else if(strcmp(attr, "src") == 0) { src_curie = value; src = rdfa_resolve_curie(context, src_curie, CURIE_PARSE_HREF_SRC); } else if(strcmp(attr, "typeof") == 0) { type_of_curie = value; type_of = rdfa_resolve_curie_list( context, type_of_curie, CURIE_PARSE_INSTANCEOF_DATATYPE); } else if(strcmp(attr, "rel") == 0) { context->rel_present = 1; rel_curie = value; rel = rdfa_resolve_curie_list( context, rel_curie, CURIE_PARSE_RELREV); } else if(strcmp(attr, "rev") == 0) { context->rev_present = 1; rev_curie = value; rev = rdfa_resolve_curie_list( context, rev_curie, CURIE_PARSE_RELREV); } else if(strcmp(attr, "property") == 0) { property_curie = value; property = rdfa_resolve_curie_list( context, property_curie, CURIE_PARSE_PROPERTY); } else if(strcmp(attr, "resource") == 0) { resource_curie = value; resource = rdfa_resolve_curie( context, resource_curie, CURIE_PARSE_ABOUT_RESOURCE); } else if(strcmp(attr, "href") == 0) { href_curie = value; href = rdfa_resolve_curie(context, href_curie, CURIE_PARSE_HREF_SRC); } else if(strcmp(attr, "content") == 0) { content = rdfa_replace_string(content, value); } else if(strcmp(attr, "datatype") == 0) { datatype_curie = value; if(strlen(datatype_curie) == 0) { datatype = rdfa_replace_string(datatype, ""); } else { datatype = rdfa_resolve_curie(context, datatype_curie, CURIE_PARSE_INSTANCEOF_DATATYPE); } } else if((attrns == NULL && strcmp(attr, "lang") == 0) || (attrns != NULL && strcmp(attrns, "xml") == 0 && strcmp(attr, "lang") == 0)) { xml_lang = rdfa_replace_string(xml_lang, value); } free(value); } } /* The root element has an implicit @about declaration */ if(context->depth == 1 && about == NULL && resource == NULL && href == NULL && src == NULL) { about_curie = ""; about = rdfa_resolve_curie( context, about_curie, CURIE_PARSE_ABOUT_RESOURCE); } /* The HEAD and BODY element in XHTML and HTML has an implicit * about="" on it. */ if(about == NULL && resource == NULL && href == NULL && src == NULL && (context->parent_subject == NULL || type_of != NULL) && ((context->host_language == HOST_LANGUAGE_XHTML1 || context->host_language == HOST_LANGUAGE_HTML) && (strcasecmp(name, "head") == 0 || strcasecmp(name, "body") == 0))) { about_curie = ""; about = rdfa_resolve_curie( context, about_curie, CURIE_PARSE_ABOUT_RESOURCE); } /* check to see if we should append an xml:lang to the XML Literal * if one is defined in the context and does not exist on the * element. */ if((xml_lang == NULL) && (context->language != NULL) && !context->xml_literal_xml_lang_defined) { context->xml_literal = rdfa_n_append_string( context->xml_literal, &context->xml_literal_size, " xml:lang=\"", strlen(" xml:lang=\"")); context->xml_literal = rdfa_n_append_string( context->xml_literal, &context->xml_literal_size, context->language, strlen(context->language)); context->xml_literal = rdfa_n_append_string( context->xml_literal, &context->xml_literal_size, "\"", 1); /* ensure that the lang isn't set in a subtree (unless it's overwritten) */ context->xml_literal_xml_lang_defined = 1; } /* close the XML Literal value */ context->xml_literal = rdfa_n_append_string( context->xml_literal, &context->xml_literal_size, ">", 1); /* 3. The [current element] is also parsed for any language * information, and [language] is set in the [current * evaluation context]; */ rdfa_update_language(context, xml_lang); /***************** FOR DEBUGGING PURPOSES ONLY ******************/ #if defined(DEBUG) && DEBUG > 0 printf("DEBUG: depth = %u\n", context->depth); if(about != NULL) { printf("DEBUG: @about = %s\n", about); } if(src != NULL) { printf("DEBUG: @src = %s\n", src); } if(type_of != NULL) { printf("DEBUG: @type_of = "); rdfa_print_list(type_of); } if(context->inlist_present) { printf("DEBUG: @inlist = true\n"); } if(rel != NULL) { printf("DEBUG: @rel = "); rdfa_print_list(rel); } if(rev != NULL) { printf("DEBUG: @rev = "); rdfa_print_list(rev); } if(property != NULL) { printf("DEBUG: @property = "); rdfa_print_list(property); } if(resource != NULL) { printf("DEBUG: @resource = %s\n", resource); } if(href != NULL) { printf("DEBUG: @href = %s\n", href); } if(content != NULL) { printf("DEBUG: @content = %s\n", content); } if(datatype != NULL) { printf("DEBUG: @datatype = %s\n", datatype); } if(xml_lang != NULL) { printf("DEBUG: @xml:lang = %s\n", xml_lang); } #endif /* TODO: This isn't part of the processing model, it needs to be * included and is a correction for the last item in step #4. */ if((about == NULL) && (src == NULL) && (type_of == NULL) && (rel == NULL) && (rev == NULL) && (property == NULL) && (resource == NULL) && (href == NULL) && (context->default_vocabulary == NULL) && (prefix == NULL)) { context->skip_element = 1; } if((rel == NULL) && (rev == NULL)) { if(context->rdfa_version == RDFA_VERSION_1_0) { /* 4. If the [current element] contains no valid @rel or @rev * URI, obtained according to the section on CURIE and URI * Processing, then the next step is to establish a value for * [new subject]. Any of the attributes that can carry a * resource can set [new subject]; */ rdfa_establish_new_1_0_subject( context, name, about, src, resource, href, type_of); } else { rdfa_establish_new_1_1_subject( context, name, about, src, resource, href, type_of, property, content, datatype); } } else { if(context->rdfa_version == RDFA_VERSION_1_0) { /* 5. If the [current element] does contain a valid @rel or @rev * URI, obtained according to the section on CURIE and URI * Processing, then the next step is to establish both a value * for [new subject] and a value for [current object resource]: */ rdfa_establish_new_1_0_subject_with_relrev( context, name, about, src, resource, href, type_of); } else { rdfa_establish_new_1_1_subject_with_relrev( context, name, about, src, resource, href, type_of); } } if(context->new_subject != NULL) { #if defined(DEBUG) && DEBUG > 0 printf("DEBUG: new_subject = %s\n", context->new_subject); #endif /* RDFa 1.0: 6. If in any of the previous steps a [new subject] was set * to a non-null value, it is now used to provide a subject for * type values; */ /* RDFa 1.1: 7. If in any of the previous steps a typed resource was set * to a non-null value, it is now used to provide a subject for type * values; */ if(type_of != NULL) { rdfa_complete_type_triples(context, type_of); } /* Note that none of this block is executed if there is no * [new subject] value, i.e., [new subject] remains null. */ } if(context->current_object_resource != NULL) { /* If the element contains both the @inlist and the @rel attributes: * the @rel may contain one or more resources, obtained according to * the section on CURIE and IRI Processing each of which is used to * add an entry to the list mapping as follows: * if the local list mapping does not contain a list associated with * the IRI, instantiate a new list and add to local list mappings * add the current object resource to the list associated with the * resource in the local list mapping */ if(context->rdfa_version == RDFA_VERSION_1_1 && (rel != NULL) && context->inlist_present) { rdfresource_t object_type = RDF_TYPE_IRI; if((property != NULL) || (content != NULL)) { object_type = RDF_TYPE_PLAIN_LITERAL; if(datatype != NULL) { object_type = RDF_TYPE_TYPED_LITERAL; } } rdfa_establish_new_inlist_triples( context, rel, context->current_object_resource, object_type); } /* 7. If in any of the previous steps a [current object resource] * was set to a non-null value, it is now used to generate triples */ rdfa_complete_relrev_triples(context, rel, rev); } if((context->current_object_resource == NULL) && context->rdfa_version == RDFA_VERSION_1_1 && (rel != NULL) && context->inlist_present) { rdfa_save_incomplete_list_triples(context, rel); } else if((context->current_object_resource == NULL) && ((rel != NULL) || (rev != NULL))) { /* 8. If however [current object resource] was set to null, but * there are predicates present, then they must be stored as * [incomplete triple]s, pending the discovery of a subject that * can be used as the object. Also, [current object resource] * should be set to a newly created [bnode] */ rdfa_save_incomplete_triples(context, rel, rev); } /* Ensure to re-insert XML Literal namespace information from this * point on... */ if(property != NULL) { context->xml_literal_namespaces_defined = 0; } /* save these for processing steps #9 and #10 */ context->about = rdfa_replace_string(context->about, about); context->resource = rdfa_replace_string(context->resource, resource); context->href = rdfa_replace_string(context->href, href); context->src = rdfa_replace_string(context->src, src); context->content = rdfa_replace_string(context->content, content); context->datatype = rdfa_replace_string(context->datatype, datatype); context->property = property; /* free the resolved CURIEs */ free(about); free(src); rdfa_free_list(type_of); rdfa_free_list(rel); rdfa_free_list(rev); free(xml_lang); free(content); free(resource); free(href); free(datatype); }
/** * Establishes a new subject for the given context when @rel or @rev * is present. The given context's new_subject and * current_object_resource values are updated if a new subject is found. * * @param context the RDFa context. * @param about the full IRI for about, or NULL if there isn't one. * @param src the full IRI for src, or NULL if there isn't one. * @param resource the full IRI for resource, or NULL if there isn't one. * @param href the full IRI for href, or NULL if there isn't one. * @param type_of the list of IRIs for type_of, or NULL if type_of * wasn't specified on the current element. */ void rdfa_establish_new_1_1_subject_with_relrev( rdfacontext* context, const char* name, const char* about, const char* src, const char* resource, const char* href, const rdfalist* type_of) { /* If the current element does contain a @rel or @rev attribute, then * the next step is to establish both a value for new subject and a * value for current object resource: */ /* new subject is set to the resource obtained from the first match from * the following rules: */ if(about != NULL) { /* by using the resource from @about, if present, obtained according * to the section on CURIE and IRI Processing; * * NOTE: This will also catch the following rule due to @about being * set in the calling function: * * if the element is the root element of the document then act as if * there is an empty @about present, and process it according to the * rule for @about, above; */ context->new_subject = rdfa_replace_string(context->new_subject, about); } if(type_of != NULL) { /* if the @typeof attribute is present, set typed resource to * new subject. */ context->typed_resource = rdfa_replace_string(context->typed_resource, context->new_subject); } /* If no resource is provided then the first match from the following rules * will apply: * */ if(context->new_subject == NULL && context->parent_object != NULL) { /* otherwise, if parent object is present, new subject is set to that. */ context->new_subject = rdfa_replace_string( context->new_subject, context->parent_object); } /* Then the current object resource is set to the resource obtained from * the first match from the following rules: */ if(resource != NULL) { /* by using the resource from @resource, if present, obtained according * to the section on CURIE and IRI Processing; */ context->current_object_resource = rdfa_replace_string( context->current_object_resource, resource); } else if(href != NULL) { /* otherwise, by using the IRI from @href, if present, obtained * according to the section on CURIE and IRI Processing; */ context->current_object_resource = rdfa_replace_string( context->current_object_resource, href); } else if(src != NULL) { /* otherwise, by using the IRI from @src, if present, obtained * according to the section on CURIE and IRI Processing; */ context->current_object_resource = rdfa_replace_string( context->current_object_resource, src); } else if(type_of != NULL && about == NULL) { /* otherwise, if @typeof is present and @about is not, use a * newly created bnode. */ char* bnode = rdfa_create_bnode(context); context->current_object_resource = rdfa_replace_string( context->current_object_resource, bnode); free(bnode); } if(type_of != NULL && about == NULL) { /* If @typeof is present and @about is not, set typed resource to current * object resource. */ context->typed_resource = rdfa_replace_string( context->typed_resource, context->current_object_resource); } /* Note that final value of the current object resource will either be * null (from initialization) or a full IRI or bnode. */ }
/** * Establishes a new subject for the given context when @rel or @rev * is present. The given context's new_subject and * current_object_resource values are updated if a new subject is found. * * @param context the RDFa context. * @param about the full IRI for about, or NULL if there isn't one. * @param src the full IRI for src, or NULL if there isn't one. * @param resource the full IRI for resource, or NULL if there isn't one. * @param href the full IRI for href, or NULL if there isn't one. * @param type_of the list of IRIs for type_of, or NULL if type_of * wasn't specified on the current element. */ void rdfa_establish_new_1_0_subject_with_relrev( rdfacontext* context, const char* name, const char* about, const char* src, const char* resource, const char* href, const rdfalist* type_of) { /* 5. If the [current element] does contain a valid @rel or @rev * URI, obtained according to the section on CURIE and URI * Processing, then the next step is to establish both a value * for [new subject] and a value for [current object resource]: * * [new subject] is set to the URI obtained from the first match * from the following rules: */ if(about != NULL) { /* * by using the URI from @about, if present, obtained * according to the section on CURIE and URI Processing; */ context->new_subject = rdfa_replace_string(context->new_subject, about); } else if(context->rdfa_version == RDFA_VERSION_1_0 && src != NULL) { /* * otherwise, by using the URI from @src, if present, obtained * according to the section on CURIE and URI Processing. */ context->new_subject = rdfa_replace_string(context->new_subject, src); } else if((type_of != NULL) && (type_of->num_items > 0)) { /* * if @type_of is present, obtained according to the * section on CURIE and URI Processing, then [new subject] is * set to be a newly created [bnode]; */ char* bnode = rdfa_create_bnode(context); context->new_subject = rdfa_replace_string(context->new_subject, bnode); free(bnode); } else if(context->parent_object != NULL) { /* * otherwise, if [parent object] is present, [new subject] is * set to that; */ context->new_subject = rdfa_replace_string(context->new_subject, context->parent_object); } /* Then the [current object resource] is set to the URI obtained * from the first match from the following rules: */ if(resource != NULL) { /* * by using the URI from @resource, if present, obtained * according to the section on CURIE and URI Processing; */ context->current_object_resource = rdfa_replace_string(context->current_object_resource, resource); } else if(href != NULL) { /* * otherwise, by using the URI from @href, if present, * obtained according to the section on CURIE and URI Processing. */ context->current_object_resource = rdfa_replace_string(context->current_object_resource, href); } else { /* * otherwise, null. */ context->current_object_resource = NULL; } /* Note that final value of the [current object resource] will * either be null, or a full URI. */ }
/** * Establishes a new subject for the given context given the * attributes on the current element. The given context's new_subject * value is updated if a new subject is found. * * @param context the RDFa context. * @param name the name of the current element that is being processed. * @param about the full IRI for about, or NULL if there isn't one. * @param src the full IRI for src, or NULL if there isn't one. * @param resource the full IRI for resource, or NULL if there isn't one. * @param href the full IRI for href, or NULL if there isn't one. * @param type_of The list of IRIs for type_of, or NULL if there was * no type_of specified. * @param property a list of properties that were detected during processing. */ void rdfa_establish_new_1_1_subject( rdfacontext* context, const char* name, const char* about, const char* src, const char* resource, const char* href, const rdfalist* type_of, const rdfalist* property, const char* content, const char* datatype) { /* * If the current element contains the @property attribute, but does not * contain either the @content or @datatype attributes, then new subject * is set to the resource obtained from the first match from the * following rule: */ if(property != NULL && content == NULL && datatype == NULL) { /* by using the resource from @about, if present, obtained according to * the section on CURIE and IRI Processing; */ if(about != NULL) { /* NOTE: this statement achieves this part of the processing rule * as well because @about is set if depth == 1 in RDFa 1.1 in * the calling function: otherwise, if the element is the root * element of the document, then act as if there is an empty * @about present, and process it according to the rule for * @about, above; */ context->new_subject = rdfa_replace_string(context->new_subject, about); } else if(context->parent_object != NULL) { /* otherwise, if parent object is present, new subject is set * to the value of parent object. */ context->new_subject = rdfa_replace_string(context->new_subject, context->parent_object); } /* If @typeof is present then typed resource is set to the resource * obtained from the first match from the following rules: */ if(type_of != NULL) { if(about != NULL) { /* by using the resource from @about, if present, obtained * according to the section on CURIE and IRI Processing; * * NOTE: about is set to the document if this is the root * element of the document, so the following rule is also applied * in this case: * * otherwise, if the element is the root element of the * document, then act as if there is an empty @about present * and process it according to the previous rule; */ context->typed_resource = rdfa_replace_string(context->typed_resource, about); } else { if(resource != NULL) { /* by using the resource from @resource, if present, obtained * according to the section on CURIE and IRI Processing; */ context->typed_resource = rdfa_replace_string(context->typed_resource, resource); } else if(href != NULL) { /* otherwise, by using the IRI from @href, if present, obtained * according to the section on CURIE and IRI Processing; */ context->typed_resource = rdfa_replace_string(context->typed_resource, href); } else if(src != NULL) { /* otherwise, by using the IRI from @src, if present, obtained * according to the section on CURIE and IRI Processing; */ context->typed_resource = rdfa_replace_string(context->typed_resource, src); } else { /* otherwise, the value of typed resource is set to a newly * created bnode. */ char* bnode = rdfa_create_bnode(context); context->typed_resource = rdfa_replace_string( context->typed_resource, bnode); free(bnode); } /* The value of the current object resource is then set to the value * of typed resource. */ context->current_object_resource = rdfa_replace_string( context->current_object_resource, context->typed_resource); } } } else { /* otherwise: * If the element contains an @about, @href, @src, or @resource attribute, * new subject is set to the resource obtained as follows: */ if(about != NULL || href != NULL || src != NULL || resource != NULL) { if(about != NULL) { /* by using the resource from @about, if present, obtained * according to the section on CURIE and IRI Processing; */ context->new_subject = rdfa_replace_string(context->new_subject, about); } else if(resource != NULL) { /* otherwise, by using the resource from @resource, if present, * obtained according to the section on CURIE and IRI Processing; */ context->new_subject = rdfa_replace_string(context->new_subject, resource); } else if(href != NULL) { /* otherwise, by using the IRI from @href, if present, obtained * according to the section on CURIE and IRI Processing; */ context->new_subject = rdfa_replace_string(context->new_subject, href); } else if(src != NULL) { /* otherwise, by using the IRI from @src, if present, obtained * according to the section on CURIE and IRI Processing. */ context->new_subject = rdfa_replace_string(context->new_subject, src); } } else { /* otherwise, if no resource is provided by a resource attribute, * then the first match from the following rules will apply: */ /* NOTE: this step is achieved via the parent function call as @about * is set if the current element is the root element. * * if the element is the root element of the document, then act * as if there is an empty @about present, and process it according * to the rule for @about, above; */ if(type_of != NULL) { /* otherwise, if @typeof is present, then new subject is set * to be a newly created bnode; */ char* bnode = rdfa_create_bnode(context); context->new_subject = rdfa_replace_string(context->new_subject, bnode); free(bnode); } else if(context->parent_object != NULL) { /* otherwise, if parent object is present, new subject is set to * the value of parent object. */ context->new_subject = rdfa_replace_string(context->new_subject, context->parent_object); /* Additionally, if @property is not present then the skip * element flag is set to 'true'. */ if(property == NULL) { context->skip_element = 1; } } } if(type_of != NULL) { /* Finally, if @typeof is present, set the typed resource to the value * of new subject. */ context->typed_resource = rdfa_replace_string(context->typed_resource, context->new_subject); } } }