Beispiel #1
0
/**
 * Resolves a given uri depending on whether or not it is a fully
 * qualified IRI, a CURIE, or a short-form XHTML reserved word for
 * @rel or @rev as defined in the XHTML+RDFa Syntax Document.
 *
 * @param context the current processing context.
 * @param uri the URI part to process.
 *
 * @return the fully qualified IRI, or NULL if the conversion failed
 *         due to the given URI not being a short-form XHTML reserved
 *         word. The memory returned from this function MUST be freed.
 */
char* rdfa_resolve_relrev_curie(rdfacontext* context, const char* uri)
{
   char* rval = NULL;
   int i = 0;
   const char* resource = uri;

   // check to make sure the URI doesn't have an empty prefix
   if(uri[0] == ':')
   {
      resource++;
   }

   // search all of the XHTML @rel/@rev reserved words for a
   // case-insensitive match against the given URI
   for(i = 0; i < XHTML_RELREV_RESERVED_WORDS_SIZE; i++)
   {
      if(strcasecmp(g_relrev_reserved_words[i], resource) == 0)
      {
         // since the URI is a reserved word for @rel/@rev, generate
         // the full IRI and stop the loop.
         rval = rdfa_join_string(XHTML_VOCAB_URI, g_relrev_reserved_words[i]);
         i = XHTML_RELREV_RESERVED_WORDS_SIZE;
      }
   }

   // if none of the XHTML @rel/@rev reserved words were found,
   // attempt to resolve the value as a standard CURIE
   if(rval == NULL)
   {
      rval = rdfa_resolve_curie(context, uri, CURIE_PARSE_RELREV);
   }
   
   return rval;
}
Beispiel #2
0
int main(int argc, char** argv)
{
   if(argc < 2)
   {
      printf("%s usage:\n\n"
             "%s <input.xhtml>\n", argv[0], argv[0]);
   }
   else
   {
      FILE* xhtml_file = fopen(argv[1], "r");
      char* filename = rindex(argv[1], '/');
      filename++;

      if(xhtml_file != NULL)
      {
         char* base_uri = rdfa_join_string(BASE_URI, filename);
         rdfacontext* context = rdfa_create_context(base_uri);
         context->callback_data = xhtml_file;

         rdfa_set_triple_handler(context, &process_triple);
         rdfa_set_buffer_filler(context, &fill_buffer);
         rdfa_parse(context);
         rdfa_free_context(context);

         fclose(xhtml_file);
         free(base_uri);
      }
      else
      {
         perror("failed to open file:");
      }
   }

   return 0;
}
Beispiel #3
0
/**
 * Runs a set of unit tests given the RDFa context, base name of the test,
 * a set of CURIEs, a processing function, and a base IRI.
 *
 * @param context the RDFa context.
 * @param name the base name of the test.
 * @param curie the set of CURIEs to resolve.
 * @param cb the function callback to the CURIE resolution function.
 * @param iri the base value of what the resulting IRI should be, the
 *            value of each set member will be appended to the IRI.
 */
void run_test_set(rdfacontext* context, const char* name, const char** curies,
   size_t curies_size, curie_func_two_arg cb, const char* iri, curieparse_t mode)
{
   int i;
   for(i = 0; i < curies_size; i++)
   {
      char* full_iri = rdfa_join_string(iri, curies[i]);
      char* result = cb(context, curies[i]);
      int compare = -1;

      // check to see if we should check for NULL or if the strings
      // should match.
      if(iri != NULL)
      {
         compare = strcmp(result, full_iri);
      }
      else if(iri == result)
      {
         compare = 0;
      }

      printf("UT#%02i/%s \"%s\" ...", ++g_test_num, name, full_iri);

      // if the string compare shows identical values, pass the test,
      // otherwise, fail the test.
      if(compare == 0)
      {
         printf("PASS.\n");
         g_test_passes++;
      }
      else
      {
         printf("FAIL. Got \"%s\", but should have been \"%s\".\n", result, iri);
         g_test_fails++;
      }

      if(result != NULL)
      {
         free(result);
      }
   //      char* full_iri = rdfa_join_string(iri, curies[i]);
      
   //   run_test(context, name, curies[i], cb, full_iri, mode);

   //   free(full_iri);
   }
}
Beispiel #4
0
int main(int argc, char** argv)
{
#ifdef LIBRDFA_IN_RAPTOR
   raptor_init();
#endif

   if(argc < 2)
   {
      printf("%s usage:\n\n"
             "%s <input.xhtml>\n", argv[0], argv[0]);
   }
   else
   {
      char* filename;

      g_xhtml_file = fopen(argv[1], "r");
      filename = rindex(argv[1], '/');
      if(filename == NULL)
      {
         filename = argv[1];
      }
      else
      {
         filename++;
      }
      
      if(g_xhtml_file != NULL)
      {
         unsigned int buffer_length = 65535;
         char* buffer = malloc(buffer_length);
         char* base_uri = rdfa_join_string(BASE_URI, filename);
         rdfacontext* context = rdfa_create_context(base_uri);         
         buffer_status* status = malloc(sizeof(buffer_status));

         // get all of the buffer text
         fread(buffer, sizeof(char), buffer_length, g_xhtml_file);
         fclose(g_xhtml_file);

         // initialize the callback data
         status->buffer = buffer;
         status->current_offset = 0;
         status->total_length = strlen(buffer);
         context->callback_data = status;

         // setup the parser
         rdfa_set_default_graph_triple_handler(
            context, &default_graph_triple);
         rdfa_set_processor_graph_triple_handler(
            context, &processor_graph_triple);
         rdfa_set_buffer_filler(context, &fill_buffer);
         rdfa_parse(context);
         rdfa_free_context(context);

         free(base_uri);
      }
      else
      {
         perror("failed to open file:");
      }
   }
   
#ifdef LIBRDFA_IN_RAPTOR
   raptor_finish();
#endif
   return 0;
}
Beispiel #5
0
char* rdfa_resolve_uri(rdfacontext* context, const char* uri)
{
   char* rval = NULL;
   char* path_start = NULL;
   size_t base_length = strlen(context->base);
   
   if(strlen(uri) < 1)
   {
      /* if a blank URI is given, use the base context */
      rval = rdfa_replace_string(rval, context->base);
   }
   else if(strstr(uri, ":") != NULL)
   {
      /* if a IRI is given, don't concatenate */
      rval = rdfa_replace_string(rval, uri);
   }
   else if(uri[0] == '#' || uri[0] == '?')
   {
      /* if a fragment ID or start of a query parameter is given,
       * concatenate it with the base URI */
      rval = rdfa_join_string(context->base, uri);
   }
   else if(uri[0] == '/')
   {
      /* if a relative URI is given, but it starts with a '/', use the
       * host part concatenated to the given URI */
      char* tmp = NULL;
      char* end_index = NULL;

      /* initialize the working-set data */
      tmp = rdfa_replace_string(tmp, context->base);
      end_index = strchr(tmp, '/');


      /* find the final '/' character after the host part of the context base. */
      if(end_index != NULL)
      {
	 end_index = strchr(end_index + 1, '/');
	
	 if(end_index != NULL)
	 {
	    end_index = strchr(end_index + 1, '/');
         }
      }

      /* if the '/' character after the host part was found, copy the host
       * part and append the given URI to the URI, otherwise, append the
       * host part and the URI part as-is, ensuring that a '/' exists at the
       * end of the host part. */
      if(end_index != NULL)
      {
         char* rval_copy;

	 *end_index = '\0';
	 
	 /* if the '/' character after the host part was found, copy the host
	  * part and append the given URI to the URI. */
	 rval_copy = rdfa_replace_string(rval, tmp);
	 rval = rdfa_join_string(rval_copy, uri);
         free(rval_copy);
      }
      else
      {
	 /* append the host part and the URI part as-is, ensuring that a
	  * '/' exists at the end of the host part. */
 	 size_t tlen = strlen(tmp) - 1;
         char* rval_copy;

	 rval_copy = rdfa_replace_string(rval, tmp);

	 if(rval_copy[tlen] == '/')
	 {
	    rval_copy[tlen] = '\0';
	 }
	 rval = rdfa_join_string(rval_copy, uri);
         free(rval_copy);
      }

      free(tmp);
   }
   else
   {
      if((char)context->base[base_length - 1] == '/')
      {
         /* if the base URI already ends in /, concatenate */
         rval = rdfa_join_string(context->base, uri);
      }
      else
      {
         /* if we have a relative URI, chop off the name of the file
          * and replace it with the relative pathname */
         char* end_index = strrchr(context->base, '/');

         if(end_index != NULL)
         {
            char* tmpstr = NULL;
            char* end_index2;

            tmpstr = rdfa_replace_string(tmpstr, context->base);
            end_index2 = strrchr(tmpstr, '/');
            if(end_index2 != NULL) {
              end_index2++;
              *end_index2 = '\0';
            }

            rval = rdfa_join_string(tmpstr, uri);
            free(tmpstr);
         }
      }
   }

   /* It is possible that rval may be NULL here in OOM scenarios */
   if(!rval)
     return NULL;

   /* Find the start of a scheme-based URL path */
   path_start = (char*)strstr(rval, "://");
   if(path_start != NULL)
   {
      if(strstr(path_start, "/.") != NULL)
      {
         path_start += 3;
         path_start = strstr(path_start, "/");
      }
      else
      {
         path_start = NULL;
      }
   }

   /* remove any dot-segments that remain in the URL for URLs w/ schemes */
   if(path_start != NULL)
   {
      size_t rlen = strlen(rval) + 1;
      size_t hlen = path_start - rval;
      char* src = (char*)malloc(rlen + 4);
      char* sptr = src + hlen;
      char* dest = (char*)malloc(rlen + 1);
      char* dptr = dest + hlen;
      char* dfence = dptr;

      memset(src, 0, rlen + 4);
      memcpy(src, rval, rlen);
      strncpy(dest, rval, hlen);

      /* Process the path portion of the IRI */
      while(sptr[0] != '?' && sptr[0] != '\0')
      {
         if(sptr[0] == '.' && sptr[1] == '.' && sptr[2] == '/')
         {
            /* A.  If the input buffer begins with a prefix of "../",
             * then remove that prefix from the input buffer; otherwise,
             */
            sptr += 3;
         }
         else if(sptr[0] == '.' && sptr[1] == '/')
         {
            /* A.  If the input buffer begins with a prefix of "./",
             * then remove that prefix from the input buffer; otherwise,
             */
            sptr += 2;
         }
         else if(sptr[0] == '/' && sptr[1] == '.' && sptr[2] == '/')
         {
            /* B.  if the input buffer begins with a prefix of "/./",
             * then replace that prefix with "/" in the input buffer;
             * otherwise,
             */
            sptr += 2;
         }
         else if(sptr[0] == '/' && sptr[1] == '.' && sptr[2] == '\0')
         {
            /* B.  if the input buffer begins with a prefix of "/.",
             * where "." is a complete path segment, then replace that
             * prefix with "/" in the input buffer; otherwise,
             */
            sptr += 1;
            *sptr = '/';
         }
         else if(sptr[0] == '/' && sptr[1] == '.' && sptr[2] == '.' &&
            ((sptr[3] == '/') || (sptr[3] == '\0')))
         {
            /* C.  if the input buffer begins with a prefix of "/../",
             * then replace that prefix with "/" in the input buffer and
             * remove the last segment and its preceding "/" (if any) from
             * the output buffer; otherwise,
             */
            if(sptr[3] == '/')
            {
               sptr += 3;
            }
            else if(sptr[3] == '\0')
            {
               sptr += 2;
               *sptr = '/';
            }

            /* remove the last segment and the preceding '/' */
            if(dptr > dfence)
            {
               dptr--;
               if(dptr[0] == '/')
               {
                  dptr--;
               }
            }
            while(dptr >= dfence && dptr[0] != '/')
            {
               dptr--;
            }
            if(dptr >= dfence)
            {
               dptr[0] = '\0';
            }
            else
            {
               dptr = dfence;
               dptr[0] = '\0';
            }
         }
         else if(sptr[0] == '.' && sptr[1] == '\0')
         {
            /* D. if the input buffer consists only of ".", then remove
             * that from the input buffer; otherwise,
             */
            sptr++;

         }
         else if(sptr[0] == '.' && sptr[1] == '.' && sptr[1] == '\0')
         {
            /* D. if the input buffer consists only of "..", then remove
             * that from the input buffer; otherwise,
             */
            sptr += 2;
         }
         else
         {
            /* Copy the path segment */
            do
            {
               *dptr++ = *sptr++;
               *dptr = '\0';
            } while(sptr[0] != '/' && sptr[0] != '?' && sptr[0] != '\0');
         }
      }

      /* Copy the remaining query parameters */
      if(sptr[0] == '?')
      {
         size_t rest_len = strlen(sptr);
         memcpy(dptr, sptr, rest_len + 1);
      }
      else
      {
         dptr[0] = '\0';
      }

      free(rval);
      free(src);
      rval = dest;
   }

   return rval;
}
Beispiel #6
0
char* rdfa_resolve_uri(rdfacontext* context, const char* uri)
{
   char* rval = NULL;
   size_t base_length = strlen(context->base);
   
   if(strlen(uri) < 1)
   {
      // if a blank URI is given, use the base context
      rval = rdfa_replace_string(rval, context->base);
   }
   else if(strstr(uri, ":") != NULL)
   {
      // if a IRI is given, don't concatenate
      rval = rdfa_replace_string(rval, uri);
   }
   else if(uri[0] == '#')
   {
      // if a fragment ID is given, concatenate it with the base URI
      rval = rdfa_join_string(context->base, uri);
   }
   else if(uri[0] == '/')
   {
      // if a relative URI is given, but it starts with a '/', use the
      // host part concatenated to the given URI
      char* tmp = NULL;
      char* end_index = NULL;

      // initialize the working-set data
      tmp = rdfa_replace_string(tmp, context->base);
      end_index = strchr(tmp, '/');


      // find the final '/' character after the host part of the context base.
      if(end_index != NULL)
      {
	 end_index = strchr(end_index + 1, '/');
	
	 if(end_index != NULL)
	 {
	    end_index = strchr(end_index + 1, '/');
         }
      }

      // if the '/' character after the host part was found, copy the host
      // part and append the given URI to the URI, otherwise, append the
      // host part and the URI part as-is, ensuring that a '/' exists at the
      // end of the host part.
      if(end_index != NULL)
      {
         char* rval_copy;

	 *end_index = '\0';
	 
	 // if the '/' character after the host part was found, copy the host
	 // part and append the given URI to the URI.
	 rval_copy = rdfa_replace_string(rval, tmp);
	 rval = rdfa_join_string(rval_copy, uri);
         free(rval_copy);
      }
      else
      {
         // append the host part and the URI part as-is, ensuring that a 
	 // '/' exists at the end of the host part.
 	 unsigned int tlen = strlen(tmp) - 1;
         char* rval_copy;

	 rval_copy = rdfa_replace_string(rval, tmp);

	 if(rval_copy[tlen] == '/')
	 {
	    rval_copy[tlen] = '\0';
	 }
	 rval = rdfa_join_string(rval_copy, uri);
         free(rval_copy);
      }

      free(tmp);
   }
   else
   {
      if((char)context->base[base_length - 1] == '/')
      {
         // if the base URI already ends in /, concatenate
         rval = rdfa_join_string(context->base, uri);
      }
      else
      {
         // if we have a relative URI, chop off the name of the file
         // and replace it with the relative pathname
         char* end_index = strrchr(context->base, '/');

         if(end_index != NULL)
         {
            char* tmpstr = NULL;
            char* end_index2;

            tmpstr = rdfa_replace_string(tmpstr, context->base);
            end_index2= strrchr(tmpstr, '/');
            end_index2++;
            *end_index2 = '\0';

            rval = rdfa_join_string(tmpstr, uri);
            free(tmpstr);
         }
      }
   }

   return rval;
}
Beispiel #7
0
char* rdfa_resolve_curie(
   rdfacontext* context, const char* uri, curieparse_t mode)
{
   char* rval = NULL;
   curie_t ctype = rdfa_get_curie_type(uri);

   if(ctype == CURIE_TYPE_INVALID)
   {
      rval = NULL;
   }
   else if((ctype == CURIE_TYPE_IRI_OR_UNSAFE) &&
           ((mode == CURIE_PARSE_HREF_SRC) ||
            (mode == CURIE_PARSE_ABOUT_RESOURCE)))
   {
      // If we are parsing something that can take either a CURIE or a
      // URI, and the type is either IRI or UNSAFE, assume that it is
      // an IRI
      rval = rdfa_resolve_uri(context, uri);
   }

   // if we are processing a safe CURIE OR
   // if we are parsing an unsafe CURIE that is an @type_of,
   // @datatype, @property, @rel, or @rev attribute, treat the curie
   // as not an IRI, but an unsafe CURIE
   if((ctype == CURIE_TYPE_SAFE) ||
         ((ctype == CURIE_TYPE_IRI_OR_UNSAFE) &&
          ((mode == CURIE_PARSE_INSTANCEOF_DATATYPE) ||
           (mode == CURIE_PARSE_PROPERTY) ||
           (mode == CURIE_PARSE_RELREV))))
   {
      char* working_copy = NULL;
      char* wcptr = NULL;
      char* prefix = NULL;
      char* curie_reference = NULL;
      const char* expanded_prefix = NULL;

      working_copy = (char*)malloc(strlen(uri) + 1);
      strcpy(working_copy, uri);//rdfa_replace_string(working_copy, uri);

      // if this is a safe CURIE, chop off the beginning and the end
      if(ctype == CURIE_TYPE_SAFE)
      {
         prefix = strtok_r(working_copy, "[:]", &wcptr);
         if(wcptr)
            curie_reference = strtok_r(NULL, "[:]", &wcptr);
      }
      else if(ctype == CURIE_TYPE_IRI_OR_UNSAFE)
      {
         prefix = strtok_r(working_copy, ":", &wcptr);
         if(wcptr)
            curie_reference = strtok_r(NULL, ":", &wcptr);
      }

      // fully resolve the prefix and get it's length

      // if a colon was found, but no prefix, use the XHTML vocabulary URI
      // as the expanded prefix 
      if((uri[0] == ':') || (strcmp(uri, "[:]") == 0))
      {
         expanded_prefix = XHTML_VOCAB_URI;
         curie_reference = prefix;
         prefix = NULL;
      }
      else if(uri[0] == ':')
      {
         // FIXME: This looks like a bug - don't know why this code is
         // in here. I think it's for the case where ":next" is
         // specified, but the code's not checking that -- manu
         expanded_prefix = context->base;
         curie_reference = prefix;
         prefix = NULL;
      }
      else if(prefix != NULL)
      {
         if(strcmp(prefix, "_") == 0)
         {
            // if the prefix specifies this as a blank node, then we
            // use the blank node prefix
            expanded_prefix = "_";
         }
         //else if(strcmp(prefix, "rdf") == 0)
         //{
         //   expanded_prefix = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
         //}
         else
         {
            // if the prefix was defined, get it from the set of URI mappings.
#ifdef LIBRDFA_IN_RAPTOR
            raptor_namespace *nspace;
            raptor_uri* ns_uri;
            nspace = raptor_namespaces_find_namespace(&context->sax2->namespaces,
                                                      (const unsigned char*)prefix, strlen(prefix));
            if(nspace) {
               ns_uri = raptor_namespace_get_uri(nspace);
               if(ns_uri)
                  expanded_prefix = (const char*)raptor_uri_as_string(ns_uri);
            }
#else
            expanded_prefix =
               rdfa_get_mapping(context->uri_mappings, prefix);
#endif
         }
      }

      if((expanded_prefix != NULL) && (curie_reference != NULL))
      {
         // if the expanded prefix and the reference exist, generate the
         // full IRI.
         if(strcmp(expanded_prefix, "_") == 0)
         {
            rval = rdfa_join_string("_:", curie_reference);
         }
         else
         {
            rval = rdfa_join_string(expanded_prefix, curie_reference);
         }
      }
      else if((expanded_prefix != NULL) && (expanded_prefix[0] != '_') && 
         (curie_reference == NULL))
      {
         // if the expanded prefix exists, but the reference is null, 
	 // generate the CURIE because a reference-less CURIE is still
         // valid
 	 rval = rdfa_join_string(expanded_prefix, "");
      }

      free(working_copy);
   }

   // if we're NULL at this point, the CURIE might be the special
   // unnamed bnode specified by _:
   if((rval == NULL) &&
      ((strcmp(uri, "[_:]") == 0) ||
       (strcmp(uri, "_:") == 0)))
   {
      if(context->underscore_colon_bnode_name == NULL)
      {
         context->underscore_colon_bnode_name = rdfa_create_bnode(context);
      }
      rval = rdfa_replace_string(rval, context->underscore_colon_bnode_name);
   }
   
   // even though a reference-only CURIE is valid, it does not
   // generate a triple in XHTML+RDFa. If we're NULL at this point,
   // the given value wasn't valid in XHTML+RDFa.
   
   return rval;
}