Esempio n. 1
0
/* start of an element */
void 
raptor_sax2_start_element(void* user_data, const unsigned char *name,
                          const unsigned char **atts)
{
  raptor_sax2* sax2=(raptor_sax2*)user_data;
  raptor_qname* el_name;
  unsigned char **xml_atts_copy=NULL;
  size_t xml_atts_size=0;
  int all_atts_count=0;
  int ns_attributes_count=0;
  raptor_qname** named_attrs=NULL;
  raptor_xml_element* xml_element=NULL;
  unsigned char *xml_language=NULL;
  raptor_uri *xml_base=NULL;

  if(sax2->failed)
    return;

#ifdef RAPTOR_XML_EXPAT
#ifdef EXPAT_UTF8_BOM_CRASH
  sax2->tokens_count++;
#endif
#endif

#ifdef RAPTOR_XML_LIBXML
  if(atts) {
    int i;
    
    /* Do XML attribute value normalization */
    for (i = 0; atts[i]; i+=2) {
      unsigned char *value=(unsigned char*)atts[i+1];
      unsigned char *src = value;
      unsigned char *dst = xmlStrdup(value);

      if(!dst) {
        raptor_log_error_to_handlers(sax2->world,
                                     sax2->error_handlers, 
                                     RAPTOR_LOG_LEVEL_FATAL,
                                     sax2->locator, "Out of memory");
        return;
      }

      atts[i+1]=dst;

      while (*src == 0x20 || *src == 0x0d || *src == 0x0a || *src == 0x09) 
        src++;
      while (*src) {
        if (*src == 0x20 || *src == 0x0d || *src == 0x0a || *src == 0x09) {
          while (*src == 0x20 || *src == 0x0d || *src == 0x0a || *src == 0x09)
            src++;
          if (*src)
            *dst++ = 0x20;
        } else {
          *dst++ = *src++;
        }
      }
      *dst = '\0';
      xmlFree(value);
    }
  }
#endif

  raptor_sax2_inc_depth(sax2);

  if(atts) {
    int i;

    /* Save passed in XML attributes pointers so we can 
     * NULL the pointers when they get handled below (various atts[i]=NULL)
     */
    for (i = 0; atts[i]; i++) ;
    xml_atts_size=sizeof(unsigned char*) * i;
    if(xml_atts_size) {
      xml_atts_copy=(unsigned char**)RAPTOR_MALLOC(cstringpointer,xml_atts_size);
      if(!xml_atts_copy)
        goto fail;
      memcpy(xml_atts_copy, atts, xml_atts_size);
    }

    /* XML attributes processing:
     *   xmlns*   - XML namespaces (Namespaces in XML REC)
     *     Deleted and used to synthesise namespaces declarations
     *   xml:lang - XML language (XML REC)
     *     Deleted and optionally normalised to lowercase
     *   xml:base - XML Base (XML Base REC)
     *     Deleted and used to set the in-scope base URI for this XML element
     */
    for (i = 0; atts[i]; i+= 2) {
      all_atts_count++;

      if(strncmp((char*)atts[i], "xml", 3)) {
        /* count and skip non xml* attributes */
        ns_attributes_count++;
        continue;
      }

      /* synthesise the XML namespace events */
      if(!memcmp((const char*)atts[i], "xmlns", 5)) {
        const unsigned char *prefix=atts[i][5] ? &atts[i][6] : NULL;
        const unsigned char *namespace_name=atts[i+1];

        raptor_namespace* nspace;
        nspace=raptor_new_namespace(&sax2->namespaces,
                                    prefix, namespace_name,
                                    raptor_sax2_get_depth(sax2));

        if(nspace) {
          raptor_namespaces_start_namespace(&sax2->namespaces, nspace);

          if(sax2->namespace_handler)
            (*sax2->namespace_handler)(sax2->user_data, nspace);
        }
      } else if(!strcmp((char*)atts[i], "xml:lang")) {
        xml_language=(unsigned char*)RAPTOR_MALLOC(cstring, strlen((char*)atts[i+1])+1);
        if(!xml_language) {
          raptor_log_error_to_handlers(sax2->world,
                                       sax2->error_handlers, 
                                       RAPTOR_LOG_LEVEL_FATAL,
                                       sax2->locator, "Out of memory");
          goto fail;
        }

        /* optionally normalize language to lowercase */
        if(sax2->feature_normalize_language) {
          unsigned char *from=(unsigned char*)atts[i+1];
          unsigned char *to=xml_language;
          
          while(*from) {
            if(isupper(*from))
              *to++ =tolower(*from++);
            else
              *to++ =*from++;
          }
          *to='\0';
        } else
          strcpy((char*)xml_language, (char*)atts[i+1]);
      } else if(!strcmp((char*)atts[i], "xml:base")) {
        raptor_uri* base_uri;
        raptor_uri* xuri;
        base_uri=raptor_sax2_inscope_base_uri(sax2);
        xuri=raptor_new_uri_relative_to_base_v2(sax2->world, base_uri, atts[i+1]);
        xml_base=raptor_new_uri_for_xmlbase_v2(sax2->world, xuri);
        raptor_free_uri_v2(sax2->world, xuri);
      }

      /* delete all xml attributes whether processed above or not */
      atts[i]=NULL; 
    }
  }


  /* Create new element structure */
  el_name=raptor_new_qname(&sax2->namespaces, name, NULL,
                           (raptor_simple_message_handler)raptor_sax2_simple_error, sax2);
  if(!el_name)
    goto fail;

  xml_element=raptor_new_xml_element(el_name, xml_language, xml_base);
  if(!xml_element) {
    raptor_free_qname(el_name);
    goto fail;
  }
  /* xml_language,xml_base now owned by xml_element */
  xml_language = NULL;
  xml_base = NULL; 

  /* Turn string attributes into namespaced-attributes */
  if(ns_attributes_count) {
    int i;
    int offset = 0;

    /* Allocate new array to hold namespaced-attributes */
    named_attrs=(raptor_qname**)RAPTOR_CALLOC(raptor_qname_array, 
                                              ns_attributes_count, 
                                              sizeof(raptor_qname*));
    if(!named_attrs) {
      raptor_log_error_to_handlers(sax2->world,
                                   sax2->error_handlers, 
                                   RAPTOR_LOG_LEVEL_FATAL,
                                   sax2->locator, "Out of memory");
      goto fail;
    }

    for (i = 0; i < all_atts_count; i++) {
      raptor_qname* attr;

      /* Skip previously processed attributes */
      if(!atts[i<<1])
        continue;

      /* namespace-name[i] stored in named_attrs[i] */
      attr=raptor_new_qname(&sax2->namespaces,
                            atts[i<<1], atts[(i<<1)+1],
                            (raptor_simple_message_handler)raptor_sax2_simple_error, sax2);
      if(!attr) { /* failed - tidy up and return */
        int j;

        for (j=0; j < i; j++)
          RAPTOR_FREE(raptor_qname, named_attrs[j]);
        RAPTOR_FREE(raptor_qname_array, named_attrs);
        goto fail;
      }

      named_attrs[offset++]=attr;
    }
  } /* end if ns_attributes_count */


  if(named_attrs)
    raptor_xml_element_set_attributes(xml_element,
                                      named_attrs, ns_attributes_count);

  raptor_xml_element_push(sax2, xml_element);

  if(sax2->start_element_handler)
    sax2->start_element_handler(sax2->user_data, xml_element);

  if(xml_atts_copy) {
    /* Restore passed in XML attributes, free the copy */
    memcpy((void*)atts, xml_atts_copy, xml_atts_size);
    RAPTOR_FREE(cstringpointer, xml_atts_copy);
  }

  return;

  fail:
  if(xml_atts_copy)
    RAPTOR_FREE(cstringpointer, xml_atts_copy);
  if(xml_base)
    raptor_free_uri_v2(sax2->world, xml_base);
  if(xml_language)
    RAPTOR_FREE(cstring, xml_language);
  if(xml_element)
    raptor_free_xml_element(xml_element);
}
Esempio n. 2
0
int
rasqal_query_write_sparql_20060406(raptor_iostream *iostr, 
                                   rasqal_query* query, raptor_uri *base_uri)
{
  int i;
  sparql_writer_context wc;
  int limit, offset;
  rasqal_query_verb verb;

  wc.world = query->world;
  wc.base_uri = NULL;

  wc.type_uri = raptor_new_uri_for_rdf_concept(query->world->raptor_world_ptr,
                                               (const unsigned char*)"type");
  wc.nstack = raptor_new_namespaces(query->world->raptor_world_ptr, 1);

  if(base_uri) {
    raptor_iostream_counted_string_write("BASE ", 5, iostr);
    rasqal_query_write_sparql_uri(&wc, iostr, base_uri);
    raptor_iostream_write_byte('\n', iostr);

    /* from now on all URIs are relative to this */
    wc.base_uri = raptor_uri_copy(base_uri);
  }
  
  
  for(i = 0; 1 ; i++) {
    raptor_namespace *nspace;
    rasqal_prefix* p = rasqal_query_get_prefix(query, i);
    if(!p)
      break;
    
    raptor_iostream_counted_string_write("PREFIX ", 7, iostr);
    if(p->prefix)
      raptor_iostream_string_write(p->prefix, iostr);
    raptor_iostream_counted_string_write(": ", 2, iostr);
    rasqal_query_write_sparql_uri(&wc, iostr, p->uri);
    raptor_iostream_write_byte('\n', iostr);

    /* Use this constructor so we copy a URI directly */
    nspace = raptor_new_namespace_from_uri(wc.nstack, p->prefix, p->uri, i);
    raptor_namespaces_start_namespace(wc.nstack, nspace);
  }

  if(query->explain)
    raptor_iostream_counted_string_write("EXPLAIN ", 8, iostr);

  verb = query->verb;
  
  /* These terms are deprecated */
  if(query->verb == RASQAL_QUERY_VERB_INSERT ||
     query->verb == RASQAL_QUERY_VERB_DELETE) {
    verb = RASQAL_QUERY_VERB_UPDATE;
  }

  
  /* Write SPARQL 1.1 (Draft) Update forms */
  if(verb == RASQAL_QUERY_VERB_UPDATE) {
    rasqal_update_operation* update;
    /* Write SPARQL Update */

    for(i = 0; (update = rasqal_query_get_update_operation(query, i)); i++) {
      int is_always_2_args = (update->type >= RASQAL_UPDATE_TYPE_ADD &&
                              update->type <= RASQAL_UPDATE_TYPE_COPY);
      
      if(update->type == RASQAL_UPDATE_TYPE_UPDATE) {
        /* update operations:
         * WITH ... INSERT { template } DELETE { template } WHERE { template }
         * INSERT/DELETE { template } WHERE { template }
         * INSERT/DELETE DATA { triples } 
         */
        if(update->graph_uri) {
          raptor_iostream_counted_string_write("WITH ", 5, iostr);
          rasqal_query_write_sparql_uri(&wc, iostr, update->graph_uri);
          raptor_iostream_write_byte('\n', iostr);
        }
        if(update->delete_templates) {
          raptor_iostream_counted_string_write("DELETE ", 7, iostr);
          if(update->flags & RASQAL_UPDATE_FLAGS_DATA) 
            raptor_iostream_counted_string_write("DATA ", 5, iostr);
          rasqal_query_write_sparql_triple_data(&wc, iostr,
                                                update->delete_templates,
                                                0);
          raptor_iostream_write_byte('\n', iostr);
        }
        if(update->insert_templates) {
          raptor_iostream_counted_string_write("INSERT ", 7, iostr);
          if(update->flags & RASQAL_UPDATE_FLAGS_DATA) 
            raptor_iostream_counted_string_write("DATA ", 5, iostr);
          rasqal_query_write_sparql_triple_data(&wc, iostr,
                                                update->insert_templates,
                                                0);
          raptor_iostream_write_byte('\n', iostr);
        }
        if(update->where) {
          raptor_iostream_counted_string_write("WHERE ", 6, iostr);
          rasqal_query_write_sparql_graph_pattern(&wc, iostr,
                                                  update->where,
                                                  -1, 0);
          raptor_iostream_write_byte('\n', iostr);
        }
      } else {
        /* admin operations:
         * CLEAR GRAPH graph-uri | DEFAULT | NAMED | ALL
         * CREATE (SILENT) GRAPH graph-uri | DEFAULT | NAMED | ALL
         * DROP (SILENT) GRAPH graph-uri
         * LOAD (SILENT) doc-uri / LOAD (SILENT) doc-uri INTO GRAPH graph-uri
         * ADD (SILENT) GraphOrDefault TO GraphOrDefault
         * MOVE (SILENT) GraphOrDefault TO GraphOrDefault
         * COPY (SILENT) GraphOrDefault TO GraphOrDefault
         */
        raptor_iostream_string_write(rasqal_update_type_label(update->type),
                                     iostr);
        if(update->flags & RASQAL_UPDATE_FLAGS_SILENT)
          raptor_iostream_counted_string_write(" SILENT", 7, iostr);

        if(is_always_2_args) {
          /* ADD, MOVE, COPY are always 2-arg admin operations */
          rasqal_query_write_graphref(&wc, iostr, 
                                      update->graph_uri,
                                      RASQAL_UPDATE_GRAPH_ONE);

          raptor_iostream_counted_string_write(" TO", 3, iostr);
        
          rasqal_query_write_graphref(&wc, iostr, 
                                      update->document_uri,
                                      RASQAL_UPDATE_GRAPH_ONE);

        } else if(update->type == RASQAL_UPDATE_TYPE_LOAD) {
          /* LOAD is 1 or 2 URIs and first one never has a GRAPH prefix */

          raptor_iostream_write_byte(' ', iostr);

          rasqal_query_write_sparql_uri(&wc, iostr, update->document_uri);

          if(update->graph_uri) {
            raptor_iostream_counted_string_write(" INTO", 5, iostr);
            
            rasqal_query_write_graphref(&wc, iostr, 
                                        update->graph_uri,
                                        RASQAL_UPDATE_GRAPH_ONE);
          }
        } else {
          /* everything else is defined by update->applies; only
          * CLEAR and DROP may apply to >1 graph
          */
          rasqal_query_write_graphref(&wc, iostr, 
                                      update->graph_uri,
                                      update->applies);
        }
        
        raptor_iostream_write_byte('\n', iostr);

      }
    }

    goto tidy;
  }


  if(verb != RASQAL_QUERY_VERB_CONSTRUCT)
    raptor_iostream_string_write(rasqal_query_verb_as_string(query->verb),
                                 iostr);

  if(query->distinct) {
    if(query->distinct == 1)
      raptor_iostream_counted_string_write(" DISTINCT", 9, iostr);
    else
      raptor_iostream_counted_string_write(" REDUCED", 8, iostr);
  }

  if(query->wildcard)
    raptor_iostream_counted_string_write(" *", 2, iostr);
  else if(verb == RASQAL_QUERY_VERB_DESCRIBE) {
    raptor_sequence *lit_seq = query->describes;
    int count = raptor_sequence_size(lit_seq);

    for(i = 0; i < count; i++) {
      rasqal_literal* l = (rasqal_literal*)raptor_sequence_get_at(lit_seq, i);
      raptor_iostream_write_byte(' ', iostr);
      rasqal_query_write_sparql_literal(&wc, iostr, l);
    }
  } else if(verb == RASQAL_QUERY_VERB_SELECT) {
    rasqal_query_write_sparql_select(&wc, iostr, query->selects);
  }
  raptor_iostream_write_byte('\n', iostr);

  if(query->data_graphs) {
    for(i = 0; 1; i++) {
      rasqal_data_graph* dg = rasqal_query_get_data_graph(query, i);
      if(!dg)
        break;
      
      if(dg->flags & RASQAL_DATA_GRAPH_NAMED)
        continue;
      
      rasqal_query_write_data_format_comment(&wc, iostr, dg);
      raptor_iostream_counted_string_write("FROM ", 5, iostr);
      rasqal_query_write_sparql_uri(&wc, iostr, dg->uri);
      raptor_iostream_counted_string_write("\n", 1, iostr);
    }
    
    for(i = 0; 1; i++) {
      rasqal_data_graph* dg = rasqal_query_get_data_graph(query, i);
      if(!dg)
        break;

      if(!(dg->flags & RASQAL_DATA_GRAPH_NAMED))
        continue;
      
      rasqal_query_write_data_format_comment(&wc, iostr, dg);
      raptor_iostream_counted_string_write("FROM NAMED ", 11, iostr);
      rasqal_query_write_sparql_uri(&wc, iostr, dg->name_uri);
      raptor_iostream_write_byte('\n', iostr);
    }
    
  }

  if(query->constructs) {
    raptor_iostream_string_write("CONSTRUCT {\n", iostr);
    for(i = 0; 1; i++) {
      rasqal_triple* t = rasqal_query_get_construct_triple(query, i);
      if(!t)
        break;

      raptor_iostream_counted_string_write("  ", 2, iostr);
      rasqal_query_write_sparql_triple(&wc, iostr, t);
      raptor_iostream_write_byte('\n', iostr);
    }
    raptor_iostream_counted_string_write("}\n", 2, iostr);
  }
  if(query->query_graph_pattern) {
    raptor_iostream_counted_string_write("WHERE ", 6, iostr);
    rasqal_query_write_sparql_graph_pattern(&wc, iostr,
                                            query->query_graph_pattern, 
                                            -1, 0);
    raptor_iostream_write_byte('\n', iostr);
  }

  if(rasqal_query_get_group_conditions_sequence(query)) {
    raptor_iostream_counted_string_write("GROUP BY ", 9, iostr);
    for(i = 0; 1; i++) {
      rasqal_expression* expr = rasqal_query_get_group_condition(query, i);
      if(!expr)
        break;

      if(i > 0)
        raptor_iostream_write_byte(' ', iostr);
      rasqal_query_write_sparql_expression(&wc, iostr, expr);
    }
    raptor_iostream_write_byte('\n', iostr);
  }

  if(rasqal_query_get_having_conditions_sequence(query)) {
    raptor_iostream_counted_string_write("HAVING ", 7, iostr);
    for(i = 0; 1; i++) {
      rasqal_expression* expr = rasqal_query_get_having_condition(query, i);
      if(!expr)
        break;

      if(i > 0)
        raptor_iostream_write_byte(' ', iostr);
      rasqal_query_write_sparql_expression(&wc, iostr, expr);
    }
    raptor_iostream_write_byte('\n', iostr);
  }

  if(rasqal_query_get_order_conditions_sequence(query)) {
    raptor_iostream_counted_string_write("ORDER BY ", 9, iostr);
    for(i = 0; 1; i++) {
      rasqal_expression* expr = rasqal_query_get_order_condition(query, i);
      if(!expr)
        break;

      if(i > 0)
        raptor_iostream_write_byte(' ', iostr);
      rasqal_query_write_sparql_expression(&wc, iostr, expr);
    }
    raptor_iostream_write_byte('\n', iostr);
  }

  limit = rasqal_query_get_limit(query);
  offset = rasqal_query_get_offset(query);
  if(limit >= 0 || offset >= 0) {
    if(limit >= 0) {
      raptor_iostream_counted_string_write("LIMIT ", 7, iostr);
      raptor_iostream_decimal_write(limit, iostr);
    }
    if(offset >= 0) {
      if(limit)
        raptor_iostream_write_byte(' ', iostr);
      raptor_iostream_counted_string_write("OFFSET ", 8, iostr);
      raptor_iostream_decimal_write(offset, iostr);
    }
    raptor_iostream_write_byte('\n', iostr);
  }

  if(query->bindings)
    rasqal_write_sparql_bindings(&wc, iostr, query->bindings);

  tidy:
  raptor_free_uri(wc.type_uri);
  if(wc.base_uri)
    raptor_free_uri(wc.base_uri);
  raptor_free_namespaces(wc.nstack);

  return 0;
}