#include "RS_XML.h" #include <libxml/xpath.h> #include "Utils.h" static SEXP convertNodeSetToR(xmlNodeSetPtr obj, SEXP fun, int encoding, SEXP manageMemory) { SEXP ans, expr = NULL, arg = NULL, ref; int i; if(!obj) return(NULL_USER_OBJECT); PROTECT(ans = NEW_LIST(obj->nodeNr)); if(GET_LENGTH(fun) && (TYPEOF(fun) == CLOSXP || TYPEOF(fun) == BUILTINSXP)) { PROTECT(expr = allocVector(LANGSXP, 2)); SETCAR(expr, fun); arg = CDR(expr); } else if(TYPEOF(fun) == LANGSXP) { expr = fun; arg = CDR(expr); } for(i = 0; i < obj->nodeNr; i++) { xmlNodePtr el; el = obj->nodeTab[i]; if(el->type == XML_ATTRIBUTE_NODE) { #if 0 PROTECT(ref = mkString((el->children && el->children->content) ? XMLCHAR_TO_CHAR(el->children->content) : "")); SET_NAMES(ref, mkString(el->name)); #else PROTECT(ref = ScalarString(mkCharCE((el->children && el->children->content) ? XMLCHAR_TO_CHAR(el->children->content) : "", encoding))); SET_NAMES(ref, ScalarString(mkCharCE(el->name, encoding))); #endif SET_CLASS(ref, mkString("XMLAttributeValue")); UNPROTECT(1); } else if(el->type == XML_NAMESPACE_DECL) ref = R_createXMLNsRef((xmlNsPtr) el); else ref = R_createXMLNodeRef(el, manageMemory); if(expr) { PROTECT(ref); SETCAR(arg, ref); PROTECT(ref = Rf_eval(expr, R_GlobalEnv)); /*XXX do we want to catch errors here? Maybe to release the namespaces. */ SET_VECTOR_ELT(ans, i, ref); UNPROTECT(2); } else SET_VECTOR_ELT(ans, i, ref); } if(expr) { if(TYPEOF(fun) == CLOSXP || TYPEOF(fun) == BUILTINSXP) UNPROTECT(1); } else SET_CLASS(ans, mkString("XMLNodeSet")); UNPROTECT(1); return(ans); } SEXP convertXPathObjectToR(xmlXPathObjectPtr obj, SEXP fun, int encoding, SEXP manageMemory) { SEXP ans = NULL_USER_OBJECT; switch(obj->type) { case XPATH_NODESET: ans = convertNodeSetToR(obj->nodesetval, fun, encoding, manageMemory); break; case XPATH_BOOLEAN: ans = ScalarLogical(obj->boolval); break; case XPATH_NUMBER: ans = ScalarReal(obj->floatval); if(xmlXPathIsInf(obj->floatval)) REAL(ans)[0] = xmlXPathIsInf(obj->floatval) < 0 ? R_NegInf : R_PosInf; else if(xmlXPathIsNaN(obj->floatval)) REAL(ans)[0] = NA_REAL; break; case XPATH_STRING: ans = mkString(XMLCHAR_TO_CHAR(obj->stringval)); //XXX encoding break; case XPATH_POINT: case XPATH_RANGE: case XPATH_LOCATIONSET: case XPATH_USERS: PROBLEM "currently unsupported xmlXPathObject type %d in convertXPathObjectToR. Please send mail to maintainer.", obj->type WARN default: ans = R_NilValue; } return(ans); } #include <libxml/xpathInternals.h> /* For xmlXPathRegisterNs() */ xmlNsPtr * R_namespaceArray(SEXP namespaces, xmlXPathContextPtr ctxt) { int i, n; SEXP names = GET_NAMES(namespaces); xmlNsPtr *els; n = GET_LENGTH(namespaces); els = xmlMallocAtomic(sizeof(xmlNsPtr) * n); if(!els) { PROBLEM "Failed to allocated space for namespaces" ERROR; } for(i = 0; i < n; i++) { /*XXX who owns these strings. */ const xmlChar *prefix, *href; href = CHAR_TO_XMLCHAR(strdup(CHAR_DEREF(STRING_ELT(namespaces, i)))); prefix = names == NULL_USER_OBJECT ? CHAR_TO_XMLCHAR("") /* NULL */ : CHAR_TO_XMLCHAR(strdup(CHAR_DEREF(STRING_ELT(names, i)))); els[i] = xmlNewNs(NULL, href, prefix); if(ctxt) xmlXPathRegisterNs(ctxt, prefix, href); } return(els); }
static xmlEntityPtr do_getEntityHandler(void *userData, const xmlChar *name, const char * r_funName) { SEXP opArgs, r_ans; xmlEntityPtr ans = NULL; RS_XMLParserData *parserData = (RS_XMLParserData*) userData; DECL_ENCODING_FROM_EVENT_PARSER(parserData) PROTECT(opArgs = NEW_LIST(1)) ; SET_VECTOR_ELT(opArgs, 0, ScalarString(ENC_COPY_TO_USER_STRING(name))); /*XXX should we encode this? Done now! */ r_ans = RS_XML(callUserFunction)(r_funName, NULL, (RS_XMLParserData *) userData, opArgs); PROTECT(r_ans) ; if(r_ans != NULL_USER_OBJECT && GET_LENGTH(r_ans) > 0) { if(TYPEOF(r_ans) == STRSXP) { const char *value; value = CHAR_DEREF(STRING_ELT(r_ans, 0)); ans = (xmlEntityPtr) malloc(sizeof(xmlEntity)); memset(ans, 0, sizeof(xmlEntity)); ans->type = XML_ENTITY_DECL; ans->etype = XML_INTERNAL_GENERAL_ENTITY; ans->name = xmlStrdup(name); ans->orig = NULL; // xmlStrdup(CHAR_TO_XMLCHAR(value)); ans->content = xmlStrdup(CHAR_TO_XMLCHAR(value)); ans->length = strlen(value); #ifndef NO_CHECKED_ENTITY_FIELD ans->checked = 1; #endif } } UNPROTECT(2); return(ans); }
int RS_XML(libXMLEventParse)(const char *fileName, RS_XMLParserData *parserData, RS_XML_ContentSourceType asText, int saxVersion) { xmlSAXHandlerPtr xmlParserHandler; xmlParserCtxtPtr ctx; int status; switch(asText) { case RS_XML_TEXT: ctx = xmlCreateDocParserCtxt(CHAR_TO_XMLCHAR(fileName)); break; case RS_XML_FILENAME: ctx = xmlCreateFileParserCtxt(fileName); break; case RS_XML_CONNECTION: ctx = RS_XML_xmlCreateConnectionParserCtxt((USER_OBJECT_) fileName); break; default: ctx = NULL; } if(ctx == NULL) { PROBLEM "Can't parse %s", fileName ERROR; } xmlParserHandler = (xmlSAXHandlerPtr) S_alloc(sizeof(xmlSAXHandler), 1); /* Make certain this is initialized so that we don't have any references to unwanted routines! */ memset(xmlParserHandler, '\0', sizeof(xmlSAXHandler)); RS_XML(initXMLParserHandler)(xmlParserHandler, saxVersion); parserData->ctx = ctx; ctx->userData = parserData; ctx->sax = xmlParserHandler; status = xmlParseDocument(ctx); ctx->sax = NULL; xmlFreeParserCtxt(ctx); return(status); /* Free(xmlParserHandler); */ }
USER_OBJECT_ RS_XML_createDocFromNode(USER_OBJECT_ s_node) { xmlDocPtr doc; xmlNodePtr node, ptr; SEXP ans; node = (xmlNodePtr) R_ExternalPtrAddr(s_node); doc = xmlNewDoc(CHAR_TO_XMLCHAR("1.0")); R_numXMLDocs++; ptr = xmlDocCopyNode(node, doc, 1); node = (xmlNodePtr) doc; xmlAddChild(node, ptr); ans = R_createXMLDocRef(doc); return(ans); }
SEXP RS_XML_xpathEval(SEXP sdoc, SEXP r_node, SEXP path, SEXP namespaces, SEXP fun, SEXP charEncoding, SEXP manageMemory) { xmlXPathContextPtr ctxt = NULL; xmlXPathObjectPtr result; SEXP ans = NULL_USER_OBJECT; xmlDocPtr doc; if(TYPEOF(sdoc) != EXTPTRSXP || R_ExternalPtrTag(sdoc) != Rf_install("XMLInternalDocument")) { PROBLEM "xpathEval must be given an internal XML document object, 'XMLInternalDocument'" ERROR; } doc = (xmlDocPtr) R_ExternalPtrAddr(sdoc); ctxt = xmlXPathNewContext(doc); if(GET_LENGTH(r_node)) { ctxt->node = ctxt->origin = R_ExternalPtrAddr(r_node); } if(GET_LENGTH(namespaces)) { ctxt->namespaces = R_namespaceArray(namespaces, ctxt); /* xmlCopyNamespaceList(doc); */ ctxt->nsNr = GET_LENGTH(namespaces); } result = xmlXPathEvalExpression(CHAR_TO_XMLCHAR(CHAR_DEREF(STRING_ELT(path, 0))), ctxt); if(result) ans = convertXPathObjectToR(result, fun, INTEGER(charEncoding)[0], manageMemory); xmlXPathFreeObject(result); xmlXPathFreeContext(ctxt); if(!result) { PROBLEM "error evaluating xpath expression %s", CHAR_DEREF(STRING_ELT(path, 0)) ERROR; } return(ans); }
SEXP RS_XML_xpathNodeEval(SEXP s_node, SEXP path, SEXP namespaces, SEXP fun) { xmlXPathContextPtr ctxt = NULL; xmlXPathObjectPtr result; SEXP ans = NULL_USER_OBJECT; xmlDocPtr doc; if(TYPEOF(s_node) != EXTPTRSXP || R_ExternalPtrTag(s_node) != Rf_install("XMLInternalNode")) { PROBLEM "xpathEval must be given an internal XML document object, 'XMLInternalNode'" ERROR; } ctxt = xmlXPathNewContext(doc); if(GET_LENGTH(namespaces)) { ctxt->namespaces = R_namespaceArray(namespaces, ctxt); /* xmlCopyNamespaceList(doc); */ ctxt->nsNr = GET_LENGTH(namespaces); } result = xmlXPathEvalExpression(CHAR_TO_XMLCHAR(CHAR_DEREF(STRING_ELT(path, 0))), ctxt); if(result) ans = convertXPathObjectToR(result, fun); xmlXPathFreeObject(result); xmlXPathFreeContext(ctxt); if(!result) { PROBLEM "error evaluating xpath expression %s", CHAR_DEREF(STRING_ELT(path, 0)) ERROR; } return(ans); }