int ROXML_API roxml_get_node_position(node_t *n) { int idx = 1; char name[256]; node_t * prnt; node_t * first; if(n == NULL) { return 0; } roxml_get_name(n, name, 256); prnt = n->prnt; if(!prnt) { return 1; } first = prnt->chld; while((first)&&(first != n)) { char twin[256]; roxml_get_name(first, twin, 256); if(strcmp(name, twin) == 0) { idx++; } first = first->sibl; } return idx; }
void clish_xmlnode_print(clish_xmlnode_t *node, FILE *out) { node_t *roxn; char *name; roxn = xmlnode_to_node(node); name = roxml_get_name(roxn, NULL, 0); if (name) { fprintf(out, "<%s", name); roxml_release(name); if (roxml_get_attr_nb(roxn)) { int attr_count = roxml_get_attr_nb(roxn); int attr_pos; for (attr_pos = 0; attr_pos < attr_count; ++attr_pos) { node_t *attr = roxml_get_attr(roxn, NULL, attr_pos); char *n = roxml_get_name(attr, NULL, 0); char *v = roxml_get_content(attr, NULL, 0, NULL); if (n && v) { fprintf(out, " %s='%s'", n, v); } if (v) roxml_release(v); if (n) roxml_release(n); } } fprintf(out, ">"); } }
static inline void ots_get_xml_node(node_t** node, const char* name) { #if DEBUG > 1 fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_name(*node, NULL, 0)); #endif /* DEBUG > 1 */ while ((*node != NULL) && g_strcmp0(roxml_get_name(*node, NULL, 0), (const char*)name)) { *node = roxml_get_next_sibling(*node); #if DEBUG > 2 fprintf(stdout, "%s\n", roxml_get_name(*node, NULL, 0)); #endif /* DEBUG > 2 */ } #if DEBUG > 0 fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_name(*node, NULL, 0)); #endif /* DEBUG > 0 */ }
node_t * ROXML_API roxml_get_root(node_t *n) { node_t * root = NULL; if(n) { root = n; while(root->prnt) root = root->prnt; if(root->chld && (root->chld->type & ROXML_NODE_TYPES) == ROXML_PI_NODE) { int lone_elm = 0; char root_name[16]; node_t * lone_elm_node = 0; if(strcmp(roxml_get_name(root->chld, root_name, 16), "xml") == 0) { if(root->chld->sibl) { node_t * ptr = root->chld->sibl; while(ptr) { if(ptr->type & ROXML_ELM_NODE) { lone_elm_node = ptr; lone_elm++; } ptr = ptr->sibl; } } } if(lone_elm == 1) { root = lone_elm_node; } } } return root; }
node_t * ROXML_API roxml_get_chld(node_t *n, char * name, int nth) { node_t *ptr = n; if(ptr == NULL) { return NULL; } ptr = n->chld; if(name == NULL) { int count = 0; if(nth == 0) { return ptr; } while((ptr->sibl)&&(nth > count)) { count++; ptr = ptr->sibl; } if(nth > count) { return NULL; } } else { while(ptr) { int ans = strcmp(roxml_get_name(ptr, NULL, 0), name); roxml_release(RELEASE_LAST); if(ans == 0) { return ptr; } ptr = ptr->sibl; } } return ptr; }
static inline void ots_append_xml_node_contents(GList** list, node_t** node) { #if DEBUG > 1 fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_name(*node, NULL, 0)); fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_content(*node, NULL, 0, NULL)); #endif /* DEBUG > 1 */ while (*node != NULL) { if (!g_strcmp0(roxml_get_name(*node, NULL, 0), (const char*)"rule")) { *list = g_list_append(*list, g_strdup(roxml_get_content(*node, NULL, 0, NULL))); #if DEBUG > 2 fprintf(stdout, "%s\n", roxml_get_content(*node, NULL, 0, NULL)); fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_name(*node, NULL, 0)); #endif /* DEBUG > 2 */ } *node = roxml_get_next_sibling((*node)); } }
static int i_get_name(node_t *n, char *v, unsigned int *vl) { char *c; int len; c = roxml_get_name(n, NULL, 0); if (c) { len = strlen(c) + 1; if (len <= *vl) { sprintf(v, "%s", c); roxml_release(c); return 0; } else { *vl = len; roxml_release(c); return -E2BIG; } } *vl = (unsigned int)-1; return -ENOMEM; }
gboolean ots_load_xml_dictionary_buf(OtsArticle* Doc, const char* buffer) { char* head_name = NULL; node_t* stem = NULL; node_t* pre = NULL; node_t* post = NULL; node_t* syno = NULL; /* synonyms */ node_t* manual = NULL; /* manual */ node_t* step1_pre = NULL; /* step1 */ node_t* step1_post = NULL; /* step1 */ node_t* parse = NULL; /* parser rules */ node_t* pbreak = NULL; node_t* pdbreak = NULL; node_t* tc_words = NULL; /* term count dictionary */ node_t* tf_words = NULL; /* term frequency dictionary */ OtsStemRule* rule = Doc->stem; node_t* head = roxml_load_buf((char*)buffer); if (head == NULL) { fprintf(stderr, "empty document\n"); roxml_release(RELEASE_ALL); return (FALSE); } #if DEBUG > 1 fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_name(head, NULL, 0)); #endif /* DEBUG > 1 */ if (g_strcmp0(roxml_get_name(head, head_name, 0), (const char*)"dictionary")) { fprintf(stderr, "%s\n", head_name); roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } stem = roxml_get_chld(head, NULL, 0); if(stem == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_get_xml_node(&stem, (const char*)"stemmer"); parse = roxml_get_chld(head, NULL, 0); if(parse == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_get_xml_node(&parse, (const char*)"parser"); tc_words = roxml_get_chld(head, NULL, 0); if(tc_words == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_get_xml_node(&tc_words, (const char*)"grader-tc"); tf_words = roxml_get_chld(head, NULL, 0); if(tf_words == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_get_xml_node(&tf_words, (const char*)"grader-tf"); pre = roxml_get_chld(stem, NULL, 0); if(pre == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_get_xml_node(&pre, (const char*)"pre"); post = roxml_get_chld(stem, NULL, 0); if(post == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_get_xml_node(&post, (const char*)"post"); syno = roxml_get_chld(stem, NULL, 0); if(syno == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_get_xml_node(&syno, (const char*)"synonyms"); manual = roxml_get_chld(stem, NULL, 0); if(manual == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_get_xml_node(&manual, (const char*)"manual"); step1_pre = roxml_get_chld(stem, NULL, 0); if(step1_pre == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_get_xml_node(&step1_pre, (const char*)"step1_pre"); step1_post = roxml_get_chld(stem, NULL, 0); if(step1_post == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_get_xml_node(&step1_post, (const char*)"step1_post"); pre = roxml_get_chld(pre, NULL, 0); /* point to first word */ if(pre == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_append_xml_node_contents(&rule->RemovePre, &pre); post = roxml_get_chld(post, NULL, 0); if(post == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_append_xml_node_contents(&rule->RemovePost, &post); syno = roxml_get_chld(syno, NULL, 0); if(syno == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_append_xml_node_contents(&rule->synonyms, &syno); manual = roxml_get_chld(manual, NULL, 0); if(manual == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_append_xml_node_contents(&rule->manual, &manual); step1_pre = roxml_get_chld(step1_pre, NULL, 0); if(step1_pre == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_append_xml_node_contents(&rule->step1_pre, &step1_pre); step1_post = roxml_get_chld(step1_post, NULL, 0); if(step1_post == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_append_xml_node_contents(&rule->step1_post, &step1_post); pbreak = roxml_get_chld(parse, NULL, 0); if(pbreak == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_get_xml_node(&pbreak, (const char*)"linebreak"); pdbreak = roxml_get_chld(parse, NULL, 0); if(pdbreak == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_get_xml_node(&pdbreak, (const char*)"linedontbreak"); /*Parser break*/ pbreak = roxml_get_chld(pbreak, NULL, 0); if(pbreak == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_append_xml_node_contents(&rule->ParserBreak, &pbreak); /*Parser Don't break*/ pdbreak = roxml_get_chld(pdbreak, NULL, 0); if(pdbreak == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_append_xml_node_contents(&rule->ParserDontBreak, &pdbreak); /* Term Count load dict */ tc_words = roxml_get_chld(tc_words, NULL, 0); if(tc_words == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } #if DEBUG > 1 fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_name(tc_words, NULL, 0)); fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_content(tc_words, NULL, 0, NULL)); #endif /* DEBUG > 1 */ while (tc_words != NULL) { if (!g_strcmp0(roxml_get_name(tc_words, NULL, 0), (const char*)"word")) { Doc->dict = g_list_prepend(Doc->dict, (gpointer)ots_new_wordEntery( (unsigned const char*)roxml_get_content(tc_words, NULL, 0, NULL))); } tc_words = roxml_get_next_sibling(tc_words); #if DEBUG > 2 fprintf(stdout, "%s\n", roxml_get_content(tc_words, NULL, 0, NULL)); #endif /* DEBUG > 2 */ } /*Term Frequency load dict*/ tf_words = roxml_get_chld(tf_words, NULL, 0); if(tf_words == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } #if DEBUG > 1 fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_name(tf_words, NULL, 0)); fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_content(tf_words, NULL, 0, NULL)); #endif /* DEBUG > 1 */ while (tf_words != NULL) { if (!g_strcmp0(roxml_get_name(tf_words, NULL, 0), (const char*)"word")) { const gchar* key = g_strdup(roxml_get_content(tf_words, NULL, 0, NULL)); node_t* idf_attr = roxml_get_attr(tf_words, "idf", 0); if (idf_attr != NULL) { const char* idf_val = roxml_get_content(idf_attr, NULL, 0, NULL); if (idf_val != NULL) { #if DEBUG > 3 fprintf(stdout, "%s\n", idf_val); #endif /* DEBUG > 3 */ Doc->tf_terms = g_list_append(Doc->tf_terms, ots_new_OtsWordTF((const unsigned char*)key, atof(idf_val))); } } g_free((gpointer)key); } tf_words = roxml_get_next_sibling(tf_words); #if DEBUG > 2 fprintf(stdout, "%s\n", roxml_get_content(tf_words, NULL, 0, NULL)); #endif /* DEBUG > 2 */ } roxml_release(RELEASE_ALL); roxml_close(head); return (TRUE); }
node_t * ROXML_API roxml_get_nodes(node_t *n, int type, char * name, int nth) { node_t *ptr = NULL; if(n == NULL) { return NULL; } if(name == NULL) { int count = 0; if(n->ns && (type & ROXML_NS_NODE)) { ptr = n->ns; if(nth == 0) { return ptr; } } else if(n->attr && (type & ROXML_ATTR_NODE)) { ptr = n->attr; if(nth == 0) { return ptr; } while((ptr->sibl)&&(nth > count)) { ptr = ptr->sibl; count++; } } else { ptr = n->chld; while(ptr && !((ptr->type & ROXML_NODE_TYPES) & type)) { ptr = ptr->sibl; } } if(nth > count) { ptr = n->chld; while(ptr && !((ptr->type & ROXML_NODE_TYPES) & type)) { ptr = ptr->sibl; } while(ptr && (ptr->sibl) && (nth > count)) { ptr = ptr->sibl; if((ptr->type & ROXML_NODE_TYPES) & type) { count++; } } } if(nth > count) { return NULL; } } else { if(n->attr && (type & ROXML_ATTR_NODE)) { ptr = n->attr; while(ptr) { int ans = strcmp(roxml_get_name(ptr, NULL, 0), name); roxml_release(RELEASE_LAST); if(ans == 0) { return ptr; } ptr = ptr->sibl; } } ptr = n->chld; while(ptr) { if((ptr->type & ROXML_NODE_TYPES) & type) { int ans = strcmp(roxml_get_name(ptr, NULL, 0), name); roxml_release(RELEASE_LAST); if(ans == 0) { return ptr; } } ptr = ptr->sibl; } } return ptr; }
char * ROXML_API roxml_get_content(node_t *n, char * buffer, int bufsize, int *size) { int total = 0; char * content = buffer; if(n == NULL) { if(size) { *size = 0; } if(buffer) { strcpy(buffer, ""); return buffer; } return NULL; } else if(n->type & ROXML_ELM_NODE) { node_t *ptr = n->chld; while(ptr) { if((ptr->type & ROXML_NODE_TYPES) == ROXML_TXT_NODE) { total += ptr->end - ptr->pos; } ptr = ptr->sibl; } if(content == NULL) { content = roxml_malloc(sizeof(char), total+1, PTR_CHAR); bufsize = total+1; } if(content == NULL) { return NULL; } total = 0; ptr = n->chld; while(ptr) { if((ptr->type & ROXML_NODE_TYPES) == ROXML_TXT_NODE) { int ret_len = 0; int read_size = ptr->end - ptr->pos; if(total+read_size > bufsize-1) { read_size = bufsize - total - 1; } ret_len += roxml_read(ptr->pos, read_size, content+total, ptr); total += ret_len; } ptr = ptr->sibl; } } else { node_t *target = n; char name[ROXML_BASE_LEN]; int read_size = 0; int name_len = 0; int spec_offset = 0; roxml_get_name(n, name, ROXML_BASE_LEN); name_len = strlen(name); if(n->type & ROXML_DOCTYPE_NODE) { total = target->end - target->pos - name_len - 2; spec_offset = target->pos + name_len + 2; } else if(n->type & ROXML_TXT_NODE) { total = target->end - target->pos; spec_offset = target->pos; } else if(n->type & ROXML_CMT_NODE) { total = target->end - target->pos - 4; spec_offset = target->pos + 4; } else if(n->type & ROXML_PI_NODE) { total = target->end - target->pos - name_len - 3; spec_offset = target->pos + name_len + 3; } else if(n->type & ROXML_ATTR_NODE) { target = n->chld; if(target) { spec_offset = target->pos; total = target->end - target->pos; } else { spec_offset = 0; total = 0; } } if(content == NULL) { content = roxml_malloc(sizeof(char), total+1, PTR_CHAR); bufsize = total+1; } if(content == NULL) { return NULL; } read_size = total; if(read_size > bufsize-1) { read_size = bufsize-1; } total = roxml_read(spec_offset, read_size, content, target); } content[total] = '\0'; if(size) { *size = total+1; } return content; }