clish_xmlnode_t *clish_xmlnode_next_child(clish_xmlnode_t *parent, clish_xmlnode_t *curchild) { node_t *roxc; if (!parent) return NULL; roxc = xmlnode_to_node(curchild); if (roxc) { return node_to_xmlnode(roxml_get_next_sibling(roxc)); } else { node_t *roxp = xmlnode_to_node(parent); node_t *child = NULL; int count; count = roxml_get_chld_nb(roxp); if (count) child = roxml_get_chld(roxp, NULL, 0); return node_to_xmlnode(child); } return NULL; }
static inline void ots_get_xml_node(node_t** node, const char* name) { #if DEBUG > 1 fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_name(*node, NULL, 0)); #endif /* DEBUG > 1 */ while ((*node != NULL) && g_strcmp0(roxml_get_name(*node, NULL, 0), (const char*)name)) { *node = roxml_get_next_sibling(*node); #if DEBUG > 2 fprintf(stdout, "%s\n", roxml_get_name(*node, NULL, 0)); #endif /* DEBUG > 2 */ } #if DEBUG > 0 fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_name(*node, NULL, 0)); #endif /* DEBUG > 0 */ }
static inline void ots_append_xml_node_contents(GList** list, node_t** node) { #if DEBUG > 1 fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_name(*node, NULL, 0)); fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_content(*node, NULL, 0, NULL)); #endif /* DEBUG > 1 */ while (*node != NULL) { if (!g_strcmp0(roxml_get_name(*node, NULL, 0), (const char*)"rule")) { *list = g_list_append(*list, g_strdup(roxml_get_content(*node, NULL, 0, NULL))); #if DEBUG > 2 fprintf(stdout, "%s\n", roxml_get_content(*node, NULL, 0, NULL)); fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_name(*node, NULL, 0)); #endif /* DEBUG > 2 */ } *node = roxml_get_next_sibling((*node)); } }
gboolean ots_load_xml_dictionary_buf(OtsArticle* Doc, const char* buffer) { char* head_name = NULL; node_t* stem = NULL; node_t* pre = NULL; node_t* post = NULL; node_t* syno = NULL; /* synonyms */ node_t* manual = NULL; /* manual */ node_t* step1_pre = NULL; /* step1 */ node_t* step1_post = NULL; /* step1 */ node_t* parse = NULL; /* parser rules */ node_t* pbreak = NULL; node_t* pdbreak = NULL; node_t* tc_words = NULL; /* term count dictionary */ node_t* tf_words = NULL; /* term frequency dictionary */ OtsStemRule* rule = Doc->stem; node_t* head = roxml_load_buf((char*)buffer); if (head == NULL) { fprintf(stderr, "empty document\n"); roxml_release(RELEASE_ALL); return (FALSE); } #if DEBUG > 1 fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_name(head, NULL, 0)); #endif /* DEBUG > 1 */ if (g_strcmp0(roxml_get_name(head, head_name, 0), (const char*)"dictionary")) { fprintf(stderr, "%s\n", head_name); roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } stem = roxml_get_chld(head, NULL, 0); if(stem == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_get_xml_node(&stem, (const char*)"stemmer"); parse = roxml_get_chld(head, NULL, 0); if(parse == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_get_xml_node(&parse, (const char*)"parser"); tc_words = roxml_get_chld(head, NULL, 0); if(tc_words == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_get_xml_node(&tc_words, (const char*)"grader-tc"); tf_words = roxml_get_chld(head, NULL, 0); if(tf_words == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_get_xml_node(&tf_words, (const char*)"grader-tf"); pre = roxml_get_chld(stem, NULL, 0); if(pre == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_get_xml_node(&pre, (const char*)"pre"); post = roxml_get_chld(stem, NULL, 0); if(post == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_get_xml_node(&post, (const char*)"post"); syno = roxml_get_chld(stem, NULL, 0); if(syno == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_get_xml_node(&syno, (const char*)"synonyms"); manual = roxml_get_chld(stem, NULL, 0); if(manual == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_get_xml_node(&manual, (const char*)"manual"); step1_pre = roxml_get_chld(stem, NULL, 0); if(step1_pre == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_get_xml_node(&step1_pre, (const char*)"step1_pre"); step1_post = roxml_get_chld(stem, NULL, 0); if(step1_post == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_get_xml_node(&step1_post, (const char*)"step1_post"); pre = roxml_get_chld(pre, NULL, 0); /* point to first word */ if(pre == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_append_xml_node_contents(&rule->RemovePre, &pre); post = roxml_get_chld(post, NULL, 0); if(post == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_append_xml_node_contents(&rule->RemovePost, &post); syno = roxml_get_chld(syno, NULL, 0); if(syno == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_append_xml_node_contents(&rule->synonyms, &syno); manual = roxml_get_chld(manual, NULL, 0); if(manual == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_append_xml_node_contents(&rule->manual, &manual); step1_pre = roxml_get_chld(step1_pre, NULL, 0); if(step1_pre == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_append_xml_node_contents(&rule->step1_pre, &step1_pre); step1_post = roxml_get_chld(step1_post, NULL, 0); if(step1_post == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_append_xml_node_contents(&rule->step1_post, &step1_post); pbreak = roxml_get_chld(parse, NULL, 0); if(pbreak == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_get_xml_node(&pbreak, (const char*)"linebreak"); pdbreak = roxml_get_chld(parse, NULL, 0); if(pdbreak == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_get_xml_node(&pdbreak, (const char*)"linedontbreak"); /*Parser break*/ pbreak = roxml_get_chld(pbreak, NULL, 0); if(pbreak == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_append_xml_node_contents(&rule->ParserBreak, &pbreak); /*Parser Don't break*/ pdbreak = roxml_get_chld(pdbreak, NULL, 0); if(pdbreak == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } ots_append_xml_node_contents(&rule->ParserDontBreak, &pdbreak); /* Term Count load dict */ tc_words = roxml_get_chld(tc_words, NULL, 0); if(tc_words == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } #if DEBUG > 1 fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_name(tc_words, NULL, 0)); fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_content(tc_words, NULL, 0, NULL)); #endif /* DEBUG > 1 */ while (tc_words != NULL) { if (!g_strcmp0(roxml_get_name(tc_words, NULL, 0), (const char*)"word")) { Doc->dict = g_list_prepend(Doc->dict, (gpointer)ots_new_wordEntery( (unsigned const char*)roxml_get_content(tc_words, NULL, 0, NULL))); } tc_words = roxml_get_next_sibling(tc_words); #if DEBUG > 2 fprintf(stdout, "%s\n", roxml_get_content(tc_words, NULL, 0, NULL)); #endif /* DEBUG > 2 */ } /*Term Frequency load dict*/ tf_words = roxml_get_chld(tf_words, NULL, 0); if(tf_words == NULL) { roxml_release(RELEASE_ALL); roxml_close(head); return (FALSE); } #if DEBUG > 1 fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_name(tf_words, NULL, 0)); fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_content(tf_words, NULL, 0, NULL)); #endif /* DEBUG > 1 */ while (tf_words != NULL) { if (!g_strcmp0(roxml_get_name(tf_words, NULL, 0), (const char*)"word")) { const gchar* key = g_strdup(roxml_get_content(tf_words, NULL, 0, NULL)); node_t* idf_attr = roxml_get_attr(tf_words, "idf", 0); if (idf_attr != NULL) { const char* idf_val = roxml_get_content(idf_attr, NULL, 0, NULL); if (idf_val != NULL) { #if DEBUG > 3 fprintf(stdout, "%s\n", idf_val); #endif /* DEBUG > 3 */ Doc->tf_terms = g_list_append(Doc->tf_terms, ots_new_OtsWordTF((const unsigned char*)key, atof(idf_val))); } } g_free((gpointer)key); } tf_words = roxml_get_next_sibling(tf_words); #if DEBUG > 2 fprintf(stdout, "%s\n", roxml_get_content(tf_words, NULL, 0, NULL)); #endif /* DEBUG > 2 */ } roxml_release(RELEASE_ALL); roxml_close(head); return (TRUE); }