Beispiel #1
0
clish_xmlnode_t *clish_xmlnode_next_child(clish_xmlnode_t *parent, 
					  clish_xmlnode_t *curchild)
{
	node_t *roxc;

	if (!parent)
		return NULL;

	roxc = xmlnode_to_node(curchild);

	if (roxc) {
		return node_to_xmlnode(roxml_get_next_sibling(roxc));
	} else {
		node_t *roxp = xmlnode_to_node(parent);
		node_t *child = NULL;
		int count;

		count = roxml_get_chld_nb(roxp);
		if (count)
			child = roxml_get_chld(roxp, NULL, 0);

		return node_to_xmlnode(child);
	}

	return NULL;
}
Beispiel #2
0
static inline void ots_get_xml_node(node_t** node, const char* name) {
#if DEBUG > 1
  fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_name(*node, NULL, 0));
#endif /* DEBUG > 1 */
  while ((*node != NULL) &&
         g_strcmp0(roxml_get_name(*node, NULL, 0), (const char*)name)) {
    *node = roxml_get_next_sibling(*node);
#if DEBUG > 2
    fprintf(stdout, "%s\n", roxml_get_name(*node, NULL, 0));
#endif /* DEBUG > 2 */
  }
#if DEBUG > 0
  fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_name(*node, NULL, 0));
#endif /* DEBUG > 0 */
}
Beispiel #3
0
static inline void ots_append_xml_node_contents(GList** list, node_t** node) {
#if DEBUG > 1
  fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_name(*node, NULL, 0));
  fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_content(*node, NULL, 0, NULL));
#endif /* DEBUG > 1 */
  while (*node != NULL) {
    if (!g_strcmp0(roxml_get_name(*node, NULL, 0), (const char*)"rule")) {
      *list = g_list_append(*list,
        g_strdup(roxml_get_content(*node, NULL, 0, NULL)));
#if DEBUG > 2
  fprintf(stdout, "%s\n", roxml_get_content(*node, NULL, 0, NULL));
  fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_name(*node, NULL, 0));
#endif /* DEBUG > 2 */
    }
    *node = roxml_get_next_sibling((*node));
  }
}
Beispiel #4
0
gboolean ots_load_xml_dictionary_buf(OtsArticle* Doc, const char* buffer) {
  char* head_name    = NULL;
  node_t* stem       = NULL;
  node_t* pre        = NULL;
  node_t* post       = NULL;
  node_t* syno       = NULL;        /* synonyms */
  node_t* manual     = NULL;        /* manual   */
  node_t* step1_pre  = NULL;        /* step1    */
  node_t* step1_post = NULL;        /* step1    */
  
  node_t* parse   = NULL;           /* parser rules */
  node_t* pbreak  = NULL;
  node_t* pdbreak = NULL;
  
  node_t* tc_words = NULL;          /* term count dictionary   */
  node_t* tf_words = NULL;          /* term frequency dictionary   */
  
  OtsStemRule* rule = Doc->stem;
  node_t* head = roxml_load_buf((char*)buffer);
  
  if (head == NULL) {
    fprintf(stderr, "empty document\n");
    roxml_release(RELEASE_ALL);
    return (FALSE);
  }
#if DEBUG > 1
  fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_name(head, NULL, 0));
#endif /* DEBUG > 1 */
  if (g_strcmp0(roxml_get_name(head, head_name, 0), (const char*)"dictionary")) {
    fprintf(stderr, "%s\n", head_name);
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  
  stem = roxml_get_chld(head, NULL, 0);
  if(stem == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_get_xml_node(&stem, (const char*)"stemmer");
  
  parse = roxml_get_chld(head, NULL, 0);
  if(parse == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_get_xml_node(&parse, (const char*)"parser");
  
  tc_words = roxml_get_chld(head, NULL, 0);
  if(tc_words == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_get_xml_node(&tc_words, (const char*)"grader-tc");
  
  tf_words = roxml_get_chld(head, NULL,  0);
  if(tf_words == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_get_xml_node(&tf_words, (const char*)"grader-tf");
  
  pre = roxml_get_chld(stem, NULL,  0);
  if(pre == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_get_xml_node(&pre, (const char*)"pre");
  
  post = roxml_get_chld(stem, NULL,  0);
  if(post == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_get_xml_node(&post, (const char*)"post");
  
  syno = roxml_get_chld(stem, NULL, 0);
  if(syno == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_get_xml_node(&syno, (const char*)"synonyms");
  
  manual = roxml_get_chld(stem, NULL, 0);
  if(manual == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_get_xml_node(&manual, (const char*)"manual");
  
  step1_pre = roxml_get_chld(stem, NULL, 0);
  if(step1_pre == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_get_xml_node(&step1_pre, (const char*)"step1_pre");
  
  step1_post = roxml_get_chld(stem, NULL, 0);
  if(step1_post == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_get_xml_node(&step1_post, (const char*)"step1_post");
  
  pre = roxml_get_chld(pre, NULL, 0); /* point to first word */
  if(pre == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_append_xml_node_contents(&rule->RemovePre, &pre);
  
  post = roxml_get_chld(post, NULL, 0);
  if(post == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_append_xml_node_contents(&rule->RemovePost, &post);
  
  syno = roxml_get_chld(syno, NULL, 0);
  if(syno == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_append_xml_node_contents(&rule->synonyms, &syno);
  
  manual = roxml_get_chld(manual, NULL, 0);
  if(manual == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_append_xml_node_contents(&rule->manual, &manual);
  
  step1_pre = roxml_get_chld(step1_pre, NULL, 0);
  if(step1_pre == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_append_xml_node_contents(&rule->step1_pre, &step1_pre);
  
  step1_post = roxml_get_chld(step1_post, NULL, 0);
  if(step1_post == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_append_xml_node_contents(&rule->step1_post, &step1_post);
  
  pbreak = roxml_get_chld(parse, NULL, 0);
  if(pbreak == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_get_xml_node(&pbreak, (const char*)"linebreak");
  
  pdbreak = roxml_get_chld(parse, NULL, 0);
  if(pdbreak == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_get_xml_node(&pdbreak, (const char*)"linedontbreak");
  
  /*Parser break*/
  pbreak = roxml_get_chld(pbreak, NULL, 0);
  if(pbreak == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_append_xml_node_contents(&rule->ParserBreak, &pbreak);
  
  /*Parser Don't break*/
  pdbreak = roxml_get_chld(pdbreak, NULL, 0);
  if(pdbreak == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_append_xml_node_contents(&rule->ParserDontBreak, &pdbreak);
  
  /* Term Count load dict */
  tc_words = roxml_get_chld(tc_words, NULL, 0);
  if(tc_words == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
#if DEBUG > 1
  fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_name(tc_words, NULL, 0));
  fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_content(tc_words, NULL, 0, NULL));
#endif /* DEBUG > 1 */
  while (tc_words != NULL) {
    if (!g_strcmp0(roxml_get_name(tc_words, NULL, 0), (const char*)"word")) {
      Doc->dict = g_list_prepend(Doc->dict,
        (gpointer)ots_new_wordEntery(
          (unsigned const char*)roxml_get_content(tc_words, NULL, 0, NULL)));
    }
    tc_words = roxml_get_next_sibling(tc_words);
#if DEBUG > 2
    fprintf(stdout, "%s\n", roxml_get_content(tc_words, NULL, 0, NULL));
#endif /* DEBUG > 2 */
  }
  
  /*Term Frequency load dict*/
  tf_words = roxml_get_chld(tf_words, NULL, 0);
  if(tf_words == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
#if DEBUG > 1
  fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_name(tf_words, NULL, 0));
  fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_content(tf_words, NULL, 0, NULL));
#endif /* DEBUG > 1 */
  while (tf_words != NULL) {
    if (!g_strcmp0(roxml_get_name(tf_words, NULL, 0), (const char*)"word")) {
      const gchar* key = g_strdup(roxml_get_content(tf_words, NULL, 0, NULL));
      node_t* idf_attr = roxml_get_attr(tf_words, "idf", 0);
      if (idf_attr != NULL) {
        const char* idf_val = roxml_get_content(idf_attr, NULL, 0, NULL);
        if (idf_val != NULL) {
#if DEBUG > 3
          fprintf(stdout, "%s\n", idf_val);
#endif /* DEBUG > 3 */
          Doc->tf_terms = g_list_append(Doc->tf_terms,
          ots_new_OtsWordTF((const unsigned char*)key, atof(idf_val)));
        }
      }
      g_free((gpointer)key);
    }
    tf_words = roxml_get_next_sibling(tf_words);
#if DEBUG > 2
    fprintf(stdout, "%s\n", roxml_get_content(tf_words, NULL, 0, NULL));
#endif /* DEBUG > 2 */
  }
  roxml_release(RELEASE_ALL);
  roxml_close(head);
  return (TRUE);
}