Esempio n. 1
0
int ROXML_API roxml_get_node_position(node_t *n)
{
	int idx = 1;
	char name[256];
	node_t * prnt;
	node_t * first;

	if(n == NULL) { return 0; }
	
	roxml_get_name(n, name, 256);

	prnt = n->prnt;
	if(!prnt) {
		return 1;
	}
	first = prnt->chld;

	while((first)&&(first != n)) {
		char twin[256];
		roxml_get_name(first, twin, 256);
		if(strcmp(name, twin) == 0) { idx++; }
		first = first->sibl;
	}

	return idx;
}
Esempio n. 2
0
void clish_xmlnode_print(clish_xmlnode_t *node, FILE *out)
{
	node_t *roxn;
	char *name;

	roxn = xmlnode_to_node(node);
	name = roxml_get_name(roxn, NULL, 0);
	if (name) {
		fprintf(out, "<%s", name);
		roxml_release(name);
		if (roxml_get_attr_nb(roxn)) {
			int attr_count = roxml_get_attr_nb(roxn);
			int attr_pos;
			for (attr_pos = 0; attr_pos < attr_count; ++attr_pos) {
				node_t *attr = roxml_get_attr(roxn, NULL, attr_pos);
				char *n = roxml_get_name(attr, NULL, 0);
				char *v = roxml_get_content(attr, NULL, 0, NULL);
				if (n && v) {
					fprintf(out, " %s='%s'", n, v);
				}
				if (v) 
					roxml_release(v);
				if (n) 
					roxml_release(n);
			}
		}
		fprintf(out, ">");
	}
}
Esempio n. 3
0
static inline void ots_get_xml_node(node_t** node, const char* name) {
#if DEBUG > 1
  fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_name(*node, NULL, 0));
#endif /* DEBUG > 1 */
  while ((*node != NULL) &&
         g_strcmp0(roxml_get_name(*node, NULL, 0), (const char*)name)) {
    *node = roxml_get_next_sibling(*node);
#if DEBUG > 2
    fprintf(stdout, "%s\n", roxml_get_name(*node, NULL, 0));
#endif /* DEBUG > 2 */
  }
#if DEBUG > 0
  fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_name(*node, NULL, 0));
#endif /* DEBUG > 0 */
}
Esempio n. 4
0
node_t * ROXML_API roxml_get_root(node_t *n)
{
	node_t * root = NULL;
	if(n)	{
		root = n;

		while(root->prnt) root = root->prnt;
		
		if(root->chld && (root->chld->type & ROXML_NODE_TYPES) == ROXML_PI_NODE) {
			int lone_elm = 0;
			char root_name[16];
			node_t * lone_elm_node = 0;
			if(strcmp(roxml_get_name(root->chld, root_name, 16), "xml") == 0) {
				if(root->chld->sibl) {
					node_t * ptr = root->chld->sibl;
					while(ptr) {
						if(ptr->type & ROXML_ELM_NODE) {
							lone_elm_node = ptr;
							lone_elm++;
						}
						ptr = ptr->sibl;
					}
				}
			}
			if(lone_elm == 1) {
				root = lone_elm_node;
			}
		}
	}
	return root;
}
Esempio n. 5
0
node_t * ROXML_API roxml_get_chld(node_t *n, char * name, int nth)
{
	node_t *ptr = n;
	
	if(ptr == NULL) {
		return NULL;
	}
	
	ptr = n->chld;
	if(name == NULL)	{
		int count = 0;
		if(nth == 0)	{
			return ptr;
		}
		while((ptr->sibl)&&(nth > count)) {
			count++;
			ptr = ptr->sibl;
		}
		if(nth > count)	{ return NULL; }
	} else	{
		while(ptr) {
			int ans = strcmp(roxml_get_name(ptr, NULL, 0), name);
			roxml_release(RELEASE_LAST);
			if(ans == 0)	{
				return ptr;
			}
			ptr = ptr->sibl;
		}
	}
	return ptr;
}
Esempio n. 6
0
static inline void ots_append_xml_node_contents(GList** list, node_t** node) {
#if DEBUG > 1
  fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_name(*node, NULL, 0));
  fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_content(*node, NULL, 0, NULL));
#endif /* DEBUG > 1 */
  while (*node != NULL) {
    if (!g_strcmp0(roxml_get_name(*node, NULL, 0), (const char*)"rule")) {
      *list = g_list_append(*list,
        g_strdup(roxml_get_content(*node, NULL, 0, NULL)));
#if DEBUG > 2
  fprintf(stdout, "%s\n", roxml_get_content(*node, NULL, 0, NULL));
  fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_name(*node, NULL, 0));
#endif /* DEBUG > 2 */
    }
    *node = roxml_get_next_sibling((*node));
  }
}
Esempio n. 7
0
static int i_get_name(node_t *n, char *v, unsigned int *vl)
{
	char *c;
	int len;

	c = roxml_get_name(n, NULL, 0);
	if (c) {
		len = strlen(c) + 1;
		if (len <= *vl) {
			sprintf(v, "%s", c);
			roxml_release(c);
			return 0;
		} else {
			*vl = len;
			roxml_release(c);
			return -E2BIG;
		}
	}
	*vl = (unsigned int)-1;
	return -ENOMEM;
}
Esempio n. 8
0
gboolean ots_load_xml_dictionary_buf(OtsArticle* Doc, const char* buffer) {
  char* head_name    = NULL;
  node_t* stem       = NULL;
  node_t* pre        = NULL;
  node_t* post       = NULL;
  node_t* syno       = NULL;        /* synonyms */
  node_t* manual     = NULL;        /* manual   */
  node_t* step1_pre  = NULL;        /* step1    */
  node_t* step1_post = NULL;        /* step1    */
  
  node_t* parse   = NULL;           /* parser rules */
  node_t* pbreak  = NULL;
  node_t* pdbreak = NULL;
  
  node_t* tc_words = NULL;          /* term count dictionary   */
  node_t* tf_words = NULL;          /* term frequency dictionary   */
  
  OtsStemRule* rule = Doc->stem;
  node_t* head = roxml_load_buf((char*)buffer);
  
  if (head == NULL) {
    fprintf(stderr, "empty document\n");
    roxml_release(RELEASE_ALL);
    return (FALSE);
  }
#if DEBUG > 1
  fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_name(head, NULL, 0));
#endif /* DEBUG > 1 */
  if (g_strcmp0(roxml_get_name(head, head_name, 0), (const char*)"dictionary")) {
    fprintf(stderr, "%s\n", head_name);
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  
  stem = roxml_get_chld(head, NULL, 0);
  if(stem == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_get_xml_node(&stem, (const char*)"stemmer");
  
  parse = roxml_get_chld(head, NULL, 0);
  if(parse == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_get_xml_node(&parse, (const char*)"parser");
  
  tc_words = roxml_get_chld(head, NULL, 0);
  if(tc_words == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_get_xml_node(&tc_words, (const char*)"grader-tc");
  
  tf_words = roxml_get_chld(head, NULL,  0);
  if(tf_words == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_get_xml_node(&tf_words, (const char*)"grader-tf");
  
  pre = roxml_get_chld(stem, NULL,  0);
  if(pre == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_get_xml_node(&pre, (const char*)"pre");
  
  post = roxml_get_chld(stem, NULL,  0);
  if(post == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_get_xml_node(&post, (const char*)"post");
  
  syno = roxml_get_chld(stem, NULL, 0);
  if(syno == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_get_xml_node(&syno, (const char*)"synonyms");
  
  manual = roxml_get_chld(stem, NULL, 0);
  if(manual == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_get_xml_node(&manual, (const char*)"manual");
  
  step1_pre = roxml_get_chld(stem, NULL, 0);
  if(step1_pre == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_get_xml_node(&step1_pre, (const char*)"step1_pre");
  
  step1_post = roxml_get_chld(stem, NULL, 0);
  if(step1_post == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_get_xml_node(&step1_post, (const char*)"step1_post");
  
  pre = roxml_get_chld(pre, NULL, 0); /* point to first word */
  if(pre == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_append_xml_node_contents(&rule->RemovePre, &pre);
  
  post = roxml_get_chld(post, NULL, 0);
  if(post == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_append_xml_node_contents(&rule->RemovePost, &post);
  
  syno = roxml_get_chld(syno, NULL, 0);
  if(syno == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_append_xml_node_contents(&rule->synonyms, &syno);
  
  manual = roxml_get_chld(manual, NULL, 0);
  if(manual == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_append_xml_node_contents(&rule->manual, &manual);
  
  step1_pre = roxml_get_chld(step1_pre, NULL, 0);
  if(step1_pre == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_append_xml_node_contents(&rule->step1_pre, &step1_pre);
  
  step1_post = roxml_get_chld(step1_post, NULL, 0);
  if(step1_post == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_append_xml_node_contents(&rule->step1_post, &step1_post);
  
  pbreak = roxml_get_chld(parse, NULL, 0);
  if(pbreak == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_get_xml_node(&pbreak, (const char*)"linebreak");
  
  pdbreak = roxml_get_chld(parse, NULL, 0);
  if(pdbreak == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_get_xml_node(&pdbreak, (const char*)"linedontbreak");
  
  /*Parser break*/
  pbreak = roxml_get_chld(pbreak, NULL, 0);
  if(pbreak == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_append_xml_node_contents(&rule->ParserBreak, &pbreak);
  
  /*Parser Don't break*/
  pdbreak = roxml_get_chld(pdbreak, NULL, 0);
  if(pdbreak == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
  ots_append_xml_node_contents(&rule->ParserDontBreak, &pdbreak);
  
  /* Term Count load dict */
  tc_words = roxml_get_chld(tc_words, NULL, 0);
  if(tc_words == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
#if DEBUG > 1
  fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_name(tc_words, NULL, 0));
  fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_content(tc_words, NULL, 0, NULL));
#endif /* DEBUG > 1 */
  while (tc_words != NULL) {
    if (!g_strcmp0(roxml_get_name(tc_words, NULL, 0), (const char*)"word")) {
      Doc->dict = g_list_prepend(Doc->dict,
        (gpointer)ots_new_wordEntery(
          (unsigned const char*)roxml_get_content(tc_words, NULL, 0, NULL)));
    }
    tc_words = roxml_get_next_sibling(tc_words);
#if DEBUG > 2
    fprintf(stdout, "%s\n", roxml_get_content(tc_words, NULL, 0, NULL));
#endif /* DEBUG > 2 */
  }
  
  /*Term Frequency load dict*/
  tf_words = roxml_get_chld(tf_words, NULL, 0);
  if(tf_words == NULL) {
    roxml_release(RELEASE_ALL);
    roxml_close(head);
    return (FALSE);
  }
#if DEBUG > 1
  fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_name(tf_words, NULL, 0));
  fprintf(stdout, "%d: %s\n", __LINE__, roxml_get_content(tf_words, NULL, 0, NULL));
#endif /* DEBUG > 1 */
  while (tf_words != NULL) {
    if (!g_strcmp0(roxml_get_name(tf_words, NULL, 0), (const char*)"word")) {
      const gchar* key = g_strdup(roxml_get_content(tf_words, NULL, 0, NULL));
      node_t* idf_attr = roxml_get_attr(tf_words, "idf", 0);
      if (idf_attr != NULL) {
        const char* idf_val = roxml_get_content(idf_attr, NULL, 0, NULL);
        if (idf_val != NULL) {
#if DEBUG > 3
          fprintf(stdout, "%s\n", idf_val);
#endif /* DEBUG > 3 */
          Doc->tf_terms = g_list_append(Doc->tf_terms,
          ots_new_OtsWordTF((const unsigned char*)key, atof(idf_val)));
        }
      }
      g_free((gpointer)key);
    }
    tf_words = roxml_get_next_sibling(tf_words);
#if DEBUG > 2
    fprintf(stdout, "%s\n", roxml_get_content(tf_words, NULL, 0, NULL));
#endif /* DEBUG > 2 */
  }
  roxml_release(RELEASE_ALL);
  roxml_close(head);
  return (TRUE);
}
Esempio n. 9
0
node_t * ROXML_API roxml_get_nodes(node_t *n, int type, char * name, int nth)
{
	node_t *ptr = NULL;
	
	if(n == NULL) {
		return NULL;
	}
	
	if(name == NULL)	{
		int count = 0;
		if(n->ns && (type & ROXML_NS_NODE)) {
			ptr = n->ns;
			if(nth == 0)	{
				return ptr;
			}
		} else if(n->attr && (type & ROXML_ATTR_NODE)) {
			ptr = n->attr;
			if(nth == 0)	{
				return ptr;
			}
			while((ptr->sibl)&&(nth > count)) {
				ptr = ptr->sibl;
				count++;
			}
		} else {
			ptr = n->chld;
			while(ptr && !((ptr->type & ROXML_NODE_TYPES) & type)) {
				ptr = ptr->sibl;
			}
		}
		if(nth > count)	{
			ptr = n->chld;
			while(ptr && !((ptr->type & ROXML_NODE_TYPES) & type)) {
				ptr = ptr->sibl;
			}
			while(ptr && (ptr->sibl) && (nth > count)) {
				ptr = ptr->sibl;
				if((ptr->type & ROXML_NODE_TYPES) & type) {
					count++;
				}
			}
		}
		if(nth > count)	{ return NULL; }
	} else	{
		if(n->attr && (type & ROXML_ATTR_NODE)) {
			ptr = n->attr;
			while(ptr) {
				int ans = strcmp(roxml_get_name(ptr, NULL, 0), name);
				roxml_release(RELEASE_LAST);
				if(ans == 0)	{
					return ptr;
				}
				ptr = ptr->sibl;
			}
		}
		ptr = n->chld;
		while(ptr) {
			if((ptr->type & ROXML_NODE_TYPES) & type) {
				int ans = strcmp(roxml_get_name(ptr, NULL, 0), name);
				roxml_release(RELEASE_LAST);
				if(ans == 0)	{
					return ptr;
				}
			}
			ptr = ptr->sibl;
		}
	}
	return ptr;
}
Esempio n. 10
0
char * ROXML_API roxml_get_content(node_t *n, char * buffer, int bufsize, int *size)
{
	int total = 0;
	char * content = buffer;
	
	if(n == NULL)	{
		if(size) {
			*size = 0;
		}
		if(buffer)	{
			strcpy(buffer, "");
			return buffer;
		}
		return NULL;
	} else if(n->type & ROXML_ELM_NODE) {
		node_t *ptr = n->chld;
		while(ptr)	{
			if((ptr->type & ROXML_NODE_TYPES) == ROXML_TXT_NODE) {
				total += ptr->end - ptr->pos;
			}
			ptr = ptr->sibl;
		}
		
		if(content == NULL) {
			content = roxml_malloc(sizeof(char), total+1, PTR_CHAR);
			bufsize = total+1;
		}
		if(content == NULL) { return NULL; }

		total = 0;
		ptr = n->chld;
		while(ptr)	{
			if((ptr->type & ROXML_NODE_TYPES) == ROXML_TXT_NODE) {
				int ret_len = 0;
				int read_size = ptr->end - ptr->pos;

				if(total+read_size > bufsize-1) {
					read_size = bufsize - total - 1;
				}
				ret_len += roxml_read(ptr->pos, read_size, content+total, ptr);

				total += ret_len;
			}
			ptr = ptr->sibl;
		}
	} else {
		node_t *target = n;
		char name[ROXML_BASE_LEN];
		int read_size = 0;
		int name_len = 0;
		int spec_offset = 0;

		roxml_get_name(n, name, ROXML_BASE_LEN);
		name_len = strlen(name);

		if(n->type & ROXML_DOCTYPE_NODE)	{
			total = target->end - target->pos - name_len - 2;
			spec_offset = target->pos + name_len + 2;
		} else if(n->type & ROXML_TXT_NODE)	{
			total = target->end - target->pos;
			spec_offset = target->pos;
		} else if(n->type & ROXML_CMT_NODE)	{
			total = target->end - target->pos - 4;
			spec_offset = target->pos + 4;
		} else if(n->type & ROXML_PI_NODE)	{
			total = target->end - target->pos - name_len - 3;
			spec_offset = target->pos + name_len + 3;
		} else if(n->type & ROXML_ATTR_NODE)	{
			target = n->chld;
			if(target) {
				spec_offset = target->pos;
				total = target->end - target->pos;
			} else {
				spec_offset = 0;
				total = 0;
			}
		}

		if(content == NULL) {
			content = roxml_malloc(sizeof(char), total+1, PTR_CHAR);
			bufsize = total+1;
		}
		if(content == NULL) { return NULL; }

		read_size = total;
		if(read_size > bufsize-1) {
			read_size = bufsize-1;
		}
		total = roxml_read(spec_offset, read_size, content, target);
	}

	content[total] = '\0';
	if(size) {
		*size = total+1;
	}
	return content;
}