예제 #1
0
파일: rule.c 프로젝트: dotSlashLu/nodescws
static void scws_rule_json_set_attrs(rule_t rules, rule_item_t rule, cJSON *attrs)
{
        char *attrname;
        cJSON *json_attr;

        json_attr = attrs->child;
        // printf("Setting attr: %s\n", rule->name);
        while (json_attr != NULL) {
                attrname = json_attr->string;
                if (!strcmp(attrname, "tf"))
                        rule->tf = (float)json_attr->valuedouble;
                else if (!strcmp(attrname, "idf"))
                        rule->idf = (float)json_attr->valuedouble;
                else if (!strcmp(attrname, "attr"))
                        strncpy(rule->attr, json_attr->valuestring, 2);
                else if (!strcmp(attrname, "znum")) {
                        // znum: {min: a, max: b} | a
                        if (json_attr->type == cJSON_Object) {
                                if (cJSON_GetObjectItem(json_attr, "min"))
                                        rule->zmin = (int)cJSON_GetObjectItem(json_attr, "min")->valueint;
                                if (cJSON_GetObjectItem(json_attr, "max"))
                                        rule->zmax = (int)cJSON_GetObjectItem(json_attr, "max")->valueint;
                        }

                        else if (json_attr->type == cJSON_Number)
                                rule->zmin = (int)json_attr->valueint;
                }
                else if (!strcmp(attrname, "type")) {
                        if (!strncmp(json_attr->valuestring, "prefix", 6))
                                rule->flag |= SCWS_ZRULE_PREFIX;
                        else if(!strncmp(json_attr->valuestring, "suffix", 6))
                                rule->flag |= SCWS_ZRULE_SUFFIX;
                }
                else if (!strcmp(attrname, "include") || !strcmp(attrname, "exclude")) {
                        unsigned int *clude;

                        if (!strcmp(attrname, "include")) {
                                clude = &rule->inc;
                                rule->flag |= SCWS_ZRULE_INCLUDE;
                        }
                        else {
                                clude = &rule->exc;
                                rule->flag |= SCWS_ZRULE_EXCLUDE;
                        }
                        // loop thru xclude values
                        int k, j;
                        for (k = 0; k < cJSON_GetArraySize(json_attr); k++) {
                                char *cludename = cJSON_GetArrayItem(json_attr, k)->valuestring;
                                if ((j = _rule_index_get(rules, cludename)) >= 0)
                                        *clude |= rules->items[j].bit;
                        }
                }
                json_attr = json_attr->next;
        }
}
예제 #2
0
파일: rule.c 프로젝트: liangxiao/scws
rule_t scws_rule_new(const char *fpath, unsigned char *mblen)
{
	FILE *fp;
	rule_t r;
	rule_item_t cr;
	int i, j, rbl, aflag;
	rule_attr_t a,rtail;
	unsigned char buf[512], *str, *ptr, *qtr;

	/* loaded or open file failed */	
	if ((fp = fopen(fpath, "r")) == NULL)
		return NULL;

	/* alloc the memory */
	r = (rule_t) malloc(sizeof(rule_st));
	memset(r, 0, sizeof(rule_st));

	/* quick scan to add the name to list */
	i = j = rbl = aflag = 0;
	while (fgets(buf, sizeof(buf)-1, fp))
	{
		if (buf[0] != '[' || !(ptr = strchr(buf, ']')))
			continue;

		str = buf + 1;
		*ptr = '\0';
		if (ptr == str || (ptr-str) > 15 || !strcasecmp(str, "attrs"))
			continue;

		if (_rule_index_get(r, str) >= 0)
			continue;

		strcpy(r->items[i].name, str);
		r->items[i].tf = 5.0;
		r->items[i].idf = 3.5;
		strncpy(r->items[i].attr, "un", 2);
		if (!strcasecmp(str, "special"))
			r->items[i].bit = SCWS_RULE_SPECIAL;
		else if (!strcasecmp(str, "nostats"))
			r->items[i].bit = SCWS_RULE_NOSTATS;
		else
		{
			r->items[i].bit = (1<<j);
			j++;
		}

		if (++i >= SCWS_RULE_MAX)
			break;
	}
	rewind(fp);

	/* load the tree data */
	if ((r->tree = xtree_new(0, 1)) == NULL)
	{
		free(r);
		return NULL;
	}
	cr = NULL;
	while (fgets(buf, sizeof(buf)-1, fp))
	{
		if (buf[0] == ';')
			continue;

		if (buf[0] == '[')
		{
			cr = NULL;
			str = buf + 1;
			aflag = 0;
			if ((ptr = strchr(str, ']')) != NULL)
			{
				*ptr = '\0';
				if (!strcasecmp(str, "attrs"))
				{
					aflag = 1;
				}
				else if ((i = _rule_index_get(r, str)) >= 0)
				{
					rbl = 1;	/* default read by line = yes */
					cr = &r->items[i];
				}
			}
			continue;
		}

		/* attr flag open? */
		if (aflag == 1)
		{
			/* parse the attr line */
			str = buf;
			while (*str == ' ' || *str == '\t') str++;
			if ((ptr = strchr(str, '+')) == NULL) continue;
			*ptr++ = '\0';
			if ((qtr = strchr(ptr, '=')) == NULL) continue;
			*qtr++ = '\0';

			/* create new memory */
			a = (rule_attr_t) malloc(sizeof(struct scws_rule_attr));
			memset(a, 0, sizeof(struct scws_rule_attr));
			
			/* get ratio */
			while(*qtr == ' ' || *qtr == '\t') qtr++;
			a->ratio = (short) atoi(qtr);
			if (a->ratio < 1)
				a->ratio = 1;
			a->npath[0] = a->npath[1] = 0xff;

			/* read attr1 & npath1? */
			a->attr1[0] = *str++;
			if (*str && *str != '(' && *str != ' ' && *str != '\t')			
				a->attr1[1] = *str++;
			while (*str && *str != '(') str++;
			if (*str == '(')
			{
				str++;
				if ((qtr = strchr(str, ')')) != NULL)
				{
					*qtr = '\0';
					a->npath[0] = (unsigned char) atoi(str);
					if (a->npath[0] > 0)
						a->npath[0]--;
					else
						a->npath[0] = 0xff;
				}
			}

			/* read attr1 & npath2? */
			str = ptr;
			while (*str == ' ' || *str == '\t') str++;
			a->attr2[0] = *str++;
			if (*str && *str != '(' && *str != ' ' && *str != '\t')			
				a->attr2[1] = *str++;
			while (*str && *str != '(') str++;
			if (*str == '(')
			{
				str++;
				if ((qtr = strchr(str, ')')) != NULL)
				{
					*qtr = '\0';
					a->npath[1] = (unsigned char) atoi(str);
					if (a->npath[1] > 0)
						a->npath[1]--;
					else
						a->npath[1] = 0xff;
				}
			}

			//printf("%c%c(%d)+%c%c(%d)=%d\n", a->attr1[0], a->attr1[1] ? a->attr1[1] : ' ', a->npath[0],
			//	a->attr2[0], a->attr2[1] ? a->attr2[1] : ' ', a->npath[1], a->ratio);
			
			/* append to the chain list */
			if (r->attr == NULL)			
				r->attr = rtail = a;
			else
			{
				rtail->next = a;
				rtail = a;
			}

			continue;
		}
		
		if (cr == NULL)
			continue;
		
		/* param set: line|znum|include|exclude|type|tf|idf|attr */
		if (buf[0] == ':')
		{			
			str = buf + 1;
			if (!(ptr = strchr(str, '=')))
				continue;			
			while (*str == ' ' || *str == '\t') str++;			
			
			qtr = ptr + 1;
			while (ptr > str && (ptr[-1] == ' ' || ptr[-1] == '\t')) ptr--;
			*ptr = '\0';				
			ptr = str;
			str = qtr;
			while (*str == ' ' || *str == '\t') str++;	
			
			if (!strcmp(ptr, "line"))				
				rbl =  (*str == 'N' || *str == 'n') ? 0 : 1;
			else if (!strcmp(ptr, "tf"))			
				cr->tf = (float) atof(str); 
			else if (!strcmp(ptr, "idf"))
				cr->idf = (float) atof(str);
			else if (!strcmp(ptr, "attr"))
				strncpy(cr->attr, str, 2);
			else if (!strcmp(ptr, "znum"))
			{			
				if ((ptr = strchr(str, ',')) != NULL)
				{
					*ptr++ = '\0';						
					while (*ptr == ' ' || *ptr == '\t') ptr++;
					cr->zmax = atoi(ptr);
					cr->flag |= SCWS_ZRULE_RANGE;
				}
				cr->zmin = atoi(str);
			}
			else if (!strcmp(ptr, "type"))
			{
				if (!strncmp(str, "prefix", 6))
					cr->flag |= SCWS_ZRULE_PREFIX;
				else if (!strncmp(str, "suffix", 6))
					cr->flag |= SCWS_ZRULE_SUFFIX;
			}
			else if (!strcmp(ptr, "include") || !strcmp(ptr, "exclude"))
			{
				unsigned int *clude;

				if (!strcmp(ptr, "include"))
				{
					clude = &cr->inc;
					cr->flag |= SCWS_ZRULE_INCLUDE;
				}
				else
				{
					clude = &cr->exc;
					cr->flag |= SCWS_ZRULE_EXCLUDE;
				}
				
				while ((ptr = strchr(str, ',')) != NULL)
				{						
					while (ptr > str && (ptr[-1] == '\t' || ptr[-1] == ' ')) ptr--;
					*ptr = '\0';
					if ((i = _rule_index_get(r, str)) >= 0)
						*clude |= r->items[i].bit;
					
					str = ptr + 1;
					while (*str == ' ' || *str == '\t' || *str == ',') str++;
				}
				
				ptr = strlen(str) + str;
				while (ptr > str && strchr(" \t\r\n", ptr[-1])) ptr--;
				*ptr = '\0';
				if (ptr > str && (i = _rule_index_get(r, str)))
					*clude |= r->items[i].bit;
			}	
			continue;
		}

		/* read the entries */
		str = buf;
		while (*str == ' ' || *str == '\t') str++;
		ptr = str + strlen(str);
		while (ptr > str && strchr(" \t\r\n", ptr[-1])) ptr--;
		*ptr = '\0';

		/* emptry line */
		if (ptr == str)
			continue;

		if (rbl)
			xtree_nput(r->tree, cr, sizeof(struct scws_rule_item), str, ptr - str);
		else
		{
			while (str < ptr)
			{
				j = mblen[(*str)];

#ifdef DEBUG
				/* try to check repeat */
				if ((i = (int) xtree_nget(r->tree, str, j, NULL)) != 0)
					fprintf(stderr, "Reapeat word on %s|%s: %.*s\n", cr->name, ((rule_item_t) i)->name, j, str);
#endif

				xtree_nput(r->tree, cr, sizeof(struct scws_rule_item), str, j);
				str += j;
			}
		}	
	}
	fclose(fp);

	/* optimize the tree */
	xtree_optimize(r->tree);
	return r;
}