static void scws_rule_json_set_attrs(rule_t rules, rule_item_t rule, cJSON *attrs) { char *attrname; cJSON *json_attr; json_attr = attrs->child; // printf("Setting attr: %s\n", rule->name); while (json_attr != NULL) { attrname = json_attr->string; if (!strcmp(attrname, "tf")) rule->tf = (float)json_attr->valuedouble; else if (!strcmp(attrname, "idf")) rule->idf = (float)json_attr->valuedouble; else if (!strcmp(attrname, "attr")) strncpy(rule->attr, json_attr->valuestring, 2); else if (!strcmp(attrname, "znum")) { // znum: {min: a, max: b} | a if (json_attr->type == cJSON_Object) { if (cJSON_GetObjectItem(json_attr, "min")) rule->zmin = (int)cJSON_GetObjectItem(json_attr, "min")->valueint; if (cJSON_GetObjectItem(json_attr, "max")) rule->zmax = (int)cJSON_GetObjectItem(json_attr, "max")->valueint; } else if (json_attr->type == cJSON_Number) rule->zmin = (int)json_attr->valueint; } else if (!strcmp(attrname, "type")) { if (!strncmp(json_attr->valuestring, "prefix", 6)) rule->flag |= SCWS_ZRULE_PREFIX; else if(!strncmp(json_attr->valuestring, "suffix", 6)) rule->flag |= SCWS_ZRULE_SUFFIX; } else if (!strcmp(attrname, "include") || !strcmp(attrname, "exclude")) { unsigned int *clude; if (!strcmp(attrname, "include")) { clude = &rule->inc; rule->flag |= SCWS_ZRULE_INCLUDE; } else { clude = &rule->exc; rule->flag |= SCWS_ZRULE_EXCLUDE; } // loop thru xclude values int k, j; for (k = 0; k < cJSON_GetArraySize(json_attr); k++) { char *cludename = cJSON_GetArrayItem(json_attr, k)->valuestring; if ((j = _rule_index_get(rules, cludename)) >= 0) *clude |= rules->items[j].bit; } } json_attr = json_attr->next; } }
rule_t scws_rule_new(const char *fpath, unsigned char *mblen) { FILE *fp; rule_t r; rule_item_t cr; int i, j, rbl, aflag; rule_attr_t a,rtail; unsigned char buf[512], *str, *ptr, *qtr; /* loaded or open file failed */ if ((fp = fopen(fpath, "r")) == NULL) return NULL; /* alloc the memory */ r = (rule_t) malloc(sizeof(rule_st)); memset(r, 0, sizeof(rule_st)); /* quick scan to add the name to list */ i = j = rbl = aflag = 0; while (fgets(buf, sizeof(buf)-1, fp)) { if (buf[0] != '[' || !(ptr = strchr(buf, ']'))) continue; str = buf + 1; *ptr = '\0'; if (ptr == str || (ptr-str) > 15 || !strcasecmp(str, "attrs")) continue; if (_rule_index_get(r, str) >= 0) continue; strcpy(r->items[i].name, str); r->items[i].tf = 5.0; r->items[i].idf = 3.5; strncpy(r->items[i].attr, "un", 2); if (!strcasecmp(str, "special")) r->items[i].bit = SCWS_RULE_SPECIAL; else if (!strcasecmp(str, "nostats")) r->items[i].bit = SCWS_RULE_NOSTATS; else { r->items[i].bit = (1<<j); j++; } if (++i >= SCWS_RULE_MAX) break; } rewind(fp); /* load the tree data */ if ((r->tree = xtree_new(0, 1)) == NULL) { free(r); return NULL; } cr = NULL; while (fgets(buf, sizeof(buf)-1, fp)) { if (buf[0] == ';') continue; if (buf[0] == '[') { cr = NULL; str = buf + 1; aflag = 0; if ((ptr = strchr(str, ']')) != NULL) { *ptr = '\0'; if (!strcasecmp(str, "attrs")) { aflag = 1; } else if ((i = _rule_index_get(r, str)) >= 0) { rbl = 1; /* default read by line = yes */ cr = &r->items[i]; } } continue; } /* attr flag open? */ if (aflag == 1) { /* parse the attr line */ str = buf; while (*str == ' ' || *str == '\t') str++; if ((ptr = strchr(str, '+')) == NULL) continue; *ptr++ = '\0'; if ((qtr = strchr(ptr, '=')) == NULL) continue; *qtr++ = '\0'; /* create new memory */ a = (rule_attr_t) malloc(sizeof(struct scws_rule_attr)); memset(a, 0, sizeof(struct scws_rule_attr)); /* get ratio */ while(*qtr == ' ' || *qtr == '\t') qtr++; a->ratio = (short) atoi(qtr); if (a->ratio < 1) a->ratio = 1; a->npath[0] = a->npath[1] = 0xff; /* read attr1 & npath1? */ a->attr1[0] = *str++; if (*str && *str != '(' && *str != ' ' && *str != '\t') a->attr1[1] = *str++; while (*str && *str != '(') str++; if (*str == '(') { str++; if ((qtr = strchr(str, ')')) != NULL) { *qtr = '\0'; a->npath[0] = (unsigned char) atoi(str); if (a->npath[0] > 0) a->npath[0]--; else a->npath[0] = 0xff; } } /* read attr1 & npath2? */ str = ptr; while (*str == ' ' || *str == '\t') str++; a->attr2[0] = *str++; if (*str && *str != '(' && *str != ' ' && *str != '\t') a->attr2[1] = *str++; while (*str && *str != '(') str++; if (*str == '(') { str++; if ((qtr = strchr(str, ')')) != NULL) { *qtr = '\0'; a->npath[1] = (unsigned char) atoi(str); if (a->npath[1] > 0) a->npath[1]--; else a->npath[1] = 0xff; } } //printf("%c%c(%d)+%c%c(%d)=%d\n", a->attr1[0], a->attr1[1] ? a->attr1[1] : ' ', a->npath[0], // a->attr2[0], a->attr2[1] ? a->attr2[1] : ' ', a->npath[1], a->ratio); /* append to the chain list */ if (r->attr == NULL) r->attr = rtail = a; else { rtail->next = a; rtail = a; } continue; } if (cr == NULL) continue; /* param set: line|znum|include|exclude|type|tf|idf|attr */ if (buf[0] == ':') { str = buf + 1; if (!(ptr = strchr(str, '='))) continue; while (*str == ' ' || *str == '\t') str++; qtr = ptr + 1; while (ptr > str && (ptr[-1] == ' ' || ptr[-1] == '\t')) ptr--; *ptr = '\0'; ptr = str; str = qtr; while (*str == ' ' || *str == '\t') str++; if (!strcmp(ptr, "line")) rbl = (*str == 'N' || *str == 'n') ? 0 : 1; else if (!strcmp(ptr, "tf")) cr->tf = (float) atof(str); else if (!strcmp(ptr, "idf")) cr->idf = (float) atof(str); else if (!strcmp(ptr, "attr")) strncpy(cr->attr, str, 2); else if (!strcmp(ptr, "znum")) { if ((ptr = strchr(str, ',')) != NULL) { *ptr++ = '\0'; while (*ptr == ' ' || *ptr == '\t') ptr++; cr->zmax = atoi(ptr); cr->flag |= SCWS_ZRULE_RANGE; } cr->zmin = atoi(str); } else if (!strcmp(ptr, "type")) { if (!strncmp(str, "prefix", 6)) cr->flag |= SCWS_ZRULE_PREFIX; else if (!strncmp(str, "suffix", 6)) cr->flag |= SCWS_ZRULE_SUFFIX; } else if (!strcmp(ptr, "include") || !strcmp(ptr, "exclude")) { unsigned int *clude; if (!strcmp(ptr, "include")) { clude = &cr->inc; cr->flag |= SCWS_ZRULE_INCLUDE; } else { clude = &cr->exc; cr->flag |= SCWS_ZRULE_EXCLUDE; } while ((ptr = strchr(str, ',')) != NULL) { while (ptr > str && (ptr[-1] == '\t' || ptr[-1] == ' ')) ptr--; *ptr = '\0'; if ((i = _rule_index_get(r, str)) >= 0) *clude |= r->items[i].bit; str = ptr + 1; while (*str == ' ' || *str == '\t' || *str == ',') str++; } ptr = strlen(str) + str; while (ptr > str && strchr(" \t\r\n", ptr[-1])) ptr--; *ptr = '\0'; if (ptr > str && (i = _rule_index_get(r, str))) *clude |= r->items[i].bit; } continue; } /* read the entries */ str = buf; while (*str == ' ' || *str == '\t') str++; ptr = str + strlen(str); while (ptr > str && strchr(" \t\r\n", ptr[-1])) ptr--; *ptr = '\0'; /* emptry line */ if (ptr == str) continue; if (rbl) xtree_nput(r->tree, cr, sizeof(struct scws_rule_item), str, ptr - str); else { while (str < ptr) { j = mblen[(*str)]; #ifdef DEBUG /* try to check repeat */ if ((i = (int) xtree_nget(r->tree, str, j, NULL)) != 0) fprintf(stderr, "Reapeat word on %s|%s: %.*s\n", cr->name, ((rule_item_t) i)->name, j, str); #endif xtree_nput(r->tree, cr, sizeof(struct scws_rule_item), str, j); str += j; } } } fclose(fp); /* optimize the tree */ xtree_optimize(r->tree); return r; }