static void *_php_create_scws(TSRMLS_D) { struct php_scws *ps; char *ini_cs; scws_t s; s = scws_new(); if (s == NULL) { return NULL; } ps = (struct php_scws *)emalloc(sizeof(struct php_scws)); ps->s = s; ps->zt = NULL; ps->charset[0] = '\0'; ps->rsrc_id = ZEND_REGISTER_RESOURCE(NULL, ps, le_scws); ini_cs = INI_STR("scws.default.charset"); if (ini_cs != NULL && *ini_cs) { memset(ps->charset, 0, sizeof(ps->charset)); strncpy(ps->charset, ini_cs, sizeof(ps->charset)-1); scws_set_charset(s, ps->charset); } return ((void *)ps); }
int init_split(char *dict, char *rule) { if (!(s = scws_new())) { wlog(rlog, LOG_ERROR, "[%s]:[%d] scws_new failed [%s]\n", ID, LN, strerror(errno)); return -1; } if (!(ss = scws_new())) { wlog(rlog, LOG_ERROR, "[%s]:[%d] scws_new failed [%s]\n", ID, LN, strerror(errno)); return -1; } scws_set_charset(s, "gbk"); scws_set_charset(ss, "gbk"); scws_set_dict(s, dict, SCWS_XDICT_XDB); scws_set_rule(s, rule); wlog(rlog, LOG_DEBUG, "[%s]:[%d] scws_new ok\n", ID, LN); return 0; }
static void zhprs_init() { zhprs_scws = scws_new(); if (!zhprs_scws) ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("Failed to init Chinese Parser Lib SCWS!\"%s\"", ""))); }
void initialize_SCWS_AS3() { if (!(s = scws_new())) { printf("ERROR: cann't init the scws!\n"); //exit(-1); } scws_set_charset(s, "utf8"); scws_set_dict(s, "dict.utf8.xdb", SCWS_XDICT_XDB); scws_set_rule(s, "rules.utf8.ini"); //scws_send_text(s, text, strlen(text)); }
static PyObject* Scws_new(PyTypeObject* type, PyObject* args, PyObject* kwds){ Scws* self; self = (Scws*)type->tp_alloc(type, 0); if(self){ self->scws = scws_new(); if(!self->scws){ Py_DECREF(self); return NULL; } } return (PyObject*)self; }
/* hightman.110320: fork scws */ scws_t scws_fork(scws_t p) { scws_t s = scws_new(); if (p != NULL && s != NULL) { s->mblen = p->mblen; s->mode = p->mode; // fork dict/rules s->r = scws_rule_fork(p->r); s->d = xdict_fork(p->d); } return s; }
Scws::Scws() { if (!(s = scws_new())) { PrintErrorAndExit("scws_new"); } //set basic settings scws_set_charset(s, "utf8"); scws_set_dict(s, "/usr/local/scws/etc/" "dict.utf8.xdb",SCWS_XDICT_XDB); scws_set_rule(s, "/usr/local/scws/etc/" "rules.utf8.ini"); }
void updateUniverse() { char *text = "Hello, 我名字叫李那曲是一个中国人, 我有时买Q币来玩, 我还听说过C#语言"; if (!(s = scws_new())) { printf("ERROR: cann't init the scws!\n"); //exit(-1); } scws_set_charset(s, "utf8"); scws_set_dict(s, "dict.utf8.xdb", SCWS_XDICT_XDB); scws_set_rule(s, "rules.utf8.ini"); scws_send_text(s, text, strlen(text)); while (res = cur = scws_get_result(s)) { while (cur != NULL) { printf("WORD: %.*s/%s (IDF = %4.2f)\n", cur->len, text+cur->off, cur->attr, cur->idf); cur = cur->next; } scws_free_result(res); } scws_free(s); }
int main(int argc, char *argv[]) { int c, xmode, fsize, tlimit, bytes; FILE *fin, *fout; char *str, buf[2048], *attr; scws_t s; struct stat st; scws_res_t res, cur; struct timeval t1, t2, t3; fin = fout = (FILE *) NULL; str = attr = NULL; bytes = xmode = fsize = tlimit = 0; if ((program_name = strrchr(argv[0], '/')) != NULL) program_name++; else program_name = argv[0]; /* try to log the time */ gettimeofday(&t1, NULL); /* create the scws engine */ s = scws_new(); /* parse the arguments */ while ((c = getopt(argc, argv, "i:o:c:r:d:t:a:M:NDUEIAvh")) != -1) { switch (c) { case 'i' : if (fin != NULL) fclose(fin); if (stat(optarg, &st) || !S_ISREG(st.st_mode) || !(fin = fopen(optarg, "r"))) str = optarg; fsize = st.st_size; break; case 'o' : if (fout != NULL) break; if (!stat(optarg, &st) || !lstat(optarg, &st)) { fprintf(stderr, "ERROR: output file exists. '%s'\n", optarg); goto cws_end; } if (!(fout = fopen(optarg, "w"))) { fprintf(stderr, "ERROR: output file write failed. '%s'\n", optarg); goto cws_end; } break; case 'c' : scws_set_charset(s, optarg); break; case 'r' : scws_set_rule(s, optarg); if (s->r == NULL && !(xmode & XMODE_NO_TIME)) fprintf(stderr, "WARNING: input ruleset fpath load failed. '%s'\n", optarg); break; case 'd' : { char *d_str, *p_str, *q_str; int dmode; d_str = optarg; do { if ((p_str = strchr(d_str, ':')) != NULL) *p_str++ = '\0'; dmode = (xmode & XMODE_DICT_MEM) ? SCWS_XDICT_MEM : SCWS_XDICT_XDB; if ((q_str = strrchr(d_str, '.')) != NULL && !strcasecmp(q_str, ".txt")) dmode |= SCWS_XDICT_TXT; dmode = scws_add_dict(s, d_str, dmode); if (dmode < 0 && !(xmode & XMODE_NO_TIME)) fprintf(stderr, "WARNING: failed to add dict file: %s\n", d_str); } while ((d_str = p_str) != NULL); } break; case 'M' : scws_set_multi(s, (atoi(optarg)<<12)); break; case 'I' : scws_set_ignore(s, SCWS_YEA); break; case 'A' : xmode |= XMODE_SHOW_ATTR; break; case 'E' : xmode |= XMODE_DICT_MEM; break; case 'N' : xmode |= XMODE_NO_TIME; break; case 'D' : scws_set_debug(s, SCWS_YEA); break; case 'U' : scws_set_duality(s, SCWS_YEA); break; case 't' : xmode |= XMODE_DO_STAT; tlimit = atoi(optarg); break; case 'a' : attr = optarg; break; case 'v' : printf("%s (%s/%s: Simpled Chinese Words Segment - Command line usage)\n", program_name, PACKAGE_NAME, PACKAGE_VERSION); exit(0); break; case 'h' : show_usage(0, NULL); break; case '?' : default : exit(-1); } } /* other arguments */ argc -= optind; if (argc > 0 && fin == NULL && str == NULL) { optarg = argv[optind++]; if (*optarg != '-') { if (stat(optarg, &st) || !S_ISREG(st.st_mode) || !(fin = fopen(optarg, "r"))) str = optarg; fsize = st.st_size; argc--; } } if (argc > 0 && fout == NULL) { optarg = argv[optind]; if (*optarg != '-' && !(fout = fopen(optarg, "w"))) { fprintf(stderr, "ERROR: output file write failed. '%s'\n", optarg); goto cws_end; } } if (fout == NULL) fout = stdout; if (!(xmode & XMODE_NO_TIME)) gettimeofday(&t2, NULL); if (xmode & XMODE_DO_STAT) { /* do the stats only */ if (str == NULL && fin == NULL) fprintf(stderr, "ERROR: top stats require input string or file\n"); else { scws_top_t top, xtop; if (str == NULL) { int b; c = b = 0; str = (char *) malloc(fsize); while (fsize > 0) { b = fread(str + c, 1, fsize, fin); fsize -= b; c += b; } xmode |= XMODE_STAT_FILE; } else { c = strlen(str); } scws_send_text(s, str, c); bytes = c; fprintf(fout, "No. WordString Attr Weight(times)\n"); fprintf(fout, "-------------------------------------------------\n"); if ((top = xtop = scws_get_tops(s, tlimit, attr)) != NULL) { tlimit = 1; while (xtop != NULL) { #if 0 fprintf(fout, "%02d. %-24.24s %-4.2s %.2f(%d)\n", tlimit, xtop->word, xtop->attr, xtop->weight, xtop->times); #endif fprintf(fout, "%-24.24s %(%d)\n", xtop->word, xtop->times); xtop = xtop->next; tlimit++; } scws_free_tops(top); } else { fprintf(fout, "EMPTY records!\n"); } if (xmode & XMODE_STAT_FILE) free(str); } } else if (str == NULL) { str = buf; if (fin == NULL) fin = stdin; while (fgets(buf, sizeof(buf)-1, fin) != NULL) { ___DOSEGMENT___ } }
#include"getfeature.h" #include <assert.h> scws_t Getfeature::s=scws_new(); Getfeature:: Getfeature(){ env=Environment::getInstance(); assert(env!=0); scws_init(); } Getfeature::~Getfeature(){ scws_free(s); } void Getfeature::scws_init(){ if(s==0) cerr<<"can't init scws_t"; policy_set(); } void Getfeature::policy_set(){ scws_set_charset(Getfeature::s, "utf8"); ///加载词典 scws_set_dict(Getfeature::s, (*(env->conf))["policy"]["dict"].c_str(),SCWS_XDICT_XDB); ///加载特殊规则 scws_set_rule(Getfeature::s,(*(env->conf))["policy"]["dict_rule"].c_str()); ///忽略所有特殊字符 scws_set_ignore(s,1); } map<string,int> Getfeature::feature_get(string&text){ map<string,int> features; string fe;
static PyObject * scws_scws_new(PyObject * self,PyObject * args){ if (!(s = scws_new())) { return NULL; } return Py_BuildValue("i",1); }