static PyObject* get_top_words(Scws* self, PyObject* args){ char *text; char *attr = NULL; int limit; if(!PyArg_ParseTuple(args, "si|s", &text, &limit, &attr)){ return NULL; } PyObject* result = PyList_New(0); scws_send_text(self->scws, text, strlen(text)); scws_top_t res, cur; if(attr){ cur = res = scws_get_tops(self->scws, limit, attr); } else{ cur = res = scws_get_tops(self->scws, limit, NULL); } while (cur != NULL){ PyObject* aword = PyList_New(4); PyObject* word = PyString_FromString(cur->word); PyObject* word_attr = PyString_FromString(cur->attr); PyObject* weight = PyFloat_FromDouble(cur->weight); PyObject* times = PyInt_FromLong(cur->times); PyList_SetItem(aword, 0, word); PyList_SetItem(aword, 1, times); PyList_SetItem(aword, 2, weight); PyList_SetItem(aword, 3, word_attr); PyList_Append(result, aword); Py_DECREF(aword); cur = cur->next; } scws_free_tops(res); return result; }
int main(int argc, char *argv[]) { int c, xmode, fsize, tlimit, bytes; FILE *fin, *fout; char *str, buf[2048], *attr; scws_t s; struct stat st; scws_res_t res, cur; struct timeval t1, t2, t3; fin = fout = (FILE *) NULL; str = attr = NULL; bytes = xmode = fsize = tlimit = 0; if ((program_name = strrchr(argv[0], '/')) != NULL) program_name++; else program_name = argv[0]; /* try to log the time */ gettimeofday(&t1, NULL); /* create the scws engine */ s = scws_new(); /* parse the arguments */ while ((c = getopt(argc, argv, "i:o:c:r:d:t:a:M:NDUEIAvh")) != -1) { switch (c) { case 'i' : if (fin != NULL) fclose(fin); if (stat(optarg, &st) || !S_ISREG(st.st_mode) || !(fin = fopen(optarg, "r"))) str = optarg; fsize = st.st_size; break; case 'o' : if (fout != NULL) break; if (!stat(optarg, &st) || !lstat(optarg, &st)) { fprintf(stderr, "ERROR: output file exists. '%s'\n", optarg); goto cws_end; } if (!(fout = fopen(optarg, "w"))) { fprintf(stderr, "ERROR: output file write failed. '%s'\n", optarg); goto cws_end; } break; case 'c' : scws_set_charset(s, optarg); break; case 'r' : scws_set_rule(s, optarg); if (s->r == NULL && !(xmode & XMODE_NO_TIME)) fprintf(stderr, "WARNING: input ruleset fpath load failed. '%s'\n", optarg); break; case 'd' : { char *d_str, *p_str, *q_str; int dmode; d_str = optarg; do { if ((p_str = strchr(d_str, ':')) != NULL) *p_str++ = '\0'; dmode = (xmode & XMODE_DICT_MEM) ? SCWS_XDICT_MEM : SCWS_XDICT_XDB; if ((q_str = strrchr(d_str, '.')) != NULL && !strcasecmp(q_str, ".txt")) dmode |= SCWS_XDICT_TXT; dmode = scws_add_dict(s, d_str, dmode); if (dmode < 0 && !(xmode & XMODE_NO_TIME)) fprintf(stderr, "WARNING: failed to add dict file: %s\n", d_str); } while ((d_str = p_str) != NULL); } break; case 'M' : scws_set_multi(s, (atoi(optarg)<<12)); break; case 'I' : scws_set_ignore(s, SCWS_YEA); break; case 'A' : xmode |= XMODE_SHOW_ATTR; break; case 'E' : xmode |= XMODE_DICT_MEM; break; case 'N' : xmode |= XMODE_NO_TIME; break; case 'D' : scws_set_debug(s, SCWS_YEA); break; case 'U' : scws_set_duality(s, SCWS_YEA); break; case 't' : xmode |= XMODE_DO_STAT; tlimit = atoi(optarg); break; case 'a' : attr = optarg; break; case 'v' : printf("%s (%s/%s: Simpled Chinese Words Segment - Command line usage)\n", program_name, PACKAGE_NAME, PACKAGE_VERSION); exit(0); break; case 'h' : show_usage(0, NULL); break; case '?' : default : exit(-1); } } /* other arguments */ argc -= optind; if (argc > 0 && fin == NULL && str == NULL) { optarg = argv[optind++]; if (*optarg != '-') { if (stat(optarg, &st) || !S_ISREG(st.st_mode) || !(fin = fopen(optarg, "r"))) str = optarg; fsize = st.st_size; argc--; } } if (argc > 0 && fout == NULL) { optarg = argv[optind]; if (*optarg != '-' && !(fout = fopen(optarg, "w"))) { fprintf(stderr, "ERROR: output file write failed. '%s'\n", optarg); goto cws_end; } } if (fout == NULL) fout = stdout; if (!(xmode & XMODE_NO_TIME)) gettimeofday(&t2, NULL); if (xmode & XMODE_DO_STAT) { /* do the stats only */ if (str == NULL && fin == NULL) fprintf(stderr, "ERROR: top stats require input string or file\n"); else { scws_top_t top, xtop; if (str == NULL) { int b; c = b = 0; str = (char *) malloc(fsize); while (fsize > 0) { b = fread(str + c, 1, fsize, fin); fsize -= b; c += b; } xmode |= XMODE_STAT_FILE; } else { c = strlen(str); } scws_send_text(s, str, c); bytes = c; fprintf(fout, "No. WordString Attr Weight(times)\n"); fprintf(fout, "-------------------------------------------------\n"); if ((top = xtop = scws_get_tops(s, tlimit, attr)) != NULL) { tlimit = 1; while (xtop != NULL) { #if 0 fprintf(fout, "%02d. %-24.24s %-4.2s %.2f(%d)\n", tlimit, xtop->word, xtop->attr, xtop->weight, xtop->times); #endif fprintf(fout, "%-24.24s %(%d)\n", xtop->word, xtop->times); xtop = xtop->next; tlimit++; } scws_free_tops(top); } else { fprintf(fout, "EMPTY records!\n"); } if (xmode & XMODE_STAT_FILE) free(str); } } else if (str == NULL) { str = buf; if (fin == NULL) fin = stdin; while (fgets(buf, sizeof(buf)-1, fin) != NULL) { ___DOSEGMENT___ } }