Example #1
0
void Scws::GetResult(string content
	,vector<SCWSRESULT> &v)
{  
	scws_res_t res, cur;
	scws_send_text(s,content.c_str()
		,content.length());
	
	while (res = cur = scws_get_result(s))
	{
		while (cur != NULL)
		{
			//add the result into the vector
			SCWSRESULT result;
			result.word = string(content,cur->off
				,cur->len);
			result.weight = (int)cur->idf;
			v.push(result);
			
			//offset the cur
			cur = cur->next;
		}
		scws_free_result(res);
	}
	scws_free(s);
}
Example #2
0
static PyObject* get_top_words(Scws* self, PyObject* args){
    char *text;
    char *attr = NULL;
    int limit;
    if(!PyArg_ParseTuple(args, "si|s", &text, &limit, &attr)){
        return NULL;
    }
    PyObject* result = PyList_New(0);
    scws_send_text(self->scws, text, strlen(text));
    scws_top_t res, cur;
    if(attr){
        cur = res = scws_get_tops(self->scws, limit, attr);
    }
    else{
        cur = res = scws_get_tops(self->scws, limit, NULL);
    }
    while (cur != NULL){
        PyObject* aword = PyList_New(4);
        PyObject* word = PyString_FromString(cur->word);
        PyObject* word_attr = PyString_FromString(cur->attr);
        PyObject* weight = PyFloat_FromDouble(cur->weight);
        PyObject* times = PyInt_FromLong(cur->times);
        PyList_SetItem(aword, 0, word);
        PyList_SetItem(aword, 1, times);
        PyList_SetItem(aword, 2, weight);
        PyList_SetItem(aword, 3, word_attr);
        PyList_Append(result, aword);
        Py_DECREF(aword);
        cur = cur->next;
    }
    scws_free_tops(res);
    return result;
}
Example #3
0
static PyObject * scws_get_res(PyObject * self,PyObject * args){
    const char *text;
    int sts;

    if (!PyArg_ParseTuple(args, "s",&text))
        return NULL;

    scws_res_t res, cur;
    scws_send_text(s, text, strlen(text));
    PyObject * v;
    int i = 0;
    int total = 0;
    long idf;
    scws_res_t head;
    v = PyList_New(0);
    double d;
    while (res = cur = scws_get_result(s))
    {
        while(cur != NULL){
            PyList_Append(v,Py_BuildValue("(O,O,d)",
                PyString_FromStringAndSize(text+cur->off,cur->len),
                PyString_FromString(cur->attr),
                cur->idf));
            cur = cur->next;
        }
    }
    scws_free_result(res);
    return Py_BuildValue("O",v);
}
Example #4
0
map<string,int> Getfeature::feature_get(string&text){
     map<string,int> features;     
     string fe;
     scws_top_t cur;
     scws_send_text(Getfeature::s,text.c_str(),text.length());
     ///按照词性获得结果
     cur=scws_get_words(Getfeature::s,const_cast<char*>((*(env->conf))["policy"]["fe_policy"].c_str()));
     if(cur!=0){
	  while(cur!=0){
	       fe=cur->word;
	       if(fe.length()>=2*3)
		    features.insert(pair<string,int> (fe,cur->times));
	       cur=cur->next;
	  }
	   scws_free_tops(cur);
     }
     return features;
}
Example #5
0
void scws_send_text_AS3()
{
	char *text = NULL;
	AS3_MallocString(text, inputString);
	scws_send_text(s, text, strlen(text));
	AS3_DeclareVar(myString, String);

	//char *result;
	//result[0] = '\0';   // ensures the memory is an empty string
	char result[5000]={"0"};
    char temp[1000]={'\0'};
	printf("%s",result);
	while (res = cur = scws_get_result(s))
	{
		while (cur != NULL)
		{
			printf("WORD: %.*s/%s (IDF = %4.2f)\n", cur->len, text+cur->off, cur->attr, cur->idf);
			//if((result = malloc(strlen(result)+ cur->len +1)) != NULL){
			//if((result = (char*) realloc(strlen(result)+ (cur->len) +1)) != NULL)
            strncpy(temp, text+cur->off, cur->len);
			temp[(cur->len)+1]='\0';
			strcat(result, temp);
			strcat(result, ' ');
			strcat(result, '\0');
            //strncpy(new_str,str2);
            //} else {
            //printf("malloc failed!\n");
            // exit?
            //}
			cur = cur->next;
		}
		scws_free_result(res);
	}
	strcat(result, '\0');
	
	printf("%s",result);
	AS3_CopyCStringToVar(myString, result, strlen(result));
	scws_free(s);
	//scws_free(result);
	AS3_Trace(myString);
	
	AS3_Return("212");
}
Example #6
0
static PyObject* has_word(Scws* self, PyObject* args){
    char *text;
    char *attr = NULL;
    if(!PyArg_ParseTuple(args, "s|s", &text, &attr)){
        return NULL;
    }
    scws_send_text(self->scws, text, strlen(text));
    int result;
    if(attr){
        result = scws_has_word(self->scws, attr);
    }
    else{
        result = scws_has_word(self->scws, NULL);
    }
    if(result){
        Py_RETURN_TRUE;
    }
    else{
        Py_RETURN_FALSE;
    }
}
Example #7
0
/*
 * functions
 */
Datum
zhprs_start(PG_FUNCTION_ARGS)
{
	ParserState *pst = (ParserState *) palloc0(sizeof(ParserState));
	scws_t scws = scws_fork(zhprs_scws);

	pst->buffer = (char *) PG_GETARG_POINTER(0);
	pst->len = PG_GETARG_INT32(1);
	pst->pos = 0;

	pst->scws = scws;
	pst->res = NULL;
	pst->curr = NULL;

	scws_set_ignore(scws, (int) zhprs_punctuation_ignore);
	scws_set_duality(scws, (int) zhprs_seg_with_duality);
	scws_set_multi(scws, zhprs_multi_mode);

	scws_send_text(scws, pst->buffer, pst->len);

	PG_RETURN_POINTER(pst);
}
Example #8
0
static PyObject* participle(Scws* self, PyObject* args){
    char *text;
    if(!PyArg_ParseTuple(args, "s", &text)){
        return NULL;
    }
    PyObject* result = PyList_New(0);
    scws_send_text(self->scws, text, strlen(text));
    scws_res_t res, cur;
    while ((cur = res = scws_get_result(self->scws)) != NULL){
        while (cur != NULL){
            PyObject* aword = PyList_New(2);
            PyObject* word_text = PyString_FromStringAndSize(text + cur->off, cur->len);
            PyObject* word_attr = PyString_FromString(cur->attr);
            PyList_SetItem(aword, 0, word_text);
            PyList_SetItem(aword, 1, word_attr);
            PyList_Append(result, aword);
            Py_DECREF(aword);
            cur = cur->next;
        }
        scws_free_result(res);
    }
    return result;
}
Example #9
0
void updateUniverse()
{
  char *text = "Hello, 我名字叫李那曲是一个中国人, 我有时买Q币来玩, 我还听说过C#语言";

  if (!(s = scws_new())) {
    printf("ERROR: cann't init the scws!\n");
    //exit(-1);
  }
  scws_set_charset(s, "utf8");
  scws_set_dict(s, "dict.utf8.xdb", SCWS_XDICT_XDB);
  scws_set_rule(s, "rules.utf8.ini");

  scws_send_text(s, text, strlen(text));
  while (res = cur = scws_get_result(s))
  {
    while (cur != NULL)
    {
      printf("WORD: %.*s/%s (IDF = %4.2f)\n", cur->len, text+cur->off, cur->attr, cur->idf);
      cur = cur->next;
    }
    scws_free_result(res);
  }
  scws_free(s);
}
Example #10
0
int main(int argc, char *argv[])
{	
	int c, xmode, fsize, tlimit, bytes;
	FILE *fin, *fout;
	char *str, buf[2048], *attr;
	scws_t s;
	struct stat st;
	scws_res_t res, cur;
	struct timeval t1, t2, t3;

	fin = fout = (FILE *) NULL;
	str = attr = NULL;
	bytes = xmode = fsize = tlimit = 0;
	if ((program_name = strrchr(argv[0], '/')) != NULL)
		program_name++;
	else
		program_name = argv[0];	

	/* try to log the time */
	gettimeofday(&t1, NULL);

	/* create the scws engine */
	s = scws_new();

	/* parse the arguments */
	while ((c = getopt(argc, argv, "i:o:c:r:d:t:a:M:NDUEIAvh")) != -1)
	{
		switch (c)
		{
			case 'i' :
				if (fin != NULL)
					fclose(fin);
				if (stat(optarg, &st) || !S_ISREG(st.st_mode) || !(fin = fopen(optarg, "r")))
					str = optarg;
				fsize = st.st_size;
				break;
			case 'o' :
				if (fout != NULL)
					break;
				if (!stat(optarg, &st) || !lstat(optarg, &st))
				{
					fprintf(stderr, "ERROR: output file exists. '%s'\n", optarg);
					goto cws_end;
				}
				if (!(fout = fopen(optarg, "w")))
				{
					fprintf(stderr, "ERROR: output file write failed. '%s'\n", optarg);
					goto cws_end;
				}
				break;
			case 'c' :
				scws_set_charset(s, optarg);
				break;
			case 'r' :
				scws_set_rule(s, optarg);
				if (s->r == NULL && !(xmode & XMODE_NO_TIME))
					fprintf(stderr, "WARNING: input ruleset fpath load failed. '%s'\n", optarg);
				break;
			case 'd' :
				{
					char *d_str, *p_str, *q_str;
					int dmode;
					d_str = optarg;
					do
					{
						if ((p_str = strchr(d_str, ':')) != NULL) *p_str++ = '\0';
						
						dmode = (xmode & XMODE_DICT_MEM) ? SCWS_XDICT_MEM : SCWS_XDICT_XDB;
						if ((q_str = strrchr(d_str, '.')) != NULL && !strcasecmp(q_str, ".txt")) 
							dmode |= SCWS_XDICT_TXT;
						dmode = scws_add_dict(s, d_str, dmode);
						if (dmode < 0 && !(xmode & XMODE_NO_TIME))
							fprintf(stderr, "WARNING: failed to add dict file: %s\n", d_str);
					}
					while ((d_str = p_str) != NULL);
				}
				break;
			case 'M' :
				scws_set_multi(s, (atoi(optarg)<<12));
				break;
			case 'I' :
				scws_set_ignore(s, SCWS_YEA);
				break;
			case 'A' :
				xmode |= XMODE_SHOW_ATTR;
				break;
			case 'E' :
				xmode |= XMODE_DICT_MEM;
				break;
			case 'N' :
				xmode |= XMODE_NO_TIME;
				break;
			case 'D' :
				scws_set_debug(s, SCWS_YEA);
				break;
			case 'U' :
				scws_set_duality(s, SCWS_YEA);
				break;
			case 't' :
				xmode |= XMODE_DO_STAT;
				tlimit = atoi(optarg);
				break;
			case 'a' :
				attr = optarg;
				break;
			case 'v' :
				printf("%s (%s/%s: Simpled Chinese Words Segment - Command line usage)\n",
							program_name, PACKAGE_NAME, PACKAGE_VERSION);
				exit(0);			
				break;
			case 'h' :
				show_usage(0, NULL);
				break;
			case '?' :
			default :
				exit(-1);
		}
	}

	/* other arguments */
	argc -= optind;
	if (argc > 0 && fin == NULL && str == NULL)
	{
		optarg = argv[optind++];
		if (*optarg != '-')
		{		
			if (stat(optarg, &st) || !S_ISREG(st.st_mode) || !(fin = fopen(optarg, "r")))
				str = optarg;
			fsize = st.st_size;
			argc--;
		}
	}
	if (argc > 0 && fout == NULL)
	{
		optarg = argv[optind];
		if (*optarg != '-' && !(fout = fopen(optarg, "w")))
		{
			fprintf(stderr, "ERROR: output file write failed. '%s'\n", optarg);
			goto cws_end;
		}
	}

	if (fout == NULL)
		fout = stdout;

	if (!(xmode & XMODE_NO_TIME))
		gettimeofday(&t2, NULL);

	if (xmode & XMODE_DO_STAT)
	{
		/* do the stats only */		
		if (str == NULL && fin == NULL)		
			fprintf(stderr, "ERROR: top stats require input string or file\n");			
		else
		{
			scws_top_t top, xtop;

			if (str == NULL)
			{
				int b;

				c = b = 0;
				str = (char *) malloc(fsize);
				while (fsize > 0)
				{
					b = fread(str + c, 1, fsize, fin);
					fsize -= b;
					c += b;
				}
				xmode |= XMODE_STAT_FILE;
			}
			else
			{
				c = strlen(str);
			}
			
			scws_send_text(s, str, c);
			bytes = c;
			fprintf(fout, "No. WordString               Attr  Weight(times)\n");
			fprintf(fout, "-------------------------------------------------\n");
			if ((top = xtop = scws_get_tops(s, tlimit, attr)) != NULL)
			{
				tlimit = 1;
				while (xtop != NULL)
				{
#if 0
					fprintf(fout, "%02d. %-24.24s %-4.2s  %.2f(%d)\n",
						tlimit, xtop->word, xtop->attr, xtop->weight, xtop->times);
#endif
					fprintf(fout, "%-24.24s %(%d)\n",
						xtop->word, xtop->times);
					xtop = xtop->next;
					tlimit++;
				}
				scws_free_tops(top);
			}
			else
			{
				fprintf(fout, "EMPTY records!\n");
			}

			if (xmode & XMODE_STAT_FILE)
				free(str);	
		}
	}
	else if (str == NULL)
	{
		str = buf;
		if (fin == NULL)
			fin = stdin;		
		while (fgets(buf, sizeof(buf)-1, fin) != NULL)
		{
			___DOSEGMENT___
		}
	}