コード例 #1
0
ファイル: test_thbrk.c プロジェクト: AnadoluPanteri/libthai
int main (int argc, char* argv[])
{
  thchar_t str[MAXLINELENGTH];
  thchar_t out[MAXLINELENGTH*6+1];
  int pos[MAXLINELENGTH];
  int outputLength;
  int numCut, i;
  int interactive = 0;
  
  if (argc >= 2) {
    if (0 == strcmp (argv[1], "-i"))
      interactive = 1;
  }

  if (interactive) {
    while (!feof (stdin)) {
      printf ("Please enter thai words/sentences: ");
      fgets ((char *)str, MAXLINELENGTH-1, stdin);
      if (!feof (stdin)) {
        numCut = th_brk (str, pos, MAXLINELENGTH);
        printf ("Total %d cut points.", numCut);
        if (numCut > 0) { 
          printf ("Cut points list: %d", pos[0]);
          for (i = 1; i < numCut; i++) {
            printf(", %d", pos[i]);
          }
        }
        printf("\n");
        outputLength = th_brk_line (str, out, sizeof out, "<WBR>");
        printf ("Output string length is %d\n", outputLength-1); /* the penultimate is \n */
        printf ("Output string is %s", out);
        printf("***********************************************************************\n");
      }
    }
  } else {
    strcpy ((char *)str, "ÊÇÑÊ´Õ¤ÃѺ ¹Õèà»ç¹¡Ò÷´ÊͺµÑÇàͧ");
    printf ("Testing with string: %s\n", str);
    numCut = th_brk (str, pos, MAXLINELENGTH);
    printf ("Total %d cut points.", numCut);
    if (numCut != 6) { 
      printf("Error! should be 6.. test th_brk() failed...\n");
      exit (-1);
    }
	
    printf("Cut points list: %d", pos[0]);
    for (i = 1; i < numCut; i++) {
      printf(", %d", pos[i]);
    }
    printf("\n");
    outputLength = th_brk_line (str, out, sizeof out, "<WBR>");
    printf ("Output string is %s\n", out);
    printf ("Output string length is %d\n", outputLength);
    if (outputLength != 62) {
      printf ("Error! should be 62.. test th_brk_line() failed...\n");
      exit (-1);
    }
    printf ("*** End of thbrk self test ******\n");
  }
  return 0;
}
コード例 #2
0
ファイル: libthai.c プロジェクト: Godlil2e/pythai
static PyObject*
th_brk_(PyObject *self, PyObject *args) 
{
    PyObject *result = NULL;
    Py_UNICODE *s1;
    int s1_len;

    if (!PyArg_ParseTuple(args, "u#", &s1, &s1_len)) {
        PyErr_SetString(PyExc_TypeError, "parameter must be unicode");
        return NULL;
    }

    if(s1_len == 0) {
        PyErr_SetString(PyExc_ValueError, "parameter must not be empty string");
        return NULL;
    }

    PyObject *txt_cp874 = PyUnicode_Encode(s1, s1_len, "CP874", NULL);
    if(txt_cp874 == NULL) {
        return NULL;
    }

    Py_ssize_t len = PyString_Size(txt_cp874);
    char *c_txt_cp874 = PyString_AsString(txt_cp874);
    int *pos = (int *)malloc(sizeof(int) * (s1_len + 1)); 
    int n = th_brk((unsigned char *)c_txt_cp874, pos, len);
    int i, s = 0;
    char *buffer;
    result = PyList_New(0);
    for(i = 0; i < n; i++) {
        PyObject *tok;
        PyObject *tok_cp874 = PySequence_GetSlice(txt_cp874, s, pos[i]);
        Py_ssize_t tok_len;
        PyString_AsStringAndSize(tok_cp874, &buffer, &tok_len);
        tok = PyUnicode_Decode(buffer, tok_len, "CP874", NULL);
        s = pos[i];
        PyList_Append(result, tok); 
        Py_XDECREF(tok_cp874);
        Py_XDECREF(tok);
    } 
    if(s < len) {
        PyObject *tok_cp874 = PySequence_GetSlice(txt_cp874, s, len);
        Py_ssize_t tok_len;
        PyObject *tok;
        PyString_AsStringAndSize(tok_cp874, &buffer, &tok_len);
        tok = PyUnicode_Decode(buffer, tok_len, "CP874", NULL);
        PyList_Append(result, tok);
        Py_XDECREF(tok_cp874);
        Py_XDECREF(tok);
    } 
    Py_XDECREF(txt_cp874);

    free(pos); 
    return result;
}
コード例 #3
0
ファイル: libthai.c プロジェクト: IFNS1981/libthai4r
static VALUE
f_th_brk(VALUE obj,VALUE v_str)
{
  int* pos = ALLOC_N(int, RSTRING_LEN(v_str));
  int n, i;
  VALUE a;
  n = th_brk((thchar_t *)RSTRING_PTR(v_str), pos, RSTRING_LEN(v_str));
  a = rb_ary_new2(n);
  for(i = 0;i < n; i++)
    {
      rb_ary_push(a, INT2FIX(pos[i]));
    }
  free(pos);
  return a;
}
コード例 #4
0
    bool isBreakableThai( const QChar *string, const int pos, const int len)
    {
        static QTextCodec *thaiCodec = QTextCodec::codecForMib(2259);
	//printf("Entering isBreakableThai with pos = %d\n", pos);

#ifndef HAVE_LIBTHAI
	
	KLibrary *lib = 0;

        /* load libthai dynamically */
	if (( !th_brk ) && thaiCodec  ) {
	    printf("Try to load libthai dynamically...\n");
            KLibLoader *loader = KLibLoader::self();
            lib = loader->library("libthai");
            if (lib && lib->hasSymbol("th_brk")) {
                th_brk = (th_brk_def) lib->symbol("th_brk");
            } else {
                // indication that loading failed and we shouldn't try to load again
		printf("Error, can't load libthai...\n");
                thaiCodec = 0;
                if (lib)
                    lib->unload();
            }
        }

        if (!th_brk ) {
            return true;
        }
#endif

	if (!cache ) {
            cache = new ThaiCache;
#ifndef HAVE_LIBTHAI
            cache->library = lib;
#endif
	}

        // build up string of thai chars
        if ( string != cache->string ) {
            //fprintf(stderr,"new string found (not in cache), calling libthai\n");
            QCString cstr = thaiCodec->fromUnicode( QConstString(string,len).string());
            //printf("About to call libthai::th_brk with str: %s",cstr.data());

            cache->numwbrpos = th_brk((const unsigned char*) cstr.data(), cache->wbrpos, cache->allocated);
            //fprintf(stderr,"libthai returns with value %d\n",cache->numwbrpos);
            if (cache->numwbrpos > cache->allocated) {
                cache->allocated = cache->numwbrpos;
                cache->wbrpos = (int *)realloc(cache->wbrpos, cache->allocated*sizeof(int));
                cache->numwbrpos = th_brk((const unsigned char*) cstr.data(), cache->wbrpos, cache->allocated);
            }
	    if ( len > cache->numisbreakable ) {
		cache->numisbreakable=len;
                cache->isbreakable = (int *)realloc(cache->isbreakable, cache->numisbreakable*sizeof(int));
	    }
	    for (int i = 0 ; i < len ; ++i) {
		cache->isbreakable[i] = 0;
	    }
            if ( cache->numwbrpos > 0 ) {
            	for (int i = cache->numwbrpos-1; i >= 0; --i) {
                	cache->isbreakable[cache->wbrpos[i]] = 1;
		}
	    }
            cache->string = string;
        }
	//printf("Returning %d\n", cache->isbreakable[pos]);
	return cache->isbreakable[pos];
    }