int main (int argc, char* argv[]) { thchar_t str[MAXLINELENGTH]; thchar_t out[MAXLINELENGTH*6+1]; int pos[MAXLINELENGTH]; int outputLength; int numCut, i; int interactive = 0; if (argc >= 2) { if (0 == strcmp (argv[1], "-i")) interactive = 1; } if (interactive) { while (!feof (stdin)) { printf ("Please enter thai words/sentences: "); fgets ((char *)str, MAXLINELENGTH-1, stdin); if (!feof (stdin)) { numCut = th_brk (str, pos, MAXLINELENGTH); printf ("Total %d cut points.", numCut); if (numCut > 0) { printf ("Cut points list: %d", pos[0]); for (i = 1; i < numCut; i++) { printf(", %d", pos[i]); } } printf("\n"); outputLength = th_brk_line (str, out, sizeof out, "<WBR>"); printf ("Output string length is %d\n", outputLength-1); /* the penultimate is \n */ printf ("Output string is %s", out); printf("***********************************************************************\n"); } } } else { strcpy ((char *)str, "ÊÇÑÊ´Õ¤ÃѺ ¹Õèà»ç¹¡Ò÷´ÊͺµÑÇàͧ"); printf ("Testing with string: %s\n", str); numCut = th_brk (str, pos, MAXLINELENGTH); printf ("Total %d cut points.", numCut); if (numCut != 6) { printf("Error! should be 6.. test th_brk() failed...\n"); exit (-1); } printf("Cut points list: %d", pos[0]); for (i = 1; i < numCut; i++) { printf(", %d", pos[i]); } printf("\n"); outputLength = th_brk_line (str, out, sizeof out, "<WBR>"); printf ("Output string is %s\n", out); printf ("Output string length is %d\n", outputLength); if (outputLength != 62) { printf ("Error! should be 62.. test th_brk_line() failed...\n"); exit (-1); } printf ("*** End of thbrk self test ******\n"); } return 0; }
static PyObject* th_brk_(PyObject *self, PyObject *args) { PyObject *result = NULL; Py_UNICODE *s1; int s1_len; if (!PyArg_ParseTuple(args, "u#", &s1, &s1_len)) { PyErr_SetString(PyExc_TypeError, "parameter must be unicode"); return NULL; } if(s1_len == 0) { PyErr_SetString(PyExc_ValueError, "parameter must not be empty string"); return NULL; } PyObject *txt_cp874 = PyUnicode_Encode(s1, s1_len, "CP874", NULL); if(txt_cp874 == NULL) { return NULL; } Py_ssize_t len = PyString_Size(txt_cp874); char *c_txt_cp874 = PyString_AsString(txt_cp874); int *pos = (int *)malloc(sizeof(int) * (s1_len + 1)); int n = th_brk((unsigned char *)c_txt_cp874, pos, len); int i, s = 0; char *buffer; result = PyList_New(0); for(i = 0; i < n; i++) { PyObject *tok; PyObject *tok_cp874 = PySequence_GetSlice(txt_cp874, s, pos[i]); Py_ssize_t tok_len; PyString_AsStringAndSize(tok_cp874, &buffer, &tok_len); tok = PyUnicode_Decode(buffer, tok_len, "CP874", NULL); s = pos[i]; PyList_Append(result, tok); Py_XDECREF(tok_cp874); Py_XDECREF(tok); } if(s < len) { PyObject *tok_cp874 = PySequence_GetSlice(txt_cp874, s, len); Py_ssize_t tok_len; PyObject *tok; PyString_AsStringAndSize(tok_cp874, &buffer, &tok_len); tok = PyUnicode_Decode(buffer, tok_len, "CP874", NULL); PyList_Append(result, tok); Py_XDECREF(tok_cp874); Py_XDECREF(tok); } Py_XDECREF(txt_cp874); free(pos); return result; }
static VALUE f_th_brk(VALUE obj,VALUE v_str) { int* pos = ALLOC_N(int, RSTRING_LEN(v_str)); int n, i; VALUE a; n = th_brk((thchar_t *)RSTRING_PTR(v_str), pos, RSTRING_LEN(v_str)); a = rb_ary_new2(n); for(i = 0;i < n; i++) { rb_ary_push(a, INT2FIX(pos[i])); } free(pos); return a; }
bool isBreakableThai( const QChar *string, const int pos, const int len) { static QTextCodec *thaiCodec = QTextCodec::codecForMib(2259); //printf("Entering isBreakableThai with pos = %d\n", pos); #ifndef HAVE_LIBTHAI KLibrary *lib = 0; /* load libthai dynamically */ if (( !th_brk ) && thaiCodec ) { printf("Try to load libthai dynamically...\n"); KLibLoader *loader = KLibLoader::self(); lib = loader->library("libthai"); if (lib && lib->hasSymbol("th_brk")) { th_brk = (th_brk_def) lib->symbol("th_brk"); } else { // indication that loading failed and we shouldn't try to load again printf("Error, can't load libthai...\n"); thaiCodec = 0; if (lib) lib->unload(); } } if (!th_brk ) { return true; } #endif if (!cache ) { cache = new ThaiCache; #ifndef HAVE_LIBTHAI cache->library = lib; #endif } // build up string of thai chars if ( string != cache->string ) { //fprintf(stderr,"new string found (not in cache), calling libthai\n"); QCString cstr = thaiCodec->fromUnicode( QConstString(string,len).string()); //printf("About to call libthai::th_brk with str: %s",cstr.data()); cache->numwbrpos = th_brk((const unsigned char*) cstr.data(), cache->wbrpos, cache->allocated); //fprintf(stderr,"libthai returns with value %d\n",cache->numwbrpos); if (cache->numwbrpos > cache->allocated) { cache->allocated = cache->numwbrpos; cache->wbrpos = (int *)realloc(cache->wbrpos, cache->allocated*sizeof(int)); cache->numwbrpos = th_brk((const unsigned char*) cstr.data(), cache->wbrpos, cache->allocated); } if ( len > cache->numisbreakable ) { cache->numisbreakable=len; cache->isbreakable = (int *)realloc(cache->isbreakable, cache->numisbreakable*sizeof(int)); } for (int i = 0 ; i < len ; ++i) { cache->isbreakable[i] = 0; } if ( cache->numwbrpos > 0 ) { for (int i = cache->numwbrpos-1; i >= 0; --i) { cache->isbreakable[cache->wbrpos[i]] = 1; } } cache->string = string; } //printf("Returning %d\n", cache->isbreakable[pos]); return cache->isbreakable[pos]; }