Exemplo n.º 1
0
Arquivo: string.c Projeto: matz/streem
static int
str_chars(strm_stream* strm, int argc, strm_value* args, strm_value* ret)
{
  const char* str;
  const char* s;
  const char* prev = NULL;
  strm_int slen;
  strm_array ary;
  strm_int n = 0;
  strm_value* sps;
  strm_int i = 0;

  strm_get_args(strm, argc, args, "s", &str, &slen);

  s = str;

  while (*s) {
    s += utf8len(s, s + slen);
    n++;
  }

  ary = strm_ary_new(NULL, n);
  sps = strm_ary_ptr(ary);
  s = str;

  while (*s) {
    prev = s;
    s += utf8len(s, s + slen);
    sps[i++] = strm_str_new(prev, s - prev);
  }

  *ret = strm_ary_value(ary);
  return STRM_OK;
}
Exemplo n.º 2
0
Arquivo: utf8.c Projeto: vigna/ne
int64_t utf8strlen(const char * const s, const int64_t len) {
	int64_t i = 0, l = 0;
	while(i < len) {
		assert(utf8len(s[i]) >= 0);
		i += utf8len(s[i]);
		l++;
	}
	return l;
}
Exemplo n.º 3
0
static mrb_value
mrb_str_reverse_bang(mrb_state *mrb, mrb_value str)
{
  mrb_int utf8_len = mrb_utf8_strlen(str, -1);
  if (utf8_len > 1) {
    mrb_int len;
    char *buf;
    unsigned char *p, *e, *r;

    mrb_str_modify(mrb, mrb_str_ptr(str));
    len = RSTRING_LEN(str);
    buf = (char *)mrb_malloc(mrb, (size_t)len);
    p = (unsigned char*)buf;
    e = (unsigned char*)buf + len;

    memcpy(buf, RSTRING_PTR(str), len);
    r = (unsigned char*)RSTRING_PTR(str) + len;

    while (p<e) {
      mrb_int clen = utf8len(p);
      r -= clen;
      memcpy(r, p, clen);
      p += clen;
    }
    mrb_free(mrb, buf);
  }

  return str;
}
PERF_TEST_F(GreekSeeking, Begin)
{
	const char* s = m_contents.c_str();

	const char* n = utf8seek(s, m_contents.length(), s, (off_t)utf8len(s) - 1, SEEK_SET);
	PERF_ASSERT(n == s + m_contents.length() - 1);
}
PERF_TEST_F(GreekSeeking, CurrentForwards)
{
	const char* s = m_contents.c_str();

	const char* n = utf8seek(s, m_contents.length(), s, (off_t)utf8len(s) - 1, SEEK_CUR);
	PERF_ASSERT(n == s + m_contents.length() - 1);
}
Exemplo n.º 6
0
int* utf8str(const char* s)
{
    int codepoint;
    int size = 0, index = 0, out_index = 0;
    unsigned char *utf8 = (unsigned char*)s;
    unsigned char c;
    
    memman_t* memman = (memman_t*)MEMMAN_ADDR;
    int* unicode = (int*)memman_alloc_4k(memman, (utf8len(s) + 1) * sizeof(int));

    c = utf8[index++];
    while (c) {
        if ((c & 0x80) == 0) {
            codepoint = c;
        } else if ((c & 0xe0) == 0xe0) {
            codepoint = (c & 0x1F) << 12;
            c = utf8[index++];
            codepoint |= (c & 0x3F) << 6;
            c = utf8[index++];
            codepoint |= (c & 0x3F);
        } else {
            codepoint = (c & 0x3F) << 6;
            c = utf8[index++];
            codepoint |= (c & 0x3F);
        }
        c = utf8[index++];
        unicode[out_index++] = codepoint;
    }
    unicode[out_index] = 0;
    return unicode;
}
Exemplo n.º 7
0
kbool_t knh_bytes_checkENCODING(kbytes_t v)
{
#ifdef K_USING_UTF8
	const unsigned char *s = v.utext;
	const unsigned char *e = s + v.len;
	while (s < e) {
		size_t ulen = utf8len(s[0]);
		switch(ulen) {
		case 1: s++; break;
		case 2:
			if(!utf8_isTrail(s[1])) return 0;
			s+=2; break;
		case 3:
			if(!utf8_isTrail(s[1])) return 0;
			if(!utf8_isTrail(s[2])) return 0;
			s+=3; break;
		case 4:
			if(!utf8_isTrail(s[1])) return 0;
			if(!utf8_isTrail(s[2])) return 0;
			if(!utf8_isTrail(s[3])) return 0;
			s+=4; break;
		case 5: case 6: case 0: default:
			return 0;
		}
	}
	return (s == e);
#else
	return 1;
#endif
}
Exemplo n.º 8
0
static mrb_value
str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
{
  mrb_int i;
  unsigned char *p = (unsigned char*) RSTRING_PTR(str), *t;
  unsigned char *e = p + RSTRING_LEN(str);

  for (i = 0; i < beg && p<e; i++) {
    p += utf8len(p);
  }
  t = p;
  for (i = 0; i < len && t<e; i++) {
    t += utf8len(t);
  }
  return mrb_str_new(mrb, (const char*)p, (size_t)(t - p));
}
Exemplo n.º 9
0
std::wstring Str2WStr(const std::string &str)
{
    std::wstring dest;
    dest.resize(utf8len(str.c_str()));
    MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), (wchar_t*)dest.c_str(), str.length());
    return dest;
}
Exemplo n.º 10
0
int main(int argc, char **argv)
{
    char *p = malloc(0);
    size_t len = 0;
    while (true) {
        p = realloc(p, len + 0x10000);
        ssize_t cnt = read(STDIN_FILENO, p + len, 0x10000);
        if (cnt == -1) {
            perror("read");
            abort();
        } else if (cnt == 0) {
            break;
        } else {
            len += cnt;
        }
    }
    printf("utf8len=%zu\n", utf8len(p));
    ssize_t start = 2, end = 3;
    utf8slice(p, &start, &end);
    printf("utf8slice[2:3]=%.*s\n", end - start, p + start);
    start = 3;
    end = 4;
    utf8slice(p, &start, &end);
    printf("utf8slice[3:4]=%.*s\n", end - start, p + start);
    return 0;
}
Exemplo n.º 11
0
static void gdi_render_msg(
      video_frame_info_t *video_info,
      void *data, const char *msg,
      const void *userdata)
{
   float x, y, scale;
   gdi_raster_t *font = (gdi_raster_t*)data;
   unsigned newX, newY, len;
   unsigned align;
   const struct font_params *params = (const struct font_params*)userdata;
   unsigned width                   = video_info->width;
   unsigned height                  = video_info->height;

   if (!font || string_is_empty(msg))
      return;

   if (params)
   {
      x = params->x;
      y = params->y;
      scale = params->scale;
      align = params->text_align;
   }
   else
   {
      x = video_info->font_msg_pos_x;
      y = video_info->font_msg_pos_y;
      scale = 1.0f;
      align = TEXT_ALIGN_LEFT;
   }

   if (!font->gdi)
      return;

   len  = utf8len(msg);

   switch (align)
   {
      case TEXT_ALIGN_LEFT:
         newX = x * width * scale;
         break;
      case TEXT_ALIGN_RIGHT:
         newX = (x * width * scale) - len;
         break;
      case TEXT_ALIGN_CENTER:
         newX = (x * width * scale) - (len / 2);
         break;
      default:
         break;
   }

   newY = height - (y * height * scale);

   font->gdi->bmp_old = (HBITMAP)SelectObject(font->gdi->memDC, font->gdi->bmp);
   SetBkMode(font->gdi->memDC, TRANSPARENT);
   SetTextColor(font->gdi->memDC, RGB(255,255,255));
   TextOut(font->gdi->memDC, newX, newY, msg, len);
   SelectObject(font->gdi->memDC, font->gdi->bmp_old);
}
PERF_TEST_F(GreekSeeking, End)
{
	const char* s = m_contents.c_str();
	const char* e = s + m_contents.length();

	const char* n = utf8seek(e, m_contents.length(), s, (off_t)utf8len(s) - 1, SEEK_END);
	PERF_ASSERT(n == s + 1);
}
PERF_TEST_F(GreekSeeking, CurrentBackwards)
{
	const char* s = m_contents.c_str();
	const char* e = s + m_contents.length();

	const char* n = utf8seek(e, m_contents.length(), s, -(off_t)utf8len(s) + 1, SEEK_CUR);
	PERF_ASSERT(n == s + 1);
}
Exemplo n.º 14
0
/* Returns number of UTF8 characters in char array */
int utf8strlen(char *str) {
	int i,j, len;
	len = strlen(str);
	for (i = 0, j = 0; *(str+i) != '\0' && i < len; j++) {
		i = i + utf8len(str+i);
	}
	return j;
}
Exemplo n.º 15
0
static void String_setNextResult(KonohaContext *kctx, KonohaStack* sfp)
{
	kIterator *itr = sfp[0].asIterator;
	kString *s = (kString *)itr->source;
	const char *t = S_text(s) + itr->current_pos;
	size_t charsize = utf8len(t[0]);
	itr->current_pos += charsize;
	KReturn(KLIB new_kString(kctx, OnStack, t, charsize, (charsize == 1) ? StringPolicy_ASCII : StringPolicy_UTF8));
}
Exemplo n.º 16
0
void add_string_pair(char *in, char *out) {
    int *int_in, *int_out;
    int i, j;
    char *token;
    struct stringpair *newpair;
    /* Get int array */
    int_in  = malloc(sizeof(int) * (utf8strlen(in) + 1));
    int_out = malloc(sizeof(int) * (utf8strlen(out) + 1));
	if (g_input_format == INPUT_FORMAT_L2P) {
		for (i = 0, j = 0; in[i] != '\0'; i += utf8len(&in[i]), j++) {
			int_in[j] = get_set_char_num(strndup(&in[i], utf8len(&in[i])));
		}
		int_in[j] = -1;
		for (i = 0, j = 0; out[i] != '\0'; i += utf8len(&out[i]), j++) {
			int_out[j] = get_set_char_num(strndup(&out[i], utf8len(&out[i])));
		}
		int_out[j] = -1;
	} else if (g_input_format == INPUT_FORMAT_NEWS) {
		token = strtok(in, " ");
		for (j = 0; token != NULL; j++) {
			int_in[j] = get_set_char_num(token);
			token = strtok(NULL, " ");
		}
		int_in[j] = -1;
		token = strtok(out, " ");
		for (j = 0; token != NULL; j++) {
			int_out[j] = get_set_char_num(token);
			token = strtok(NULL, " ");
		}
		int_out[j] = -1;	
	}

	newpair = malloc(sizeof(struct stringpair));
	newpair->in = int_in;
	newpair->out = int_out;    
	newpair->next = NULL;
	if (g_stringpairs == NULL) {
		g_stringpairs = newpair;
		g_stringpairs_tail = newpair;
	} else {
		g_stringpairs_tail->next = newpair;
		g_stringpairs_tail = newpair;
	}    
}
Exemplo n.º 17
0
int String::Length() const
{
	MUTEX_LOCK(str_mutex);

#ifdef SCRATCH_NO_UTF8
	return (int)strlen(this->str_szBuffer);
#else
	return (int)utf8len(this->str_szBuffer);
#endif
}
Exemplo n.º 18
0
kbytes_t knh_bytes_mofflen(kbytes_t v, size_t moff, size_t mlen)
{
#ifdef K_USING_UTF8
	size_t i;
	const unsigned char *s = v.utext;
	const unsigned char *e = s + v.len;
	for(i = 0; i < moff; i++) {
		s += utf8len(s[0]);
	}
	v.ubuf = (kchar_t*)s;
	for(i = 0; i < mlen; i++) {
		s += utf8len(s[0]);
	}
	KNH_ASSERT(s <= e);
	v.len = (const char*)s - v.text;
	return v;
#else
	return knh_bytes_subbytes(m, moff, mlen); /* if K_ENCODING is not set */
#endif
}
Exemplo n.º 19
0
static mrb_int
mrb_utf8_strlen(mrb_value str)
{
  mrb_int total = 0;
  unsigned char* p = (unsigned char*) RSTRING_PTR(str);
  unsigned char* e = p + RSTRING_LEN(str);
  while (p<e) {
    p += utf8len(p);
    total++;
  }
  return total;
}
Exemplo n.º 20
0
static size_t utf8_strlen(const char *text, size_t len)
{
	size_t size = 0;
	const unsigned char *s = (const unsigned char *)text;
	const unsigned char *eos = s + len;
	while(s < eos) {
		size_t ulen = utf8len(s[0]);
		size++;
		s += ulen;
	}
	return size;
}
Exemplo n.º 21
0
/**
 * Creates a new filter.
 */
static void filter_new(void)
{
	struct filter *f = filter_create();
	if (f)
	{
		DoMethod(filter_list, MUIM_NList_InsertSingle, (ULONG)f, MUIV_NList_Insert_Bottom);
		filter_dispose(f);
		set(filter_list, MUIA_NList_Active, MUIV_NList_Active_Bottom);

		if (filter_last_selected) set(filter_name_string, MUIA_BetterString_SelectSize, -utf8len(filter_last_selected->name));
		set(filter_wnd, MUIA_Window_ActiveObject, filter_name_string);
	}
}
Exemplo n.º 22
0
int get_set_char_num(char *utfstring) {
    int i;
    debug("Finding symbol %s with len %i... ", utfstring, utf8len(utfstring));
	for (i = 1; i <= g_maxsymbol; i++) {
		if (strcmp(utfstring, g_symboltable[i]) == 0) {
			debug("Found at %i\n", i);
			return i;
		}
	}
	g_maxsymbol++;
	debug("Not found, adding at %i\n", g_maxsymbol);
	g_symboltable[g_maxsymbol] = strdup(utfstring);
	return(g_maxsymbol);
}
Exemplo n.º 23
0
efi_loadopt_create(uint8_t *buf, ssize_t size, uint32_t attributes,
		   efidp dp, ssize_t dp_size, unsigned char *description,
		   uint8_t *optional_data, size_t optional_data_size)
{
	if (!description) {
		errno = EINVAL;
		return -1;
	}

	ssize_t desc_len = utf8len((uint8_t *)description, 1024) * 2 + 2;
	ssize_t sz = sizeof (attributes)
		     + sizeof (uint16_t) + desc_len
		     + dp_size + optional_data_size;
	if (size == 0)
		return sz;
	if (size < sz) {
		errno = ENOSPC;
		return -1;
	}

	if (!optional_data && optional_data_size != 0) {
		errno = EINVAL;
		return -1;
	}

	if (!dp && dp_size == 0) {
		errno = EINVAL;
		return -1;
	}

	uint8_t *pos = buf;

	*(uint32_t *)pos = attributes;
	pos += sizeof (attributes);

	*(uint16_t *)pos = dp_size;
	pos += sizeof (uint16_t);

	utf8_to_ucs2((uint16_t *)pos, desc_len, 1, (uint8_t *)description);
	pos += desc_len;

	memcpy(pos, dp, dp_size);
	pos += dp_size;

	if (optional_data && optional_data_size > 0)
		memcpy(pos, optional_data, optional_data_size);

	return sz;
}
Exemplo n.º 24
0
efidp_make_file(uint8_t *buf, ssize_t size, char *filepath)
{
	efidp_file *file = (efidp_file *)buf;
	unsigned char *lf = (unsigned char *)filepath;
	ssize_t sz;
	ssize_t len = utf8len(lf, -1) + 1;
	ssize_t req = sizeof (*file) + len * sizeof (uint16_t);
	sz = efidp_make_generic(buf, size, EFIDP_MEDIA_TYPE, EFIDP_MEDIA_FILE,
				req);
	if (size && sz == req) {
		memset(buf+4, 0, req-4);
		utf8_to_ucs2(file->name, req-4, 1, lf);
	}
	return sz;
}
Exemplo n.º 25
0
size_t knh_bytes_mlen(kbytes_t v)
{
#ifdef K_USING_UTF8
	size_t size = 0;
	const unsigned char *s = v.utext;
	const unsigned char *e = s + v.len;
	while (s < e) {
		size_t ulen = utf8len(s[0]);
		size ++;
		s += ulen;
	}
	return size;
#else
	return v.len;
#endif
}
Exemplo n.º 26
0
String String::SubString(int iStart, int iLen) const
{
	MUTEX_LOCK(str_mutex);

	// Empty strings
	if (iStart < 0 || iLen <= 0) {
		return "";
	}

	// Get the first offset
#ifdef SCRATCH_NO_UTF8
	String strRet(this->str_szBuffer + iStart);
#else
	void* subFirst = this->str_szBuffer;
	s_char codepoint;
	do {
		subFirst = utf8codepoint(subFirst, &codepoint);
	} while (--iStart > 0 && codepoint != '\0');
	String strRet((const char*)subFirst);
#endif

	// Check for stupid developers
#ifdef SCRATCH_NO_UTF8
	if ((uint32_t)iLen > strlen(strRet)) {
		return strRet;
	}
#else
	if ((uint32_t)iLen > utf8len(strRet)) {
		return strRet;
	}
#endif

	// Then set the null terminator at the length the user wants
#ifdef SCRATCH_NO_UTF8
	strRet.str_szBuffer[iLen] = '\0';
#else
	void* sz = strRet.str_szBuffer;
	do {
		sz = utf8codepoint(sz, &codepoint);
	} while (--iLen > 0 && codepoint != '\0');
	*(char*)sz = '\0';
#endif

	// Return
	return strRet;
}
Exemplo n.º 27
0
kint_t kchar_toucs4(kutext_t *utf8)   /* utf8 -> ucs4 */
{
#if defined(K_USING_UTF8)
	kint_t ucs4 = 0;
	int i= 0;
	kchar_t ret = 0;
	if (!utf8_isSingleton(utf8[0])) {
		kushort_t length_utf8 = utf8len(utf8[i]);
		kchar_t mask = (kchar_t)(1 << 0 | 1 << 1 | 1 << 2 | 1 << 3);

		switch(length_utf8){
		case 2:
			/* 110xxxxx 10xxxxxx */
			TODO();
			break;
		case 3:
			/* format 1110xxxx 10xxxxxx 10xxxxxx */
			// first 4 bits
			ucs4 = 0;
			ret = utf8[0] & mask;
			ucs4 = ucs4 | ret;
			// second bit
			ucs4 = ucs4 << 6;
			mask = mask | 1 << 4 | 1 << 5;
			ret = utf8[1] & mask;
			ucs4 = ucs4  | ret;
			// third bit
			ucs4 = ucs4 << 6;
			ret = mask & utf8[2];
			ucs4 = ucs4 | ret;
			break;
		default:
			/* TODO: */
			break;
		}
	} else {
		/* ASCII, let it goes...*/
		ucs4 = utf8[0];
	}
	return ucs4;
#else
	return (kint_t)utf8[0];
#endif
}
Exemplo n.º 28
0
NS_IMETHODIMP nsRenderingContextPh::DrawString(const char *aString, PRUint32 aLength,
												nscoord aX, nscoord aY,
												const nscoord* aSpacing)
{
	if ( aLength == 0 )
		return NS_OK;

	UpdateGC();
	PgSetTextColorCx( mGC, mCurrentColor );
	
	PgSetFontCx( mGC, mPhotonFontName );
	PgSetExtendedTextFlagsCx( mGC, Pg_TEXT_SIMPLE_METRICS );

	if( !aSpacing ) {
		mTranMatrix->TransformCoord( &aX, &aY );
		PhPoint_t pos = { aX, aY };
		PgDrawTextCharsCx( mSurfaceDC, aString, aLength, &pos, Pg_TEXT_LEFT);
		}
	else {
    nscoord x = aX;
    nscoord y = aY;
    const char* end = aString + aLength;
    while( aString < end ) {
			const char *ch = aString;
			int charlen = utf8len( aString, aLength );
			if( charlen <= 0 )
				break;

			aString += charlen;
			aLength -= charlen;

      nscoord xx = x;
      nscoord yy = y;
      mTranMatrix->TransformCoord(&xx, &yy);
      PhPoint_t pos = { xx, yy };
			PgDrawTextCx( mSurfaceDC, ch, charlen, &pos, Pg_TEXT_LEFT);
			x += *aSpacing++;
			}
		}

	PgSetExtendedTextFlagsCx( mGC, 0 );

	return NS_OK;
}
Exemplo n.º 29
0
efi_loadopt_args_as_ucs2(uint16_t *buf, ssize_t size, uint8_t *utf8)
{
	ssize_t req;
	if (!utf8 || (!buf && size > 0)) {
		errno = EINVAL;
		return -1;
	}

	req = utf8len(utf8, -1) * sizeof(uint16_t);
	if (size == 0)
		return req;

	if (size < req) {
		errno = ENOSPC;
		return -1;
	}

	return utf8_to_ucs2(buf, size, 0, utf8);
}
Exemplo n.º 30
0
static void kArray_split(KonohaContext *kctx, kArray *resultArray, kString *str, kRegExp *regex, size_t limit)
{
	int stringPolicy = kString_is(ASCII, str) ? StringPolicy_ASCII : 0;
	if(IS_NOTNULL(regex) && S_size(regex->pattern) > 0) {
		const char *s = S_text(str);  // necessary
		const char *eos = s + S_size(str);
		kregmatch_t pmatch[2];
		int res = 0;
		while(s < eos && res == 0) {
			res = pcre_regexec(kctx, regex->reg, s, 1, pmatch, regex->eflags);
			if(res != 0) break;
			size_t len = pmatch[0].rm_eo;
			if(len > 0) {
				KLIB new_kString(kctx, resultArray, s, pmatch[0].rm_so, stringPolicy);
				s += len;
			}
			if(!(kArray_size(resultArray) + 1 < limit)) {
				return;
			}
		}
		if(s < eos) {
			KLIB new_kString(kctx, resultArray, s, eos - s, stringPolicy); // append remaining string to array
		}
	}
	else {
		const unsigned char *s = (const unsigned char *)S_text(str);
		size_t i, n = S_size(str);
		if(kString_is(ASCII, str)) {
			for(i = 0; i < n; i++) {
				KLIB new_kString(kctx, resultArray, (const char *)s + i, 1, StringPolicy_ASCII);
			}
		}
		else {
			for(i = 0; i < n; i++) {
				int len = utf8len(s[i]);
				KLIB new_kString(kctx, resultArray, (const char *)s + i, len, len == 1 ? StringPolicy_ASCII: StringPolicy_UTF8);
				i += len;
			}
		}
	}
}