/****************************************************************************** * * _Dt_PrevChar(start,s) * return a pointer to the multi-byte character preceding the * character pointed to by "s". If "s" does not point to a valid * multi-byte character retreat one byte. "start" should point to * a character preceding "s" in the multi-byte string. * ******************************************************************************/ char * _Dt_PrevChar(const char *start, char *s) { char *p; int len; if ( !_DtNl_is_multibyte || (MB_CUR_MAX == 1) ) return (s - 1); /* * Check if "*s" is a valid multi-byte character. * if not just return the previous byte. */ if ( mblen(s,MB_CUR_MAX) < 0 ) return (s - 1); /* * "start" must be less than "s" ; if not return * (s-1) */ if ( start >= s ) return (s - 1); /* * Check that "start" points to a valid multi-byte character. * otherwise return "s-1" */ if ( mblen(start,MB_CUR_MAX) < 0 ) return (s-1); /* * Starting from "start" traverse the string until we find * the character preceding "s". */ /* * We have to take care of the case when mblen() returns -1. */ for (p = (char *)start; p + (len = (mblen(p,MB_CUR_MAX) == -1 ? 1 : mblen(p,MB_CUR_MAX))) < s; p += len) /* NULL STATEMENT */; /* * We should always find a multi-byte character preceding "s" if * "*s" is a valid multi-byte char and not the first character of * the text. */ /* myassert(p < s); */ return p; }
// This function is called from within fcitx. It is called when the // list of input candidates has changed. It extracts the candidates // from the recognition engine and puts them into the format required // by fcitx for displaying in its popup window INPUT_RETURN_VALUE FcitxTabletGetCandWords(void* arg) { FcitxTablet* tablet = (FcitxTablet*) arg; FcitxInputState *input = FcitxInstanceGetInputState(tablet->fcitx); FcitxInstanceCleanInputWindow(tablet->fcitx); char* c = tablet->engineInstance->GetCandidates(tablet->engineData); int len = strlen(c); int i = 0; do { int n = mblen(&c[i], len); if(n <= 0) break; FcitxCandidateWord cw; cw.callback = FcitxTabletGetCandWord; cw.strExtra = NULL; cw.priv = NULL; cw.owner = tablet; cw.wordType = MSG_OTHER; // TODO does fcitx free this? cw.strWord = (char*) malloc(n+1); memcpy(cw.strWord, &c[i], n); cw.strWord[n] = 0; FcitxCandidateWordAppend(FcitxInputStateGetCandidateList(input), &cw); i += n; } while(1); return IRV_DISPLAY_CANDWORDS; }
/* This function is equivalent to strcasestr() for multibyte strings. */ char *mbstrcasestr(const char *haystack, const char *needle) { #ifdef ENABLE_UTF8 if (use_utf8) { size_t haystack_len, needle_len; assert(haystack != NULL && needle != NULL); if (*needle == '\0') return (char *)haystack; haystack_len = mbstrlen(haystack); needle_len = mbstrlen(needle); for (; *haystack != '\0' && haystack_len >= needle_len; haystack += move_mbright(haystack, 0), haystack_len--) { if (mbstrncasecmp(haystack, needle, needle_len) == 0 && mblen(haystack, MB_CUR_MAX) > 0) return (char *)haystack; } return NULL; } else #endif return (char *) strcasestr(haystack, needle); }
gchar * sary_ipoint_locale (SaryText *text) { gchar *cursor, *eof; SaryInt maxlen, len; if (sary_text_is_eof(text)) { return NULL; } eof = sary_text_get_eof(text); cursor = sary_text_get_cursor(text); maxlen = eof - cursor; len = mblen(cursor, maxlen); if (len == -1) { /* invalid character */ gchar *bof = sary_text_get_bof(text); g_warning("invalid character at %d", cursor - bof); len = 1; } sary_text_forward_cursor(text, len); return cursor; }
int mbtowc (wchar_t *charptr, const char *address, size_t number) { int bytes; if (address == 0) return 0; if ((bytes = mblen (address, number)) < 0) return bytes; if (charptr) { switch (bytes) { case 0: if (number > 0) *charptr = (wchar_t) '\0'; break; case 1: *charptr = (wchar_t) ((unsigned char) address[0]); break; case 2: *charptr = (wchar_t) (((unsigned char) address[0] << 8) | (unsigned char) address[1]); break; } } return bytes; }
static void _DtWmParseToLower (unsigned char *string) { unsigned char *pch = string; #ifdef MULTIBYTE int chlen; while ((chlen = mblen ((char *)pch, MB_CUR_MAX)) > 0) { if ((chlen == 1) && (isupper (*pch))) { *pch = tolower(*pch); } pch += chlen; } #else while (*pch != NULL) { if (isupper (*pch)) { *pch = tolower(*pch); } pch++; } #endif } /* END OF FUNCTION _DtWmParseToLower */
int Dt_charCount( char *s ) { int count = 0; int len; if (s == NULL) return(0); if (!_DtNl_is_multibyte) return(strlen(s)); /* Move through the string, counting each character present */ while (*s) { len = mblen(s, MB_CUR_MAX); /* if invalid character, still count it and continue */ if (len == -1) len = 1; s += len; count++; } return(count); }
static void num_arg(char *arg, int md) { offset_t repeat, toline; char rep[21]; char *ptr; int len; ptr = rep; for (++arg; *arg != '}'; arg += len) { if (*arg == NULL) fatal("%s: missing '}'\n", targ); if ((len = mblen(arg, MB_LEN_MAX)) <= 0) len = 1; if ((ptr + len) >= &rep[20]) fatal("%s: Repeat count too large\n", targ); (void) memcpy(ptr, arg, len); ptr += len; } *ptr = NULL; if ((asc_to_ll(rep, &repeat) == ERR) || repeat < 0L) fatal("Illegal repeat count: %s\n", targ); if (md == LINMODE) { toline = offset = curline; for (; repeat > 0LL; repeat--) { toline += offset; to_line(toline); } } else if (md == EXPMODE) for (; repeat > 0LL; repeat--) to_line(findline(expbuf, offset)); else fatal("No operation for %s\n", targ); }
/****************************************************************************** * Function: int _DtHelpFmtFindBreak (char *ptr, int mb_len, int *num_chars) * * Parameters: * ptr Specifies the string to check. * mb_len Specifies if the sequence should be single * byte or multi-byte. * num_chars Returns the character count. * * Returns number of bytes in the sequence. * * errno Values: * * Purpose: Find a length of 'ptr' comprised of multi or single byte * characters. * *****************************************************************************/ int _DtHelpFmtFindBreak ( char *ptr, int mb_len, int *num_chars) { int len = 0; int numChars = 0; int mySize; short done = 0; while (0 == done && '\0' != *ptr) { mySize = mblen(ptr, MB_CUR_MAX); done = 1; if (0 < mySize && ((1 != mb_len && 1 != mySize) || (1 == mb_len && 1 == mySize))) { numChars++; ptr += mySize; len += mySize; done = 0; } } *num_chars = numChars; return len; }
/* * Cut based on byte positions, taking care not to split multibyte characters. * Although this function also handles the case where -n is not specified, * c_cut() ought to be much faster. */ void b_n_cut(FILE *fp, const char *fname) { size_t col, i, lbuflen; static char buf[MAX_LINE_LENGTH]; char *lbuf; int canwrite, clen, warned; warned = 0; while ((lbuf = fgets(buf, sizeof buf, fp)) != NULL) { lbuflen = strlen(lbuf); for (col = 0; lbuflen > 0; col += clen) { if ((clen = mblen(lbuf, lbuflen)) < 0) { if (!warned) { warn("%s", fname); warned = 1; } clen = 1; } if (clen == 0 || *lbuf == '\n') break; if (col < maxval && !positions[1 + col]) { /* * Print the character if (1) after an initial * segment of un-selected bytes, the rest of * it is selected, and (2) the last byte is * selected. */ i = col; while (i < col + clen && i < maxval && !positions[1 + i]) i++; canwrite = i < col + clen; for (; i < col + clen && i < maxval; i++) canwrite &= positions[1 + i]; if (canwrite) fwrite(lbuf, 1, clen, stdout); } else { /* * Print the character if all of it has * been selected. */ canwrite = 1; for (i = col; i < col + clen; i++) if ((i >= maxval && !autostop) || (i < maxval && !positions[1 + i])) { canwrite = 0; break; } if (canwrite) fwrite(lbuf, 1, clen, stdout); } lbuf += clen; lbuflen -= clen; } if (lbuflen > 0) putchar('\n'); } }
char * _DtGetNthChar( char *s, int n ) { int count; int len; if ((s == NULL) || (n < 0) || (n > Dt_charCount(s))) return(NULL); count = 0; while ((count < n) && (*s)) { if (_DtNl_is_multibyte) len = mblen(s, MB_CUR_MAX); else len = 1; /* * We have to take care of the case when mblen() returns -1. */ if ( len == -1 ) len = 1; s += len; count++; } return(s); }
static const char *php_fgetcsv_lookup_trailing_spaces(const char *ptr, size_t len) { int inc_len; unsigned char last_chars[2] = { 0, 0 }; while (len > 0) { inc_len = (*ptr == '\0' ? 1: mblen(ptr, len)); switch (inc_len) { case -2: case -1: inc_len = 1; break; case 0: goto quit_loop; case 1: default: last_chars[0] = last_chars[1]; last_chars[1] = (unsigned char)*ptr; break; } ptr += inc_len; len -= (size_t)inc_len; } quit_loop: switch (last_chars[1]) { case '\n': if (last_chars[0] == '\r') { return ptr - 2; } /* break is omitted intentionally */ case '\r': return ptr - 1; } return ptr; }
/* * multibyte version of strpbrk(). * Only cs can be multibyte. */ char * _dt_strpbrk( char *cs, char *ct) { int len; size_t i; if(MB_CUR_MAX == 1) return(strpbrk(cs, ct)); while(*cs) { len = mblen(cs, MB_CUR_MAX); if(len < 1) len = 1; if(len == 1) { for(i = 0; i < strlen(ct); i++) { if(*cs == *(ct + i)) return(cs); } } cs += len; } return(NULL); }
int xstricmp(char *d, char *s) { while (*d && *s) { int l; l = mblen(d, MB_CUR_MAX); if (!l) break; if (l > 1) { while (l--) { if (*d != *s) return ((unsigned char)*d) - ((unsigned char)*s); d++; s++; } } else { int dc, sc; dc = (unsigned char)*d; sc = (unsigned char)*s; if ('a' <= dc && dc <= 'z') dc += 'A' - 'a'; if ('a' <= sc && sc <= 'z') sc += 'A' - 'a'; if (dc != sc) return ((unsigned char)*d) - ((unsigned char)*s); d++; s++; } } return ((unsigned char)*d) - ((unsigned char)*s); }
/* static */ bool cCharUtil::PeekNextChar( const TSTRING::const_iterator& cur, const TSTRING::const_iterator& end, TSTRING::const_iterator& first, TSTRING::const_iterator& last ) { // // do we have a valid string here? // if( cur > end ) { return false; } if( cur == end ) { first = last = end; return false; } if( *cur == _T('\0') ) { first = last = cur; return false; } first = cur; if (!(*cur)) { last = cur; } else { #if !IS_AROS mblen (NULL, 0); int len = mblen(&*cur, MB_CUR_MAX); if (len < 0) //invalid multibyte sequence, but let's not blow up. len = 1; last = cur + len; #else // AROS mblen() seems broken (as of 6/2016) so don't use it. last = cur + 1; #endif } return true; }
static inline int Tmblen(const char* s, size_t n) { int result = mblen(s, n); if (result > 0) return result; else return 1; }
static void CommitFirstCandidate(FcitxTablet* tablet) { char s[5]; // five chars should be plenty to hold a utf-8 char char* candidates = tablet->engineInstance->GetCandidates(tablet->engineData); int l = mblen(candidates, 10); memcpy(s, candidates, l); s[l] = '\0'; FcitxInstanceCommitString(tablet->fcitx, FcitxInstanceGetCurrentIC(tablet->fcitx), s); }
char * Dt_strtok( char *s1, char *s2 ) { static char *ptr; char * return_ptr; int len; int offset; /* Use standard libc function, if no multibyte */ if (!_DtNl_is_multibyte) return(strtok(s1, s2)); /* * If this is the first call, save the string pointer, and bypass * any leading separators. */ if (s1) ptr = s1 + Dt_strspn(s1, s2); /* A Null string pointer has no tokens */ if (ptr == NULL) return(NULL); /* Find out where the first terminator is */ if ((len = Dt_strcspn(ptr, s2)) <= 0) { /* No tokens left */ return(NULL); } /* Keep track of where the token started */ return_ptr = ptr; /* Null out the terminator; we need to know how many bytes are * occupied by the terminator, so that we can skip over it to * the next character. */ /* * We have to take care of the case when mblen() returns -1. */ offset = mblen(ptr + len, MB_CUR_MAX); if( offset == -1 ) offset = 1; *(ptr + len) = '\0'; ptr += (len + offset); /* * In preparation for the next pass, skip any other occurrances of * the terminator characters which were joined with the terminator * we first encountered. */ len = Dt_strspn(ptr, s2); ptr += len; return(return_ptr); }
/* * folds white space around and in between words. * " aa bb " becomes "aa bb". * returns NULL if it couldn't allocate memory. The caller must free * the result when done. */ static char *slp_fold_space(const char *s) { int len; char *folded, *f; if (!(folded = malloc(strlen(s) + 1))) { slp_err(LOG_CRIT, 0, "slp_fold_space", "out of memory"); return (NULL); } f = folded; for (;;) { /* step 1: skip white space */ for (; *s; s++) { len = mblen(s, MB_CUR_MAX); if (len != 1) break; if (!isspace(*s)) break; } if (!*s) { /* end of string */ *f = 0; return (folded); } /* if we are in between words, keep one space */ if (f != folded) *f++ = ' '; /* step 2: copy into folded until we hit more white space */ while (*s) { int i; len = mblen(s, MB_CUR_MAX); if (len == 1 && isspace(*s)) break; for (i = 0; i < len; i++) *f++ = *s++; } *f = *s; if (!*s++) return (folded); } }
// Convert and map inconvertible Unicode characters. // We use it for extended ASCII names in Unix. void CharToWideMap(const char *Src,wchar *Dest,size_t DestSize,bool &Success) { // Map inconvertible characters to private use Unicode area 0xE000. // Mark such string by placing special non-character code before // first inconvertible character. Success=false; bool MarkAdded=false; uint SrcPos=0,DestPos=0; while (DestPos<DestSize) { if (Src[SrcPos]==0) { Dest[DestPos]=0; Success=true; break; } ignore_result( mbtowc(NULL,NULL,0) ); // Reset shift state. if (mbtowc(Dest+DestPos,Src+SrcPos,MB_CUR_MAX)==-1) { // For security reasons we do not want to map low ASCII characters, // so we do not have additional .. and path separator codes. if (byte(Src[SrcPos])>=0x80) { if (!MarkAdded) { Dest[DestPos++]=MappedStringMark; MarkAdded=true; if (DestPos>=DestSize) break; } Dest[DestPos++]=byte(Src[SrcPos++])+MapAreaStart; } else break; } else { ignore_result( mblen(NULL,0) ); // Reset shift state. int Length=mblen(Src+SrcPos,MB_CUR_MAX); SrcPos+=Max(Length,1); DestPos++; } } }
void do_right() { if (curline->pdata_x< curline->data+strlen(curline->data)) { int bytelen=mblen(curline->pdata_x,MB_CUR_MAX); curline->pdata_x+=bytelen; curline->win_x+= (bytelen>1)?2:1; } }
static inline size_t strlen_locale(const char* cstr_utf8, size_t num_bytes) { size_t length = 0; const char* p = cstr_utf8; while (num_bytes) { size_t char_size = mblen(p, num_bytes); ++length; p += char_size; num_bytes -= char_size; } return length; }
/*************************************** * * Multi-byte capable version of strtok(s1, s2). * Returns a pointer to the span of characters in s1 terminated by * one of the characters in s2. Only s1 can be multibyte. */ char * xpstrtok( char *s1, char *s2 ) { #ifdef NLS16 static char * ptr = NULL; char * return_ptr; int len; int offset; /* * If this is the first call, save the string pointer, and bypass * any leading separators. */ if (s1) ptr = s1 + xpstrspn(s1, s2); /* A Null string pointer has no tokens */ if (ptr == NULL) return(NULL); /* Find out where the first terminator is */ if ((len = xpstrcspn(ptr, s2)) <= 0) { /* No tokens left */ return(NULL); } /* Keep track of where the token started */ return_ptr = ptr; /* Null out the terminator; we need to know how many bytes are * occupied by the terminator, so that we can skip over it to * the next character. */ offset = mblen(ptr + len, MB_CUR_MAX); *(ptr + len) = '\0'; ptr += (len + offset); /* * In preparation for the next pass, skip any other occurrances of * the terminator characters which were joined with the terminator * we first encountered. */ len = xpstrspn(ptr, s2); ptr += len; return(return_ptr); #else return(strtok(s1, s2)); #endif /* NLS16 */ }
/* * Dt mult-byte equivalent of isdigit() */ int _Dt_isdigit(char *s) { if ( !_DtNl_is_multibyte || MB_CUR_MAX == 1 ) return isdigit(*s); if ( mblen(s,MB_CUR_MAX) == 1 ) return isdigit(*s); else return 0; }
/* * Dt mult-byte equivalent of isspace() */ int _Dt_isspace(char *s) { if ( !_DtNl_is_multibyte || MB_CUR_MAX == 1 ) return isspace((u_char)*s); if ( mblen(s,MB_CUR_MAX) == 1 ) return isspace((u_char)*s); else return 0; }
/* * Same semantics as strchr. * Assumes that we start on a char boundry, and that c is a 7-bit * ASCII char. */ char *slp_utf_strchr(const char *s, char c) { int len; char *p; for (p = (char *)s; *p; p += len) { len = mblen(p, MB_CUR_MAX); if (len == 1 && *p == c) return (p); } return (NULL); }
static char *GetExt(char *filename) { char *p = filename, *p2 = 0; while (*p) { int l = mblen(p, MB_CUR_MAX); if (!l) break; if (*p == '/' || *p == '\\' || *p == ':') p2 = 0; if (*p == '.') p2 = p; p += l; } if (p2) return p2; return filename; }
/* * returns 1 if a character before s2 in s1 is single-byte, * returns 0 if it is multi-byte. */ int _is_previous_single( char *s1, char *s2) { int n = 1; if(MB_CUR_MAX == 1) return(1); while(*s1) { if(s1 == s2) { if(n > 1) return(0); else return(1); } n = mblen(s1, MB_CUR_MAX) > 1 ? mblen(s1, MB_CUR_MAX) : 1; s1 += n; } return(1); }
int DEFUN_VOID(main) { wchar_t w[10]; char c[10]; int i; int lose = 0; i = mbstowcs (w, "bar", 4); if (!(i == 3 && w[1] == 'a')) { puts ("mbstowcs FAILED!"); lose = 1; } mbstowcs (w, "blah", 5); i = wcstombs (c, w, 10); if (i != 4) { puts ("wcstombs FAILED!"); lose = 1; } if (mblen ("foobar", 7) != -1) { puts ("mblen 1 FAILED!"); lose = 1; } if (mblen ("", 1) != 0) { puts ("mblen 2 FAILED!"); lose = 1; } puts (lose ? "Test FAILED!" : "Test succeeded."); return lose; }
// Convert and restore mapped inconvertible Unicode characters. // We use it for extended ASCII names in Unix. bool WideToCharMap(const wchar *Src,char *Dest,size_t DestSize,bool &Success) { // String with inconvertible characters mapped to private use Unicode area // must have the mark code somewhere. if (wcschr(Src,(wchar)MappedStringMark)==NULL) return false; Success=true; uint SrcPos=0,DestPos=0; while (DestPos<DestSize-MB_CUR_MAX) { if (Src[SrcPos]==0) { Dest[DestPos]=0; break; } if (uint(Src[SrcPos])==MappedStringMark) { SrcPos++; continue; } // For security reasons do not retore low ASCII codes, so mapping cannot // be used to hide control codes like path separators. if (uint(Src[SrcPos])>=MapAreaStart+0x80 && uint(Src[SrcPos])<MapAreaStart+0x100) Dest[DestPos++]=char(uint(Src[SrcPos++])-MapAreaStart); else { ignore_result( wctomb(NULL,0) ); // Reset shift state. if (wctomb(Dest+DestPos,Src[SrcPos])==-1) Success=false; SrcPos++; ignore_result( mblen(NULL,0) ); // Reset shift state. int Length=mblen(Dest+DestPos,MB_CUR_MAX); DestPos+=Max(Length,1); } } return true; }