Esempi in C++ (Cpp) per is_ascii

Esempio n. 1

0

Mostra file

File: SkOSFile_stdio.cpp Progetto: android/platform_external_skia

static FILE* fopen_win(const char* utf8path, const char* perm) {
    if (is_ascii(utf8path)) {
        return fopen(utf8path, perm);
    }

    const char* ptr = utf8path;
    const char* end = utf8path + strlen(utf8path);
    size_t n = 0;
    while (ptr < end) {
        SkUnichar u = SkUTF8_NextUnicharWithError(&ptr, end);
        if (u < 0) {
            return nullptr;  // malformed UTF-8
        }
        n += SkUTF16_FromUnichar(u);
    }
    std::vector<uint16_t> wchars(n + 1);
    uint16_t* out = wchars.data();
    for (const char* ptr = utf8path; ptr < end;) {
        out += SkUTF16_FromUnichar(SkUTF8_NextUnicharWithError(&ptr, end), out);
    }
    SkASSERT(out == &wchars[n]);
    *out = 0; // final null
    wchar_t wperms[4] = {(wchar_t)perm[0], (wchar_t)perm[1], (wchar_t)perm[2], (wchar_t)perm[3]};
    return _wfopen((wchar_t*)wchars.data(), wperms);
}

Esempio n. 2

0

Mostra file

File: hcache.c Progetto: srg-imperial/mutt

static unsigned char *
dump_char_size(char *c, unsigned char *d, int *off, ssize_t size, int convert)
{
  char *p = c;

  if (c == NULL)
  {
    size = 0;
    d = dump_int(size, d, off);
    return d;
  }

  if (convert && !is_ascii (c, size)) {
    p = mutt_substrdup (c, c + size);
    if (mutt_convert_string (&p, Charset, "utf-8", 0) == 0) {
      c = p;
      size = mutt_strlen (c) + 1;
    }
  }

  d = dump_int(size, d, off);
  lazy_realloc(&d, *off + size);
  memcpy(d + *off, p, size);
  *off += size;

  if (p != c)
    FREE(&p);

  return d;
}

Esempio n. 3

0

Mostra file

File: Encoding.cpp Progetto: Burgestrand/twitterskylt

	/*
	 * Takes a string of UTF8-encoded data and strips all characters we cannot use.
	 *
	 * - multibyte characters we can display are mapped over
	 * - all other multibyte characters are stripped
	 * - characters we cannot handle are turned into nulls
	 *
	 * Also see: http://en.wikipedia.org/wiki/Utf8
	 */
	char *utf8_strip(const char *dirty)
	{
		// optimistic result: we end up with the same string
		uint8_t length = strlen(dirty), ci = 0, di = 0;
		unsigned char bytes	= 0;
		unsigned char current = 0x00, next = 0x00;
		char *cleaned = ALLOC_STR(length);

		// iterate character by character and replace it
		for (di = 0, ci = 0; di < length; di++)
		{
			current = dirty[di];

			if ( ! valid_utf8(current)) // invalid byte
			{
				continue;
			}
			else if ( ! is_ascii(current)) // multibyte
			{
				if (current == 0xC3 && (di + 1) < length) // might be åäöÅÄÖ, they all are 0xC3xx
				{
					next = dirty[++di]; // we consume the next character

					if (is_ascii(next)) // somehow, next byte is ascii (invalid utf8), so abort
					{
						// we cannot safely map the next byte in our charmap as it’ll collide
						// with the ascii characters which might be bad!
						continue;
					}
					else
					{
						current = next;
					}
				}
				else // skip all the additional bytes
				{
					bytes = (current & 0xF0); // 1111 xxxx
					while (bytes <<= 1) di += 1;
					current = '\0'; // let charmap handle it
				}
			}

			cleaned[ci++] = charmap[current];
		}

		return cleaned;
	}

Esempio n. 4

0

Mostra file

File: str_operate.cpp Progetto: pkxpp/Study

int
filter_tab_buffer(char* pBuffer, int nStrLen){
	char* 		pStr 	= NULL;
	char* 		pStr2 	= NULL;
	int		nLength = 0;
	int		nColNum = 1;
	int		i, isIgnored;
		
	g_nCurPos = 0;
	pStr = pBuffer;
	while( (*(pStr) != '\0') && (*(pStr+1) != '\0') ){
		if ( !is_ascii((unsigned char)(*pStr)) ){
			pStr2 = strchr(pStr, '\t');

			isIgnored = 0;
			if ( g_ignoreIdx > -1 ){ /* judge this column is ignored */
				for ( i = 0; i < MAX_IGNORE_COL_NUM; i++ ){
					if ( g_ignoreCol[g_ignoreIdx][i] == 0 )
						break;
					if ( g_ignoreCol[g_ignoreIdx][i] == nColNum ) {
						isIgnored = 1;
						break;
					}
				}
			}

			if ( pStr2 == NULL ){
				nStrLen = (int)strlen(pBuffer);
				nLength = nStrLen - g_nCurPos;
				if ( isIgnored == 0 )
					string_process(&pBuffer[g_nCurPos],
						       nLength, 0);
				break;
			}
			else{	
				nLength = (int)(pStr2 - pBuffer) - g_nCurPos;
				if ( isIgnored == 0 )
					nLength = string_process(
					&pBuffer[g_nCurPos], nLength, 0);
				else
					nLength++;
				
				pStr = pBuffer + g_nCurPos + nLength;
				g_nCurPos = (int)(pStr - pBuffer);
				nColNum ++;
			}

			continue;
		}
		else if ( is_tab(*pStr) ){
			g_nCurPos = (int)(pStr - pBuffer) + 1;
			nColNum ++;
		}

		pStr++;
	}
	
	return 1;
}

Esempio n. 5

0

Mostra file

File: str_operate.cpp Progetto: pkxpp/Study

int
filter_ini_buffer(char* pBuffer, int nStrLen){
	int 		nRet;
	char* 		pStr = NULL;
	char*		pStr2 = NULL;
	char		cTail = '\0';

	g_nCurPos = 0;
	/* if the line is a remark, not process */
	if( is_ini_remark((unsigned char)pBuffer[0], (unsigned char)pBuffer[1]) )
		return 1;

	/* if the line is a section, not process */
	nRet = is_ini_section(pBuffer); 
	if( nRet == 1 ){
		pStr2 = strrchr(pBuffer, ']');
		*pStr2 = '\0';
		pStr = pBuffer;
		cTail = ']';
		goto PROCESS;
	}
	else  if( nRet == 2){
		string_process(pBuffer, (int)strlen(pBuffer), 1);
		return 0;
	}

	/* if the line is not a key,then it's error! */
	pStr = strchr(pBuffer, '='); 
	if ( pStr == NULL ){
		string_process(pBuffer, (int)strlen(pBuffer), 1);
		return 0;
	}

	/* if the line is a key,judge whether there is chinese in key value */
PROCESS:
	pStr += 1;
	pStr2 = pStr;
	if ( *pStr == '\0' )
		return 1;

	nRet = (int)strlen(pStr2)+1;
	while( (*(pStr) != '\0') && (*(pStr+1) != '\0') ){
		if ( !is_ascii((unsigned char)(*pStr)) ){
			nRet = string_process(pStr2, (int)strlen(pStr2), 0);
			break;
		}
		pStr ++;
	}

	if ( cTail != '\0' ){
		pStr2[nRet-1] = cTail;
		pStr2[nRet] = '\0';
	}

	return 1;
}

Esempio n. 6

0

Mostra file

File: packet-pmproxy.c Progetto: acaceres2176/wireshark

static int looks_like_proxy_exchange(tvbuff_t *tvb) {
    gint packet_length;
    const guchar *packet_data;

    packet_length = tvb_ensure_captured_length_remaining(tvb, PMPROXY_START_OF_PACKET);
    packet_data = tvb_get_ptr(tvb, PMPROXY_START_OF_PACKET, packet_length);

    /* A proxy exchange packet only contains ascii characters (eg "localhost 44321") and terminated with \n */
    return is_ascii(packet_data, packet_length) && packet_data[packet_length-1] == '\n';
}

Esempio n. 7

0

Mostra file

File: load.c Progetto: Matrixbirds/siege

/**
 * maps a file to our address space 
 * and returns it the calling function.
 */
void
load_file(URL U, char *file)
{
  FILE     *fp;
  size_t   len;
  char     *buf;
  char     *filename;
  char     mode[8];

  filename = trim(file);

  memset(mode, '\0', sizeof(mode));
  snprintf(mode, sizeof(mode), "%s", (is_ascii(filename))?"r":"rb");
  fp = fopen(filename, mode);
  if (! fp) {
    NOTIFY(ERROR, "unable to open file: %s", filename );
    return;
  }
 
  fseek(fp, 0, SEEK_END);
  len = ftell(fp);
  fseek(fp, 0, SEEK_SET);
  buf = (char *)xmalloc(len+1);

  if ((fread(buf, 1, len, fp )) == len) {
    if (is_ascii(filename)) {
      buf[len] = '\0';
      trim(buf);
      len = strlen(buf);
    }
  } else {
    NOTIFY(ERROR, "unable to read file: %s", filename );
  }
  fclose(fp); 

  if (len > 0) {
    url_set_conttype(U, get_content_type(filename));
    url_set_postdata(U, buf, len);
  } 

  xfree(buf);
  return;
}

Esempio n. 8

0

Mostra file

File: iconv_hook_eucjp.c Progetto: ystk/debian-libapache-mod-encoding

static size_t skip_bytes(char c)
{
  if (is_ascii(c)) {
    return 1;
  } else if (is_kanji(c) || is_hankana(c)) {
    return 2;
  } else if (is_hojyo(c)) {
    return 3;
  }
  return 0;
}

Esempio n. 9

0

Mostra file

File: iconv_hook_eucjp.c Progetto: ystk/debian-libapache-mod-encoding

static size_t
eucjp_iconv(iconv_t cd,
	    char **srcbuf, size_t *srclen, char **outbuf, size_t *outlen) {
  unsigned char *tmpbuf, *tmp;
  unsigned char *src;
  unsigned char ch, cl;
  size_t ret;

  if (! (srcbuf && srclen && outbuf && outlen))
    return 0;

  /* translate EUC-JP into SJIS */
  src = (unsigned char *)*srcbuf;
  tmp = tmpbuf = malloc(*srclen+2);
  while (*src && ((tmp - tmpbuf) < *srclen)) {
    ch = *src++;
    if (is_ascii(ch)) {
      *tmp++ = ch;
    } else {
      cl = *src++;
      if (is_kanji(ch)) {
        *tmp++ = ((ch-0x5f)/2) ^ 0xA0;
        if (!(ch&1))
          *tmp++ = cl - 0x02;
        else if (cl < 0xE0)
          *tmp++ = cl - 0x61;
        else
          *tmp++ = cl - 0x60;
      } else if (is_hankana(ch)) {
        if (cl < 0xA0 || cl > 0xDF) {
          *srcbuf=(char *)(src-2);
          errno=EILSEQ;
          return -1;
        }
        *tmp++ = cl;
      } else {
        /* We don't support JIS X 0212 */
        *srcbuf=(char *)(src-2);
        errno=EILSEQ;
        return -1;
      }
    }
  }
  *tmp='\0';

  ret = mssjis_iconv(cd, (char **) &tmpbuf, srclen, outbuf, outlen);
  free(tmpbuf);
  *srcbuf += *src;
  *srclen = 0;
  return ret;
}

Esempio n. 10

0

Mostra file

File: fctypes.cpp Progetto: privacore/open-source-search-engine

//Note: there is a safer version in GbUtil.* that writes to a SafeBuf.
// . convert "-->%22 , &-->%26, +-->%2b, space-->+, ?-->%3f is that it?
// . convert so we can display as a cgi PARAMETER within a url
// . used by HttPage2 (cached web page) to encode the query into a url
// . used by PageRoot to do likewise
// . returns bytes written into "d" not including terminating \0
int32_t urlEncode ( char *d , int32_t dlen , const char *s , int32_t slen, bool requestPath ) {
	char *dstart = d;
	// subtract 1 to make room for a terminating \0
	char *dend = d + dlen - 1;
	const char *send = s + slen;
	for ( ; s < send && d < dend ; s++ ) {
		if ( *s == '\0' && requestPath ) {
			*d++ = *s;
			continue;
		}
		// encode if not fit for display
		if ( ! is_ascii ( *s ) ) goto encode;
		switch ( *s ) {
		case ' ': goto encode;
		case '&': goto encode;
		case '"': goto encode;
		case '+': goto encode;
		case '%': goto encode;
		case '#': goto encode;
		// encoding < and > are more for displaying on an
		// html page than sending to an http server
		case '>': goto encode;
		case '<': goto encode;
		case '?': if ( requestPath ) break;
			  goto encode;
		}
		// otherwise, no need to encode
		*d++ = *s;
		continue;
	encode:
		// space to +
		if ( *s == ' ' && d + 1 < dend ) { *d++ = '+'; continue; }
		// break out if no room to encode
		if ( d + 2 >= dend ) break;
		*d++ = '%';
		// store first hex digit
		unsigned char v = ((unsigned char)*s)/16 ;
		if ( v < 10 ) v += '0';
		else          v += 'A' - 10;
		*d++ = v;
		// store second hex digit
		v = ((unsigned char)*s) & 0x0f ;
		if ( v < 10 ) v += '0';
		else          v += 'A' - 10;
		*d++ = v;
	}
	// NULL terminate it
	*d = '\0';
	// and return the length
	return d - dstart;
}

Esempio n. 11

0

Mostra file

File: http_helper.cpp Progetto: xuxiandi/LiquidFL

void url_encode(lfl_string* str)
{
	lfl_string out;
	for (int i = 0; i < str->length(); i++) {
		char c = (*str)[i];
		if( !is_ascii( c ) || is_special_character(c) )
		{   
			out += string_printf( "%%%2x", c );
		} else {
			// Pass this character straight through.
			out += c;
		}
	}

	*str = out;
}

Esempio n. 12

0

Mostra file

File: x.c Progetto: raincoats/x

int needs_escaping(int c)
{
	switch (c) {
		case '\r': return esc_cr;
		case '\n': return esc_lf;
		case '\t': return esc_tabs;
		case '\v': return esc_vtab;
		case '\e': return esc_ansi;
		case ' ':  return esc_space;
	}

	if (is_ascii(c))
		return false;

	return true;
}

Esempio n. 13

0

Mostra file

File: crack.c Progetto: yeshwantsingh/computer_science_50

int main(int argc, char *argv[])
{
    bool is_ascii(char str[]);
    void encryption_match(char word[], char slt[], char encrypted_pwd[],
                          FILE *fptr_words);

    /* verify contents of command-line arg's, encrypted password, is composed
       entirely of ascii characters, string in argv[1] has a legth of 13, and
       is not empty, or more than one arg. */
    if ( argc != 2 || ! is_ascii( argv[1]) || strlen(argv[1]) != 13 )
    {
        printf ("\n*** An ERROR occured and the program has closed.***\n"
                "Input: ./crack <encrypted password>. \nWhere <encrypted "
                "password> must be 13 ASCII characters in length.\n");
        return 1;
    }

    char encrypted_pwd [strlen(argv[1])];
    char slt[2], word[81];

    cpy_n_pst(argv[1], 0, 13, encrypted_pwd, 0);    // assigns encrypted_pwd
    cpy_n_pst(argv[1], 0, 2, slt, 0);              // assigns salt

    /* altternative dictionary file is exhaustive, and contains 4,160,636
       entries. It is slow to load, and it contains commonly used passwords, and
       numbers, user names from above, and some words associated with Harvard,
       "/home/jharvard/Dropbox/wiki.txt". */

    fptr_wiki = fopen("/usr/share/dict/words", "r");
    if (fptr_wiki == 0)
    {
        printf("\nError opening the file requested.\n");
        return 1;
    }

    /* fucntion calls (encryption_match), which runs a dictionary attack. If it
       fails to locate a match, it then proceeds to run a brute force attack. If
       both attacks fails the function retuns to main.*/

    encryption_match(word, slt, encrypted_pwd, fptr_wiki);
    if ( match == false )
        printf ("\nMATCH NOT FOUND.\n");

    fclose(fptr_wiki);

    return 0;
}

Esempio n. 14

0

Mostra file

File: py-shape.cpp Progetto: lukas-ke/faint-graphics-editor

PyObject* create_Path(const utf8_string& path,
  const Optional<Settings>& maybeSettings)
{
  // Fixme: Duplicates py-canvas.cpp

  if (!is_ascii(path)){
    // Fixme: Consider adding ascii_string type
    throw ValueError("Non-ascii-characters in path definition.");
  }
  std::vector<PathPt> points(parse_svg_path(path.str()));
  if (points.empty()){
    throw ValueError("Failed parsing path definition.");
  }
  if (points.front().IsNotMove()){
    throw ValueError("Paths must begin with a Move-entry.");
  }

  const auto s = merge_settings(maybeSettings, default_path_settings());
  return create_Shape(create_path_object_raw(Points(points), s));
}

Esempio n. 15

0

Mostra file

File: load.c Progetto: Matrixbirds/siege

void 
load_file(URL U, char *file)
{
  FILE     *fp;
  size_t   len = 0;
  struct   stat st; 
  char     *filename;
  char     postdata[POSTBUF]; 
  size_t   postlen = 0;

  filename = trim(file);
  memset(postdata, 0, POSTBUF);

  if ((lstat(filename, &st) == 0) || (errno != ENOENT)) { 
    len = (st.st_size >= POSTBUF) ? POSTBUF : st.st_size;  
    if (len < (unsigned)st.st_size) {
      NOTIFY(WARNING, "Truncated file: %s exceeds the post limit of %d bytes.\n", filename, POSTBUF);
    }
    if ((fp = fopen(filename, "r")) == NULL) {
      NOTIFY(ERROR, "could not open file: %s", filename);
      return;
    }
    if ((fread(postdata, 1, len, fp )) == len) {
      if (is_ascii(filename)) {
        trim(postdata);
        postlen = strlen(postdata);
      } else {
        postlen = len;
      }
    } else {
      NOTIFY(ERROR, "unable to read file: %s", filename );
    }
    fclose(fp);
  }

  if (strlen(postdata) > 0) {
    url_set_conttype(U, get_content_type(filename));
    url_set_postdata(U, postdata, postlen);
  } 
  return;
}

Esempio n. 16

0

Mostra file

File: utf8.hpp Progetto: GavinHwa/oh

 size_t utf8_length(InputIterator first, InputIterator last, UTF16Type)
 {
     size_t len = 0;
     for (; first < last; ++len)
     {
         if (is_ascii(*first))
         {
             first += 1;
         }
         else if (is_2byte(*first))
         {
             first+= 2;
         }
         else if (is_3byte(*first))
         {
             first += 3;
         }
         else if (is_4byte(*first))
         {
             first += 4;
             ++len;
         }
         else if (is_5byte(*first))
         {
             first += 5;
         }
         else if (is_6byte(*first))
         {
             first += 6;
         }
         else
         {
             break;
         }
     }
     if (first != last)
     {
         HPROSE_THROW_EXCEPTION("Not a UTF-8 string");
     }
     return len;
 }

Esempio n. 17

0

Mostra file

File: checkutf8.c Progetto: lemire/Code-used-on-Daniel-Lemire-s-blog

void demo(size_t N) {
  printf("string size = %zu \n", N);
  char *data = (char *)malloc(N);
  bool expected = true; // it is all ascii?
  int repeat = 5;
  printf("We are feeding ascii so it is always going to be ok.\n");
  BEST_TIME(is_ascii(data, N), expected,populate(data,N) , repeat, N, true);

  BEST_TIME(validate_utf8(data, N), expected,populate(data,N) , repeat, N, true);
  BEST_TIME(validate_utf8_branchless(data, N), expected,populate(data,N) , repeat, N, true);
  BEST_TIME(validate_utf8_double(data, N), expected,populate(data,N) , repeat, N, true);

  BEST_TIME(shiftless_validate_utf8(data, N), expected,populate(data,N) , repeat, N, true);
  BEST_TIME(shiftless_validate_utf8_branchless(data, N), expected,populate(data,N) , repeat, N, true);
  BEST_TIME(shiftless_validate_utf8_double(data, N), expected,populate(data,N) , repeat, N, true);


  BEST_TIME(validate_utf8_sse_nocheating(data, N), expected,populate(data,N) , repeat, N, true);
  BEST_TIME(validate_utf8_sse(data, N), expected,populate(data,N) , repeat, N, true);
  free(data);
}

Esempio n. 18

0

Mostra file

File: json_parser_write.hpp Progetto: Chang-Liu-0520/dealii

 std::basic_string<Ch> create_escapes(const std::basic_string<Ch> &s)
 {
     std::basic_string<Ch> result;
     typename std::basic_string<Ch>::const_iterator b = s.begin();
     typename std::basic_string<Ch>::const_iterator e = s.end();
     while (b != e)
     {
         // This assumes an ASCII superset. But so does everything in PTree.
         // We escape everything outside ASCII, because this code can't
         // handle high unicode characters.
         if (*b == 0x20 || *b == 0x21 || (*b >= 0x23 && *b <= 0x2E) ||
             (*b >= 0x30 && *b <= 0x5B) || (*b >= 0x5D && is_ascii(*b)))
             result += *b;
         else if (*b == Ch('\b')) result += Ch('\\'), result += Ch('b');
         else if (*b == Ch('\f')) result += Ch('\\'), result += Ch('f');
         else if (*b == Ch('\n')) result += Ch('\\'), result += Ch('n');
         else if (*b == Ch('\r')) result += Ch('\\'), result += Ch('r');
         else if (*b == Ch('\t')) result += Ch('\\'), result += Ch('t');
         else if (*b == Ch('/')) result += Ch('\\'), result += Ch('/');
         else if (*b == Ch('"'))  result += Ch('\\'), result += Ch('"');
         else if (*b == Ch('\\')) result += Ch('\\'), result += Ch('\\');
         else
         {
             const char *hexdigits = "0123456789ABCDEF";
             typedef typename make_unsigned<Ch>::type UCh;
             unsigned long u = (std::min)(static_cast<unsigned long>(
                                              static_cast<UCh>(*b)),
                                          0xFFFFul);
             int d1 = u / 4096; u -= d1 * 4096;
             int d2 = u / 256; u -= d2 * 256;
             int d3 = u / 16; u -= d3 * 16;
             int d4 = u;
             result += Ch('\\'); result += Ch('u');
             result += Ch(hexdigits[d1]); result += Ch(hexdigits[d2]);
             result += Ch(hexdigits[d3]); result += Ch(hexdigits[d4]);
         }
         ++b;
     }
     return result;
 }

Esempio n. 19

0

Mostra file

File: autocomp.c Progetto: vigna/ne

static void search_buff(const buffer *b, char * p, const int encoding, const bool case_search, const int ext) {
	assert(p);
	const int p_len = strlen(p);
	const int (*cmp)(const char *, const char *, size_t) = case_search ? strncmp : strncasecmp;
	for(line_desc *ld = (line_desc *)b->line_desc_list.head, *next; next = (line_desc *)ld->ld_node.next; ld = next) {
		int64_t l = 0, r = 0;
		do {
			/* find left edge of word */
			while (l < ld->line_len - p_len && !ne_isword(get_char(&ld->line[l], b->encoding), b->encoding)) l += get_char_width(&ld->line[l], b->encoding);
			if (l < ld->line_len - p_len ) {
				int ch;
				/* find right edge of word */
				r = l + get_char_width(&ld->line[l], b->encoding);
				/* accept "'" as a word character if it is followed by another word character, so that
				   words like "don't" are not broken into "don" and "t". */
				while (r < ld->line_len
				       && (    ne_isword(ch=get_char(&ld->line[r], b->encoding), b->encoding)
				            || ( r+1 < ld->line_len && ch == '\'' && ne_isword(get_char(&ld->line[r+1], b->encoding), b->encoding))
				          )
				      ) r += get_char_width(&ld->line[r], b->encoding);
				if ((b != cur_buffer || ld != b->cur_line_desc || b->cur_pos < l || r < b->cur_pos)
				     && r - l > p_len && (b->encoding == encoding || is_ascii(&ld->line[l], r - l))
				     && !cmp(p, &ld->line[l], p_len))
					add_string(&ld->line[l], r - l, ext);
				l = r;
				count_scanned++;
			}
			assert(l <= ld->line_len);
			if (stop || count_scanned >= MAX_AUTOCOMPLETE_SCAN) {
				add_string(NULL, -1, 0);
				return;
			}
		} while (l < ld->line_len - p_len);
	}
	add_string(NULL, -1, 0);
}

Esempio n. 20

0

Mostra file

File: hcache.c Progetto: srg-imperial/mutt

static void
restore_char(char **c, const unsigned char *d, int *off, int convert)
{
  unsigned int size;
  restore_int(&size, d, off);

  if (size == 0)
  {
    *c = NULL;
    return;
  }

  *c = safe_malloc(size);
  memcpy(*c, d + *off, size);
  if (convert && !is_ascii (*c, size)) {
    char *tmp = safe_strdup (*c);
    if (mutt_convert_string (&tmp, "utf-8", Charset, 0) == 0) {
      mutt_str_replace (c, tmp);
    } else {
      FREE(&tmp);
    }
  }
  *off += size;
}

Esempio n. 21

0

Mostra file

File: MsgDisplay.cpp Progetto: ZeroCM/zcm

static void print_value_scalar(TypeDb& db, zcm_field_t *field, void *data, int *usertype_count)
{

    switch(field->type) {

        case ZCM_FIELD_BYTE:
        case ZCM_FIELD_INT8_T: {
            int8_t i = *(int8_t *) data;
            printf(" %d", i);
            if(is_ascii(i))
                printf(" (%c)", i);
            break;
        }

        case ZCM_FIELD_INT16_T:
            printf("% d", *(int16_t *) data);
            break;

        case ZCM_FIELD_INT32_T:
            printf("% d", *(int32_t *) data);
            break;

        case ZCM_FIELD_INT64_T:
            printf("% " PRIi64 "", *(int64_t *) data);
            break;

        case ZCM_FIELD_FLOAT:
            printf("% f", *(float *) data);
            break;

        case ZCM_FIELD_DOUBLE:
            printf("% f", *(double *) data);
            break;

        case ZCM_FIELD_STRING:
            printf("\"%s\"", *(const char **) data);
            break;

        case ZCM_FIELD_BOOLEAN:
            printf("%s", (*(int8_t*) data) == 1 ? "true" : "false");
            break;

        case ZCM_FIELD_USER_TYPE: {
            if (db.getByName(field->typestr)) {
                if(usertype_count == NULL) {
                    printf("<USER>");
                } else {
                    int n = ++*usertype_count;
                    printf("<%d>", n);
                }
            } else {
                printf("<unknown-user-type>");
            }
            break;
        }

        default:
            printf("???");
            fprintf(stderr, "ERR: failed to handle zcm message field type: %s\n", field->typestr);
            break;
    }
}

Esempio n. 22

0

Mostra file

File: Words.cpp Progetto: privacore/open-source-search-engine

bool Words::addWords( char *s, int32_t nodeLen, bool computeWordIds ) {
	int32_t  i = 0;
	int32_t  j;
	int32_t  wlen;

	bool hadApostrophe = false;

	UCScript oldScript = ucScriptCommon;
	UCScript saved;
	UCProps props;

 uptop:

	// bad utf8 can cause a breach
	if ( i >= nodeLen ) {
		goto done;
	}

	if ( ! s[i] ) {
		goto done;
	}

	if ( !is_alnum_utf8( s + i ) ) {
		if ( m_numWords >= m_preCount ) {
			goto done;
		}

		// tag?
		if ( s[i]=='<' && m_hasTags && isTagStart(s+i) ) {
			// get the tag id
			if( m_tagIds ) {
				if ( s[i + 1] == '/' ) {
					// skip over /
					m_tagIds[m_numWords] = ::getTagId( s + i + 2 );
					m_tagIds[m_numWords] |= BACKBIT;
				} else {
					m_tagIds[m_numWords] = ::getTagId( s + i + 1 );
				}
			}

			m_words[m_numWords] = s + i;
			m_wordIds[m_numWords] = 0LL;

			// skip till end
			int32_t tagLen = getTagLen( s + i );
			m_wordLens[m_numWords] = tagLen;
			m_nodes[m_numWords] = 0;
			m_numWords++;

			// advance
			i += tagLen;
			goto uptop;
		}

		// it is a punct word, find end of it
		char *start = s+i;
		for ( ; s[i] ; i += getUtf8CharSize(s+i)) {
			// stop on < if we got tags
			if ( s[i] == '<' && m_hasTags ) {
				break;
			}

			// if we are simple ascii, skip quickly
			if ( is_ascii(s[i]) ) {
				// accumulate NON-alnum chars
				if ( ! is_alnum_a(s[i]) ) {
					continue;
				}

				// update
				oldScript = ucScriptCommon;

				// otherwise, stop we got alnum
				break;
			}

			// if we are utf8 we stop on special props
			UChar32 c = utf8Decode ( s+i );

			// stop if word char
			if ( ! ucIsWordChar ( c ) ) {
				continue;
			}

			// update first though
			oldScript = ucGetScript ( c );

			// then stop
			break;
		}
		m_words        [ m_numWords  ] = start;
		m_wordLens     [ m_numWords  ] = s+i - start;
		m_wordIds      [ m_numWords  ] = 0LL;
		m_nodes        [ m_numWords  ] = 0;

		if (m_tagIds) {
			m_tagIds[m_numWords] = 0;
		}

		m_numWords++;
		goto uptop;
	}

	// get an alnum word
	j = i;
 again:
	for ( ; s[i] ; i += getUtf8CharSize(s+i) ) {
		// simple ascii?
		if ( is_ascii(s[i]) ) {
			// accumulate alnum chars
			if ( is_alnum_a(s[i]) ) continue;
			// update
			oldScript = ucScriptCommon;
			// otherwise, stop we got punct
			break;
		}
		// get the code point of the utf8 char
		UChar32 c = utf8Decode ( s+i );
		// get props
		props = ucProperties ( c );
		// good stuff?
		if ( props & (UC_IGNORABLE|UC_EXTEND) ) continue;
		// stop? if UC_WORCHAR is set, that means its an alnum
		if ( ! ( props & UC_WORDCHAR ) ) {
			// reset script between words
			oldScript = ucScriptCommon;
			break;
		}
		// save it
		saved = oldScript;
		// update here
		oldScript = ucGetScript(c);
		// treat ucScriptLatin (30) as common so we can have latin1
		// like char without breaking the word!
		if ( oldScript == ucScriptLatin ) oldScript = ucScriptCommon;
		// stop on this crap too i guess. like japanes chars?
		if ( props & ( UC_IDEOGRAPH | UC_HIRAGANA | UC_THAI ) ) {
			// include it
			i += getUtf8CharSize(s+i);
			// but stop
			break;
		}
		// script change?
		if ( saved != oldScript ) break;
	}
	
	// . java++, A++, C++ exception
	// . A+, C+, exception
	// . TODO: consider putting in Bits.cpp w/ D_CAN_BE_IN_PHRASE
	if ( s[i]=='+' ) {
		if ( s[i+1]=='+' && !is_alnum_utf8(&s[i+2]) ) i += 2;
		else if ( !is_alnum_utf8(&s[i+1]) ) i++;
	}
	// . c#, j#, ...
	if ( s[i]=='#' && !is_alnum_utf8(&s[i+1]) ) i++;

	// comma is ok if like ,ddd!d
	if ( s[i]==',' && 
	     i-j <= 3 &&
	     is_digit(s[i-1]) ) {
		// if word so far is 2 or 3 chars, make sure digits
		if ( i-j >= 2 && ! is_digit(s[i-2]) ) goto nogo;
		if ( i-j >= 3 && ! is_digit(s[i-3]) ) goto nogo;
		// scan forward
		while ( s[i] == ',' &&
		        is_digit(s[i+1]) &&
		        is_digit(s[i+2]) &&
		        is_digit(s[i+3]) &&
		        ! is_digit(s[i+4]) ) {
			i += 4;
		}
	}

	// decimal point?
	if ( s[i] == '.' &&
	     is_digit(s[i-1]) &&
	     is_digit(s[i+1]) ) {
		// allow the decimal point
		i++;
		// skip over string of digits
		while ( is_digit(s[i]) ) i++;
	}
	
 nogo:

	// allow for words like we're dave's and i'm
	if ( s[i] == '\'' && s[i + 1] && is_alnum_utf8( &s[i + 1] ) && !hadApostrophe ) {
		i++;
		hadApostrophe = true;
		goto again;
	}
	hadApostrophe = false;
	
	// get word length
	wlen = i - j;
	if ( m_numWords >= m_preCount ) goto done;
	m_words   [ m_numWords  ] = &s[j];
	m_wordLens[ m_numWords  ] = wlen;

	if ( computeWordIds ) {
		int64_t h = hash64Lower_utf8(&s[j],wlen);
		m_wordIds [m_numWords] = h;
	}

	m_nodes[m_numWords] = 0;
	if (m_tagIds) m_tagIds[m_numWords] = 0;
	m_numWords++;
	m_numAlnumWords++;
	// get a punct word
	goto uptop;

 done:
	// bad programming warning
	if ( m_numWords > m_preCount ) {
		log(LOG_LOGIC, "build: words: set: Fix counting routine.");
		gbshutdownLogicError();
	}

	return true;
}

Esempio n. 23

0

Mostra file

File: str_operate.cpp Progetto: pkxpp/Study

int
filter_cpp_buffer(char* pBuffer, int nStrLen){
	char* 		pStr	 = NULL;
	char*		pStr2 	 = NULL;
	int		nLength  = 0;
	int		nCount	 = 0;
	int		bError	 = 0;

	//if the line is a cpp remark, not process
	if( is_cpp_remark(pBuffer[0], pBuffer[1]) )
		return 1;

	if( is_c_remark(pBuffer[0], pBuffer[1]) ){
		if( strstr(pBuffer, "*/") == NULL )
			g_nFinishRemark = 0;
		return 1;
	}

	if( g_nFinishRemark == 0 ){
		if( strstr(pBuffer, "*/") )
			g_nFinishRemark = 1;
		return 1;
	}

	pStr = pBuffer;
	while( (*(pStr) != '\0') && (*(pStr+1) != '\0') ){
		if( is_cpp_remark(*pStr, *(pStr+1)) ||
		    is_c_remark(*pStr, *(pStr+1)))
			break;
		
		if ( !is_ascii((unsigned char)(*pStr)) ){
			
			pStr2 = strchr(pStr, '"');
		
			if ( pStr2 == NULL )
				bError = 1;
			else if ((nCount%2) == 0){
				if ( *(pStr2-1) != '\\' )
					bError = 1;
			}

			if ( bError == 1 ){
				nStrLen = (int)strlen(pBuffer);
				nLength = nStrLen - g_nCurPos;
				if ( nLength > 0 )
					string_process(&pBuffer[g_nCurPos],
						       nLength, 1);
				break;
			}
			else{	
				nLength = (int)(pStr2 - pBuffer) - g_nCurPos;
				nLength = string_process(&pBuffer[g_nCurPos],
						      nLength, 0);
				nCount ++;
				pStr = pBuffer + g_nCurPos + nLength;
				g_nCurPos = (int)(pStr - pBuffer);
			}
			
			continue;
		}
		else if( is_double_quotation(*pStr) ){
			nCount ++;
			if( nCount % 2 )
				g_nCurPos = (int)(pStr - pBuffer) + 1;
		}
		
		pStr++;
	}
	
	return 1;
}

Esempio n. 24

0

Mostra file

File: str_operate.cpp Progetto: pkxpp/Study

int
filter_lua_buffer(char* pBuffer, int nStrLen){
	char* 	pStr 	= NULL;
	char*	pStr2 	= NULL;
	char*	pOld 	= NULL;
	int		nLength = 0;
	int		nDoubleCount = 0;
	int		nSingleCount = 0;
	int		nTailLen = 0;
	int		bError = 0;
	int		nBiasNum = 0;
	char	cQuotation, cOld;

	g_nCurPos = 0;
	
	/* if the line is a remark, not process */
	if( is_lua_remark(pBuffer[0], pBuffer[1]) )
		return 1;

	pStr = pBuffer;
	while( (*(pStr) != '\0') && (*(pStr+1) != '\0') ){
		/* stop when encounter lua remark */
		if ( is_lua_remark(*pStr, *(pStr+1)) )
			break;

		/* when encounter a chinese,
		   draw out the entire string from its start after '"'
		   to its end before the next '"' */
		if ( !is_ascii((unsigned char)(*pStr)) ){
			if ( (nSingleCount % 2) == (nDoubleCount % 2) )
				bError = 1;
			else if ( nDoubleCount % 2 )
				cQuotation = '"';
			else if ( nSingleCount % 2 )
				cQuotation = '\'';

			if ( bError != 1 ){
				pStr2 = strchr(++pStr, cQuotation);
				while ( pStr2 != NULL && is_escape_quotation( pStr2, cQuotation ) > 0 ){
					pStr2 = strchr(++pStr2, cQuotation);
				}
			}

			if ( pStr2 == NULL )
				bError = 1;

			if ( bError == 1 ){
				nStrLen = (int)strlen(pBuffer);
				nLength = nStrLen - g_nCurPos;
				if ( nLength > 0 )
					string_process(&pBuffer[g_nCurPos],
						     nLength, 1);
				break;
			}
			else{
				nLength = (int)(pStr2 - pBuffer) - g_nCurPos;

				if ( cQuotation == '"' )
					nDoubleCount ++;
				else if ( cQuotation == '\'' )
					nSingleCount ++;

				nTailLen = get_tail_func_name_len(
					&pBuffer[g_nCurPos], nLength);
				nLength = string_process(&pBuffer[g_nCurPos],
						      nLength-nTailLen, 0);

				if ( nTailLen >= 2 ){
					pStr = pBuffer + g_nCurPos + nLength;
					pOld = pStr2;

					while ( ++pStr < pOld ){
						if ( ( is_escape_quotation( pStr, cQuotation ) == 1 ) || 
							( ( is_single_quotation(*pStr) || is_double_quotation(*pStr) ) && *pStr != cQuotation ) ){

							pStr2 = strchr( (pStr + 1), *pStr );
							while ( pStr2 != NULL ){
								if ( *pStr2 != cQuotation )
									break;
								else if ( is_escape_quotation( pStr2, cQuotation ) == 1 )
									break;

								pStr2 = strchr( ++pStr2, *pStr );
							}

							if ( pStr2 == NULL ){
								nStrLen = (int)strlen(pBuffer);
								nLength = nStrLen - g_nCurPos;
								if ( nLength > 0 )
									string_process(&pBuffer[g_nCurPos],
											nLength, 1);
								return 1;
							}

							*pStr2 = '\0';
							if ( is_there_gbk_code(pStr) ){
								*pStr2 = *pStr;
								string_process(&pBuffer[(int)(pStr - pBuffer) + 1],
									((int)(pStr2 - pStr) - 1), 0);
							}

							pStr = pStr2;
						}
					}
				}

				nLength += nTailLen;

				pStr = pBuffer + g_nCurPos + nLength;
				g_nCurPos = (int)(pStr - pBuffer);
			}
			
			continue;
		}
		// only a string starts at '"' maybe a string contain chinese
		// record the position after the '"'
		// as the start of possible chinese string
		else if( (nSingleCount % 2) == 0 && is_escape_quotation( pStr, '"' ) == 0 ){
			if( (++nDoubleCount) % 2 )
				g_nCurPos = (int)(pStr - pBuffer) + 1;
		}
		else if( (nDoubleCount % 2) == 0 && is_escape_quotation( pStr, '\'' ) == 0 ){
			if( (++nSingleCount) % 2 )
				g_nCurPos = (int)(pStr - pBuffer) + 1;
		}
		
		pStr++;
	}
	
	return 1;
}

Esempio n. 25

0

Mostra file

File: ls.c Progetto: engrnasirkhan/UofI

int print_output(char **output, int i, char **argv, int argc)
{
  //stat files
  struct stat buf;
  int exists;

  int x;
  for(x = 0; x < i; x++)
    {
      exists = stat(output[x], &buf);
      if (exists < 0) 
	{
	  fprintf(stderr, "%s not found\n", output[x]);
	} 
      else 
	{
	  ////////////get the argument
	  int arge = find_arg_element(argv, argc);
	  char* ls_arg;
	  ls_arg = (char*)malloc(MAX_BUFFER_SIZE);
	  if(arge != -1)
	    ls_arg = strdup(argv[arge]);
	  else
	    ls_arg = "";

	  /////////////filter arguments

	  // l option
	  char *l_opt;
	  l_opt = (char*)malloc(MAX_BUFFER_SIZE);
	  if(str_index(ls_arg, "l") != -1)
	    {
	      //convert stat's date
	      time_t rawtime = buf.st_mtime;
	      char *date = ctime(&rawtime);
	      //get rid of end newline
	      date[strlen(date) - 1] = '\0';

	      int read, write, execute;
	      read = buf.st_mode & S_IEXEC;
	      read = (int)read;
	      //read = sqrt(read) - 1;

	      sprintf(l_opt, "%4d %d %d %4d %5d %s", 
		      buf.st_mode, buf.st_nlink, buf.st_uid, 
		      buf.st_gid, buf.st_size, date);
	    }
	  else
	    {l_opt = "";}

	  // t option
	  char *t_opt;
	  t_opt = (char*)malloc(MAX_BUFFER_SIZE);
	  if(str_index(ls_arg, "t") != -1)
	    {
	      //convert stat's date
	      time_t rawtime = buf.st_mtime;
	      char *date = ctime(&rawtime);
	      //get rid of end newline
	      date[strlen(date) - 1] = '\0';

	      sprintf(t_opt, "%s", date);
	    }
	  else
	    {t_opt = "";}

	  // f option
	  char *f_opt;
	  f_opt = (char*)malloc(MAX_BUFFER_SIZE);
	  if(str_index(ls_arg, "f") != -1)
	    {
	      //open file and see what its first bits look like
	      FILE *fp;
	      fp = fopen(output[x], "r");
	      char line [MAX_BUFFER_SIZE];
	      	      
	      if(fp != NULL)
		{
		  if(fgets(line, sizeof(line), fp ) != NULL)
		    {
		      //fputs ( line, stdout );
		      //print_ts_str(line);
		      if(is_o(output[x]) == 1)
			f_opt = " - Relocatable .o file";
		      else if(is_elf(line) == 1)
			f_opt = " - ELF File";
		      else if(is_dos(fp) == 1)
			f_opt = " - ASCII File";
		      else if(is_ascii(fp) == 1)
			f_opt = " - ASCII File";
		      else
			f_opt = " - file unknow";
		    }
		  else
		    f_opt = " - directory";
		  
		}
	      
	      fclose(fp);

 	    }
	  else
	    {f_opt = "";}


	  //print formatted text
	  printf("%s %s %10s %s\n", t_opt, l_opt, output[x], f_opt);
	}
    }
  return(0);
}

Esempio n. 26

0

Mostra file

File: sstring.cpp Progetto: ritmatter/ByteMe

sstring::sstring(const char *from) {
#ifdef DEBUG
    assert(is_ascii(from));
#endif
    encode(from);
}

Esempio n. 27

0

Mostra file

File: sstring.cpp Progetto: ritmatter/ByteMe

sstring::sstring(const char *from, size_t length) {
#ifdef DEBUG
    assert(is_ascii(from));
#endif
    encode(from, length);
}

Esempio n. 28

0

Mostra file

File: Words.cpp Progetto: BillWangCS/open-source-search-engine

bool Words::addWords(char *s,long nodeLen,bool computeWordIds, long niceness) {
	long  i = 0;
	long  j;
	//long  k = 0;
	long  wlen;
	//unsigned long e;
	//long  skip;
	long badCount = 0;

	bool hadApostrophe = false;

	UCScript oldScript = ucScriptCommon;
	UCScript saved;
	UCProps props;

 uptop:

	// bad utf8 can cause a breach
	if ( i >= nodeLen ) goto done;

	if ( ! s[i] ) goto done;

	if ( ! is_alnum_utf8(s+i) ) { // && m_numWords < m_preCount ) {

		if ( m_numWords >= m_preCount ) goto done;

		// tag?
		if ( s[i]=='<' && m_hasTags && isTagStart(s+i) ) {
			// get the tag id
			if ( s[i+1]=='/' ) {
				// skip over /
				m_tagIds [m_numWords] = ::getTagId(s+i+2);
				m_tagIds [m_numWords] |= BACKBIT;
			}
			else
				m_tagIds [m_numWords] = ::getTagId(s+i+1);
			// word start
			m_words    [m_numWords] = s + i;
			m_wordIds  [m_numWords] = 0LL;
			// skip till end
			long tagLen = getTagLen(s+i); // ,niceness);
			m_wordLens [m_numWords] = tagLen;
			m_numWords++;
			// advance
			i += tagLen;
			goto uptop;
		}

		// it is a punct word, find end of it
		char *start = s+i;
		//for (;s[i] && ! is_alnum_utf8(s+i);i+=getUtf8CharSize(s+i));
		for ( ; s[i] ; i += getUtf8CharSize(s+i)){
			// stop on < if we got tags
			if ( s[i] == '<' && m_hasTags ) break;
			// breathe
			QUICKPOLL(niceness);
			// if we are simple ascii, skip quickly
			if ( is_ascii(s[i]) ) {
				// accumulate NON-alnum chars
				if ( ! is_alnum_a(s[i]) ) continue;
				// update
				oldScript = ucScriptCommon;
				// otherwise, stop we got alnum
				break;
			}
			// if we are utf8 we stop on special props
			UChar32 c = utf8Decode ( s+i );
			// stop if word char
			if ( ! ucIsWordChar ( c ) ) continue;
			// update first though
			oldScript = ucGetScript ( c );
			// then stop
			break;
		}
		m_words        [ m_numWords  ] = start;
		m_wordLens     [ m_numWords  ] = s+i - start;
		m_wordIds      [ m_numWords  ] = 0LL;
		if (m_tagIds) m_tagIds[m_numWords] = 0;
		m_numWords++;
		goto uptop;
	}

	// get an alnum word
	j = i;
 again:
	//for ( ; is_alnum_utf8 (&s[i] ) ; i += getUtf8CharSize(s+i) );
	for ( ; s[i] ; i += getUtf8CharSize(s+i) ) {
		// breathe
		QUICKPOLL(niceness);
		// simple ascii?
		if ( is_ascii(s[i]) ) {
			// accumulate alnum chars
			if ( is_alnum_a(s[i]) ) continue;
			// update
			oldScript = ucScriptCommon;
			// otherwise, stop we got punct
			break;
		}
		// get the code point of the utf8 char
		UChar32 c = utf8Decode ( s+i );
		// get props
		props = ucProperties ( c );
		// good stuff?
		if ( props & (UC_IGNORABLE|UC_EXTEND) ) continue;
		// stop? if UC_WORCHAR is set, that means its an alnum
		if ( ! ( props & UC_WORDCHAR ) ) {
			// reset script between words
			oldScript = ucScriptCommon;
			break;
		}
		// save it
		saved = oldScript;
		// update here
		oldScript = ucGetScript(c);
		// treat ucScriptLatin (30) as common so we can have latin1
		// like char without breaking the word!
		if ( oldScript == ucScriptLatin ) oldScript = ucScriptCommon;
		// stop on this crap too i guess. like japanes chars?
		if ( props & ( UC_IDEOGRAPH | UC_HIRAGANA | UC_THAI ) ) {
			// include it
			i += getUtf8CharSize(s+i);
			// but stop
			break;
		}
		// script change?
		if ( saved != oldScript ) break;
	}
	
	// . java++, A++, C++ exception
	// . A+, C+, exception
	// . TODO: consider putting in Bits.cpp w/ D_CAN_BE_IN_PHRASE
	if ( s[i]=='+' ) {
		if ( s[i+1]=='+' && !is_alnum_utf8(&s[i+2]) ) i += 2;
		else if ( !is_alnum_utf8(&s[i+1]) ) i++;
	}
	// . c#, j#, ...
	if ( s[i]=='#' && !is_alnum_utf8(&s[i+1]) ) i++;
	
	// allow for words like we're dave's and i'm
	if(s[i]=='\''&&s[i+1]&&is_alnum_utf8(&s[i+1])&&!hadApostrophe){
		i++;
		hadApostrophe = true;
		goto again;
	}
	hadApostrophe = false;
	
	// get word length
	wlen = i - j;
	if ( m_numWords >= m_preCount ) goto done;
	m_words   [ m_numWords  ] = &s[j];
	m_wordLens[ m_numWords  ] = wlen;
	// . Lars says it's better to leave the accented chars intact
	// . google agrees
	// . but what about "re'sume"?
	if ( computeWordIds ) {
		long long h = hash64Lower_utf8(&s[j],wlen);
		m_wordIds [m_numWords] = h;
		// until we get an accent removal algo, comment this
		// out and possibly use the query synonym pipeline
		// to search without accents. MDW
		//long long h2 = hash64AsciiLowerE(&s[j],wlen);
		//if ( h2 != h ) m_stripWordIds [m_numWords] = h2;
		//else           m_stripWordIds [m_numWords] = 0LL;
		//m_stripWordIds[m_numWords] = 0;
	}
	if (m_tagIds) m_tagIds[m_numWords] = 0;
	m_numWords++;
	m_numAlnumWords++;
	// break on \0 or MAX_WORDS
	//if ( ! s[i] ) goto done;
	// get a punct word
	goto uptop;
	/*
	  j = i;
	  // delineate the "punctuation" word
	  for ( ; s[i] && !is_alnum_utf8(&s[i]);i+=getUtf8CharSize(s+i));
	  // bad utf8 could cause us to breach the node, so watch out!
	  if ( i > nodeLen ) {
	  badCount++;
	  i = nodeLen;
	  }
	  // get word length
	  wlen = i - j;
	  if ( m_numWords >= m_preCount ) goto done;
	  m_words        [m_numWords  ] = &s[j];
	  m_wordLens     [m_numWords  ] = wlen;
	  m_wordIds      [m_numWords  ] = 0LL;
	  if (m_tagIds) m_tagIds[m_numWords] = 0;
	  m_numWords++;
	*/

 done:
	// bad programming warning
	if ( m_numWords > m_preCount ) {
		log(LOG_LOGIC,
		    "build: words: set: Fix counting routine.");
		char *xx = NULL; *xx = 0;
	}
	// compute total length
	if ( m_numWords <= 0 ) m_totalLen = 0;
	else m_totalLen = m_words[m_numWords-1] - s + m_wordLens[m_numWords-1];

	if ( badCount )
		log("words: had %li bad utf8 chars",badCount);

	return true;
}

Esempio n. 29

0

Mostra file

File: ps.cpp Progetto: DemonFang/groff

ps_output &ps_output::put_string(const char *s, int n)
{
  int len = 0;
  int i;
  for (i = 0; i < n; i++) {
    char c = s[i];
    if (is_ascii(c) && csprint(c)) {
      if (c == '(' || c == ')' || c == '\\')
	len += 2;
      else
	len += 1;
    }
    else
      len += 4;
  }
  if (len > n*2) {
    if (col + n*2 + 2 > max_line_length && n*2 + 2 <= max_line_length) {
      putc('\n', fp);
      col = 0;
    }
    if (col + 1 > max_line_length) {
      putc('\n', fp);
      col = 0;
    }
    putc('<', fp);
    col++;
    for (i = 0; i < n; i++) {
      if (col + 2 > max_line_length) {
	putc('\n', fp);
	col = 0;
      }
      fprintf(fp, "%02x", s[i] & 0377);
      col += 2;
    }
    putc('>', fp);
    col++;
  }
  else {
    if (col + len + 2 > max_line_length && len + 2 <= max_line_length) {
      putc('\n', fp);
      col = 0;
    }
    if (col + 2 > max_line_length) {
      putc('\n', fp);
      col = 0;
    }
    putc('(', fp);
    col++;
    for (i = 0; i < n; i++) {
      char c = s[i];
      if (is_ascii(c) && csprint(c)) {
	if (c == '(' || c == ')' || c == '\\')
	  len = 2;
	else
	  len = 1;
      }
      else
	len = 4;
      if (col + len + 1 > max_line_length) {
	putc('\\', fp);
	putc('\n', fp);
	col = 0;
      }
      switch (len) {
      case 1:
	putc(c, fp);
	break;
      case 2:
	putc('\\', fp);
	putc(c, fp);
	break;
      case 4:
	fprintf(fp, "\\%03o", c & 0377);
	break;
      default:
	assert(0);
      }
      col += len;
    }
    putc(')', fp);
    col++;
  }
  need_space = 0;
  return *this;
}

Esempio n. 30

0

Mostra file

File: input.c Progetto: dmt4/ne

static int input_buffer_is_ascii() {
	return is_ascii(input_buffer, len);
}