Example #1
0
bool Cx_TextUtil::ToDBC(std::wstring& text, bool punct)
{
	bool changed = false;
	std::wstring dest('\0', text.size() + 1);

	if (!text.empty() && punct)
	{
		int ret = LCMapStringW(
			MAKELANGID(LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED), 
			LCMAP_HALFWIDTH, 
			text.c_str(), text.size(), 
			&dest[0], dest.size() + 1);

		if (ret > 0 && dest != text)
		{
			text = dest;
			changed = true;
		}
	}
	else if (!text.empty())
	{
		LPCWSTR psrc = text.c_str();
		LPWSTR pdest = &dest[0];
		int i = 0, n = 0;

		for (; psrc[i] != 0; i++)
		{
			if (psrc[i] >= 0xA3B0 && psrc[i] <= 0xA3B9)		// £°..£¹
			{
				pdest[i] = WCHAR('0' + psrc[i] - 0xA3B0);
				n++;
			}
			else if (psrc[i] >= 0xA3C1 && psrc[i] <= 0xA3DA)	// £Á..£Ú
			{
				pdest[i] = WCHAR('A' + psrc[i] - 0xA3C1);
				n++;
			}
			else if (psrc[i] >= 0xA3E1 && psrc[i] <= 0xA3FA)	// £á..£ú
			{
				pdest[i] = WCHAR('a' + psrc[i] - 0xA3E1);
				n++;
			}
			else
			{
				pdest[i] = psrc[i];
			}
		}
		pdest[i] = 0;

		if (n > 0)
		{
			text = dest;
			changed = true;
		}
	}

	return changed;
}
Example #2
0
HANDLE _generateDumpFileAtPath(const WCHAR *path) {
	static const int maxFileLen = MAX_PATH * 10;

	WCHAR szPath[maxFileLen];
	wsprintf(szPath, L"%stdumps\\", path);

    if (!CreateDirectory(szPath, NULL)) {
		if (GetLastError() != ERROR_ALREADY_EXISTS) {
			return 0;
		}
	}

    WCHAR szFileName[maxFileLen];
	WCHAR szExeName[maxFileLen];

	wcscpy_s(szExeName, _exeName);
	WCHAR *dotFrom = wcschr(szExeName, WCHAR(L'.'));
	if (dotFrom) {
		wsprintf(dotFrom, L"");
	}

    SYSTEMTIME stLocalTime;

    GetLocalTime(&stLocalTime);

    wsprintf(szFileName, L"%s%s-%s-%04d%02d%02d-%02d%02d%02d-%ld-%ld.dmp", 
             szPath, szExeName, updaterVersionStr, 
             stLocalTime.wYear, stLocalTime.wMonth, stLocalTime.wDay, 
             stLocalTime.wHour, stLocalTime.wMinute, stLocalTime.wSecond, 
             GetCurrentProcessId(), GetCurrentThreadId());
    return CreateFile(szFileName, GENERIC_READ|GENERIC_WRITE, FILE_SHARE_WRITE|FILE_SHARE_READ, 0, CREATE_ALWAYS, 0, 0);
}
Example #3
0
/* Filedisk PnP ID query-response routine. */
static NTSTATUS STDCALL WvFilediskPnpQueryId_(
    IN PDEVICE_OBJECT dev_obj,
    IN PIRP irp,
    IN WVL_SP_DISK_T disk
  ) {
    static const WCHAR * hw_ids[WvlDiskMediaTypes] = {
        WVL_M_WLIT L"\\FileFloppyDisk",
        WVL_M_WLIT L"\\FileHardDisk",
        WVL_M_WLIT L"\\FileOpticalDisc"
      };
    WCHAR (*buf)[512];
    NTSTATUS status;
    WV_SP_FILEDISK_T filedisk = CONTAINING_RECORD(disk, WV_S_FILEDISK_T, disk);
    BUS_QUERY_ID_TYPE query_type;

    /* Allocate a buffer. */
    buf = wv_mallocz(sizeof *buf);
    if (!buf) {
        status = STATUS_INSUFFICIENT_RESOURCES;
        goto err_buf;
      }

    /* Populate the buffer with IDs. */
    query_type = IoGetCurrentIrpStackLocation(irp)->Parameters.QueryId.IdType;
    switch (query_type) {
        case BusQueryDeviceID:
          swprintf(*buf, hw_ids[disk->Media]);
          break;

        case BusQueryInstanceID:
          /* "Location". */
          swprintf(*buf, L"Hash_%08X", filedisk->hash);
          break;

        case BusQueryHardwareIDs:
          swprintf(
              *buf + swprintf(*buf, hw_ids[disk->Media]) + 1,
              WvlDiskCompatIds[disk->Media]
            );
          break;

        case BusQueryCompatibleIDs:
          swprintf(*buf, WvlDiskCompatIds[disk->Media]);

        default:
          DBG("Unknown query type %d for %p!\n", query_type, filedisk);
          status = STATUS_INVALID_PARAMETER;
          goto err_query_type;
      }

    DBG("IRP_MN_QUERY_ID for file-backed disk %p.\n", filedisk);
    return WvlIrpComplete(irp, (ULONG_PTR) buf, STATUS_SUCCESS);

    err_query_type:

    wv_free(buf);
    err_buf:

    return WvlIrpComplete(irp, 0, status);
  }
Example #4
0
NTSTATUS STDCALL WvDiskPnpQueryDevText(
    IN PDEVICE_OBJECT dev_obj,
    IN PIRP irp,
    IN WVL_SP_DISK_T disk
  ) {
    IN WV_SP_DEV_T dev = WvDevFromDevObj(dev_obj);
    WCHAR (*str)[512];
    PIO_STACK_LOCATION io_stack_loc = IoGetCurrentIrpStackLocation(irp);
    NTSTATUS status;
    UINT32 str_len;

    /* Allocate a string buffer. */
    str = wv_mallocz(sizeof *str);
    if (str == NULL) {
        DBG("wv_malloc IRP_MN_QUERY_DEVICE_TEXT\n");
        status = STATUS_INSUFFICIENT_RESOURCES;
        goto alloc_str;
      }
    /* Determine the query type. */
    switch (io_stack_loc->Parameters.QueryDeviceText.DeviceTextType) {
        case DeviceTextDescription:
          str_len = swprintf(*str, WVL_M_WLIT L" Disk") + 1;
          irp->IoStatus.Information =
            (ULONG_PTR) wv_palloc(str_len * sizeof *str);
          if (irp->IoStatus.Information == 0) {
              DBG("wv_palloc DeviceTextDescription\n");
              status = STATUS_INSUFFICIENT_RESOURCES;
              goto alloc_info;
            }
          RtlCopyMemory(
              (PWCHAR) irp->IoStatus.Information,
              str,
              str_len * sizeof *str
            );
          status = STATUS_SUCCESS;
          goto alloc_info;

        case DeviceTextLocationInformation:
          if (disk->disk_ops.PnpQueryId) {
              io_stack_loc->MinorFunction = IRP_MN_QUERY_ID;
              io_stack_loc->Parameters.QueryId.IdType = BusQueryInstanceID;
              return disk->disk_ops.PnpQueryId(dev_obj, irp, disk);
            }
          /* Else, fall through... */

        default:
          irp->IoStatus.Information = 0;
          status = STATUS_NOT_SUPPORTED;
      }
    /* irp->IoStatus.Information not freed. */
    alloc_info:

    wv_free(str);
    alloc_str:

    return WvlIrpComplete(irp, irp->IoStatus.Information, status);
  }
Example #5
0
// Thanks to Jonathan Wood from stackoverflow.com for this.
// Returns filename portion of the given path
// Returns empty string if path is directory
WCHAR *GetFileName(WCHAR *path)
{
    WCHAR *filename = wcsrchr(path, WCHAR(0x5C));
    if (filename == NULL)
        filename = path;
    else
        filename++;

    return filename;
}
Example #6
0
nsresult
gfxGDIFontList::GetFontSubstitutes()
{
    HKEY hKey;
    DWORD i, rv, lenAlias, lenActual, valueType;
    WCHAR aliasName[MAX_VALUE_NAME];
    WCHAR actualName[MAX_VALUE_DATA];

    if (RegOpenKeyExW(HKEY_LOCAL_MACHINE, 
          L"SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\FontSubstitutes",
          0, KEY_READ, &hKey) != ERROR_SUCCESS)
    {
        return NS_ERROR_FAILURE;
    }

    for (i = 0, rv = ERROR_SUCCESS; rv != ERROR_NO_MORE_ITEMS; i++) {
        aliasName[0] = 0;
        lenAlias = sizeof(aliasName);
        actualName[0] = 0;
        lenActual = sizeof(actualName);
        rv = RegEnumValueW(hKey, i, aliasName, &lenAlias, NULL, &valueType, 
                (LPBYTE)actualName, &lenActual);

        if (rv != ERROR_SUCCESS || valueType != REG_SZ || lenAlias == 0) {
            continue;
        }

        if (aliasName[0] == WCHAR('@')) {
            continue;
        }

        nsAutoString substituteName((PRUnichar*) aliasName);
        nsAutoString actualFontName((PRUnichar*) actualName);
        RemoveCharsetFromFontSubstitute(substituteName);
        BuildKeyNameFromFontName(substituteName);
        RemoveCharsetFromFontSubstitute(actualFontName);
        BuildKeyNameFromFontName(actualFontName);
        gfxFontFamily *ff;
        if (!actualFontName.IsEmpty() && 
            (ff = mFontFamilies.GetWeak(actualFontName))) {
            mFontSubstitutes.Put(substituteName, ff);
        } else {
            mNonExistingFonts.AppendElement(substituteName);
        }
    }
    return NS_OK;
}
Example #7
0
static int _readstringW(WCHAR*& pRet,LPCWSTR pStr,DWORD dwStrLen,MALLOC_FUNC pMalloc)
{
	pRet=(WCHAR*)pMalloc(dwStrLen*2);

	int i;
	for (i=0;*pStr!='\0';i++,pStr++)
	{
		if (*pStr=='\\' && pStr[1]!='\0')
		{
			pStr++;
			switch (*pStr)
			{
			case '0':
				pRet[i]=L'\0';
				break;
			case 'n':
				pRet[i]=L'\n';
				break;
			case 'r':
				pRet[i]=L'\r';
				break;
			case 't':
				pRet[i]=L'\t';
				break;
			case 'b':
				pRet[i]=L'\b';
				break;
			default:
				pRet[i]=WCHAR(_readnum(16,pStr,4));
				pStr--;
				break;
			}
		}
		else
			pRet[i]=*pStr;
	}
	return i;
}
LRESULT CNumericEntryDlg::OnCharEditMin(UINT uMsg, WPARAM wParam, LPARAM lParam, BOOL& bHandled)
{
	USES_CONVERSION;
	
	TCHAR ch = (TCHAR)wParam;
	
	if (wParam >= 0x20)
	{
		CComBSTR bstr;
		m_editMin.GetWindowText((BSTR&)bstr);
		
		if ((IsCharAlphaNumeric(ch) && !IsCharAlpha(ch)) || 
			(ch == _T('.') && bstr.Length() > 0 && wcsrchr(bstr, WCHAR('.')) == NULL))
			bHandled = FALSE;
		else
			MessageBeep(MB_OK);
	}
	else
	{
		bHandled = FALSE;
	}
	
	return 0;
}
Example #9
0
size_t
EGexecute (char const *buf, size_t size, size_t *match_size,
           char const *start_ptr)
{
  char const *buflim, *beg, *end, *match, *best_match, *mb_start;
  char eol = eolbyte;
  int backref;
  regoff_t start;
  size_t len, best_len;
  struct kwsmatch kwsm;
  size_t i, ret_val;
  mb_len_map_t *map = NULL;

  if (MB_CUR_MAX > 1)
    {
      if (match_icase)
        {
          /* mbtolower adds a NUL byte at the end.  That will provide
             space for the sentinel byte dfaexec may add.  */
          char *case_buf = mbtolower (buf, &size, &map);
          if (start_ptr)
            start_ptr = case_buf + (start_ptr - buf);
          buf = case_buf;
        }
    }

  mb_start = buf;
  buflim = buf + size;

  for (beg = end = buf; end < buflim; beg = end)
    {
      if (!start_ptr)
        {
          /* We don't care about an exact match.  */
          if (kwset)
            {
              /* Find a possible match using the KWset matcher. */
              size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
              if (offset == (size_t) -1)
                goto failure;
              beg += offset;
              /* Narrow down to the line containing the candidate, and
                 run it through DFA. */
              if ((end = memchr(beg, eol, buflim - beg)) != NULL)
                end++;
              else
                end = buflim;
              match = beg;
              while (beg > buf && beg[-1] != eol)
                --beg;
              if (kwsm.index < kwset_exact_matches)
                {
                  if (!MBS_SUPPORT)
                    goto success;

                  if (mb_start < beg)
                    mb_start = beg;
                  if (MB_CUR_MAX == 1
                      || !is_mb_middle (&mb_start, match, buflim,
                                        kwsm.size[0]))
                    goto success;
                }
              if (dfaexec (dfa, beg, (char *) end, 0, NULL, &backref) == NULL)
                continue;
            }
          else
            {
              /* No good fixed strings; start with DFA. */
              char const *next_beg = dfaexec (dfa, beg, (char *) buflim,
                                              0, NULL, &backref);
              /* If there's no match, or if we've matched the sentinel,
                 we're done.  */
              if (next_beg == NULL || next_beg == buflim)
                break;
              /* Narrow down to the line we've found. */
              beg = next_beg;
              if ((end = memchr(beg, eol, buflim - beg)) != NULL)
                end++;
              else
                end = buflim;
              while (beg > buf && beg[-1] != eol)
                --beg;
            }
          /* Successful, no backreferences encountered! */
          if (!backref)
            goto success;
        }
      else
        {
          /* We are looking for the leftmost (then longest) exact match.
             We will go through the outer loop only once.  */
          beg = start_ptr;
          end = buflim;
        }

      /* If the "line" is longer than the maximum regexp offset,
         die as if we've run out of memory.  */
      if (TYPE_MAXIMUM (regoff_t) < end - buf - 1)
        xalloc_die ();

      /* If we've made it to this point, this means DFA has seen
         a probable match, and we need to run it through Regex. */
      best_match = end;
      best_len = 0;
      for (i = 0; i < pcount; i++)
        {
          patterns[i].regexbuf.not_eol = 0;
          start = re_search (&(patterns[i].regexbuf),
                             buf, end - buf - 1,
                             beg - buf, end - beg - 1,
                             &(patterns[i].regs));
          if (start < -1)
            xalloc_die ();
          else if (0 <= start)
            {
              len = patterns[i].regs.end[0] - start;
              match = buf + start;
              if (match > best_match)
                continue;
              if (start_ptr && !match_words)
                goto assess_pattern_match;
              if ((!match_lines && !match_words)
                  || (match_lines && len == end - beg - 1))
                {
                  match = beg;
                  len = end - beg;
                  goto assess_pattern_match;
                }
              /* If -w, check if the match aligns with word boundaries.
                 We do this iteratively because:
                 (a) the line may contain more than one occurrence of the
                 pattern, and
                 (b) Several alternatives in the pattern might be valid at a
                 given point, and we may need to consider a shorter one to
                 find a word boundary.  */
              if (match_words)
                while (match <= best_match)
                  {
                    regoff_t shorter_len = 0;
                    if ((match == buf || !WCHAR ((unsigned char) match[-1]))
                        && (start + len == end - buf - 1
                            || !WCHAR ((unsigned char) match[len])))
                      goto assess_pattern_match;
                    if (len > 0)
                      {
                        /* Try a shorter length anchored at the same place. */
                        --len;
                        patterns[i].regexbuf.not_eol = 1;
                        shorter_len = re_match (&(patterns[i].regexbuf),
                                                buf, match + len - beg,
                                                match - buf,
                                                &(patterns[i].regs));
                        if (shorter_len < -1)
                          xalloc_die ();
                      }
                    if (0 < shorter_len)
                      len = shorter_len;
                    else
                      {
                        /* Try looking further on. */
                        if (match == end - 1)
                          break;
                        match++;
                        patterns[i].regexbuf.not_eol = 0;
                        start = re_search (&(patterns[i].regexbuf),
                                           buf, end - buf - 1,
                                           match - buf, end - match - 1,
                                           &(patterns[i].regs));
                        if (start < 0)
                          {
                            if (start < -1)
                              xalloc_die ();
                            break;
                          }
                        len = patterns[i].regs.end[0] - start;
                        match = buf + start;
                      }
                  } /* while (match <= best_match) */
              continue;
            assess_pattern_match:
              if (!start_ptr)
                {
                  /* Good enough for a non-exact match.
                     No need to look at further patterns, if any.  */
                  goto success;
                }
              if (match < best_match || (match == best_match && len > best_len))
                {
                  /* Best exact match:  leftmost, then longest.  */
                  best_match = match;
                  best_len = len;
                }
            } /* if re_search >= 0 */
        } /* for Regex patterns.  */
        if (best_match < end)
          {
            /* We have found an exact match.  We were just
               waiting for the best one (leftmost then longest).  */
            beg = best_match;
            len = best_len;
            goto success_in_len;
          }
    } /* for (beg = end ..) */

 failure:
  ret_val = -1;
  goto out;

 success:
  len = end - beg;
 success_in_len:;
  size_t off = beg - buf;
  mb_case_map_apply (map, &off, &len);
  *match_size = len;
  ret_val = off;
 out:
  return ret_val;
}
Example #10
0
static NTSTATUS STDCALL AoeBusPnpQueryDevText_(
    IN WVL_SP_BUS_T bus,
    IN PIRP irp
) {
    WCHAR (*str)[512];
    PIO_STACK_LOCATION io_stack_loc = IoGetCurrentIrpStackLocation(irp);
    NTSTATUS status;
    UINT32 str_len;

    /* Allocate a string buffer. */
    str = wv_mallocz(sizeof *str);
    if (str == NULL) {
        DBG("wv_malloc IRP_MN_QUERY_DEVICE_TEXT\n");
        status = STATUS_INSUFFICIENT_RESOURCES;
        goto alloc_str;
    }
    /* Determine the query type. */
    switch (io_stack_loc->Parameters.QueryDeviceText.DeviceTextType) {
    case DeviceTextDescription:
        str_len = swprintf(*str, L"AoE Bus") + 1;
        irp->IoStatus.Information =
            (ULONG_PTR) wv_palloc(str_len * sizeof **str);
        if (irp->IoStatus.Information == 0) {
            DBG("wv_palloc DeviceTextDescription\n");
            status = STATUS_INSUFFICIENT_RESOURCES;
            goto alloc_info;
        }
        RtlCopyMemory(
            (PWCHAR) irp->IoStatus.Information,
            str,
            str_len * sizeof **str
        );
        status = STATUS_SUCCESS;
        goto alloc_info;

    case DeviceTextLocationInformation:
        str_len = AoeBusPnpId_(
                      NULL,
                      BusQueryInstanceID,
                      str
                  );
        irp->IoStatus.Information =
            (ULONG_PTR) wv_palloc(str_len * sizeof **str);
        if (irp->IoStatus.Information == 0) {
            DBG("wv_palloc DeviceTextLocationInformation\n");
            status = STATUS_INSUFFICIENT_RESOURCES;
            goto alloc_info;
        }
        RtlCopyMemory(
            (PWCHAR) irp->IoStatus.Information,
            str,
            str_len * sizeof **str
        );
        status = STATUS_SUCCESS;
        goto alloc_info;

    default:
        irp->IoStatus.Information = 0;
        status = STATUS_NOT_SUPPORTED;
    }
    /* irp->IoStatus.Information not freed. */
alloc_info:

    wv_free(str);
alloc_str:

    return WvlIrpComplete(irp, irp->IoStatus.Information, status);
}
Example #11
0
    inline char16 DecodeTail(char16 c1, LPCUTF8& ptr, LPCUTF8 end, DecodeOptions& options, bool *chunkEndsAtTruncatedSequence)
    {
        char16 ch = 0;
        BYTE c2, c3, c4;

        switch (EncodedBytes(c1))
        {
        case 1:
            if (c1 < 0x80) return c1;

            if ((options & doSecondSurrogatePair) != 0)
            {
                // We're in the middle of decoding a surrogate pair from a four-byte utf8 sequence.
                // The high word has already been returned, but without advancing ptr, which was on byte 1.
                // ptr was then advanced externally when reading c1, which is byte 1, so ptr is now on byte 2.
                // byte 1 must have been a continuation byte, hence will be in case 1.
                ptr--; // back to byte 1
                c1 = ptr[-1]; // the original first byte

                // ptr is now on c2. We must also have c3 and c4, otherwise doSecondSurrogatePair won't set.
                _Analysis_assume_(ptr + 2 < end);
                goto LFourByte;
            }

            // 10xxxxxx (trail byte appearing in a lead byte position
            return GetUnknownCharacter(options);

        case 2:
            // Look for an overlong utf-8 sequence.
            if (ptr >= end)
            {
                if ((options & doChunkedEncoding) != 0)
                {
                    // The is a sequence that spans a chunk, push ptr back to the beginning of the sequence.
                    ptr--;

                    if (chunkEndsAtTruncatedSequence)
                    {
                        *chunkEndsAtTruncatedSequence = true;
                    }
                }
                return GetUnknownCharacter(options);
            }
            c2 = *ptr++;
            // 110XXXXx 10xxxxxx
            //      UTF16       |   UTF8 1st byte  2nd byte
            // U+0080..U+07FF   |    C2..DF         80..BF
            if (
                    InRange(c1, 0xC2, 0xDF)
                    && InRange(c2, 0x80, 0xBF)
                )
            {
                ch |= WCHAR(c1 & 0x1f) << 6;     // 0x0080 - 0x07ff
                ch |= WCHAR(c2 & 0x3f);
                if (!IsValidWideChar(ch) && ((options & doAllowInvalidWCHARs) == 0))
                {
                    ch = GetUnknownCharacter(options);
                }
            }
            else
            {
                ptr--;
                ch = GetUnknownCharacter(options);
            }
            break;

        case 3:
            // 1110XXXX 10Xxxxxx 10xxxxxx
            // Look for overlong utf-8 sequence.
            if (ptr + 1 >= end)
            {
                if ((options & doChunkedEncoding) != 0)
                {
                    // The is a sequence that spans a chunk, push ptr back to the beginning of the sequence.
                    ptr--;

                    if (chunkEndsAtTruncatedSequence)
                    {
                        *chunkEndsAtTruncatedSequence = true;
                    }
                }

                return GetUnknownCharacter(options);
            }

            //      UTF16       |   UTF8 1st byte  2nd byte 3rd byte
            // U+0800..U+0FFF   |    E0             A0..BF   80..BF
            // U+1000..U+CFFF   |    E1..EC         80..BF   80..BF
            // U+D000..U+D7FF   |    ED             80..9F   80..BF
            // U+E000..U+FFFF   |    EE..EF         80..BF   80..BF
            c2 = ptr[0];
            c3 = ptr[1];
            if (
                // any following be true
                    (c1 == 0xE0
                    && InRange(c2, 0xA0, 0xBF)
                    && InRange(c3, 0x80, 0xBF))
                    ||
                    (InRange(c1, 0xE1, 0xEC)
                    && InRange(c2, 0x80, 0xBF)
                    && InRange(c3, 0x80, 0xBF))
                    ||
                    (c1 == 0xED
                    && InRange(c2, 0x80, 0x9F)
                    && InRange(c3, 0x80, 0xBF))
                    ||
                    (InRange(c1, 0xEE, 0xEF)
                    && InRange(c2, 0x80, 0xBF)
                    && InRange(c3, 0x80, 0xBF))
                    ||
                    (((options & doAllowThreeByteSurrogates) != 0)
                    &&
                    c1 == 0xED
                    && InRange(c2, 0x80, 0xBF)
                    && InRange(c3, 0x80, 0xBF)
                    )
                )
            {
                ch  = WCHAR(c1 & 0x0f) << 12;    // 0x0800 - 0xffff
                ch |= WCHAR(c2 & 0x3f) << 6;     // 0x0080 - 0x07ff
                ch |= WCHAR(c3 & 0x3f);
                if (!IsValidWideChar(ch) && ((options & (doAllowThreeByteSurrogates | doAllowInvalidWCHARs)) == 0))
                {
                    ch = GetUnknownCharacter(options);
                }
                ptr += 2;
            }
            else
            {
                ch = GetUnknownCharacter(options);
                // Windows OS 1713952. Only drop the illegal leading byte
                // Retry next byte.
                // ptr is already advanced.
            }
            break;

        case 4:
LFourByte:
            // 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx or 11111xxx ....
            // NOTE: 11111xxx is not supported
            if (ptr + 2 >= end)
            {
                if ((options & doChunkedEncoding) != 0)
                {
                    // The is a sequence that spans a chunk, push ptr back to the beginning of the sequence.
                    ptr--;

                    if (chunkEndsAtTruncatedSequence)
                    {
                        *chunkEndsAtTruncatedSequence = true;
                    }
                }

                ch = GetUnknownCharacter(options);
                break;
            }

            c2 = ptr[0];
            c3 = ptr[1];
            c4 = ptr[2];

            //      UTF16         |   UTF8 1st byte  2nd byte 3rd byte 4th byte
            // U+10000..U+3FFFF   |    F0             90..BF   80..BF   80..BF
            // U+40000..U+FFFFF   |    F1..F3         80..BF   80..BF   80..BF
            // U+100000..U+10FFFF |    F4             80..8F   80..BF   80..BF
            if (! // NOT Unicode well-formed byte sequences
                    (
                    // any following be true
                        (c1 == 0xF0
                        && InRange(c2, 0x90,0xBF)
                        && InRange(c3, 0x80,0xBF)
                        && InRange(c4, 0x80,0xBF))
                    ||
                        (InRange(c1, 0xF1, 0xF3)
                        && InRange(c2, 0x80,0xBF)
                        && InRange(c3, 0x80,0xBF)
                        && InRange(c4, 0x80,0xBF))
                    ||
                        (c1 == 0xF4
                        && InRange(c2, 0x80,0x8F)
                        && InRange(c3, 0x80,0xBF)
                        && InRange(c4, 0x80,0xBF))
                    )
                )
            {
                // Windows OS 1713952. Only drop the illegal leading byte.
                // Retry next byte.
                // ptr is already advanced 1.
                ch = GetUnknownCharacter(options);
                break;
            }

            if ((options & doSecondSurrogatePair) == 0)
            {
                // Decode high 10 bits of utf-8 20 bit char
                ch  = WCHAR(c1 & 0x07) << 2;
                ch |= WCHAR(c2 & 0x30) >> 4;
                ch  = (ch - 1) << 6;             // ch == 0000 00ww ww00 0000
                ch |= WCHAR(c2 & 0x0f) << 2;     // ch == 0000 00ww wwzz zz00
                ch |= WCHAR(c3 & 0x30) >> 4;     // ch == 0000 00ww wwzz zzyy
                // Encode first word of utf-16 surrogate pair
                ch += 0xD800;
                // Remember next call must return second word
                options = (DecodeOptions)(options | doSecondSurrogatePair);
                // Leave ptr on byte 1, this way:
                //  - callers who test that ptr has been advanced by utf8::Decode will see progress for
                //    both words of the surrogate pair.
                //  - callers who calculate the number of multi-unit chars by subtracting after from before ptr
                //    will accumulate 0 for first word and 2 for second, thus utf8 chars equals 2 utf16 chars + 2
                //    multi-unit chars, as it should be.
            }
            else
            {
Example #12
0
                ch |= WCHAR(c3 & 0x30) >> 4;     // ch == 0000 00ww wwzz zzyy
                // Encode first word of utf-16 surrogate pair
                ch += 0xD800;
                // Remember next call must return second word
                options = (DecodeOptions)(options | doSecondSurrogatePair);
                // Leave ptr on byte 1, this way:
                //  - callers who test that ptr has been advanced by utf8::Decode will see progress for
                //    both words of the surrogate pair.
                //  - callers who calculate the number of multi-unit chars by subtracting after from before ptr
                //    will accumulate 0 for first word and 2 for second, thus utf8 chars equals 2 utf16 chars + 2
                //    multi-unit chars, as it should be.
            }
            else
            {
                // Decode low 10 bits of utf-8 20 bit char
                ch = WCHAR(c3 & 0x0f) << 6;     // ch == 0000 00yy yy00 0000
                ch |= WCHAR(c4 & 0x3f);          // ch == 0000 00yy yyxx xxxx
                // Encode second word of utf-16 surrogate pair
                ch += 0xDC00;
                // We're done with this char
                options = (DecodeOptions)(options & ~doSecondSurrogatePair);
                ptr += 3; // remember, got here by subtracting one from ptr in case 1, so effective increment is 2
            }
            break;
        }

        return ch;
    }

    LPCUTF8 NextCharFull(LPCUTF8 ptr)
    {
Example #13
0
static int
match_class(register const char *clss, register char testchar)
/*
 *	pattern is a pointer to the leading [ of
 *	a shell-type class.  testchar is the character to match against
 *	the class.
 */
{
    int match = 1;		/* false if first char is '!' */
    wchar_t __nlh_char[1];

    /* find end of class, ie an un-escaped ']' */
    register const char *eop = next_patt(clss, 0);
    ADVANCE(clss);

    if (CHARAT(eop) != ']')
	return 2;

    if (CHARAT(clss) == '!')
    {
	match = 0;
	ADVANCE(clss);
    }

    while (clss < eop)
    {
	register int ch = next_char(clss, &clss);
	char const *clss_end = clss;
	int sep = next_char(clss_end, &clss_end);
	int ch2 = next_char(clss_end, &clss_end);

	/* check if next three chars are a range */
	if (sep == '-' && ch2 != ']')
	{
	    /* check range - we have to use strcoll to do it right */
	    char c1[MB_LEN_MAX+1], c2[MB_LEN_MAX+1], tc[MB_LEN_MAX+1];
	    memset(c1, 0, sizeof(c1));
	    memset(c2, 0, sizeof(c2));
	    memset(tc, 0, sizeof(tc));
	    ch &= ~QUOTE;
	    WCHAR(ch, c1);
	    ch2 &= ~QUOTE;
	    WCHAR(ch2, c2);
	    WCHAR(testchar, tc);

	    /* if (ch <= testchar && testchar <= ch2) // Original code */

	    /* Second implementation:
	     * if (nl_strncmp(c1, tc, 1) <= 0 && nl_strncmp(tc, c2, 1) <= 0)
	     *     return match;
	     */

	    /* Third, portable implementation: */
	    if (strcoll(c1, tc) <= 0 && strcoll(tc, c2) <= 0)
		return match;
	    clss = clss_end;
	}
	else			/* they are not a range, check simple
				   match */
	{
	    if ((ch & ~QUOTE) == testchar)
		return match;
	}
    }

    return !match;
}