예제 #1
0
void
swap_m68k_thread_state_regs(
struct m68k_thread_state_regs *cpu,
enum NXByteOrder target_byte_sex)
{
    uint32_t i;

	for(i = 0; i < 8; i++)
	    cpu->dreg[i] = OSSwapInt32(cpu->dreg[i]);
	for(i = 0; i < 8; i++)
	    cpu->areg[i] = OSSwapInt32(cpu->areg[i]);
	cpu->pad0 = OSSwapInt16(cpu->pad0);
	cpu->sr = OSSwapInt16(cpu->sr);
	cpu->pc = OSSwapInt32(cpu->pc);
}
	PseudoAddressSpace::PseudoAddressSpace( Device& userclient, UserObjectHandle inKernAddrSpaceRef,
												void* inBuffer, UInt32 inBufferSize, void* inBackingStore, void* inRefCon) 
	: IOFireWireIUnknown( reinterpret_cast<const IUnknownVTbl &>( sInterface ) ),
		mNotifyIsOn(false),
		mWriter( nil ),
		mReader( nil ),
		mSkippedPacketHandler( nil ),
		mUserClient(userclient), 
		mKernAddrSpaceRef(inKernAddrSpaceRef),
		mBuffer((char*)inBuffer),
		mBufferSize(inBufferSize),
		mBackingStore(inBackingStore),
		mRefCon(inRefCon)
	{
		userclient.AddRef() ;

		mPendingLocks = ::CFDictionaryCreateMutable( kCFAllocatorDefault, 0, NULL, NULL ) ;
		if (!mPendingLocks)
			throw kIOReturnNoMemory ;
	
		AddressSpaceInfo info ;

		IOReturn error ;
		
		uint32_t outputCnt = 0;
		size_t outputStructSize =  sizeof( info ) ;
		const uint64_t inputs[1]={(const uint64_t)mKernAddrSpaceRef};

		error = IOConnectCallMethod(mUserClient.GetUserClientConnection(), 
									kPseudoAddrSpace_GetFWAddrInfo,
									inputs,1,
									NULL,0,
									NULL,&outputCnt,
									&info,&outputStructSize);
		if (error)
		{
			throw error ;
		}

#ifndef __LP64__		
		ROSETTA_ONLY(
			{
				info.address.nodeID = OSSwapInt16( info.address.nodeID );
				info.address.addressHi = OSSwapInt16( info.address.addressHi );
				info.address.addressLo = OSSwapInt32( info.address.addressLo );
			}
		);
예제 #3
0
void
swap_nlist_64(
struct nlist_64 *symbols,
uint32_t nsymbols,
enum NXByteOrder target_byte_sex)
{
    uint32_t i;

	for(i = 0; i < nsymbols; i++){
	    symbols[i].n_un.n_strx = OSSwapInt32(symbols[i].n_un.n_strx);
	    /* n_type */
	    /* n_sect */
	    symbols[i].n_desc = OSSwapInt16(symbols[i].n_desc);
	    symbols[i].n_value = OSSwapInt64(symbols[i].n_value);
	}
}
/*
 * utf8_encodelen - Calculate the UTF-8 encoding length
 *
 * This function takes a Unicode input string, ucsp, of ucslen bytes
 * and calculates the size of the UTF-8 output in bytes (not including
 * a NULL termination byte). The string must reside in kernel memory.
 *
 * If '/' chars are possible in the Unicode input then an alternate
 * (replacement) char should be provided in altslash.
 *
 * FLAGS
 *    UTF_REVERSE_ENDIAN:  Unicode byte order is opposite current runtime
 *
 *    UTF_BIG_ENDIAN:  Unicode byte order is always big endian
 *
 *    UTF_LITTLE_ENDIAN:  Unicode byte order is always little endian
 *
 *    UTF_DECOMPOSED:  generate fully decomposed output
 *
 *    UTF_PRECOMPOSED is ignored since utf8_encodestr doesn't support it
 *
 * ERRORS
 *    None
 */
size_t
utf8_encodelen(const u_int16_t * ucsp, size_t ucslen, u_int16_t altslash, int flags)
{
	u_int16_t ucs_ch;
	u_int16_t * chp = NULL;
	u_int16_t sequence[8];
	int extra = 0;
	int charcnt;
	int swapbytes = (flags & UTF_REVERSE_ENDIAN);
	int decompose = (flags & UTF_DECOMPOSED);
	size_t len;

	charcnt = (/*SG*/int)(ucslen / 2);
	len = 0;

	while (charcnt-- > 0) {
		if (extra > 0) {
			--extra;
			ucs_ch = *chp++;
		} else {
			ucs_ch = *ucsp++;
			if (swapbytes) {
				ucs_ch = OSSwapInt16(ucs_ch);
			}
			if (ucs_ch == '/') {
				ucs_ch = altslash ? altslash : '_';
			} else if (ucs_ch == '\0') {
				ucs_ch = UCS_ALT_NULL;
			} else if (decompose && unicode_decomposeable(ucs_ch)) {
				extra = unicode_decompose(ucs_ch, sequence) - 1;
				charcnt += extra;
				ucs_ch = sequence[0];
				chp = &sequence[1];
			}
		}
		len += UNICODE_TO_UTF8_LEN(ucs_ch);
	}

	return (len);
}
/*
 * utf8_encodestr - Encodes a Unicode string to UTF-8
 *
 * NOTES:
 *    The resulting UTF-8 string is NULL terminated.
 *
 *    If '/' chars are allowed on disk then an alternate
 *    (replacement) char must be provided in altslash.
 *
 * input flags:
 *    UTF_REVERSE_ENDIAN: Unicode byteorder is opposite current runtime
 *
 *    UTF_BIG_ENDIAN:  Unicode byte order is always big endian
 *
 *    UTF_LITTLE_ENDIAN:  Unicode byte order is always little endian
 *
 *    UTF_DECOMPOSED:  generate fully decomposed output
 *
 *    UTF_NO_NULL_TERM:  don't add NULL termination to UTF-8 output
 *
 * result:
 *    ENAMETOOLONG: Name didn't fit; only buflen bytes were encoded
 *
 *    EINVAL: Illegal char found; char was replaced by an '_'.
 */
static int
utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p,
               size_t * utf8len, size_t buflen, u_int16_t altslash, int flags)
{
	u_int8_t * bufstart;
	u_int8_t * bufend;
	u_int16_t ucs_ch;
	u_int16_t * chp = NULL;
	u_int16_t sequence[8];
	int extra = 0;
	int charcnt;
	int swapbytes = (flags & UTF_REVERSE_ENDIAN);
	int nullterm  = ((flags & UTF_NO_NULL_TERM) == 0);
	int decompose = (flags & UTF_DECOMPOSED);
	int sfmconv = (flags & UTF_SFM_CONVERSIONS);
	int result = 0;

	bufstart = utf8p;
	bufend = bufstart + buflen;
	if (nullterm)
		--bufend;
	charcnt = (/*SG*/int)(ucslen / 2);

	while (charcnt-- > 0) {
		if (extra > 0) {
			--extra;
			ucs_ch = *chp++;
		} else {
			ucs_ch = swapbytes ? OSSwapInt16(*ucsp++) : *ucsp++;

			if (decompose && unicode_decomposeable(ucs_ch)) {
				extra = unicode_decompose(ucs_ch, sequence) - 1;
				charcnt += extra;
				ucs_ch = sequence[0];
				chp = &sequence[1];
			}
		}

		/* Slash and NULL are not permitted */
		if (ucs_ch == '/') {
			if (altslash)
				ucs_ch = altslash;
			else {
				ucs_ch = '_';
				result = EINVAL;
			}
		} else if (ucs_ch == '\0') {
			ucs_ch = UCS_ALT_NULL;
		}

		if (ucs_ch < 0x0080) {
			if (utf8p >= bufend) {
				result = ENAMETOOLONG;
				break;
			}			
			*utf8p++ = (SG_uint8)ucs_ch;

		} else if (ucs_ch < 0x800) {
			if ((utf8p + 1) >= bufend) {
				result = ENAMETOOLONG;
				break;
			}
			*utf8p++ = 0xc0 | (SG_uint8)(ucs_ch >> 6);
			*utf8p++ = 0x80 | (SG_uint8)(0x3f & ucs_ch);

		} else {
			/* These chars never valid Unicode. */
			if (ucs_ch == 0xFFFE || ucs_ch == 0xFFFF) {
				result = EINVAL;
				break;
			}

			/* Combine valid surrogate pairs */
			if (ucs_ch >= SP_HIGH_FIRST && ucs_ch <= SP_HIGH_LAST
				&& charcnt > 0) {
				u_int16_t ch2;
				u_int32_t pair;

				ch2 = swapbytes ? OSSwapInt16(*ucsp) : *ucsp;
				if (ch2 >= SP_LOW_FIRST && ch2 <= SP_LOW_LAST) {
					pair = ((ucs_ch - SP_HIGH_FIRST) << SP_HALF_SHIFT)
						+ (ch2 - SP_LOW_FIRST) + SP_HALF_BASE;
					if ((utf8p + 3) >= bufend) {
						result = ENAMETOOLONG;
						break;
					}
					--charcnt;
					++ucsp;				
					*utf8p++ = 0xf0 | (SG_uint8)(pair >> 18);
					*utf8p++ = 0x80 | (SG_uint8)(0x3f & (pair >> 12));
					*utf8p++ = 0x80 | (SG_uint8)(0x3f & (pair >> 6));
					*utf8p++ = 0x80 | (SG_uint8)(0x3f & pair);
					continue;
				}
			} else if (sfmconv) {
uint16_t AJ_ByteSwap16(uint16_t x)
{
    return OSSwapInt16(x);
}
예제 #7
0
void Float32ToNativeInt16_X86( const Float32 *src, SInt16 *dst, unsigned int numToConvert )
{
	const float *src0 = src;
	int16_t *dst0 = dst;
	unsigned int count = numToConvert;
	
	if (count >= 8) {
		// vector -- requires 8+ samples
		ROUNDMODE_NEG_INF
		const __m128 vround = (const __m128) { 0.5f, 0.5f, 0.5f, 0.5f };
		const __m128 vmin = (const __m128) { -32768.0f, -32768.0f, -32768.0f, -32768.0f };
		const __m128 vmax = (const __m128) { 32767.0f, 32767.0f, 32767.0f, 32767.0f  };
		const __m128 vscale = (const __m128) { 32768.0f, 32768.0f, 32768.0f, 32768.0f  };
		__m128 vf0, vf1;
		__m128i vi0, vi1, vpack0;
	
#define F32TOLE16 \
		vf0 = _mm_mul_ps(vf0, vscale);			\
		vf1 = _mm_mul_ps(vf1, vscale);			\
		vf0 = _mm_add_ps(vf0, vround);			\
		vf1 = _mm_add_ps(vf1, vround);			\
		vf0 = _mm_max_ps(vf0, vmin);			\
		vf1 = _mm_max_ps(vf1, vmin);			\
		vf0 = _mm_min_ps(vf0, vmax);			\
		vf1 = _mm_min_ps(vf1, vmax);			\
		vi0 = _mm_cvtps_epi32(vf0);			\
		vi1 = _mm_cvtps_epi32(vf1);			\
		vpack0 = _mm_packs_epi32(vi0, vi1);

		int falign = (uintptr_t)src & 0xF;
		int ialign = (uintptr_t)dst & 0xF;
	
		if (falign != 0 || ialign != 0) {
			// do one unaligned conversion
			vf0 = _mm_loadu_ps(src);
			vf1 = _mm_loadu_ps(src+4);
			F32TOLE16
			_mm_storeu_si128((__m128i *)dst, vpack0);
			
			// advance such that the destination ints are aligned
			unsigned int n = (16 - ialign) / 2;
			src += n;
			dst += n;
			count -= n;

			falign = (uintptr_t)src & 0xF;
			if (falign != 0) {
				// unaligned loads, aligned stores
				while (count >= 8) {
					vf0 = _mm_loadu_ps(src);
					vf1 = _mm_loadu_ps(src+4);
					F32TOLE16
					_mm_store_si128((__m128i *)dst, vpack0);
					src += 8;
					dst += 8;
					count -= 8;
				}
				goto VectorCleanup;
			}
		}
	
		// aligned loads, aligned stores
		while (count >= 8) {
			vf0 = _mm_load_ps(src);
			vf1 = _mm_load_ps(src+4);
			F32TOLE16
			_mm_store_si128((__m128i *)dst, vpack0);
			
			src += 8;
			dst += 8;
			count -= 8;
		}
VectorCleanup:
		if (count > 0) {
			// unaligned cleanup -- just do one unaligned vector at the end
			src = src0 + numToConvert - 8;
			dst = dst0 + numToConvert - 8;
			vf0 = _mm_loadu_ps(src);
			vf1 = _mm_loadu_ps(src+4);
			F32TOLE16
			_mm_storeu_si128((__m128i *)dst, vpack0);
		}
		RESTORE_ROUNDMODE
		return;
	}
	
	// scalar for small numbers of samples
	if (count > 0) {
		double scale = 2147483648.0, round = 32768.0, max32 = 2147483648.0 - 1.0 - 32768.0, min32 = 0.;
		ROUNDMODE_NEG_INF
		
		while (count-- > 0) {
			double f0 = *src++;
			f0 = f0 * scale + round;
			SInt32 i0 = FloatToInt(f0, min32, max32);
			i0 >>= 16;
			*dst++ = i0;
		}
		RESTORE_ROUNDMODE
	}
}

// ===================================================================================================

void Float32ToSwapInt16_X86( const Float32 *src, SInt16 *dst, unsigned int numToConvert )
{
	const float *src0 = src;
	int16_t *dst0 = dst;
	unsigned int count = numToConvert;
	
	if (count >= 8) {
		// vector -- requires 8+ samples
		ROUNDMODE_NEG_INF
		const __m128 vround = (const __m128) { 0.5f, 0.5f, 0.5f, 0.5f };
		const __m128 vmin = (const __m128) { -32768.0f, -32768.0f, -32768.0f, -32768.0f };
		const __m128 vmax = (const __m128) { 32767.0f, 32767.0f, 32767.0f, 32767.0f  };
		const __m128 vscale = (const __m128) { 32768.0f, 32768.0f, 32768.0f, 32768.0f  };
		__m128 vf0, vf1;
		__m128i vi0, vi1, vpack0;
	
#define F32TOBE16 \
		vf0 = _mm_mul_ps(vf0, vscale);			\
		vf1 = _mm_mul_ps(vf1, vscale);			\
		vf0 = _mm_add_ps(vf0, vround);			\
		vf1 = _mm_add_ps(vf1, vround);			\
		vf0 = _mm_max_ps(vf0, vmin);			\
		vf1 = _mm_max_ps(vf1, vmin);			\
		vf0 = _mm_min_ps(vf0, vmax);			\
		vf1 = _mm_min_ps(vf1, vmax);			\
		vi0 = _mm_cvtps_epi32(vf0);			\
		vi1 = _mm_cvtps_epi32(vf1);			\
		vpack0 = _mm_packs_epi32(vi0, vi1);		\
		vpack0 = byteswap16(vpack0);

		int falign = (uintptr_t)src & 0xF;
		int ialign = (uintptr_t)dst & 0xF;
	
		if (falign != 0 || ialign != 0) {
			// do one unaligned conversion
			vf0 = _mm_loadu_ps(src);
			vf1 = _mm_loadu_ps(src+4);
			F32TOBE16
			_mm_storeu_si128((__m128i *)dst, vpack0);

			// and advance such that the destination ints are aligned
			unsigned int n = (16 - ialign) / 2;
			src += n;
			dst += n;
			count -= n;

			falign = (uintptr_t)src & 0xF;
			if (falign != 0) {
				// unaligned loads, aligned stores
				while (count >= 8) {
					vf0 = _mm_loadu_ps(src);
					vf1 = _mm_loadu_ps(src+4);
					F32TOBE16
					_mm_store_si128((__m128i *)dst, vpack0);
					src += 8;
					dst += 8;
					count -= 8;
				}
				goto VectorCleanup;
			}
		}
	
		// aligned loads, aligned stores
		while (count >= 8) {
			vf0 = _mm_load_ps(src);
			vf1 = _mm_load_ps(src+4);
			F32TOBE16
			_mm_store_si128((__m128i *)dst, vpack0);
			
			src += 8;
			dst += 8;
			count -= 8;
		}
VectorCleanup:
		if (count > 0) {
			// unaligned cleanup -- just do one unaligned vector at the end
			src = src0 + numToConvert - 8;
			dst = dst0 + numToConvert - 8;
			vf0 = _mm_loadu_ps(src);
			vf1 = _mm_loadu_ps(src+4);
			F32TOBE16
			_mm_storeu_si128((__m128i *)dst, vpack0);
		}
		RESTORE_ROUNDMODE
		return;
	}
	
	// scalar for small numbers of samples
	if (count > 0) {
		double scale = 2147483648.0, round = 32768.0, max32 = 2147483648.0 - 1.0 - 32768.0, min32 = 0.;
		ROUNDMODE_NEG_INF
		
		while (count-- > 0) {
			double f0 = *src++;
			f0 = f0 * scale + round;
			SInt32 i0 = FloatToInt(f0, min32, max32);
			i0 >>= 16;
			*dst++ = OSSwapInt16(i0);
		}
		RESTORE_ROUNDMODE
	}
}

// ===================================================================================================

void Float32ToNativeInt32_X86( const Float32 *src, SInt32 *dst, unsigned int numToConvert )
{
	const float *src0 = src;
	SInt32 *dst0 = dst;
	unsigned int count = numToConvert;
	
	if (count >= 4) {
		// vector -- requires 4+ samples
		ROUNDMODE_NEG_INF
		const __m128 vround = (const __m128) { 0.5f, 0.5f, 0.5f, 0.5f };
		const __m128 vmin = (const __m128) { -2147483648.0f, -2147483648.0f, -2147483648.0f, -2147483648.0f };
		const __m128 vmax = (const __m128) { kMaxFloat32, kMaxFloat32, kMaxFloat32, kMaxFloat32  };
		const __m128 vscale = (const __m128) { 2147483648.0f, 2147483648.0f, 2147483648.0f, 2147483648.0f  };
		__m128 vf0;
		__m128i vi0;
	
#define F32TOLE32(x) \
		vf##x = _mm_mul_ps(vf##x, vscale);			\
		vf##x = _mm_add_ps(vf##x, vround);			\
		vf##x = _mm_max_ps(vf##x, vmin);			\
		vf##x = _mm_min_ps(vf##x, vmax);			\
		vi##x = _mm_cvtps_epi32(vf##x);			\

		int falign = (uintptr_t)src & 0xF;
		int ialign = (uintptr_t)dst & 0xF;
	
		if (falign != 0 || ialign != 0) {
			// do one unaligned conversion
			vf0 = _mm_loadu_ps(src);
			F32TOLE32(0)
			_mm_storeu_si128((__m128i *)dst, vi0);
			
			// and advance such that the destination ints are aligned
			unsigned int n = (16 - ialign) / 4;
			src += n;
			dst += n;
			count -= n;

			falign = (uintptr_t)src & 0xF;
			if (falign != 0) {
				// unaligned loads, aligned stores
				while (count >= 4) {
					vf0 = _mm_loadu_ps(src);
					F32TOLE32(0)
					_mm_store_si128((__m128i *)dst, vi0);
					src += 4;
					dst += 4;
					count -= 4;
				}
				goto VectorCleanup;
			}
		}
	
		while (count >= 4) {
			vf0 = _mm_load_ps(src);
			F32TOLE32(0)
			_mm_store_si128((__m128i *)dst, vi0);
			
			src += 4;
			dst += 4;
			count -= 4;
		}
VectorCleanup:
		if (count > 0) {
			// unaligned cleanup -- just do one unaligned vector at the end
			src = src0 + numToConvert - 4;
			dst = dst0 + numToConvert - 4;
			vf0 = _mm_loadu_ps(src);
			F32TOLE32(0)
			_mm_storeu_si128((__m128i *)dst, vi0);
		}
		RESTORE_ROUNDMODE
		return;
	}
	
	// scalar for small numbers of samples
	if (count > 0) {
		double scale = 2147483648.0, round = 0.5, max32 = 2147483648.0 - 1.0 - 0.5, min32 = 0.;
		ROUNDMODE_NEG_INF
		
		while (count-- > 0) {
			double f0 = *src++;
			f0 = f0 * scale + round;
			SInt32 i0 = FloatToInt(f0, min32, max32);
			*dst++ = i0;
		}
		RESTORE_ROUNDMODE
	}
}

// ===================================================================================================

void Float32ToSwapInt32_X86( const Float32 *src, SInt32 *dst, unsigned int numToConvert )
{
	const float *src0 = src;
	SInt32 *dst0 = dst;
	unsigned int count = numToConvert;
	
	if (count >= 4) {
		// vector -- requires 4+ samples
		ROUNDMODE_NEG_INF
		const __m128 vround = (const __m128) { 0.5f, 0.5f, 0.5f, 0.5f };
		const __m128 vmin = (const __m128) { -2147483648.0f, -2147483648.0f, -2147483648.0f, -2147483648.0f };
		const __m128 vmax = (const __m128) { kMaxFloat32, kMaxFloat32, kMaxFloat32, kMaxFloat32  };
		const __m128 vscale = (const __m128) { 2147483648.0f, 2147483648.0f, 2147483648.0f, 2147483648.0f  };
		__m128 vf0;
		__m128i vi0;
	
#define F32TOBE32(x) \
		vf##x = _mm_mul_ps(vf##x, vscale);			\
		vf##x = _mm_add_ps(vf##x, vround);			\
		vf##x = _mm_max_ps(vf##x, vmin);			\
		vf##x = _mm_min_ps(vf##x, vmax);			\
		vi##x = _mm_cvtps_epi32(vf##x);			\
		vi##x = byteswap32(vi##x);

		int falign = (uintptr_t)src & 0xF;
		int ialign = (uintptr_t)dst & 0xF;
	
		if (falign != 0 || ialign != 0) {
			// do one unaligned conversion
			vf0 = _mm_loadu_ps(src);
			F32TOBE32(0)
			_mm_storeu_si128((__m128i *)dst, vi0);
			
			// and advance such that the destination ints are aligned
			unsigned int n = (16 - ialign) / 4;
			src += n;
			dst += n;
			count -= n;

			falign = (uintptr_t)src & 0xF;
			if (falign != 0) {
				// unaligned loads, aligned stores
				while (count >= 4) {
					vf0 = _mm_loadu_ps(src);
					F32TOBE32(0)
					_mm_store_si128((__m128i *)dst, vi0);
					src += 4;
					dst += 4;
					count -= 4;
				}
				goto VectorCleanup;
			}
		}
	
		while (count >= 4) {
			vf0 = _mm_load_ps(src);
			F32TOBE32(0)
			_mm_store_si128((__m128i *)dst, vi0);
			
			src += 4;
			dst += 4;
			count -= 4;
		}
VectorCleanup:
		if (count > 0) {
			// unaligned cleanup -- just do one unaligned vector at the end
			src = src0 + numToConvert - 4;
			dst = dst0 + numToConvert - 4;
			vf0 = _mm_loadu_ps(src);
			F32TOBE32(0)
			_mm_storeu_si128((__m128i *)dst, vi0);
		}
		RESTORE_ROUNDMODE
		return;
	}
	
	// scalar for small numbers of samples
	if (count > 0) {
		double scale = 2147483648.0, round = 0.5, max32 = 2147483648.0 - 1.0 - 0.5, min32 = 0.;
		ROUNDMODE_NEG_INF
		
		while (count-- > 0) {
			double f0 = *src++;
			f0 = f0 * scale + round;
			SInt32 i0 = FloatToInt(f0, min32, max32);
			*dst++ = OSSwapInt32(i0);
		}
		RESTORE_ROUNDMODE
	}
}

// ===================================================================================================

// ~14 instructions
static inline __m128i Pack32ToLE24(__m128i val, __m128i mask)
{
	__m128i store;
	val = _mm_srli_si128(val, 1);
	store = _mm_and_si128(val, mask);

	val = _mm_srli_si128(val, 1);
	mask = _mm_slli_si128(mask, 3);
	store = _mm_or_si128(store, _mm_and_si128(val, mask));

	val = _mm_srli_si128(val, 1);
	mask = _mm_slli_si128(mask, 3);
	store = _mm_or_si128(store, _mm_and_si128(val, mask));

	val = _mm_srli_si128(val, 1);
	mask = _mm_slli_si128(mask, 3);
	store = _mm_or_si128(store, _mm_and_si128(val, mask));
	return store;
}

// marginally faster than scalar
void Float32ToNativeInt24_X86( const Float32 *src, UInt8 *dst, unsigned int numToConvert )
{
	const Float32 *src0 = src;
	UInt8 *dst0 = dst;
	unsigned int count = numToConvert;
	
	if (count >= 6) {
		// vector -- requires 6+ samples
		ROUNDMODE_NEG_INF
		const __m128 vround = (const __m128) { 0.5f, 0.5f, 0.5f, 0.5f };
		const __m128 vmin = (const __m128) { -2147483648.0f, -2147483648.0f, -2147483648.0f, -2147483648.0f };
		const __m128 vmax = (const __m128) { kMaxFloat32, kMaxFloat32, kMaxFloat32, kMaxFloat32  };
		const __m128 vscale = (const __m128) { 2147483648.0f, 2147483648.0f, 2147483648.0f, 2147483648.0f  };
		__m128i mask = _mm_setr_epi32(0x00FFFFFF, 0, 0, 0);
			// it is actually cheaper to copy and shift this mask on the fly than to have 4 of them

		__m128i store;
		union {
			UInt32 i[4];
			__m128i v;
		} u;

		__m128 vf0;
		__m128i vi0;

		int falign = (uintptr_t)src & 0xF;
	
		if (falign != 0) {
			// do one unaligned conversion
			vf0 = _mm_loadu_ps(src);
			F32TOLE32(0)
			store = Pack32ToLE24(vi0, mask);
			_mm_storeu_si128((__m128i *)dst, store);

			// and advance such that the source floats are aligned
			unsigned int n = (16 - falign) / 4;
			src += n;
			dst += 3*n;	// bytes
			count -= n;
		}
	
		while (count >= 6) {
			vf0 = _mm_load_ps(src);
			F32TOLE32(0)
			store = Pack32ToLE24(vi0, mask);
			_mm_storeu_si128((__m128i *)dst, store);	// destination always unaligned
			
			src += 4;
			dst += 12;	// bytes
			count -= 4;
		}
		
		
		if (count >= 4) {
			vf0 = _mm_load_ps(src);
			F32TOLE32(0)
			u.v = Pack32ToLE24(vi0, mask);
			((UInt32 *)dst)[0] = u.i[0];
			((UInt32 *)dst)[1] = u.i[1];
			((UInt32 *)dst)[2] = u.i[2];
			
			src += 4;
			dst += 12;	// bytes
			count -= 4;
		}

		if (count > 0) {
			// unaligned cleanup -- just do one unaligned vector at the end
			src = src0 + numToConvert - 4;
			dst = dst0 + 3*numToConvert - 12;
			vf0 = _mm_loadu_ps(src);
			F32TOLE32(0)
			u.v = Pack32ToLE24(vi0, mask);
			((UInt32 *)dst)[0] = u.i[0];
			((UInt32 *)dst)[1] = u.i[1];
			((UInt32 *)dst)[2] = u.i[2];
		}
		RESTORE_ROUNDMODE
		return;
	}
	
	// scalar for small numbers of samples
	if (count > 0) {
		double scale = 2147483648.0, round = 0.5, max32 = 2147483648.0 - 1.0 - 0.5, min32 = 0.;
		ROUNDMODE_NEG_INF
		
		while (count-- > 0) {
			double f0 = *src++;
			f0 = f0 * scale + round;
			UInt32 i0 = FloatToInt(f0, min32, max32);
			dst[0] = (UInt8)(i0 >> 8);
			dst[1] = (UInt8)(i0 >> 16);
			dst[2] = (UInt8)(i0 >> 24);
			dst += 3;
		}
		RESTORE_ROUNDMODE
	}
}


// ===================================================================================================
#pragma mark -
#pragma mark Int -> Float

void NativeInt16ToFloat32_X86( const SInt16 *src, Float32 *dst, unsigned int numToConvert )
{
	const SInt16 *src0 = src;
	Float32 *dst0 = dst;
	unsigned int count = numToConvert;

	if (count >= 8) {
		// vector -- requires 8+ samples
		// convert the 16-bit words to the high word of 32-bit values
#define LEI16TOF32(x, y) \
	vi##x = _mm_unpacklo_epi16(zero, vpack##x); \
	vi##y = _mm_unpackhi_epi16(zero, vpack##x); \
	vf##x = _mm_cvtepi32_ps(vi##x); \
	vf##y = _mm_cvtepi32_ps(vi##y); \
	vf##x = _mm_mul_ps(vf##x, vscale); \
	vf##y = _mm_mul_ps(vf##y, vscale);
		
		const __m128 vscale = (const __m128) { 1.0/2147483648.0f, 1.0/2147483648.0f, 1.0/2147483648.0f, 1.0/2147483648.0f  };
		const __m128i zero = _mm_setzero_si128();
		__m128 vf0, vf1;
		__m128i vi0, vi1, vpack0;

		int ialign = (uintptr_t)src & 0xF;
		int falign = (uintptr_t)dst & 0xF;
	
		if (falign != 0 || ialign != 0) {
			// do one unaligned conversion
			vpack0 = _mm_loadu_si128((__m128i const *)src);
			LEI16TOF32(0, 1)
			_mm_storeu_ps(dst, vf0);
			_mm_storeu_ps(dst+4, vf1);
			
			// and advance such that the destination floats are aligned
			unsigned int n = (16 - falign) / 4;
			src += n;
			dst += n;
			count -= n;

			ialign = (uintptr_t)src & 0xF;
			if (ialign != 0) {
				// unaligned loads, aligned stores
				while (count >= 8) {
					vpack0 = _mm_loadu_si128((__m128i const *)src);
					LEI16TOF32(0, 1)
					_mm_store_ps(dst, vf0);
					_mm_store_ps(dst+4, vf1);
					src += 8;
					dst += 8;
					count -= 8;
				}
				goto VectorCleanup;
			}
		}
	
		// aligned loads, aligned stores
		while (count >= 8) {
			vpack0 = _mm_load_si128((__m128i const *)src);
			LEI16TOF32(0, 1)
			_mm_store_ps(dst, vf0);
			_mm_store_ps(dst+4, vf1);
			src += 8;
			dst += 8;
			count -= 8;
		}
		
VectorCleanup:
		if (count > 0) {
			// unaligned cleanup -- just do one unaligned vector at the end
			src = src0 + numToConvert - 8;
			dst = dst0 + numToConvert - 8;
			vpack0 = _mm_loadu_si128((__m128i const *)src);
			LEI16TOF32(0, 1)
			_mm_storeu_ps(dst, vf0);
			_mm_storeu_ps(dst+4, vf1);
		}
		return;
	}
	// scalar for small numbers of samples
	if (count > 0) {
		double scale = 1./32768.f;
		while (count-- > 0) {
			SInt16 i = *src++;
			double f = (double)i * scale;
			*dst++ = f;
		}
	}
}

// ===================================================================================================

void SwapInt16ToFloat32_X86( const SInt16 *src, Float32 *dst, unsigned int numToConvert )
{
	const SInt16 *src0 = src;
	Float32 *dst0 = dst;
	unsigned int count = numToConvert;

	if (count >= 8) {
		// vector -- requires 8+ samples
		// convert the 16-bit words to the high word of 32-bit values
#define BEI16TOF32 \
	vpack0 = byteswap16(vpack0); \
	vi0 = _mm_unpacklo_epi16(zero, vpack0); \
	vi1 = _mm_unpackhi_epi16(zero, vpack0); \
	vf0 = _mm_cvtepi32_ps(vi0); \
	vf1 = _mm_cvtepi32_ps(vi1); \
	vf0 = _mm_mul_ps(vf0, vscale); \
	vf1 = _mm_mul_ps(vf1, vscale);
		
		const __m128 vscale = (const __m128) { 1.0/2147483648.0f, 1.0/2147483648.0f, 1.0/2147483648.0f, 1.0/2147483648.0f  };
		const __m128i zero = _mm_setzero_si128();
		__m128 vf0, vf1;
		__m128i vi0, vi1, vpack0;

		int ialign = (uintptr_t)src & 0xF;
		int falign = (uintptr_t)dst & 0xF;
	
		if (falign != 0 || ialign != 0) {
			// do one unaligned conversion
			vpack0 = _mm_loadu_si128((__m128i const *)src);
			BEI16TOF32
			_mm_storeu_ps(dst, vf0);
			_mm_storeu_ps(dst+4, vf1);

			// and advance such that the destination floats are aligned
			unsigned int n = (16 - falign) / 4;
			src += n;
			dst += n;
			count -= n;

			ialign = (uintptr_t)src & 0xF;
			if (ialign != 0) {
				// unaligned loads, aligned stores
				while (count >= 8) {
					vpack0 = _mm_loadu_si128((__m128i const *)src);
					BEI16TOF32
					_mm_store_ps(dst, vf0);
					_mm_store_ps(dst+4, vf1);
					src += 8;
					dst += 8;
					count -= 8;
				}
				goto VectorCleanup;
			}
		}
	
		// aligned loads, aligned stores
		while (count >= 8) {
			vpack0 = _mm_load_si128((__m128i const *)src);
			BEI16TOF32
			_mm_store_ps(dst, vf0);
			_mm_store_ps(dst+4, vf1);
			src += 8;
			dst += 8;
			count -= 8;
		}
		
VectorCleanup:
		if (count > 0) {
			// unaligned cleanup -- just do one unaligned vector at the end
			src = src0 + numToConvert - 8;
			dst = dst0 + numToConvert - 8;
			vpack0 = _mm_loadu_si128((__m128i const *)src);
			BEI16TOF32
			_mm_storeu_ps(dst, vf0);
			_mm_storeu_ps(dst+4, vf1);
		}
		return;
	}
	// scalar for small numbers of samples
	if (count > 0) {
		double scale = 1./32768.f;
		while (count-- > 0) {
			SInt16 i = *src++;
			i = OSSwapInt16(i);
			double f = (double)i * scale;
			*dst++ = f;
		}
	}
}

// ===================================================================================================

void NativeInt32ToFloat32_X86( const SInt32 *src, Float32 *dst, unsigned int numToConvert )
{
	const SInt32 *src0 = src;
	Float32 *dst0 = dst;
	unsigned int count = numToConvert;

	if (count >= 4) {
		// vector -- requires 4+ samples
#define LEI32TOF32(x) \
	vf##x = _mm_cvtepi32_ps(vi##x); \
	vf##x = _mm_mul_ps(vf##x, vscale); \
		
		const __m128 vscale = (const __m128) { 1.0/2147483648.0f, 1.0/2147483648.0f, 1.0/2147483648.0f, 1.0/2147483648.0f  };
		__m128 vf0;
		__m128i vi0;

		int ialign = (uintptr_t)src & 0xF;
		int falign = (uintptr_t)dst & 0xF;
	
		if (falign != 0 || ialign != 0) {
			// do one unaligned conversion
			vi0 = _mm_loadu_si128((__m128i const *)src);
			LEI32TOF32(0)
			_mm_storeu_ps(dst, vf0);
			
			// and advance such that the destination floats are aligned
			unsigned int n = (16 - falign) / 4;
			src += n;
			dst += n;
			count -= n;

			ialign = (uintptr_t)src & 0xF;
			if (ialign != 0) {
				// unaligned loads, aligned stores
				while (count >= 4) {
					vi0 = _mm_loadu_si128((__m128i const *)src);
					LEI32TOF32(0)
					_mm_store_ps(dst, vf0);
					src += 4;
					dst += 4;
					count -= 4;
				}
				goto VectorCleanup;
			}
		}
	
		// aligned loads, aligned stores
		while (count >= 4) {
			vi0 = _mm_load_si128((__m128i const *)src);
			LEI32TOF32(0)
			_mm_store_ps(dst, vf0);
			src += 4;
			dst += 4;
			count -= 4;
		}
		
VectorCleanup:
		if (count > 0) {
			// unaligned cleanup -- just do one unaligned vector at the end
			src = src0 + numToConvert - 4;
			dst = dst0 + numToConvert - 4;
			vi0 = _mm_loadu_si128((__m128i const *)src);
			LEI32TOF32(0)
			_mm_storeu_ps(dst, vf0);
		}
		return;
	}
	// scalar for small numbers of samples
	if (count > 0) {
		double scale = 1./2147483648.0f;
		while (count-- > 0) {
			SInt32 i = *src++;
			double f = (double)i * scale;
			*dst++ = f;
		}
	}
}

// ===================================================================================================

void SwapInt32ToFloat32_X86( const SInt32 *src, Float32 *dst, unsigned int numToConvert )
{
	const SInt32 *src0 = src;
	Float32 *dst0 = dst;
	unsigned int count = numToConvert;

	if (count >= 4) {
		// vector -- requires 4+ samples
#define BEI32TOF32(x) \
	vi##x = byteswap32(vi##x); \
	vf##x = _mm_cvtepi32_ps(vi##x); \
	vf##x = _mm_mul_ps(vf##x, vscale); \
		
		const __m128 vscale = (const __m128) { 1.0/2147483648.0f, 1.0/2147483648.0f, 1.0/2147483648.0f, 1.0/2147483648.0f  };
		__m128 vf0;
		__m128i vi0;

		int ialign = (uintptr_t)src & 0xF;
		int falign = (uintptr_t)dst & 0xF;
	
		if (falign != 0 || ialign != 0) {
			// do one unaligned conversion
			vi0 = _mm_loadu_si128((__m128i const *)src);
			BEI32TOF32(0)
			_mm_storeu_ps(dst, vf0);
			
			// and advance such that the destination floats are aligned
			unsigned int n = (16 - falign) / 4;
			src += n;
			dst += n;
			count -= n;

			ialign = (uintptr_t)src & 0xF;
			if (ialign != 0) {
				// unaligned loads, aligned stores
				while (count >= 4) {
					vi0 = _mm_loadu_si128((__m128i const *)src);
					BEI32TOF32(0)
					_mm_store_ps(dst, vf0);
					src += 4;
					dst += 4;
					count -= 4;
				}
				goto VectorCleanup;
			}
		}
	
		// aligned loads, aligned stores
		while (count >= 4) {
			vi0 = _mm_load_si128((__m128i const *)src);
			BEI32TOF32(0)
			_mm_store_ps(dst, vf0);
			src += 4;
			dst += 4;
			count -= 4;
		}
		
VectorCleanup:
		if (count > 0) {
			// unaligned cleanup -- just do one unaligned vector at the end
			src = src0 + numToConvert - 4;
			dst = dst0 + numToConvert - 4;
			vi0 = _mm_loadu_si128((__m128i const *)src);
			BEI32TOF32(0)
			_mm_storeu_ps(dst, vf0);
		}
		return;
	}
	// scalar for small numbers of samples
	if (count > 0) {
		double scale = 1./2147483648.0f;
		while (count-- > 0) {
			SInt32 i = *src++;
			i = OSSwapInt32(i);
			double f = (double)i * scale;
			*dst++ = f;
		}
	}
}
예제 #8
0
IOReturn
AppleUHCIIsochTransferDescriptor::UpdateFrameList(AbsoluteTime timeStamp)
{
    UInt32						statFlags;
    IOUSBIsocFrame 				*pFrames;    
    IOUSBLowLatencyIsocFrame 	*pLLFrames;    
    IOReturn					frStatus = kIOReturnSuccess;
    UInt16						frActualCount = 0;
    UInt16						frReqCount;
    
    statFlags = USBToHostLong(GetSharedLogical()->ctrlStatus);
	frActualCount = UHCI_TD_GET_ACTLEN(statFlags);
    // warning - this method can run at primary interrupt time, which can cause a panic if it logs too much
    // USBLog(7, "AppleUHCIIsochTransferDescriptor[%p]::UpdateFrameList statFlags (%x)", this, statFlags);
    pFrames = _pFrames;
	if (!pFrames)							// this will be the case for the dummy TD
		return kIOReturnSuccess;
	
    pLLFrames = (IOUSBLowLatencyIsocFrame*)_pFrames;
    if (_lowLatency)
    {
		frReqCount = pLLFrames[_frameIndex].frReqCount;
    }
    else
    {
		frReqCount = pFrames[_frameIndex].frReqCount;
    }
	
    if (statFlags & kUHCI_TD_ACTIVE)
    {
		frStatus = kIOUSBNotSent2Err;
    }
    else if (statFlags & kUHCI_TD_CRCTO)
    {
		frStatus = kIOReturnNotResponding;
    }
    else if (statFlags & kUHCI_TD_DBUF)									// data buffer (PCI error)
    {
		if (_pEndpoint->direction == kUSBOut)
			frStatus = kIOUSBBufferUnderrunErr;
		else
			frStatus = kIOUSBBufferOverrunErr;
    }
    else if (statFlags & kUHCI_TD_BABBLE)
    {
		if (_pEndpoint->direction == kUSBOut)
			frStatus = kIOReturnNotResponding;							// babble on OUT. this should never happen
		else
			frStatus = kIOReturnOverrun;
    }
    else if (statFlags & kUHCI_TD_STALLED)								// if STALL happens on Isoch, it is most likely covered by one of the other bits above
    {
		frStatus = kIOUSBWrongPIDErr;
    }
    else
    {
		if (frActualCount != frReqCount)
		{
			if (_pEndpoint->direction == kUSBOut)
			{
				// warning - this method can run at primary interrupt time, which can cause a panic if it logs too much
				// USBLog(7, "AppleUHCIIsochTransferDescriptor[%p]::UpdateFrameList - (OUT) reqCount (%d) actCount (%d)", this, frReqCount, frActualCount);
				frStatus = kIOUSBBufferUnderrunErr;						// this better have generated a DBUF or other error
			}
			else if (_pEndpoint->direction == kUSBIn)
			{
				// warning - this method can run at primary interrupt time, which can cause a panic if it logs too much
				// USBLog(7, "AppleUHCIIsochTransferDescriptor[%p]::UpdateFrameList - (IN) reqCount (%d) actCount (%d)", this, frReqCount, frActualCount);
				frStatus = kIOReturnUnderrun;							// benign error
			}
		}
    }

    if (alignBuffer && alignBuffer->userBuffer && alignBuffer->vaddr && (_pEndpoint->direction == kUSBIn))
	{
		// i can't log in here because this is called at interrupt time
		// i know that this is OK for Low Latency because the buffer will be allocated in low memory and wont' be bounced
		alignBuffer->userBuffer->writeBytes(alignBuffer->userOffset, (void*)alignBuffer->vaddr, frActualCount);
		alignBuffer->actCount = frActualCount;
	}
	
	
	if (_lowLatency)
    {
		if ( _requestFromRosettaClient )
		{
			pLLFrames[_frameIndex].frActCount = OSSwapInt16(frActualCount);
			pLLFrames[_frameIndex].frReqCount = OSSwapInt16(pLLFrames[_frameIndex].frReqCount);
			AbsoluteTime_to_scalar(&pLLFrames[_frameIndex].frTimeStamp) 
			= OSSwapInt64(AbsoluteTime_to_scalar(&timeStamp));
			pLLFrames[_frameIndex].frStatus = OSSwapInt32(frStatus);
		}
		else
		{
			pLLFrames[_frameIndex].frActCount = frActualCount;
			pLLFrames[_frameIndex].frTimeStamp = timeStamp;
			pLLFrames[_frameIndex].frStatus = frStatus;

#ifdef __LP64__
			USBTrace( kUSBTUHCIInterrupts, kTPUHCIUpdateFrameList , (uintptr_t)((_pEndpoint->direction << 24) | ( _pEndpoint->functionAddress << 8) | _pEndpoint->endpointNumber), (uintptr_t)&pLLFrames[_frameIndex], (uintptr_t)frActualCount, (uintptr_t)timeStamp );
#else
			USBTrace( kUSBTUHCIInterrupts, kTPUHCIUpdateFrameList , (uintptr_t)((_pEndpoint->direction << 24) | ( _pEndpoint->functionAddress << 8) | _pEndpoint->endpointNumber), (uintptr_t)&pLLFrames[_frameIndex], (uintptr_t)(timeStamp.hi), (uintptr_t)timeStamp.lo );
#endif
		}
    }
    else
    {
		if ( _requestFromRosettaClient )
		{
			pFrames[_frameIndex].frActCount = OSSwapInt16(frActualCount);
			pFrames[_frameIndex].frReqCount = OSSwapInt16(pFrames[_frameIndex].frReqCount);
			pFrames[_frameIndex].frStatus = OSSwapInt32(frStatus);
		}
		else
		{
			pFrames[_frameIndex].frActCount = frActualCount;
			pFrames[_frameIndex].frStatus = frStatus;
		}
    }
	
    if (frStatus != kIOReturnSuccess)
    {
		if (frStatus != kIOReturnUnderrun)
		{
			_pEndpoint->accumulatedStatus = frStatus;
		}
		else if (_pEndpoint->accumulatedStatus == kIOReturnSuccess)
		{
			_pEndpoint->accumulatedStatus = kIOReturnUnderrun;
		}
    }
    return frStatus;
}
예제 #9
0
void
swap_i386_thread_fpstate(
i386_thread_fpstate_t *fpu,
enum NXByteOrder target_byte_sex)
{
    struct swapped_fp_control {
	union {
	    struct {
		unsigned short
			    :3,
		    /*inf*/ :1,
		    rc	    :2,
		    pc	    :2,
			    :2,
		    precis  :1,
		    undfl   :1,
		    ovrfl   :1,
		    zdiv    :1,
		    denorm  :1,
		    invalid :1;
	    } fields;
	    unsigned short half;
	} u;
    } sfpc;

    struct swapped_fp_status {
	union {
	    struct {
		unsigned short
		    busy    :1,
		    c3	    :1,
		    tos	    :3,
		    c2	    :1,
		    c1	    :1,
		    c0	    :1,
		    errsumm :1,
		    stkflt  :1,
		    precis  :1,
		    undfl   :1,
		    ovrfl   :1,
		    zdiv    :1,
		    denorm  :1,
		    invalid :1;
	    } fields;
	    unsigned short half;
	} u;
    } sfps;

    struct swapped_fp_tag {
	union {
	    struct {
		unsigned short
		    tag7 :2,
		    tag6 :2,
		    tag5 :2,
		    tag4 :2,
		    tag3 :2,
		    tag2 :2,
		    tag1 :2,
		    tag0 :2;
	    } fields;
	    unsigned short half;
	} u;
    } sfpt;

    struct swapped_fp_data_reg {
	unsigned short mant;
	unsigned short mant1 :16,
		       mant2 :16,
		       mant3 :16;
	union {
	    struct {
		unsigned short sign :1,
			       exp  :15;
	    } fields;
	    unsigned short half;
	} u;
    } sfpd;

    struct swapped_sel {
	union {
	    struct {
    		unsigned short
		    index :13,
		    ti    :1,
		    rpl   :2;
	    } fields;
	    unsigned short half;
	} u;
    } ss;

    enum NXByteOrder host_byte_sex;
    int i;

	host_byte_sex = NXHostByteOrder();

	fpu->environ.ip = OSSwapInt32(fpu->environ.ip);
	fpu->environ.opcode = OSSwapInt16(fpu->environ.opcode);
	fpu->environ.dp = OSSwapInt32(fpu->environ.dp);

	if(target_byte_sex == host_byte_sex){
	    memcpy(&sfpc, &(fpu->environ.control),
		   sizeof(struct swapped_fp_control));
	    sfpc.u.half = OSSwapInt16(sfpc.u.half);
	    fpu->environ.control.rc = sfpc.u.fields.rc;
	    fpu->environ.control.pc = sfpc.u.fields.pc;
	    fpu->environ.control.precis = sfpc.u.fields.precis;
	    fpu->environ.control.undfl = sfpc.u.fields.undfl;
	    fpu->environ.control.ovrfl = sfpc.u.fields.ovrfl;
	    fpu->environ.control.zdiv = sfpc.u.fields.zdiv;
	    fpu->environ.control.denorm = sfpc.u.fields.denorm;
	    fpu->environ.control.invalid = sfpc.u.fields.invalid;

	    memcpy(&sfps, &(fpu->environ.status),
		   sizeof(struct swapped_fp_status));
	    sfps.u.half = OSSwapInt16(sfps.u.half);
	    fpu->environ.status.busy = sfps.u.fields.busy;
	    fpu->environ.status.c3 = sfps.u.fields.c3;
	    fpu->environ.status.tos = sfps.u.fields.tos;
	    fpu->environ.status.c2 = sfps.u.fields.c2;
	    fpu->environ.status.c1 = sfps.u.fields.c1;
	    fpu->environ.status.c0 = sfps.u.fields.c0;
	    fpu->environ.status.errsumm = sfps.u.fields.errsumm;
	    fpu->environ.status.stkflt = sfps.u.fields.stkflt;
	    fpu->environ.status.precis = sfps.u.fields.precis;
	    fpu->environ.status.undfl = sfps.u.fields.undfl;
	    fpu->environ.status.ovrfl = sfps.u.fields.ovrfl;
	    fpu->environ.status.zdiv = sfps.u.fields.zdiv;
	    fpu->environ.status.denorm = sfps.u.fields.denorm;
	    fpu->environ.status.invalid = sfps.u.fields.invalid;

	    memcpy(&sfpt, &(fpu->environ.tag),
		   sizeof(struct swapped_fp_tag));
	    sfpt.u.half = OSSwapInt16(sfpt.u.half);
	    fpu->environ.tag.tag7 = sfpt.u.fields.tag7;
	    fpu->environ.tag.tag6 = sfpt.u.fields.tag6;
	    fpu->environ.tag.tag5 = sfpt.u.fields.tag5;
	    fpu->environ.tag.tag4 = sfpt.u.fields.tag4;
	    fpu->environ.tag.tag3 = sfpt.u.fields.tag3;
	    fpu->environ.tag.tag2 = sfpt.u.fields.tag2;
	    fpu->environ.tag.tag1 = sfpt.u.fields.tag1;
	    fpu->environ.tag.tag0 = sfpt.u.fields.tag0;

	    memcpy(&ss, &(fpu->environ.cs),
		   sizeof(struct swapped_sel));
	    ss.u.half = OSSwapInt16(ss.u.half);
	    fpu->environ.cs.index = ss.u.fields.index;
	    fpu->environ.cs.ti = ss.u.fields.ti;
	    fpu->environ.cs.rpl = ss.u.fields.rpl;

	    memcpy(&ss, &(fpu->environ.ds),
		   sizeof(struct swapped_sel));
	    ss.u.half = OSSwapInt16(ss.u.half);
	    fpu->environ.ds.index = ss.u.fields.index;
	    fpu->environ.ds.ti = ss.u.fields.ti;
	    fpu->environ.ds.rpl = ss.u.fields.rpl;
	
	    for(i = 0; i < 8; i++){
		memcpy(&sfpd, &(fpu->stack.ST[i]),
		       sizeof(struct swapped_fp_data_reg));
		fpu->stack.ST[i].mant = OSSwapInt16(sfpd.mant);
		fpu->stack.ST[i].mant1 = OSSwapInt16(sfpd.mant1);
		fpu->stack.ST[i].mant2 = OSSwapInt16(sfpd.mant2);
		fpu->stack.ST[i].mant3 = OSSwapInt16(sfpd.mant3);
		sfpd.u.half = OSSwapInt16(sfpd.u.half);
		fpu->stack.ST[i].exp = sfpd.u.fields.exp;
		fpu->stack.ST[i].sign = sfpd.u.fields.sign;
	    }
	}
	else{
	    sfpc.u.fields.rc = fpu->environ.control.rc;
	    sfpc.u.fields.pc = fpu->environ.control.pc;
	    sfpc.u.fields.precis = fpu->environ.control.precis;
	    sfpc.u.fields.undfl = fpu->environ.control.undfl;
	    sfpc.u.fields.ovrfl = fpu->environ.control.ovrfl;
	    sfpc.u.fields.zdiv = fpu->environ.control.zdiv;
	    sfpc.u.fields.denorm = fpu->environ.control.denorm;
	    sfpc.u.fields.invalid = fpu->environ.control.invalid;
	    sfpc.u.half = OSSwapInt16(sfpc.u.half);
	    memcpy(&(fpu->environ.control), &sfpc,
		   sizeof(struct swapped_fp_control));

	    sfps.u.fields.busy = fpu->environ.status.busy;
	    sfps.u.fields.c3 = fpu->environ.status.c3;
	    sfps.u.fields.tos = fpu->environ.status.tos;
	    sfps.u.fields.c2 = fpu->environ.status.c2;
	    sfps.u.fields.c1 = fpu->environ.status.c1;
	    sfps.u.fields.c0 = fpu->environ.status.c0;
	    sfps.u.fields.errsumm = fpu->environ.status.errsumm;
	    sfps.u.fields.stkflt = fpu->environ.status.stkflt;
	    sfps.u.fields.precis = fpu->environ.status.precis;
	    sfps.u.fields.undfl = fpu->environ.status.undfl;
	    sfps.u.fields.ovrfl = fpu->environ.status.ovrfl;
	    sfps.u.fields.zdiv = fpu->environ.status.zdiv;
	    sfps.u.fields.denorm = fpu->environ.status.denorm;
	    sfps.u.fields.invalid = fpu->environ.status.invalid;
	    sfps.u.half = OSSwapInt16(sfps.u.half);
	    memcpy(&(fpu->environ.status), &sfps,
		   sizeof(struct swapped_fp_status));

	    sfpt.u.fields.tag7 = fpu->environ.tag.tag7;
	    sfpt.u.fields.tag6 = fpu->environ.tag.tag6;
	    sfpt.u.fields.tag5 = fpu->environ.tag.tag5;
	    sfpt.u.fields.tag4 = fpu->environ.tag.tag4;
	    sfpt.u.fields.tag3 = fpu->environ.tag.tag3;
	    sfpt.u.fields.tag2 = fpu->environ.tag.tag2;
	    sfpt.u.fields.tag1 = fpu->environ.tag.tag1;
	    sfpt.u.fields.tag0 = fpu->environ.tag.tag0;
	    sfpt.u.half = OSSwapInt16(sfpt.u.half);
	    memcpy(&(fpu->environ.tag), &sfpt,
		   sizeof(struct swapped_fp_tag));

	    ss.u.fields.index = fpu->environ.cs.index;
	    ss.u.fields.ti = fpu->environ.cs.ti;
	    ss.u.fields.rpl = fpu->environ.cs.rpl;
	    ss.u.half = OSSwapInt16(ss.u.half);
	    memcpy(&(fpu->environ.cs), &ss,
		   sizeof(struct swapped_sel));

	    ss.u.fields.index = fpu->environ.ds.index;
	    ss.u.fields.ti = fpu->environ.ds.ti;
	    ss.u.fields.rpl = fpu->environ.ds.rpl;
	    ss.u.half = OSSwapInt16(ss.u.half);
	    memcpy(&(fpu->environ.cs), &ss,
		   sizeof(struct swapped_sel));

	    for(i = 0; i < 8; i++){
		sfpd.mant = OSSwapInt16(fpu->stack.ST[i].mant);
		sfpd.mant1 = OSSwapInt16(fpu->stack.ST[i].mant1);
		sfpd.mant2 = OSSwapInt16(fpu->stack.ST[i].mant2);
		sfpd.mant3 = OSSwapInt16(fpu->stack.ST[i].mant3);
		sfpd.u.fields.exp = fpu->stack.ST[i].exp;
		sfpd.u.fields.sign = fpu->stack.ST[i].sign;
		sfpd.u.half = OSSwapInt16(sfpd.u.half);
		memcpy(&(fpu->stack.ST[i]), &sfpd,
		       sizeof(struct swapped_fp_data_reg));
	    }
	}
}
예제 #10
0
void
swap_i386_float_state(
struct i386_float_state *fpu,
enum NXByteOrder target_byte_sex)
{
#ifndef i386_EXCEPTION_STATE_COUNT
    /* this routine does nothing as their are currently no non-byte fields */
#else /* !defined(i386_EXCEPTION_STATE_COUNT) */
    struct swapped_fp_control {
	union {
	    struct {
		unsigned short
			    :3,
		    /*inf*/ :1,
		    rc	    :2,
		    pc	    :2,
			    :2,
		    precis  :1,
		    undfl   :1,
		    ovrfl   :1,
		    zdiv    :1,
		    denorm  :1,
		    invalid :1;
	    } fields;
	    unsigned short half;
	} u;
    } sfpc;

    struct swapped_fp_status {
	union {
	    struct {
		unsigned short
		    busy    :1,
		    c3	    :1,
		    tos	    :3,
		    c2	    :1,
		    c1	    :1,
		    c0	    :1,
		    errsumm :1,
		    stkflt  :1,
		    precis  :1,
		    undfl   :1,
		    ovrfl   :1,
		    zdiv    :1,
		    denorm  :1,
		    invalid :1;
	    } fields;
	    unsigned short half;
	} u;
    } sfps;

    enum NXByteOrder host_byte_sex;

	host_byte_sex = NXHostByteOrder();

	fpu->fpu_reserved[0] = OSSwapInt32(fpu->fpu_reserved[0]);
	fpu->fpu_reserved[1] = OSSwapInt32(fpu->fpu_reserved[1]);

	if(target_byte_sex == host_byte_sex){
	    memcpy(&sfpc, &(fpu->fpu_fcw),
		   sizeof(struct swapped_fp_control));
	    sfpc.u.half = OSSwapInt16(sfpc.u.half);
	    fpu->fpu_fcw.rc = sfpc.u.fields.rc;
	    fpu->fpu_fcw.pc = sfpc.u.fields.pc;
	    fpu->fpu_fcw.precis = sfpc.u.fields.precis;
	    fpu->fpu_fcw.undfl = sfpc.u.fields.undfl;
	    fpu->fpu_fcw.ovrfl = sfpc.u.fields.ovrfl;
	    fpu->fpu_fcw.zdiv = sfpc.u.fields.zdiv;
	    fpu->fpu_fcw.denorm = sfpc.u.fields.denorm;
	    fpu->fpu_fcw.invalid = sfpc.u.fields.invalid;

	    memcpy(&sfps, &(fpu->fpu_fsw),
		   sizeof(struct swapped_fp_status));
	    sfps.u.half = OSSwapInt16(sfps.u.half);
	    fpu->fpu_fsw.busy = sfps.u.fields.busy;
	    fpu->fpu_fsw.c3 = sfps.u.fields.c3;
	    fpu->fpu_fsw.tos = sfps.u.fields.tos;
	    fpu->fpu_fsw.c2 = sfps.u.fields.c2;
	    fpu->fpu_fsw.c1 = sfps.u.fields.c1;
	    fpu->fpu_fsw.c0 = sfps.u.fields.c0;
	    fpu->fpu_fsw.errsumm = sfps.u.fields.errsumm;
	    fpu->fpu_fsw.stkflt = sfps.u.fields.stkflt;
	    fpu->fpu_fsw.precis = sfps.u.fields.precis;
	    fpu->fpu_fsw.undfl = sfps.u.fields.undfl;
	    fpu->fpu_fsw.ovrfl = sfps.u.fields.ovrfl;
	    fpu->fpu_fsw.zdiv = sfps.u.fields.zdiv;
	    fpu->fpu_fsw.denorm = sfps.u.fields.denorm;
	    fpu->fpu_fsw.invalid = sfps.u.fields.invalid;
	}
	else{
	    sfpc.u.fields.rc = fpu->fpu_fcw.rc;
	    sfpc.u.fields.pc = fpu->fpu_fcw.pc;
	    sfpc.u.fields.precis = fpu->fpu_fcw.precis;
	    sfpc.u.fields.undfl = fpu->fpu_fcw.undfl;
	    sfpc.u.fields.ovrfl = fpu->fpu_fcw.ovrfl;
	    sfpc.u.fields.zdiv = fpu->fpu_fcw.zdiv;
	    sfpc.u.fields.denorm = fpu->fpu_fcw.denorm;
	    sfpc.u.fields.invalid = fpu->fpu_fcw.invalid;
	    sfpc.u.half = OSSwapInt16(sfpc.u.half);
	    memcpy(&(fpu->fpu_fcw), &sfpc,
		   sizeof(struct swapped_fp_control));

	    sfps.u.fields.busy = fpu->fpu_fsw.busy;
	    sfps.u.fields.c3 = fpu->fpu_fsw.c3;
	    sfps.u.fields.tos = fpu->fpu_fsw.tos;
	    sfps.u.fields.c2 = fpu->fpu_fsw.c2;
	    sfps.u.fields.c1 = fpu->fpu_fsw.c1;
	    sfps.u.fields.c0 = fpu->fpu_fsw.c0;
	    sfps.u.fields.errsumm = fpu->fpu_fsw.errsumm;
	    sfps.u.fields.stkflt = fpu->fpu_fsw.stkflt;
	    sfps.u.fields.precis = fpu->fpu_fsw.precis;
	    sfps.u.fields.undfl = fpu->fpu_fsw.undfl;
	    sfps.u.fields.ovrfl = fpu->fpu_fsw.ovrfl;
	    sfps.u.fields.zdiv = fpu->fpu_fsw.zdiv;
	    sfps.u.fields.denorm = fpu->fpu_fsw.denorm;
	    sfps.u.fields.invalid = fpu->fpu_fsw.invalid;
	    sfps.u.half = OSSwapInt16(sfps.u.half);
	    memcpy(&(fpu->fpu_fsw), &sfps,
		   sizeof(struct swapped_fp_status));
	}
	fpu->fpu_fop = OSSwapInt16(fpu->fpu_fop);
	fpu->fpu_ip = OSSwapInt32(fpu->fpu_ip);
	fpu->fpu_cs = OSSwapInt16(fpu->fpu_cs);
	fpu->fpu_rsrv2 = OSSwapInt16(fpu->fpu_rsrv2);
	fpu->fpu_dp = OSSwapInt32(fpu->fpu_dp);
	fpu->fpu_ds = OSSwapInt16(fpu->fpu_ds);
	fpu->fpu_rsrv3 = OSSwapInt16(fpu->fpu_rsrv3);
	fpu->fpu_mxcsr = OSSwapInt32(fpu->fpu_mxcsr);
	fpu->fpu_mxcsrmask = OSSwapInt32(fpu->fpu_mxcsrmask);
	fpu->fpu_reserved1 = OSSwapInt32(fpu->fpu_reserved1);

#endif /* !defined(i386_EXCEPTION_STATE_COUNT) */
}