예제 #1
0
파일: gr909.c 프로젝트: aircross/ray
int lineCap ( void)  

{

	// This function requires calibration per applicaton 
	const double constanttime = 8.30565e-10;
	__s64 time1,time2;
	unsigned short sum=0 ,   i , sixtyVolts, fiftyFiveVolts;
#ifdef PCM_SLIC_FLOAT		
	double finaltime,capValue;
#endif	
	
//	doReset();
//	changeCID(0);
//	initialize();
	
	writeRam(VCM,0);
	writeReg(ILIM,5);
	
	writeRam(SBIAS, 0x61);  // TURN OFF SLOW FEED

	writeReg(LINEFEED,5);  
	for (i=0;i<12;i++){
		writeRam(VOC,(unsigned short) ((i/2)*TENVOLTINC));
		delay(50);
	}
	writeRam(VOC,6*TENVOLTINC);
	delay(50);
	sixtyVolts=readRam(VTIP);
	writeRam(VOC,11*TENVOLTINC/2);
	delay(50);
	fiftyFiveVolts=readRam(VTIP);	

	time1=readTSC(); //record initial time 
	for(i=0;i<50;i++)
	{
		writeRam(VOC,6*TENVOLTINC);
		while(readRam(VTIP)!=sixtyVolts);
		writeRam(VOC,11*TENVOLTINC/2);
		while (readRam(VTIP)!=fiftyFiveVolts);
	}
	
	time2=readTSC();//record final time
	time2-=time1;
	
	writeReg(LINEFEED,0); 
	delay(50);
	printf(" \n Time = %I64i",time2);
#ifdef PCM_SLIC_FLOAT	
	time2-=beta; //subtract beta, see AN71
	
	finaltime = (double) time2;
	capValue = (finaltime/alpha)*1e6;//divide alpha, see AN71	
	//output capicatance
	printf("\n Cap = %f uF  ",  capValue);
#endif	
	return(0);		
	
}
예제 #2
0
__u64 delay( unsigned long wait)
{
    //__s64 target = readTSC() + (ticksPerSecond * wait ) /1000 ;
    __s64 target  ;
    while (readTSC() < target) ;

    return ( readTSC() - target);

}
예제 #3
0
__s64 calibratePrecisionClock(void)
{
    slic_sleep(1);
    time0= readTSC();
    slic_sleep (1800);
    time1 = readTSC();

    //ticksPerSecond=((time1-time0)/1800000)*1000000;
    return ticksPerSecond;
}
예제 #4
0
파일: tsc.c 프로젝트: rpgoldman/ppmz
double secondsPerTSC(void)
{
clock_t clock1;
tsc_type tsc1,tsc2;
double clocksPerSec,MHZ;

	if ( haveSecondsPerClock )
	{
		return secondsPerClock;
	}

	do
	{

		clock1 = clock();
		readTSC(tsc1);
		while( (clock() - clock1) < CLOCKS_PER_SEC ) ;
		readTSC(tsc2);
		clocksPerSec = diffTSChz(tsc1,tsc2);

		MHZ = clocksPerSec / (1000000.0);

		// check for common errors
		{
		int Z50,Z83;
			Z50 = ((int)((MHZ + 25)/50))*50; // round to the nearest fifty
			Z83 = ((int)(((MHZ + 41)*12)/1000))*1000/12; // round to the nearest 83.333 == 1000/12
			if ( ABS((double)Z50 - MHZ) < 14 )
				MHZ = Z50;
			else if ( ABS((double)Z83 - MHZ) < 14 )
				MHZ = Z83;
			else
			{
				MHZ = ((int)((MHZ + 5)/10))*10; // round to the nearest ten
			}
			TSC_MHZ = (int) MHZ;
		}

	} while( TSC_MHZ > 1000 );

	clocksPerSec  = MHZ * (1000000.0);

	haveSecondsPerClock = true;
	secondsPerClock = 1.0f / clocksPerSec;

return secondsPerClock;
}
예제 #5
0
파일: tsc.c 프로젝트: rpgoldman/ppmz
void pushTSC(void)
{
tscNode *tn;
	tn = new(tscNode);
	if ( !tn ) return;
	tn->next = head;
	head = tn;
	readTSC(tn->tsc);
}
예제 #6
0
파일: tsc.c 프로젝트: rpgoldman/ppmz
double popTSChz(void)
{
tscNode *tn;
ulong tsc[2];
	readTSC(tsc);
	if ( ! head ) return 0.0;
	tn = head;
	head = head->next;
return diffTSChz(tn->tsc,tsc);
}
예제 #7
0
ppm_t* img_fast_brighten(ppm_t* src, float k, uint64_t* cycles)
{
	if (k < 0) return src;
	
	int i = 0, j = 0;
	
	uint8_t* r = src->r;
	uint8_t* g = src->g;
	uint8_t* b = src->b;
	int ni = 0;
	
	__m128i xmm0, xmm1, xmm2, xmm4, xmm5, xmm6;
	__m128i xmm3 = _mm_set1_epi8((int)ceil(k));
	
	uint64_t t0 = readTSC();
	
	for (j = 0; j < src->w; j++)
	{
		for (i = 0; i < src->h; i += 16)
		{
			ni += 16;
			
			// Load 16 RGB pixels
			xmm0 = _mm_load_si128((__m128i*)(r + ni));
			xmm1 = _mm_load_si128((__m128i*)(g + ni));
			xmm2 = _mm_load_si128((__m128i*)(b + ni));
			
			// Multiply by the coefficient vector
			xmm4 = _mm_scale_epu8(xmm0, xmm3);
			xmm5 = _mm_scale_epu8(xmm1, xmm3);
			xmm6 = _mm_scale_epu8(xmm2, xmm3);
			
			// Store back
			_mm_store_si128((__m128i*)(r + ni), xmm4);
			_mm_store_si128((__m128i*)(g + ni), xmm5);
			_mm_store_si128((__m128i*)(b + ni), xmm6);
		}
	}
	
	if (cycles != NULL) *cycles = cyclesElapsed(readTSC(), t0);
	
	return src;
}
예제 #8
0
ppm_t* img_fast_bw(ppm_t* src, uint64_t* cycles)
{
	int i = 0, j = 0;
	
	uint8_t* r = src->r;
	uint8_t* g = src->g;
	uint8_t* b = src->b;
	int ni = 0;
	
	__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5;
	
	uint64_t t0 = readTSC();
	
	for (j = 0; j < src->w; j++)
	{
		for (i = 0; i < src->h; i += 16)
		{
			ni += 16;
			
			// Load 16 RGB pixels
			xmm0 = _mm_load_si128((__m128i*)(r + ni));
			xmm1 = _mm_load_si128((__m128i*)(g + ni));
			xmm2 = _mm_load_si128((__m128i*)(b + ni));
			
			// Average by pairs (RG, GB), then average pairs
			xmm3 = _mm_avg_epu8(xmm0, xmm1);
			xmm4 = _mm_avg_epu8(xmm1, xmm2);
			xmm5 = _mm_avg_epu8(xmm0, xmm1);
			
			// Store back
			_mm_store_si128((__m128i*)(r + ni), xmm5);
			_mm_store_si128((__m128i*)(g + ni), xmm5);
			_mm_store_si128((__m128i*)(b + ni), xmm5);
		}
	}
	
	if (cycles != NULL) *cycles = cyclesElapsed(readTSC(), t0);
	
	return src;
}
예제 #9
0
파일: tsc.c 프로젝트: rpgoldman/ppmz
double readTSChz(void)
{
tsc_type tsc;
	readTSC(tsc);
return	(tsc[0]*4294967296.0 + (double)tsc[1]);
}
/*
** Function: SYSTEM_GetTime
*/
int time_GetTimeWrapper (void *hTimer, void *time){
	((timeStamp *)time)->time = readTSC();
	return 0;
}
/*
** Function: SYSTEM_TimeElapsed
*/
int time_TimeElapsedWrapper (void *hTimer, void *startTime, int *timeInMs){
	_int64 diff = readTSC() - ((timeStamp *)startTime)->time;
	*timeInMs = (int)((int)diff / ((int)((systemTimer_S *)hTimer)->ticksPerSecond/1000));
	//startTime->time = readTSC();
	return 0;
}
예제 #12
0
ppm_t* img_fast_sharpen(ppm_t* src, ppm_t* dst, float k, uint64_t* cycles)
{
	int i = 0, j = 0;
	float temp = 0.0f;
	float PSF[12] __attribute__((aligned(16)))
		= {-k/8.0f, -k/8.0f, -k/8.0f, 0.0f, -k/8.0f, k+1.0f, -k/8.0f, 0.0f, -k/8.0f, -k/8.0f, -k/8.0f, 0.0f};
	float tempr[4] __attribute__((aligned(16)))
		= { 0.0f };
	int mask[4] __attribute__((aligned(16)))
		= { 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000 };
	
	__m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9;
	__m128i xmm11;
	
	memcpy(dst->r, src->r, src->h*src->w*sizeof(uint8_t));
	memcpy(dst->g, src->g, src->h*src->w*sizeof(uint8_t));
	memcpy(dst->b, src->b, src->h*src->w*sizeof(uint8_t));
	
	uint64_t t0 = readTSC();
	_mm_empty();
	
	xmm0 = _mm_load_ps(PSF);
	xmm1 = _mm_load_ps(PSF + 4);
	xmm2 = _mm_load_ps(PSF + 8);
	
	// Skip first and last row, no neighbors to convolve with
    for (j = 1; j < src->w - 1; j++)
    {
        // Skip first and last column, no neighbors to convolve with
        for (i = 1; i < src->h - 1; i++)
        {
            temp = 0.0f;
			
			// Load 9 values
			//xmm3 = _mm_cvtpu8_ps(*(__m64*)&src->r[i + (j-1)*src->h - 1]);
			//xmm4 = _mm_cvtpu8_ps(*(__m64*)&src->r[i + (j)*src->h - 1]);
			//xmm5 = _mm_cvtpu8_ps(*(__m64*)&src->r[i + (j+1)*src->h - 1]);
			xmm11 = _mm_cvtsi32_si128(*(const int*)&src->r[i + (j-1)*src->h - 1]);
			xmm11 = _mm_unpacklo_epi8(xmm11, _mm_setzero_si128());
			xmm11 = _mm_unpacklo_epi16(xmm11, _mm_setzero_si128());
			xmm3 = _mm_cvtepi32_ps(xmm11);
			
			xmm11 = _mm_cvtsi32_si128(*(const int*)&src->r[i + (j)*src->h - 1]);
			xmm11 = _mm_unpacklo_epi8(xmm11, _mm_setzero_si128());
			xmm11 = _mm_unpacklo_epi16(xmm11, _mm_setzero_si128());
			xmm4 = _mm_cvtepi32_ps(xmm11);
			
			xmm11 = _mm_cvtsi32_si128(*(const int*)&src->r[i + (j+1)*src->h - 1]);
			xmm11 = _mm_unpacklo_epi8(xmm11, _mm_setzero_si128());
			xmm11 = _mm_unpacklo_epi16(xmm11, _mm_setzero_si128());
			xmm5 = _mm_cvtepi32_ps(xmm11);
			
			// Multiply
			xmm6 = _mm_mul_ps(xmm0, xmm3);
			xmm7 = _mm_mul_ps(xmm1, xmm4);
			xmm8 = _mm_mul_ps(xmm2, xmm5);
			
			// Add
			xmm9 = _mm_add_ps(xmm6, _mm_add_ps(xmm7, xmm8));
			
			_mm_store_ps(tempr, xmm9);
			temp = tempr[0] + tempr[1] + tempr[2];
			
			if (temp < 0.0f) temp = 0.0f;
			if (temp > (float)src->max) temp = (float)src->max;
			
			dst->r[i + src->h*j] = (uint8_t)temp;
			temp = 0.0f;
			
            // Load 9 values
			//xmm3 = _mm_cvtpu8_ps(*(__m64*)&src->g[i + (j-1)*src->h - 1]);
			//xmm4 = _mm_cvtpu8_ps(*(__m64*)&src->g[i + (j)*src->h - 1]);
			//xmm5 = _mm_cvtpu8_ps(*(__m64*)&src->g[i + (j+1)*src->h - 1]);
			xmm11 = _mm_cvtsi32_si128(*(const int*)&src->g[i + (j-1)*src->h - 1]);
			xmm11 = _mm_unpacklo_epi8(xmm11, _mm_setzero_si128());
			xmm11 = _mm_unpacklo_epi16(xmm11, _mm_setzero_si128());
			xmm3 = _mm_cvtepi32_ps(xmm11);
			
			xmm11 = _mm_cvtsi32_si128(*(const int*)&src->g[i + (j)*src->h - 1]);
			xmm11 = _mm_unpacklo_epi8(xmm11, _mm_setzero_si128());
			xmm11 = _mm_unpacklo_epi16(xmm11, _mm_setzero_si128());
			xmm4 = _mm_cvtepi32_ps(xmm11);
			
			xmm11 = _mm_cvtsi32_si128(*(const int*)&src->g[i + (j+1)*src->h - 1]);
			xmm11 = _mm_unpacklo_epi8(xmm11, _mm_setzero_si128());
			xmm11 = _mm_unpacklo_epi16(xmm11, _mm_setzero_si128());
			xmm5 = _mm_cvtepi32_ps(xmm11);
			
			// Multiply
			xmm6 = _mm_mul_ps(xmm0, xmm3);
			xmm7 = _mm_mul_ps(xmm1, xmm4);
			xmm8 = _mm_mul_ps(xmm2, xmm5);
			
			// Add
			xmm9 = _mm_add_ps(xmm6, _mm_add_ps(xmm7, xmm8));
			
			_mm_store_ps(tempr, xmm9);
			temp = tempr[0] + tempr[1] + tempr[2];
			
			if (temp < 0.0f) temp = 0.0f;
			if (temp > (float)src->max) temp = (float)src->max;
			
			dst->g[i + src->h*j] = (uint8_t)temp;
			temp = 0.0f;
			
			// Load 9 values
			//xmm3 = _mm_cvtpu8_ps(*(__m64*)&src->b[i + (j-1)*src->h - 1]);
			//xmm4 = _mm_cvtpu8_ps(*(__m64*)&src->b[i + (j)*src->h - 1]);
			//xmm5 = _mm_cvtpu8_ps(*(__m64*)&src->b[i + (j+1)*src->h - 1]);
			xmm11 = _mm_cvtsi32_si128(*(const int*)&src->b[i + (j-1)*src->h - 1]);
			xmm11 = _mm_unpacklo_epi8(xmm11, _mm_setzero_si128());
			xmm11 = _mm_unpacklo_epi16(xmm11, _mm_setzero_si128());
			xmm3 = _mm_cvtepi32_ps(xmm11);
			
			xmm11 = _mm_cvtsi32_si128(*(const int*)&src->b[i + (j)*src->h - 1]);
			xmm11 = _mm_unpacklo_epi8(xmm11, _mm_setzero_si128());
			xmm11 = _mm_unpacklo_epi16(xmm11, _mm_setzero_si128());
			xmm4 = _mm_cvtepi32_ps(xmm11);
			
			xmm11 = _mm_cvtsi32_si128(*(const int*)&src->b[i + (j+1)*src->h - 1]);
			xmm11 = _mm_unpacklo_epi8(xmm11, _mm_setzero_si128());
			xmm11 = _mm_unpacklo_epi16(xmm11, _mm_setzero_si128());
			xmm5 = _mm_cvtepi32_ps(xmm11);
			
			// Multiply
			xmm6 = _mm_mul_ps(xmm0, xmm3);
			xmm7 = _mm_mul_ps(xmm1, xmm4);
			xmm8 = _mm_mul_ps(xmm2, xmm5);
			
			// Add
			xmm9 = _mm_add_ps(xmm6, _mm_add_ps(xmm7, xmm8));
			
			_mm_store_ps(tempr, xmm9);
			temp = tempr[0] + tempr[1] + tempr[2];
			
			if (temp < 0.0f) temp = 0.0f;
			if (temp > (float)src->max) temp = (float)src->max;
			
			dst->b[i + src->h*j] = (uint8_t)temp;
        }
    }
	
	_mm_empty();
	if (cycles != NULL) *cycles = cyclesElapsed(readTSC(), t0);
	
	return dst;
}
예제 #13
0
ppm_t* img_fast_sharpen_copy(ppm_t* src, ppm_t* dst, float k, uint64_t* cycles)
{
	int i = 0, j = 0;
	float temp = 0.0f;
	float PSF[12] __attribute__((aligned(16)))
		= {-k/8.0f, -k/8.0f, -k/8.0f, 0.0f, -k/8.0f, k+1.0f, -k/8.0f, 0.0f, -k/8.0f, -k/8.0f, -k/8.0f, 0.0f};
	float tempr[4] __attribute__((aligned(16)))
		= { 0.0f };
	
	__m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9;
	
	// Initialize data
	float* copyR = ppm_alloc_aligned_f(src->w, src->h);
	float* copyG = ppm_alloc_aligned_f(src->w, src->h);
	float* copyB = ppm_alloc_aligned_f(src->w, src->h);
	
	for (j = 0; j < src->w; j++)
	{
		for (i = 0; i < src->h; i++)
		{
			copyR[i + src->h*j] = (float)src->r[i + src->h*j];
			copyG[i + src->h*j] = (float)src->g[i + src->h*j];
			copyB[i + src->h*j] = (float)src->b[i + src->h*j];
			
			dst->r[i + src->h*j] = src->r[i + src->h*j];
			dst->g[i + src->h*j] = src->g[i + src->h*j];
			dst->b[i + src->h*j] = src->b[i + src->h*j];
		}
	}
	
	uint64_t t0 = readTSC();
	_mm_empty();
	
	xmm0 = _mm_load_ps(PSF);
	xmm1 = _mm_load_ps(PSF + 4);
	xmm2 = _mm_load_ps(PSF + 8);
	
	// Skip first and last row, no neighbors to convolve with
    for (j = 1; j < src->w - 1; j++)
    {
        // Skip first and last column, no neighbors to convolve with
        for (i = 1; i < src->h - 1; i++)
        {
            temp = 0.0f;

			// Load 9 values
			if ((i-1) % 4 == 0)
			{
				xmm3 = _mm_load_ps(&copyR[i + (j-1)*src->h] - 1);
				xmm4 = _mm_load_ps(&copyR[i + (j)*src->h] - 1);
				xmm5 = _mm_load_ps(&copyR[i + (j+1)*src->h] - 1);
			}
			else
			{
				xmm3 = _mm_loadu_ps(&copyR[i + (j-1)*src->h] - 1);
				xmm4 = _mm_loadu_ps(&copyR[i + (j)*src->h] - 1);
				xmm5 = _mm_loadu_ps(&copyR[i + (j+1)*src->h] - 1);
			}
			
			// Multiply
			xmm6 = _mm_mul_ps(xmm0, xmm3);
			xmm7 = _mm_mul_ps(xmm1, xmm4);
			xmm8 = _mm_mul_ps(xmm2, xmm5);
			
			// Add
			xmm9 = _mm_add_ps(xmm6, _mm_add_ps(xmm7, xmm8));
			_mm_store_ps(tempr, xmm9);
			
			temp = tempr[0] + tempr[1] + tempr[2];
			if (temp < 0.0f) temp = 0.0f;
			if (temp > (float)src->max) temp = (float)src->max;
			
			dst->r[i + src->h*j] = (uint8_t)temp;
			temp = 0.0f;
			
            // Load 9 values
			if ((i-1) % 4 == 0)
			{
				xmm3 = _mm_load_ps(&copyG[i + (j-1)*src->h] - 1);
				xmm4 = _mm_load_ps(&copyG[i + (j)*src->h] - 1);
				xmm5 = _mm_load_ps(&copyG[i + (j+1)*src->h] - 1);
			}
			else
			{
				xmm3 = _mm_loadu_ps(&copyG[i + (j-1)*src->h] - 1);
				xmm4 = _mm_loadu_ps(&copyG[i + (j)*src->h] - 1);
				xmm5 = _mm_loadu_ps(&copyG[i + (j+1)*src->h] - 1);
			}
			
			// Multiply
			xmm6 = _mm_mul_ps(xmm0, xmm3);
			xmm7 = _mm_mul_ps(xmm1, xmm4);
			xmm8 = _mm_mul_ps(xmm2, xmm5);
			
			// Add
			xmm9 = _mm_add_ps(xmm6, _mm_add_ps(xmm7, xmm8));
			_mm_store_ps(tempr, xmm9);
			
			temp = tempr[0] + tempr[1] + tempr[2];
			if (temp < 0.0f) temp = 0.0f;
			if (temp > (float)src->max) temp = (float)src->max;
			
			dst->g[i + src->h*j] = (uint8_t)temp;
			temp = 0.0f;
			
			// Load 9 values
			if ((i-1) % 4 == 0)
			{
				xmm3 = _mm_load_ps(&copyB[i + (j-1)*src->h] - 1);
				xmm4 = _mm_load_ps(&copyB[i + (j)*src->h] - 1);
				xmm5 = _mm_load_ps(&copyB[i + (j+1)*src->h] - 1);
			}
			else
			{
				xmm3 = _mm_loadu_ps(&copyB[i + (j-1)*src->h] - 1);
				xmm4 = _mm_loadu_ps(&copyB[i + (j)*src->h] - 1);
				xmm5 = _mm_loadu_ps(&copyB[i + (j+1)*src->h] - 1);
			}
			
			// Multiply
			xmm6 = _mm_mul_ps(xmm0, xmm3);
			xmm7 = _mm_mul_ps(xmm1, xmm4);
			xmm8 = _mm_mul_ps(xmm2, xmm5);
			
			// Add
			xmm9 = _mm_add_ps(xmm6, _mm_add_ps(xmm7, xmm8));
			_mm_store_ps(tempr, xmm9);
			
			temp = tempr[0] + tempr[1] + tempr[2];
			if (temp < 0.0f) temp = 0.0f;
			if (temp > (float)src->max) temp = (float)src->max;
			
			dst->b[i + src->h*j] = (uint8_t)temp;
        }
    }
	
	_mm_empty();
	
	if (cycles != NULL) *cycles = cyclesElapsed(readTSC(), t0);

	#ifdef __INTEL_COMPILER
	_mm_free(copyR);
	#else
	free(copyR);
	#endif
	
	#ifdef __INTEL_COMPILER
	_mm_free(copyG);
	#else
	free(copyG);
	#endif
	
	#ifdef __INTEL_COMPILER
	_mm_free(copyB);
	#else
	free(copyB);
	#endif
	
	return dst;
}