예제 #1
0
TD* SigMalloc(int len)
{
	Fw32u size = sizeof(TD) * len;
	TD* memPtr = (TD*)fwMalloc(size);
	
	return memPtr;
}
예제 #2
0
	static SYS_INLINE FwStatus iResizeShift_C1R(const TS *pSrc, FwiSize srcSize, int srcStep, FwiRect srcRoi,	
		TS *pDst, int dstStep, FwiSize dstRoiSize,	
		double xFr, double yFr, double xShift, double yShift, int interpolation)	
	{	
		double fEnd_x,fEnd_y; 
		long   iStart_x, iEnd_x, iStart_y, iEnd_y; 
		int x=0,y=0;
		//short half_FW_WEIGHT = FW_WEIGHT/2 ;

		if (xFr <= 0.0 || yFr <= 0.0)	
			return fwStsResizeFactorErr;

		if (interpolation != FWI_INTER_LINEAR)	return fwStsInterpolationErr;	

		FwStatus status = My_FW_ParaCheck<TS>(pSrc, srcSize, srcStep, srcRoi, pDst, dstStep,
			dstRoiSize, 1);
		if (status !=fwStsNoErr) return status;

		if(xShift > 0.0) {
			iStart_x = 0;
			fEnd_x = (double)(srcRoi.width-xShift) / xFr;	
			if (fEnd_x <1) return fwStsWrongIntersectROI;

			if((double)dstRoiSize.width < fEnd_x)	
				iEnd_x = dstRoiSize.width;	
			else	
				iEnd_x = (int)fEnd_x;	
		} else {	
			iStart_x =(long)( xShift / xFr);	
			fEnd_x = (double)(srcRoi.width) / xFr;	
			if((double)dstRoiSize.width < fEnd_x)	
				iEnd_x = dstRoiSize.width;	
			else	
				iEnd_x = (int)fEnd_x;	
		}	

		if(yShift > 0.0) {	
			iStart_y = 0;	
			fEnd_y = (double)(srcRoi.height-yShift) / yFr;	
			if (fEnd_y <1) return fwStsWrongIntersectROI;

			if((double)dstRoiSize.height < fEnd_y)	
				iEnd_y = dstRoiSize.height;	
			else	
				iEnd_y = (int)fEnd_y;	
		} else {	
			iStart_y = (long)(yShift / yFr);	
			fEnd_y = (double)(srcRoi.width) / yFr;	
			if((double)dstRoiSize.height < fEnd_y)	
				iEnd_y = dstRoiSize.height;	
			else	
				iEnd_y = (int)fEnd_y;	
		}	

		//use fwMalloc instead of malloc for aligned address
		Linear_Array *pX_Array_Value = (Linear_Array*) fwMalloc(dstRoiSize.width*sizeof(Linear_Array));	
		Linear_Array *pY_Array_Value = (Linear_Array*) fwMalloc(dstRoiSize.height*sizeof(Linear_Array));	

		//resizeshift_pixel_mapping3(srcStep,srcRoi,pDst,dstStep,xFr,yFr,xShift,yShift,pX_Array_Value,
		// pY_Array_Value,iStart_x,iEnd_x,iStart_y,iEnd_y);	
		double fraction_X, fractY, one_Minus_X, one_Minus_Y, weight_shift, srcRoix, srcRoiy;	
		int ceil_X, ceil_Y, floor_X, floor_Y;	
		int ifraction_x, ifraction_y, ione_minus_x, ione_minus_y;	

		srcRoix = (double)srcRoi.x + xShift;	
		srcRoiy = (double)srcRoi.y + yShift;	

		weight_shift = (double) (1<<FW_WEIGHT) ;	

		//Pre-calculate the y coefficient.
		for (y = iStart_y; y < iEnd_y; y++)	
		{	
			floor_Y = (int)floor((double)y * yFr);	
			ceil_Y = floor_Y + 1;
			//Protection for over-boundary reading
			if (ceil_Y >= srcRoi.height) ceil_Y = floor_Y;	

			fractY = y*yFr - floor_Y;	
			one_Minus_Y = 1.0 - fractY;	

			//Shifted for integer calculation
			ifraction_y  = (int)(fractY * weight_shift);	
			ione_minus_y = (int)(one_Minus_Y * weight_shift);	

			floor_Y = (int)((srcRoiy + (double)floor_Y)*(double)srcStep + 0.5);	
			ceil_Y  = (int)((srcRoiy + (double)ceil_Y)*(double)srcStep +0.5);	

			pY_Array_Value[y].floor=floor_Y;	
			pY_Array_Value[y].ceil=ceil_Y;	
			//		pY_Array_Value[y].fraction=fractY;	
			//		pY_Array_Value[y].one_minus_val=one_Minus_Y;	
			pY_Array_Value[y].ifraction=(short)ifraction_y;	
			pY_Array_Value[y].ione_Minus_Val=(short)ione_minus_y;			
		}    

		//Pre-calculate the x coefficient.
		for (x = iStart_x; x < iEnd_x; x++)	
		{	
			floor_X = (int)floor((double)x * xFr);	

			//Protection for over-boundary reading
			ceil_X = floor_X + 1;	
			if (ceil_X >= srcRoi.width) ceil_X = floor_X;	

			fraction_X = x*xFr - floor_X;	
			one_Minus_X = 1.0 - fraction_X;	

			//Shifted for integer calculation
			ifraction_x  = (int)(fraction_X * weight_shift);	
			ione_minus_x = (int)(one_Minus_X * weight_shift);	

			floor_X = (int)((srcRoix + (double)floor_X) + 0.5);	
			ceil_X  = (int)((srcRoix + (double)ceil_X) + 0.5);	

			pX_Array_Value[x].floor=floor_X;	
			pX_Array_Value[x].ceil=ceil_X;	
			//		pX_Array_Value[x].fraction=fraction_X;	
			//		pX_Array_Value[x].one_minus_val=one_Minus_X;	
			pX_Array_Value[x].ifraction=(short)ifraction_x;	
			pX_Array_Value[x].ione_Minus_Val=(short)ione_minus_x;	
		}	


		//	stat_from_interpolation = resizeshift_interpolation_func(pSrc,srcRoi,pDst,dstStep,dstRoiSize,	
		//xFr,yFr,xShift,yShift,pX_Array_Value,pY_Array_Value,iStart_x,iEnd_x,iStart_y,iEnd_y);	

		//This function will be used for Fw8u type only.
		if (sizeof(TS) != 1) return fwStsErr;
		else //if(sizeof(TYPE) == 1)	// if TYPE == Fw8u
		{
			unsigned char p1, p2, p3, p4, t1, t2;	

			//__m128i rxmm7;	
			//rxmm7  = _mm_set1_epi8(0);				

			//int tempval_width=dstRoiSize.width%16;	
			//for (y = 0; y < dstRoiSize.height; y++)	
			//{	
			//	
			//	if(dstRoiSize.width>=16)	
			//	{	
			//		for (x = 0; x < dstRoiSize.width-tempval_width; x+=16)		
			//		   _mm_storeu_si128  ((__m128i *)(pDst + x + y*dstStep),rxmm7);	

			//		for (;x < dstRoiSize.width; x++) *(pDst + x + y*dstStep) = 0;	
			//	}	
			//	else	
			//	{	
			//		for (x=0;x < dstRoiSize.width; x++) *(pDst + x + y*dstStep) = 0;	
			//	}	
			//}	

			__m128i rxmm0 , rxmm1, rxmm2, rxmm3, rxmm4, rxmm5, rxmm6, rxmm7, rxmm8;	
			XMM128 pp1={0}, pp2={0}, pp3={0}, pp4={0};	
			XMM128 pIfx={0}, pIofx={0};	
			Fw8u *pSrc_FloorY;	
			Fw8u *pSrc_CeilY;	
			short half_FW_WEIGHT = FW_WEIGHT/2 ;	

			rxmm8  = _mm_set1_epi16(half_FW_WEIGHT);								

			for (y = iStart_y; y < iEnd_y; y++)	
			{		
				pSrc_CeilY   = (Fw8u*)pSrc;	
				pSrc_FloorY  = (Fw8u*)pSrc;	
				pSrc_CeilY  += pY_Array_Value[y].ceil;	
				pSrc_FloorY += pY_Array_Value[y].floor;	

				ifraction_y  = pY_Array_Value[y].ifraction;	
				ione_minus_y = pY_Array_Value[y].ione_Minus_Val;	

				rxmm0  = _mm_set1_epi16((short)ione_minus_y);                         
				rxmm7  = _mm_set1_epi16((short)ifraction_y);                         

				if((iEnd_x-iStart_x)>=8)	
				{	
					for (x = iStart_x; x <= iEnd_x-8; x+=8)	// process 8 pixels in parallel
					{	
						for (int xx = 0; xx < 8; xx++)	// process 8 pixels in parallel	
						{	
							ceil_X		 = pX_Array_Value[xx+x].ceil;	
							floor_X		 = pX_Array_Value[xx+x].floor;	
							ifraction_x  = pX_Array_Value[xx+x].ifraction;	
							ione_minus_x = pX_Array_Value[xx+x].ione_Minus_Val;	

							pp1.u16[xx] = (unsigned short)(*(pSrc_FloorY + floor_X));	
							pp2.u16[xx] = (unsigned short)(*(pSrc_FloorY + ceil_X));		
							pp3.u16[xx] = (unsigned short)(*(pSrc_CeilY  + floor_X));	
							pp4.u16[xx] = (unsigned short)(*(pSrc_CeilY  + ceil_X));		

							pIfx.u16[xx] = (unsigned short)ifraction_x;	
							pIofx.u16[xx]= (unsigned short)ione_minus_x;	
						}	

						rxmm1 = _mm_load_si128(&pp1.i);	
						rxmm2 = _mm_load_si128(&pp2.i);	
						rxmm3 = _mm_load_si128(&pp3.i);	
						rxmm4 = _mm_load_si128(&pp4.i);	

						rxmm5 = _mm_load_si128(&pIfx.i);	// ifraction_x 	
						rxmm6 = _mm_load_si128(&pIofx.i);	// ione_minus_x		

						// resize	
						// t1 = (unsigned char)((ione_minus_x *p1 + ifraction_x *p2) >> FW_WEIGHT); 
						rxmm1 = _mm_mullo_epi16 (rxmm1, rxmm6);	//  ione_minus_x *p1 	
						rxmm2 = _mm_mullo_epi16 (rxmm2, rxmm5);	//  ifraction_x  *p2		
						rxmm1 = _mm_add_epi16(rxmm1, rxmm2);	
						rxmm1 = _mm_add_epi16(rxmm1, rxmm8);	
						rxmm1 = _mm_srli_epi16(rxmm1, FW_WEIGHT);	

						//  t2 = (unsigned char)((ione_minus_x *p3 + ifraction_x *p4) >> FW_WEIGHT); 	
						rxmm3 = _mm_mullo_epi16 (rxmm3, rxmm6);	//  ione_minus_x *p1 	
						rxmm4 = _mm_mullo_epi16 (rxmm4, rxmm5);	//  ifraction_x  *p2 	
						rxmm3 = _mm_add_epi16(rxmm3, rxmm4);            
						rxmm3 = _mm_add_epi16(rxmm3, rxmm8);            
						rxmm3 = _mm_srli_epi16(rxmm3, FW_WEIGHT);	


						// *(pDst + x + y*dstStep) = (unsigned char)((ione_minus_y *t1 + ifraction_y * t2) >> FW_WEIGHT)	
						rxmm1 = _mm_mullo_epi16 (rxmm1, rxmm0);	//  ione_minus_y * t1 	
						rxmm3 = _mm_mullo_epi16 (rxmm3, rxmm7);	//  ifraction_y  * t2		
						rxmm1 = _mm_add_epi16(rxmm1, rxmm3);	
						rxmm1 = _mm_add_epi16(rxmm1, rxmm8);	
						rxmm1 = _mm_srli_epi16(rxmm1, FW_WEIGHT);	

						rxmm1 = _mm_packus_epi16(rxmm1, rxmm1);		//  convert to 8 bit 	

						_mm_storel_epi64((__m128i *)(pDst + x + y*dstStep), rxmm1);	
					}	

					for (; x < iEnd_x; x++)	//  for remaining pixels 
					{	

						ceil_X=pX_Array_Value[x].ceil;	
						floor_X=pX_Array_Value[x].floor;	
						ifraction_x = pX_Array_Value[x].ifraction;	
						ione_minus_x = pX_Array_Value[x].ione_Minus_Val;	

						p1 = *(pSrc_FloorY + floor_X);
						p2 = *(pSrc_FloorY + ceil_X);
						p3 = *(pSrc_CeilY  + floor_X);
						p4 = *(pSrc_CeilY  + ceil_X);

						// ione_minus_x and ifraction_x value has been shifted by FW_WEIGHT, but no sturation is needed
						// + half_FW_WEIGHT for rounding
						t1 = (Fw8u)((ione_minus_x *p1 + ifraction_x *p2 + half_FW_WEIGHT) >> FW_WEIGHT); 
						t2 = (Fw8u)((ione_minus_x *p3 + ifraction_x *p4 + half_FW_WEIGHT) >> FW_WEIGHT); 

						*(pDst + x + y*dstStep) = (Fw8u)((ione_minus_y*t1 + ifraction_y*t2 + half_FW_WEIGHT) >> FW_WEIGHT); 
					}
				} 
				else 
				{ 
					for (x = iStart_x; x < iEnd_x; x++)	//  for remaining pixels	
					{	
						ceil_X=pX_Array_Value[x].ceil;	
						floor_X=pX_Array_Value[x].floor;	
						ifraction_x = pX_Array_Value[x].ifraction;	
						ione_minus_x = pX_Array_Value[x].ione_Minus_Val;	

						p1 = *(pSrc_FloorY + floor_X);
						p2 = *(pSrc_FloorY + ceil_X);
						p3 = *(pSrc_CeilY  + floor_X);
						p4 = *(pSrc_CeilY  + ceil_X);

						// ione_minus_x and ifraction_x value has been shifted by FW_WEIGHT, but no sturation is needed
						// + half_FW_WEIGHT for rounding
						t1 = (Fw8u)((ione_minus_x *p1 + ifraction_x *p2 + half_FW_WEIGHT) >> FW_WEIGHT);
						t2 = (Fw8u)((ione_minus_x *p3 + ifraction_x *p4 + half_FW_WEIGHT) >> FW_WEIGHT);

						*(pDst + x + y*dstStep) = (Fw8u)((ione_minus_y*t1 + ifraction_y*t2 + half_FW_WEIGHT) >> FW_WEIGHT);
					}
				}
			}
		}