void rotate_z(float m[3][3], float r) { m[0][0]=mcos(r); m[0][1]=-msin(r); m[0][2]=0; m[1][0]=msin(r); m[1][1]=mcos(r); m[1][2]=0; m[2][0]=0; m[2][1]=0; m[2][2]=1; }
void demo() { setgr(); source1=loadgpi("cool.gpi",0); readgpipal("cool.pal",0,255,0); char bol=1; int el[50]; int t=0,y; float temp; for(temp=1;temp<180;temp+=0.1388) { if(y!=(int)(50*mcos(temp))) { y=(int)(50*mcos(temp)); el[t]=(int)(*source1*msin(temp)/2); if(t<50)t++; } } int kur[50]; for(t=0;t<50;t++)kur[t]=*source1; while(bol==1) { clearpot(0); bol=0; for(t=1;t<50;t++) { hsline(160-kur[t],160+kur[t],50+t,source1+*source1*t+2,*source1-1); hsline(160-kur[t],160+kur[t],50+99-t,source1+*source1*(99-t)+2,*source1-1); if(kur[t]>el[t]) { bol=1; kur[t]--; } } // delay(100); } free(source1); }
void Matrix::zrotate (float t) { float st=msin(t), ct=mcos(t); clear (); /* | cos t -sin t 0 | Mr = | sin t cos t 0 | | 0 0 0 | */ m[10] = m[15] = 1.0f; m[0] = ct; m[1] = -st; m[4] = st; m[5] = ct; }
void Matrix::xrotate (float t) { float st = msin(t), ct = mcos(t); clear (); /* | 1 0 0 | Mr = | 0 cos t -sin t | | 0 sin t cos t | */ m[0] = 1.0f; m[5] = ct; m[6] = -st; m[9] = st; m[10] = ct; m[15] = 1.0f; }
void Matrix::yrotate (float t) { float st=msin(t), ct=mcos(t); clear (); /* | cos t 0 -sin t | Mr = | 0 1 0 | | sin t 0 cos t | */ m[0] = ct; m[2] = -st; m[5] = 1.0f; m[8] = st; m[10] = ct; m[15] = 1.0f; }
void Matrix::vector_rotation (const Vector3& cv, float angle) { float c = mcos(angle); float s = msin(angle); float cc = 1 - c; clear (); Vector3 axis(cv); axis.normalize (); v(0,0) = (cc * axis.x * axis.x) + c; v(0,1) = (cc * axis.x * axis.y) + (axis.z * s); v(0,2) = (cc * axis.x * axis.z) - (axis.y * s); v(1,0) = (cc * axis.x * axis.y) - (axis.z * s); v(1,1) = (cc * axis.y * axis.y) + c; v(1,2) = (cc * axis.z * axis.y) + (axis.x * s); v(2,0) = (cc * axis.x * axis.z) + (axis.y * s); v(2,1) = (cc * axis.y * axis.z) - (axis.x * s); v(2,2) = (cc * axis.z * axis.z) + c; }
cv::Mat SIFT::sp_find_sift_grid( Mat I,Mat gridX,Mat gridY,int patchSize, float sigma_edge ) { int num_angles=8; float num_bins=4; int num_samples=num_bins*num_bins; int alpha = 9; //此处需要判断总共传入多少个变量,如果变量数小于5,就把sigma_edge设置为1 float angle_step=2*pi/num_angles; //初始化angles 为一个一维矩阵从0到2*pi;间隔为angle_step Mat angles=create(0,2*pi,angle_step); angles=deleteO(angles); //删除最后一个元素 CvSize size=I.size(); //int hgt=size.height; //int wid=size.width; int num_patches=gridX.total();//计算gridX总共有多少个元素 Mat sift_arr=Mat::zeros(num_patches,num_samples*num_angles,CV_32F); //计算滤波算子 int f_wid = 4 * ceil(sigma_edge) + 1; Mat G=gaussian(f_wid,sigma_edge); Mat GX=gradientX(G); Mat GY=gradientY(G); GX=GX*2/totalSumO(GX); GY=GY*2/totalSumO(GY); Mat I_X(I.rows,I.cols,CV_32F); I_X=filter2(GX,I); //因为I,图片读入不同,所以I_X不同,与I有关的均布相同,但是,都正确 Mat I_Y(I.rows,I.cols,CV_32F); I_Y=filter2(GY,I); Mat T(I_X.rows,I_X.cols,CV_32F); add(I_X.mul(I_X),I_Y.mul(I_Y),T); Mat I_mag(I_X.rows,I_X.cols,CV_32F); sqrt(T,I_mag); Mat I_theta=matan2(I_Y,I_X); Mat interval=create(2/num_bins,2,2/num_bins); interval-=(1/num_bins+1); Mat sample_x=meshgrid_X(interval,interval); Mat sample_y=meshgrid_Y(interval,interval); sample_x=reshapeX(sample_x);//变为一个1维矩阵 sample_y=reshapeX(sample_y); Mat I_orientation[8] = {Mat::zeros(size,CV_32F)}; for(int i=0;i<8;i++) { I_orientation[i] = Mat::zeros(size,CV_32F); } float *pt=angles.ptr<float>(0); for(int a=0;a<num_angles;a++) { Mat tep1=mcos(I_theta-pt[a]);//cos //cout<<tep1.at<float>(0,1)<<endl; Mat tep(tep1.rows,tep1.cols,CV_32F); pow(tep1,alpha,tep); tep=compareB(tep,0); I_orientation[a]=tep.mul(I_mag); } for(int i=0;i<num_patches;i++) { double r=patchSize/2; float l=(float)(i/gridX.rows); float m=i%gridX.rows; float cx=gridX.at<float>(m,l)+r-0.5; float cy=gridY.at<float>(m,l)+r-0.5; Mat sample_x_t=Add(sample_x*r,cx); Mat sample_y_t=Add(sample_y*r,cy); float *pt1=sample_y_t.ptr<float>(0); float sample_res=pt1[1]-pt1[0]; // int c=(int)i/gridX.rows; // float *ptc1=gridX.ptr<float>(c); // int x_lo=ptc1[i%gridX.rows]; int x_lo = gridX.at<float>(i % gridX.rows, i / gridX.rows); int x_hi=patchSize+x_lo-1; /* float *ptc2=gridY.ptr<float>(c);*/ int y_lo=gridY.at<float>(i % gridY.rows, i / gridY.rows); int y_hi=y_lo+patchSize-1; Mat A=create(x_lo,x_hi,1); Mat B=create(y_lo,y_hi,1); Mat sample_px=meshgrid_X(A,B); Mat sample_py=meshgrid_Y(A,B); int num_pix = sample_px.total();//计算sample_px元素总数 sample_px=reshapeY(sample_px); sample_py=reshapeY(sample_py); Mat dist_px=abs(repmat(sample_px,1,num_samples)-repmat(sample_x_t,num_pix,1)); Mat dist_py=abs(repmat(sample_py,1,num_samples)-repmat(sample_y_t,num_pix,1)); Mat weights_x=dist_px/sample_res; Mat weights_x_l=Less(weights_x,1); weights_x=(1-weights_x).mul(weights_x_l); Mat weights_y=dist_py/sample_res; Mat weights_y_l=Less(weights_y,1); weights_y=(1-weights_y).mul(weights_y_l); Mat weights=weights_x.mul(weights_y); Mat curr_sift=Mat::zeros(num_angles,num_samples,CV_32F); for(int a=0;a<num_angles;a++) { //Mat I=getNum(I_orientation[a],y_lo,y_hi,x_lo,x_hi); Mat I = I_orientation[a](Range(y_lo, y_hi), Range(x_lo, x_hi)); Mat tep=reshapeY(I); // Fill tep with zeros to fit size of weight if (tep.cols < weights.cols) { for (int i = tep.rows; i < weights.rows; i++) tep.push_back(0.0f); } tep=repmat(tep,1,num_samples); Mat t=tep.mul(weights); Mat ta=sum_every_col(t); float *p=ta.ptr<float>(0); for(int i=0;i<curr_sift.cols;i++) { curr_sift.at<float>(a,i)=p[i]; } } Mat tp=reshapeX(curr_sift); float *p=tp.ptr<float>(0); for(int j=0;j<sift_arr.cols;j++) { sift_arr.at<float>(i,j)=p[j]; } } return sift_arr; }
void draw_frame(uint64_t buf_ea) { vec_uint4 buf[2*1920/4]; int row, col, i, tag = 0; float step = 4.0f/spu.width*spu.zoom; float xbeg = spu.xc - spu.width*step*0.5f; vec_float4 vxbeg = spu_splats(xbeg) + spu_splats(step) * (vec_float4) { 0.f,1.f,2.f,3.f }; vec_float4 xstep = spu_splats(step)*spu_splats(4.f); vec_float4 vyp = spu_splats(spu.yc - spu.height*step*0.5f + step*spu.rank); const vec_float4 vinc = spu_splats(spu.count * step); const vec_float4 esc2 = spu_splats(BAILOUT*BAILOUT); #if BAILBITS != 1 const vec_float4 esc21 = spu_splats(4.f/(BAILOUT*BAILOUT)); #endif const vec_float4 two = spu_splats(2.f); const vec_float4 zero = spu_splats(0.f); const vec_float4 colsc = spu_splats(255.f); const vec_float4 ccr = spu_splats(4.f*BAILOUT/(3.5f*3.141592654f)); const vec_float4 ccg = spu_splats(4.f*BAILOUT/(5.f*3.141592654f)); const vec_float4 ccb = spu_splats(4.f*BAILOUT/(9.f*3.141592654f)); vec_float4 x, y, x2, y2, m2, vxp; vec_uint4 cmp, inc; vec_uint4 vi; vec_uint4 *p, *b; vec_float4 co; /* Process the full image. As there are 6 SPUs working in parallel, each with * a different rank from 0 to 5, each SPU processes only the line numbers: * rank, rank+6, rank+12, ... * The program uses a SPU DMA programming technique known as "double buffering", * where the previously generated line is transmitted to main memory while we * compute the next one, hence the need for a local buffer containing two lines. */ for (row = spu.rank; row < spu.height; row += spu.count) { /* Pixel buffer address (in local memory) of the next line to be drawn */ b = p = buf + ((1920/4)&-tag); vxp = vxbeg; /* first four x coordinates */ /* Process a whole screen line by packets of 4 pixels */ for (col = spu.width/4; col > 0 ; col--) { vi = spu_splats(0u); x = vxp; y = vyp; i = 0; cmp = spu_splats(-1u); inc = spu_splats(1u); m2 = zero; /* This loop processes the Mandelbrot suite for the four complex numbers * whose real part are the components of the x vector, and the imaginary * part are in y (as we process the same line, all initial values of y * are equal). * We perform loop unrolling for SPU performance optimization reasons, * hence the 4x replication of the same computation block. */ do { x2 = x*x; y2 = y*y; m2 = spu_sel(m2, x2+y2, cmp); cmp = spu_cmpgt(esc2, m2); inc = spu_and(inc, cmp); /* increment the iteration count only if */ vi = vi + inc; /* we're still inside the bailout radius */ y = two*x*y + vyp; x = x2-y2 + vxp; x2 = x*x; y2 = y*y; m2 = spu_sel(m2, x2+y2, cmp); cmp = spu_cmpgt(esc2, m2); inc = spu_and(inc, cmp); vi = vi + inc; y = two*x*y + vyp; x = x2-y2 + vxp; x2 = x*x; y2 = y*y; m2 = spu_sel(m2, x2+y2, cmp); cmp = spu_cmpgt(esc2, m2); inc = spu_and(inc, cmp); vi = vi + inc; y = two*x*y + vyp; x = x2-y2 + vxp; x2 = x*x; y2 = y*y; m2 = spu_sel(m2, x2+y2, cmp); cmp = spu_cmpgt(esc2, m2); inc = spu_and(inc, cmp); vi = vi + inc; y = two*x*y + vyp; x = x2-y2 + vxp; i += 4; } /* Exit the loop only if the iteration limit of 128 has been reached, * or all current four points are outside the bailout radius. * The __builtin_expect(xxx, 1) construct hints the compiler that the xxx * test has greater chance of being true (1), so a branch hinting * instruction is inserted into the binary code to make the conditional * branch faster in most cases (except the last one when we exit the * loop). This results in performance increase. */ while (__builtin_expect((i < 128) & (si_to_int((qword)spu_gather(cmp)) != 0), 1)); /* smooth coloring: compute the fractional part */ co = spu_convtf(vi, 0) + spu_splats(1.f); co -= fast_logf(fast_logf(m2) * spu_splats(.5f)); #if BAILBITS != 1 co = spu_re(spu_rsqrte(co*esc21)); #endif /* Compute the red, green an blue pixel components */ vec_uint4 cr = spu_convtu(mcos(co * ccr) * colsc, 0); vec_uint4 cg = spu_convtu(mcos(co * ccg) * colsc, 0); vec_uint4 cb = spu_convtu(mcos(co * ccb) * colsc, 0); /* Put the 4 pixel values in the buffer */ *p++ = (spu_sl(cr, 16) | spu_sl(cg, 8) | cb) & ~-inc; vxp += xstep; } /* double-buffered dma: initiate a dma transfer of last computed scanline * then wait for completion of the second last transfer (previous computed * line). This is done by changing the tag value. */ mfc_put(b, buf_ea+(spu.width*4)*row, spu.width*4, tag, 0, 0); tag = 1 - tag; wait_for_completion(tag); vyp += vinc; } /* wait for completion of last sent image line */ wait_for_completion(1-tag); }