Пример #1
0
void Chunk::Init() {
	int width = g_BoxWidth;
	int depth = g_BoxDepth;
	int height = g_BoxHeight;

	int nCount = 0;

	float xLimit = 0;
	float yLimit = 0;
	float zLimit = 0;
	
	CPerformanceCounter counter;
	counter.Start();

	m_ChunkPrim.Init();

	m_pPhysContainer = new PhysContainer;
	m_GameObjects = new GameObject[g_BoxWidth * g_BoxDepth * g_BoxHeight];
	m_pBoxGrid = NULL;//new Prim_t*[g_BoxWidth * g_BoxDepth * g_BoxHeight];

	//m_pPhysContainer->halfwidths.Set(width * 4.0f + 0.01f, height * 4.0f + 0.01f, depth * 4.0f + 0.01f);
	//m_pPhysContainer->center.Set( );
	
	Vector3d transPos;
	transPos.Set( m_Pos );//-128.0f, -10.0f, -128.0f);

	for ( int h = 0; h < height; ++h ){
		for ( int i = 0; i < depth; ++i ){
			for ( int j = 0; j < width; ++j ){
				int idx = (h*(depth*width)) + (i*width) + j;
				//printf("idx: %d\n", idx);
				
				int r = rand() % 100;
				
				if( r > 65 ) {
					float x = transPos.x + (j*8);
					float y = transPos.y + (h*8);
					float z = transPos.z + (i*8);
					
					float xFabs = fabs(x);
					float yFabs = fabs(y);
					float zFabs = fabs(z);
					
					if( xFabs > xLimit) xLimit = xFabs;
					if( yFabs > yLimit) yLimit = yFabs;
					if( zFabs > zLimit) zLimit = zFabs;

					/*
					if( r > 75) {
						m_pBoxGrid[ idx ] = CreateBox( "dirt.raw", 4 );
					} else {
						m_pBoxGrid[ idx ] = CreateBox( "test.raw", 4 );
					}
					*/
					//m_pBoxGrid[ idx ] = CreateBox( "test.raw", 4 );
					
					//m_pBoxGrid[ idx ]->vPos.Set( x, y, z );
					m_GameObjects[ idx ].renderable = NULL;
					
					Vector3d boxPos;
					boxPos.Set( transPos.x * -1.0f, transPos.y * -1.0f, transPos.z * -1.0f );
					boxPos.x += x;
					boxPos.y += y;
					boxPos.z += z;
					#if CHUNK_OPTIMIZATIONS
					m_ChunkPrim.AddBoxOpt(boxPos);
					#else
					m_ChunkPrim.AddBox(boxPos);
					#endif
					
					//if(m_pBoxGrid[ idx ] == NULL) {
						//printf("Box call failed\n");
					//}
					
					//printf("Box Pos: %f %f %f\n", g_pBoxGrid[ idx ]->vPos.x, g_pBoxGrid[ idx ]->vPos.y, g_pBoxGrid[ idx ]->vPos.z);
		
					//m_GameObjects[ idx ].renderable = m_pBoxGrid[ idx ];
					m_GameObjects[ idx ].phys = new PhysObject;
		
					//center point
					m_GameObjects[ idx ].phys->center.Set(transPos.x + (j*8), (transPos.y *0.5f) + (h*8), transPos.z + (i*8));
					m_GameObjects[ idx ].phys->halfwidths.Set(4.0f,4.0f,4.0f);

					m_pPhysContainer->AddObject( m_GameObjects[ idx ].phys );
					
					nCount++;
				} else {
					m_GameObjects[ idx ].renderable = NULL;
					m_GameObjects[ idx ].phys = NULL;
					//printf("renderable set to NULL\n");
				}
	
			}
		}
	}

	m_pPhysContainer->halfwidths.Set(width * 4.0f + 0.01f, height * 4.0f + 0.01f, depth * 4.0f + 0.01f);
	m_pPhysContainer->center.Set( transPos.x + ((width-1)*4.0f) , (height * 4.0f) + transPos.y, transPos.z + ((depth-1.0f)*4.0f));
	PhysicsManager::GetInstance().AddContainer( m_pPhysContainer );

	//m_pPhysContainer->SetDebug( true );
	//Vector3d boxPos;
	//m_ChunkPrim.AddBox(boxPos);
	
	#if CHUNK_OPTIMIZATIONS
	m_ChunkPrim.AddFaces();
	#endif
	m_ChunkPrim.Bake();
	m_ChunkPrim.m_Prim.vPos.Set( transPos );

	//printf("%d boxes created\n", nCount);
	
	//m_ChunkPrim.PrintFaces();
	counter.Stop();
	
	printf("Chunk generated in %f seconds.\n", counter.TimeInMilliseconds());
}
Пример #2
0
int main(int argc, char** argv)
{
	bool srcbin = 0;
	bool invbk = 0;
	if(argc < 3){
		printf("Not enough args!\narg1: target image\narg2: source image\narg3: do source image adaptive threshold or not\narg4: invert back ground or not\n");
		getchar();
		return 1;
	}
	if(argc >= 4){
		if(!strcmp(argv[3], "1"))
			srcbin = 1;
	}
	if(argc >= 5){
		if(!strcmp(argv[4], "1"))
			invbk = 1;
	}

	IplImage* srcimg= 0, *srcimgb= 0, *srcimgb2 = 0, *bimg = 0, *b2img = 0,*bugimg = 0, *alg2dst = 0;
	srcimg= cvLoadImage(argv[2], 1);
	if (!srcimg)
	{
		printf("src img %s load failed!\n", argv[2]);
		getchar();
		return 1;
	}
	
	//choosing the parameters for our ccl
	int bn = 8; //how many partitions
	int nwidth = 512;
	if(srcimg->width > 512){
		nwidth = 1024;
		bn = 6;
	}
	if(srcimg->width > 1024){
		nwidth = 2048;
		bn = 3;
	}
	if(srcimg->width > 2048){
		printf("warning, image too wide, max support 2048. image is truncated.\n");
		getchar();
		return 1;
	}
	
	//start selection gpu devices
	int devCount;
	int smCnt = 0;
    cudaGetDeviceCount(&devCount);
 
    // Iterate through devices
	int devChosen = 0;
    for (int i = 0; i < devCount; ++i)
    {
        cudaDeviceProp devProp;
        cudaGetDeviceProperties(&devProp, i);
		if(devProp.major >= 2){//only one device supported
			smCnt = max(smCnt, devProp.multiProcessorCount);
			if(devProp.multiProcessorCount == smCnt)
				devChosen = i;
		}
    }
	
	if(smCnt == 0){
		//our ccl require CUDA cap 2.0 or above, but the Ostava's ccl can be run on any CUDA gpu
		printf("Error, no device with cap 2.x found. Only cpu alg will be run.\n");
		getchar();
		return 1;
	}
	
	if(smCnt != 0){
		cudaSetDevice(devChosen);
		bn = bn * smCnt;
	}

	int nheight = (cvGetSize(srcimg).height-2) / (2*bn);
	if((nheight*2*bn+2) < cvGetSize(srcimg).height)
		nheight++;
	nheight = nheight*2*bn+2;

	if(smCnt != 0)
		printf("gpu ccl for image width 512, 1024, 2048.\nchoosing device %d, width %d, height %d, blocks %d\n", devChosen, nwidth, nheight, bn);

	srcimgb= cvCreateImage(cvSize(nwidth, cvGetSize(srcimg).height),IPL_DEPTH_8U,1);
	srcimgb2= cvCreateImage(cvSize(nwidth, cvGetSize(srcimg).height),IPL_DEPTH_8U,1);
	cvSetImageROI(srcimg, cvRect(0, 0, min(cvGetSize(srcimg).width, nwidth), cvGetSize(srcimg).height));
	cvSetImageROI(srcimgb2, cvRect(0, 0, min(cvGetSize(srcimg).width, nwidth), cvGetSize(srcimg).height));
	cvSet(srcimgb2, cvScalar(0,0,0));
	cvCvtColor(srcimg, srcimgb2, CV_BGRA2GRAY);
	cvResetImageROI(srcimgb2);
	cvReleaseImage(&srcimg);
	if(srcbin)
		cvAdaptiveThreshold(srcimgb2, srcimgb, 1.0, CV_ADAPTIVE_THRESH_MEAN_C, invbk ? CV_THRESH_BINARY_INV :  CV_THRESH_BINARY);
	else
		cvThreshold(srcimgb2, srcimgb, 0.0, 1.0, invbk ? CV_THRESH_BINARY_INV :  CV_THRESH_BINARY);
	boundCheck(srcimgb);

	cvScale(srcimgb, srcimgb2, 255);
	//the source binary image to be labeled is saved as bsrc.bmp
	cvSaveImage("bsrc.bmp", srcimgb2);
	cvSet(srcimgb2, cvScalar(0,0,0));
	
	float elapsedMilliSeconds1;
	{//begin cpu labeling algorithm, the SBLA proposed by Zhao
		LABELDATATYPE *data=(LABELDATATYPE *)malloc(srcimgb->width * srcimgb->height * sizeof(LABELDATATYPE));
	
		for(int j = 0; j<srcimgb->height; j++)
			for(int i = 0; i<srcimgb->width; i++)
				data[i + j*srcimgb->width] = (srcimgb->imageData[i + j*srcimgb->widthStep]) ? 1 : 0;

		int iNumLabels;
		CPerformanceCounter perf;
		perf.Start();
    	iNumLabels = LabelSBLA(data, srcimgb->width, srcimgb->height);
		elapsedMilliSeconds1 = (float)perf.GetElapsedMilliSeconds();
		printf("cpu SBLA used %f ms, total labels %u\n", elapsedMilliSeconds1, iNumLabels);
		free(data);
	}
	
	IplImage *src2(0),*dst2(0);
	int iNumLabels;
	float elapsedMilliSeconds2;
	{//begin cpu labeling algorithm, the BBDT proposed by C. Grana, D. Borghesani, R. Cucchiara
		CPerformanceCounter perf;
		src2 = cvCreateImage( cvGetSize(srcimgb), IPL_DEPTH_8U, 1 );
		cvCopyImage(srcimgb,src2);
		dst2 = cvCreateImage( cvGetSize(srcimgb), IPL_DEPTH_32S, 1 );
		perf.Start();
		cvLabelingImageLab(src2, dst2, 1, &iNumLabels);
		elapsedMilliSeconds2 = (float)perf.GetElapsedMilliSeconds();
		printf("cpu BBDT used %f ms, total labels %u\n", elapsedMilliSeconds2, iNumLabels);
		cvSaveImage("bbdt.bmp", dst2);
//		cvReleaseImage(&src2);
//		cvReleaseImage(&dst2);
	}

	if(smCnt != 0){

	bugimg = cvCreateImage(cvSize(nwidth, 9*bn),IPL_DEPTH_8U,1);
	bimg = cvCreateImage(cvSize(nwidth, 2*bn),IPL_DEPTH_8U,1);
	b2img = cvCreateImage(cvSize(nwidth, 2*bn),IPL_DEPTH_8U,1);

//    cvNamedWindow("src",CV_WINDOW_AUTOSIZE);
//	cvShowImage("src",srcimg);

	//prepare buffers for our gpu algorithm
	CudaBuffer srcBuf, dstBuf, dstBuf2, bBuf, b2Buf, errBuf, glabel;
	srcBuf.Create2D(nwidth, nheight);		//the binary image to be processed
	dstBuf.Create2D(nwidth, (nheight-2)/2); //the label result, only about 1/4 the size of source image contains the final labels
	dstBuf2.Create2D(nwidth,(nheight-2)/2);	//a copy of the pass1 temp result, for debug purpose
	glabel.Create2D(4, 1);					//a int size global buffer for unique final label
	errBuf.Create2D(nwidth, 9*bn);			//a buffer for debug info
	bBuf.Create2D(nwidth, 2 * bn);			//the intersection info used by pass2
	b2Buf.Create2D(nwidth, 2 * bn);			//a copy of bBuf for debug purpose

	srcBuf.SetZeroData();
	srcBuf.CopyFrom(srcimgb->imageData, srcimgb->widthStep, nwidth, cvGetSize(srcimgb).height);

	float elapsedTimeInMs = 0.0f;
    
	//-------------------gpu part----------------------------
    cudaEvent_t start, stop;
    cutilSafeCall  ( cudaEventCreate( &start ) );
    cutilSafeCall  ( cudaEventCreate( &stop ) );
    cutilSafeCall( cudaEventRecord( start, 0 ) );  
	
	if(nwidth == 512)
		label_512(&dstBuf, &dstBuf2, &srcBuf, &bBuf, &b2Buf, &glabel, nheight, bn, &errBuf);
	else if(nwidth == 1024)
		label_1024(&dstBuf, &dstBuf2, &srcBuf, &bBuf, &b2Buf, &glabel, nheight, bn, &errBuf);
	else if(nwidth == 2048)
		label_2048(&dstBuf, &dstBuf2, &srcBuf, &bBuf, &b2Buf, &glabel, nheight, bn, &errBuf);

    cutilSafeCall( cudaEventRecord( stop, 0 ) );
//	cutilCheckMsg("kernel launch failure");
	cudaEventSynchronize(stop);
	cutilSafeCall( cudaEventElapsedTime( &elapsedTimeInMs, start, stop ) );
	uint tlabel = 0;

	cudaMemcpy(&tlabel, glabel.GetData(), 4, cudaMemcpyDeviceToHost);
	printf("gpu alg 1 used %f ms, total labels %u\n", elapsedTimeInMs, tlabel);

	dstBuf.CopyToHost(srcimgb->imageData, srcimgb->widthStep, nwidth, (nheight-2)/2);
	dstBuf2.CopyToHost(srcimgb2->imageData, srcimgb->widthStep, nwidth, (nheight-2)/2);
	errBuf.CopyToHost(bugimg->imageData, bugimg->widthStep, nwidth, 9*bn);
	bBuf.CopyToHost(bimg->imageData, bimg->widthStep, nwidth, 2*bn);
	b2Buf.CopyToHost(b2img->imageData, bimg->widthStep, nwidth, 2*bn);

//	cvNamedWindow("gpu",CV_WINDOW_AUTOSIZE);
//	cvShowImage("gpu",srcimgb);
	cvSaveImage(argv[1], srcimgb);
	cvSaveImage("gpu2.bmp", srcimgb2);	//the final labels of our algorithm
	cvSaveImage("bug.bmp", bugimg);
	cvSaveImage("b.bmp", bimg);
	cvSaveImage("b2.bmp", b2img);
	
	//now start the gpu ccl implemented by Ostava
	alg2dst= cvCreateImage(cvSize(nwidth*4, cvGetSize(srcimgb).height),IPL_DEPTH_8U,1);
	CCLBase* m_ccl;
	m_ccl = new CCL();	

	m_ccl->FindRegions(nwidth, cvGetSize(srcimgb).height, &srcBuf);
	m_ccl->GetConnectedRegionsBuffer()->CopyToHost(alg2dst->imageData, alg2dst->widthStep, nwidth*4, cvGetSize(srcimgb).height);
	delete m_ccl;
	cvSaveImage("alg2.bmp", alg2dst);

	cvReleaseImage(&bugimg);
	cvReleaseImage(&bimg);
	cvReleaseImage(&b2img);
	cvReleaseImage(&alg2dst);
//	}
	//cvWaitKey(0);
	
	//now start cross compare label results of our ccl and the BBDT, to check the correctness
//	if(smCnt != 0){
		ushort *gpures, *cpures;
		uint sz = nwidth * (cvGetSize(srcimgb).height/2);
		gpures = (ushort*)malloc(sz);
		cpures = (ushort*)malloc(sz);
		dstBuf.CopyToHost(gpures, nwidth, nwidth, (cvGetSize(srcimgb).height/2));
		
		//first, reduce cpu labels from one label for each pixel to one label for a 2x2 block, assuming 8-connectivity
		for(int j = 0; j < (cvGetSize(srcimgb).height/2); j++)
			for(int i = 0; i < (nwidth/2); i++){
				uint* cpup;
				ushort res = LBMAX;
				uint y = j*2, x = i*2;
				cpup = (uint*)(dst2->imageData + y*dst2->widthStep);
//				if(y < cvGetSize(srcimgb).height){
					if(cpup[x] != 0)
						res = cpup[x]-1;
					if(cpup[x+1] != 0)
						res = cpup[x+1]-1;
//				}
				y++;
				cpup = (uint*)(dst2->imageData + y*dst2->widthStep);
//				if(y < cvGetSize(srcimgb).height){
					if(cpup[x] != 0)
						res = cpup[x]-1;
					if(cpup[x+1] != 0)
						res = cpup[x+1]-1;
//				}
				cpures[i + j*(nwidth/2)] = res;
			}
		
		//our algo use unsigned short to represent a label, the first label starts a 0, and maximun labels is LBMAX
		if(iNumLabels > LBMAX)
			printf("too much cc, compare abort.\n");
		else{
			//create a error
			//cpures[5] = 12;
			//cpures[15] = 18;
			printf("Checking correctness of gpu alg1\nChecking gpu ref by cpu.\n");
			checkLabels(cpures, gpures, nwidth/2, cvGetSize(srcimgb).height/2, iNumLabels);

			printf("Checking cpu ref by gpu.\n");
			checkLabels(gpures, cpures, nwidth/2, cvGetSize(srcimgb).height/2, tlabel);
		}

		free(gpures);
		free(cpures);
		printf("speedup is %f, %f, %f\n", gpu2time/elapsedTimeInMs, elapsedMilliSeconds1/elapsedTimeInMs, elapsedMilliSeconds2/elapsedTimeInMs);
	}

	cvReleaseImage(&srcimgb);
	cvReleaseImage(&srcimgb2);
	cvReleaseImage(&dst2);
	cvReleaseImage(&src2);

    cutilSafeCall( cudaThreadExit() );
	return 0;

}