Esempio n. 1
0
	void PTXParser::parse( std::istream& input, 
		ir::Instruction::Architecture language )
	{
		assert( language == ir::Instruction::PTX );
	
		std::stringstream temp;
		
		report( "Parsing file " << fileName );
		report( "Running main parse pass." );
		
		parser::PTXLexer lexer( &input, &temp );
		reset();
		
		try 
		{
			state.addSpecialRegisters();
			ptx::yyparse( lexer, state );
			assertM( temp.str().empty(),
				"Failed to lex all characters, remainder is:\n"
				<< (int)temp.str()[0] );
		
			checkLabels();
		}
        catch( PTXParseException& e )
		{
			e.message = "\nFailed to parse file '" + fileName + "':\n" +
				getLinesNearCurrentLocation(input) +
				"\n" + e.message;
			
			report("parse error");
			report(e.what());
			throw e;
		}
	}
Esempio n. 2
0
int main(int argc, char* argv[])
{
    char inputFile[FILENAME_MAX];
    char temp[FILENAME_MAX];
    int flag = TRUE;
    int i;
    FILE *file;
    if(argc==1)
    {
        printf("No files to compile\n");
        return 0;
    }
/*Loop through file names.*/
    for(i=1; i<argc; i++)
    {
        fileName = argv[i];
        strcpy(inputFile,argv[i]);
        strcat(inputFile,".as");
        file = fopen (inputFile, "r");
        if (!file)
        {
            printf("File %s does not exist\n",fileName);
            return 0;
        }
/*Collect the input from the file and make first validation*/
        flag = buildMatrix(file);
/*Swap all the Dollars operand and check that the swaps are legal*/
        if(flag)
            flag = changeDollars();
/*Check that all labels, commands and operands are valid*/
        if(!checkLabels()||!checkAllCommands())
            flag = FALSE;
/*If flag is TRUE, can go on and parser the input*/
        if(flag)
        {
            buildSymbolTable();
            buildOutputTable();
            freeTables();
        /*Delete temp files*/
            strcpy(temp,fileName);
            strcat(temp,".ex");
            remove(temp);
            strcpy(temp,fileName);
            strcat(temp,".en");
            remove(temp);
            printf("compiled %s.\n",fileName);
        }
/*if Flag is false, print error*/
        else
        {
        /*Print error*/
            printf("Cannot compile %s.\n",fileName);
        /*Delete temp file*/
            strcpy(temp,fileName);
            strcat(temp,".ex");
            remove(temp);
            strcpy(temp,fileName);
            strcat(temp,".en");
            remove(temp);
        }
/*Free input table*/
        freeMatrixMem();
        fclose(file);
    } /*end for*/
    return 0;
}
Esempio n. 3
0
int main(int argc, char** argv)
{
	bool srcbin = 0;
	bool invbk = 0;
	if(argc < 3){
		printf("Not enough args!\narg1: target image\narg2: source image\narg3: do source image adaptive threshold or not\narg4: invert back ground or not\n");
		getchar();
		return 1;
	}
	if(argc >= 4){
		if(!strcmp(argv[3], "1"))
			srcbin = 1;
	}
	if(argc >= 5){
		if(!strcmp(argv[4], "1"))
			invbk = 1;
	}

	IplImage* srcimg= 0, *srcimgb= 0, *srcimgb2 = 0, *bimg = 0, *b2img = 0,*bugimg = 0, *alg2dst = 0;
	srcimg= cvLoadImage(argv[2], 1);
	if (!srcimg)
	{
		printf("src img %s load failed!\n", argv[2]);
		getchar();
		return 1;
	}
	
	//choosing the parameters for our ccl
	int bn = 8; //how many partitions
	int nwidth = 512;
	if(srcimg->width > 512){
		nwidth = 1024;
		bn = 6;
	}
	if(srcimg->width > 1024){
		nwidth = 2048;
		bn = 3;
	}
	if(srcimg->width > 2048){
		printf("warning, image too wide, max support 2048. image is truncated.\n");
		getchar();
		return 1;
	}
	
	//start selection gpu devices
	int devCount;
	int smCnt = 0;
    cudaGetDeviceCount(&devCount);
 
    // Iterate through devices
	int devChosen = 0;
    for (int i = 0; i < devCount; ++i)
    {
        cudaDeviceProp devProp;
        cudaGetDeviceProperties(&devProp, i);
		if(devProp.major >= 2){//only one device supported
			smCnt = max(smCnt, devProp.multiProcessorCount);
			if(devProp.multiProcessorCount == smCnt)
				devChosen = i;
		}
    }
	
	if(smCnt == 0){
		//our ccl require CUDA cap 2.0 or above, but the Ostava's ccl can be run on any CUDA gpu
		printf("Error, no device with cap 2.x found. Only cpu alg will be run.\n");
		getchar();
		return 1;
	}
	
	if(smCnt != 0){
		cudaSetDevice(devChosen);
		bn = bn * smCnt;
	}

	int nheight = (cvGetSize(srcimg).height-2) / (2*bn);
	if((nheight*2*bn+2) < cvGetSize(srcimg).height)
		nheight++;
	nheight = nheight*2*bn+2;

	if(smCnt != 0)
		printf("gpu ccl for image width 512, 1024, 2048.\nchoosing device %d, width %d, height %d, blocks %d\n", devChosen, nwidth, nheight, bn);

	srcimgb= cvCreateImage(cvSize(nwidth, cvGetSize(srcimg).height),IPL_DEPTH_8U,1);
	srcimgb2= cvCreateImage(cvSize(nwidth, cvGetSize(srcimg).height),IPL_DEPTH_8U,1);
	cvSetImageROI(srcimg, cvRect(0, 0, min(cvGetSize(srcimg).width, nwidth), cvGetSize(srcimg).height));
	cvSetImageROI(srcimgb2, cvRect(0, 0, min(cvGetSize(srcimg).width, nwidth), cvGetSize(srcimg).height));
	cvSet(srcimgb2, cvScalar(0,0,0));
	cvCvtColor(srcimg, srcimgb2, CV_BGRA2GRAY);
	cvResetImageROI(srcimgb2);
	cvReleaseImage(&srcimg);
	if(srcbin)
		cvAdaptiveThreshold(srcimgb2, srcimgb, 1.0, CV_ADAPTIVE_THRESH_MEAN_C, invbk ? CV_THRESH_BINARY_INV :  CV_THRESH_BINARY);
	else
		cvThreshold(srcimgb2, srcimgb, 0.0, 1.0, invbk ? CV_THRESH_BINARY_INV :  CV_THRESH_BINARY);
	boundCheck(srcimgb);

	cvScale(srcimgb, srcimgb2, 255);
	//the source binary image to be labeled is saved as bsrc.bmp
	cvSaveImage("bsrc.bmp", srcimgb2);
	cvSet(srcimgb2, cvScalar(0,0,0));
	
	float elapsedMilliSeconds1;
	{//begin cpu labeling algorithm, the SBLA proposed by Zhao
		LABELDATATYPE *data=(LABELDATATYPE *)malloc(srcimgb->width * srcimgb->height * sizeof(LABELDATATYPE));
	
		for(int j = 0; j<srcimgb->height; j++)
			for(int i = 0; i<srcimgb->width; i++)
				data[i + j*srcimgb->width] = (srcimgb->imageData[i + j*srcimgb->widthStep]) ? 1 : 0;

		int iNumLabels;
		CPerformanceCounter perf;
		perf.Start();
    	iNumLabels = LabelSBLA(data, srcimgb->width, srcimgb->height);
		elapsedMilliSeconds1 = (float)perf.GetElapsedMilliSeconds();
		printf("cpu SBLA used %f ms, total labels %u\n", elapsedMilliSeconds1, iNumLabels);
		free(data);
	}
	
	IplImage *src2(0),*dst2(0);
	int iNumLabels;
	float elapsedMilliSeconds2;
	{//begin cpu labeling algorithm, the BBDT proposed by C. Grana, D. Borghesani, R. Cucchiara
		CPerformanceCounter perf;
		src2 = cvCreateImage( cvGetSize(srcimgb), IPL_DEPTH_8U, 1 );
		cvCopyImage(srcimgb,src2);
		dst2 = cvCreateImage( cvGetSize(srcimgb), IPL_DEPTH_32S, 1 );
		perf.Start();
		cvLabelingImageLab(src2, dst2, 1, &iNumLabels);
		elapsedMilliSeconds2 = (float)perf.GetElapsedMilliSeconds();
		printf("cpu BBDT used %f ms, total labels %u\n", elapsedMilliSeconds2, iNumLabels);
		cvSaveImage("bbdt.bmp", dst2);
//		cvReleaseImage(&src2);
//		cvReleaseImage(&dst2);
	}

	if(smCnt != 0){

	bugimg = cvCreateImage(cvSize(nwidth, 9*bn),IPL_DEPTH_8U,1);
	bimg = cvCreateImage(cvSize(nwidth, 2*bn),IPL_DEPTH_8U,1);
	b2img = cvCreateImage(cvSize(nwidth, 2*bn),IPL_DEPTH_8U,1);

//    cvNamedWindow("src",CV_WINDOW_AUTOSIZE);
//	cvShowImage("src",srcimg);

	//prepare buffers for our gpu algorithm
	CudaBuffer srcBuf, dstBuf, dstBuf2, bBuf, b2Buf, errBuf, glabel;
	srcBuf.Create2D(nwidth, nheight);		//the binary image to be processed
	dstBuf.Create2D(nwidth, (nheight-2)/2); //the label result, only about 1/4 the size of source image contains the final labels
	dstBuf2.Create2D(nwidth,(nheight-2)/2);	//a copy of the pass1 temp result, for debug purpose
	glabel.Create2D(4, 1);					//a int size global buffer for unique final label
	errBuf.Create2D(nwidth, 9*bn);			//a buffer for debug info
	bBuf.Create2D(nwidth, 2 * bn);			//the intersection info used by pass2
	b2Buf.Create2D(nwidth, 2 * bn);			//a copy of bBuf for debug purpose

	srcBuf.SetZeroData();
	srcBuf.CopyFrom(srcimgb->imageData, srcimgb->widthStep, nwidth, cvGetSize(srcimgb).height);

	float elapsedTimeInMs = 0.0f;
    
	//-------------------gpu part----------------------------
    cudaEvent_t start, stop;
    cutilSafeCall  ( cudaEventCreate( &start ) );
    cutilSafeCall  ( cudaEventCreate( &stop ) );
    cutilSafeCall( cudaEventRecord( start, 0 ) );  
	
	if(nwidth == 512)
		label_512(&dstBuf, &dstBuf2, &srcBuf, &bBuf, &b2Buf, &glabel, nheight, bn, &errBuf);
	else if(nwidth == 1024)
		label_1024(&dstBuf, &dstBuf2, &srcBuf, &bBuf, &b2Buf, &glabel, nheight, bn, &errBuf);
	else if(nwidth == 2048)
		label_2048(&dstBuf, &dstBuf2, &srcBuf, &bBuf, &b2Buf, &glabel, nheight, bn, &errBuf);

    cutilSafeCall( cudaEventRecord( stop, 0 ) );
//	cutilCheckMsg("kernel launch failure");
	cudaEventSynchronize(stop);
	cutilSafeCall( cudaEventElapsedTime( &elapsedTimeInMs, start, stop ) );
	uint tlabel = 0;

	cudaMemcpy(&tlabel, glabel.GetData(), 4, cudaMemcpyDeviceToHost);
	printf("gpu alg 1 used %f ms, total labels %u\n", elapsedTimeInMs, tlabel);

	dstBuf.CopyToHost(srcimgb->imageData, srcimgb->widthStep, nwidth, (nheight-2)/2);
	dstBuf2.CopyToHost(srcimgb2->imageData, srcimgb->widthStep, nwidth, (nheight-2)/2);
	errBuf.CopyToHost(bugimg->imageData, bugimg->widthStep, nwidth, 9*bn);
	bBuf.CopyToHost(bimg->imageData, bimg->widthStep, nwidth, 2*bn);
	b2Buf.CopyToHost(b2img->imageData, bimg->widthStep, nwidth, 2*bn);

//	cvNamedWindow("gpu",CV_WINDOW_AUTOSIZE);
//	cvShowImage("gpu",srcimgb);
	cvSaveImage(argv[1], srcimgb);
	cvSaveImage("gpu2.bmp", srcimgb2);	//the final labels of our algorithm
	cvSaveImage("bug.bmp", bugimg);
	cvSaveImage("b.bmp", bimg);
	cvSaveImage("b2.bmp", b2img);
	
	//now start the gpu ccl implemented by Ostava
	alg2dst= cvCreateImage(cvSize(nwidth*4, cvGetSize(srcimgb).height),IPL_DEPTH_8U,1);
	CCLBase* m_ccl;
	m_ccl = new CCL();	

	m_ccl->FindRegions(nwidth, cvGetSize(srcimgb).height, &srcBuf);
	m_ccl->GetConnectedRegionsBuffer()->CopyToHost(alg2dst->imageData, alg2dst->widthStep, nwidth*4, cvGetSize(srcimgb).height);
	delete m_ccl;
	cvSaveImage("alg2.bmp", alg2dst);

	cvReleaseImage(&bugimg);
	cvReleaseImage(&bimg);
	cvReleaseImage(&b2img);
	cvReleaseImage(&alg2dst);
//	}
	//cvWaitKey(0);
	
	//now start cross compare label results of our ccl and the BBDT, to check the correctness
//	if(smCnt != 0){
		ushort *gpures, *cpures;
		uint sz = nwidth * (cvGetSize(srcimgb).height/2);
		gpures = (ushort*)malloc(sz);
		cpures = (ushort*)malloc(sz);
		dstBuf.CopyToHost(gpures, nwidth, nwidth, (cvGetSize(srcimgb).height/2));
		
		//first, reduce cpu labels from one label for each pixel to one label for a 2x2 block, assuming 8-connectivity
		for(int j = 0; j < (cvGetSize(srcimgb).height/2); j++)
			for(int i = 0; i < (nwidth/2); i++){
				uint* cpup;
				ushort res = LBMAX;
				uint y = j*2, x = i*2;
				cpup = (uint*)(dst2->imageData + y*dst2->widthStep);
//				if(y < cvGetSize(srcimgb).height){
					if(cpup[x] != 0)
						res = cpup[x]-1;
					if(cpup[x+1] != 0)
						res = cpup[x+1]-1;
//				}
				y++;
				cpup = (uint*)(dst2->imageData + y*dst2->widthStep);
//				if(y < cvGetSize(srcimgb).height){
					if(cpup[x] != 0)
						res = cpup[x]-1;
					if(cpup[x+1] != 0)
						res = cpup[x+1]-1;
//				}
				cpures[i + j*(nwidth/2)] = res;
			}
		
		//our algo use unsigned short to represent a label, the first label starts a 0, and maximun labels is LBMAX
		if(iNumLabels > LBMAX)
			printf("too much cc, compare abort.\n");
		else{
			//create a error
			//cpures[5] = 12;
			//cpures[15] = 18;
			printf("Checking correctness of gpu alg1\nChecking gpu ref by cpu.\n");
			checkLabels(cpures, gpures, nwidth/2, cvGetSize(srcimgb).height/2, iNumLabels);

			printf("Checking cpu ref by gpu.\n");
			checkLabels(gpures, cpures, nwidth/2, cvGetSize(srcimgb).height/2, tlabel);
		}

		free(gpures);
		free(cpures);
		printf("speedup is %f, %f, %f\n", gpu2time/elapsedTimeInMs, elapsedMilliSeconds1/elapsedTimeInMs, elapsedMilliSeconds2/elapsedTimeInMs);
	}

	cvReleaseImage(&srcimgb);
	cvReleaseImage(&srcimgb2);
	cvReleaseImage(&dst2);
	cvReleaseImage(&src2);

    cutilSafeCall( cudaThreadExit() );
	return 0;

}