void PTXParser::parse( std::istream& input, ir::Instruction::Architecture language ) { assert( language == ir::Instruction::PTX ); std::stringstream temp; report( "Parsing file " << fileName ); report( "Running main parse pass." ); parser::PTXLexer lexer( &input, &temp ); reset(); try { state.addSpecialRegisters(); ptx::yyparse( lexer, state ); assertM( temp.str().empty(), "Failed to lex all characters, remainder is:\n" << (int)temp.str()[0] ); checkLabels(); } catch( PTXParseException& e ) { e.message = "\nFailed to parse file '" + fileName + "':\n" + getLinesNearCurrentLocation(input) + "\n" + e.message; report("parse error"); report(e.what()); throw e; } }
int main(int argc, char* argv[]) { char inputFile[FILENAME_MAX]; char temp[FILENAME_MAX]; int flag = TRUE; int i; FILE *file; if(argc==1) { printf("No files to compile\n"); return 0; } /*Loop through file names.*/ for(i=1; i<argc; i++) { fileName = argv[i]; strcpy(inputFile,argv[i]); strcat(inputFile,".as"); file = fopen (inputFile, "r"); if (!file) { printf("File %s does not exist\n",fileName); return 0; } /*Collect the input from the file and make first validation*/ flag = buildMatrix(file); /*Swap all the Dollars operand and check that the swaps are legal*/ if(flag) flag = changeDollars(); /*Check that all labels, commands and operands are valid*/ if(!checkLabels()||!checkAllCommands()) flag = FALSE; /*If flag is TRUE, can go on and parser the input*/ if(flag) { buildSymbolTable(); buildOutputTable(); freeTables(); /*Delete temp files*/ strcpy(temp,fileName); strcat(temp,".ex"); remove(temp); strcpy(temp,fileName); strcat(temp,".en"); remove(temp); printf("compiled %s.\n",fileName); } /*if Flag is false, print error*/ else { /*Print error*/ printf("Cannot compile %s.\n",fileName); /*Delete temp file*/ strcpy(temp,fileName); strcat(temp,".ex"); remove(temp); strcpy(temp,fileName); strcat(temp,".en"); remove(temp); } /*Free input table*/ freeMatrixMem(); fclose(file); } /*end for*/ return 0; }
int main(int argc, char** argv) { bool srcbin = 0; bool invbk = 0; if(argc < 3){ printf("Not enough args!\narg1: target image\narg2: source image\narg3: do source image adaptive threshold or not\narg4: invert back ground or not\n"); getchar(); return 1; } if(argc >= 4){ if(!strcmp(argv[3], "1")) srcbin = 1; } if(argc >= 5){ if(!strcmp(argv[4], "1")) invbk = 1; } IplImage* srcimg= 0, *srcimgb= 0, *srcimgb2 = 0, *bimg = 0, *b2img = 0,*bugimg = 0, *alg2dst = 0; srcimg= cvLoadImage(argv[2], 1); if (!srcimg) { printf("src img %s load failed!\n", argv[2]); getchar(); return 1; } //choosing the parameters for our ccl int bn = 8; //how many partitions int nwidth = 512; if(srcimg->width > 512){ nwidth = 1024; bn = 6; } if(srcimg->width > 1024){ nwidth = 2048; bn = 3; } if(srcimg->width > 2048){ printf("warning, image too wide, max support 2048. image is truncated.\n"); getchar(); return 1; } //start selection gpu devices int devCount; int smCnt = 0; cudaGetDeviceCount(&devCount); // Iterate through devices int devChosen = 0; for (int i = 0; i < devCount; ++i) { cudaDeviceProp devProp; cudaGetDeviceProperties(&devProp, i); if(devProp.major >= 2){//only one device supported smCnt = max(smCnt, devProp.multiProcessorCount); if(devProp.multiProcessorCount == smCnt) devChosen = i; } } if(smCnt == 0){ //our ccl require CUDA cap 2.0 or above, but the Ostava's ccl can be run on any CUDA gpu printf("Error, no device with cap 2.x found. Only cpu alg will be run.\n"); getchar(); return 1; } if(smCnt != 0){ cudaSetDevice(devChosen); bn = bn * smCnt; } int nheight = (cvGetSize(srcimg).height-2) / (2*bn); if((nheight*2*bn+2) < cvGetSize(srcimg).height) nheight++; nheight = nheight*2*bn+2; if(smCnt != 0) printf("gpu ccl for image width 512, 1024, 2048.\nchoosing device %d, width %d, height %d, blocks %d\n", devChosen, nwidth, nheight, bn); srcimgb= cvCreateImage(cvSize(nwidth, cvGetSize(srcimg).height),IPL_DEPTH_8U,1); srcimgb2= cvCreateImage(cvSize(nwidth, cvGetSize(srcimg).height),IPL_DEPTH_8U,1); cvSetImageROI(srcimg, cvRect(0, 0, min(cvGetSize(srcimg).width, nwidth), cvGetSize(srcimg).height)); cvSetImageROI(srcimgb2, cvRect(0, 0, min(cvGetSize(srcimg).width, nwidth), cvGetSize(srcimg).height)); cvSet(srcimgb2, cvScalar(0,0,0)); cvCvtColor(srcimg, srcimgb2, CV_BGRA2GRAY); cvResetImageROI(srcimgb2); cvReleaseImage(&srcimg); if(srcbin) cvAdaptiveThreshold(srcimgb2, srcimgb, 1.0, CV_ADAPTIVE_THRESH_MEAN_C, invbk ? CV_THRESH_BINARY_INV : CV_THRESH_BINARY); else cvThreshold(srcimgb2, srcimgb, 0.0, 1.0, invbk ? CV_THRESH_BINARY_INV : CV_THRESH_BINARY); boundCheck(srcimgb); cvScale(srcimgb, srcimgb2, 255); //the source binary image to be labeled is saved as bsrc.bmp cvSaveImage("bsrc.bmp", srcimgb2); cvSet(srcimgb2, cvScalar(0,0,0)); float elapsedMilliSeconds1; {//begin cpu labeling algorithm, the SBLA proposed by Zhao LABELDATATYPE *data=(LABELDATATYPE *)malloc(srcimgb->width * srcimgb->height * sizeof(LABELDATATYPE)); for(int j = 0; j<srcimgb->height; j++) for(int i = 0; i<srcimgb->width; i++) data[i + j*srcimgb->width] = (srcimgb->imageData[i + j*srcimgb->widthStep]) ? 1 : 0; int iNumLabels; CPerformanceCounter perf; perf.Start(); iNumLabels = LabelSBLA(data, srcimgb->width, srcimgb->height); elapsedMilliSeconds1 = (float)perf.GetElapsedMilliSeconds(); printf("cpu SBLA used %f ms, total labels %u\n", elapsedMilliSeconds1, iNumLabels); free(data); } IplImage *src2(0),*dst2(0); int iNumLabels; float elapsedMilliSeconds2; {//begin cpu labeling algorithm, the BBDT proposed by C. Grana, D. Borghesani, R. Cucchiara CPerformanceCounter perf; src2 = cvCreateImage( cvGetSize(srcimgb), IPL_DEPTH_8U, 1 ); cvCopyImage(srcimgb,src2); dst2 = cvCreateImage( cvGetSize(srcimgb), IPL_DEPTH_32S, 1 ); perf.Start(); cvLabelingImageLab(src2, dst2, 1, &iNumLabels); elapsedMilliSeconds2 = (float)perf.GetElapsedMilliSeconds(); printf("cpu BBDT used %f ms, total labels %u\n", elapsedMilliSeconds2, iNumLabels); cvSaveImage("bbdt.bmp", dst2); // cvReleaseImage(&src2); // cvReleaseImage(&dst2); } if(smCnt != 0){ bugimg = cvCreateImage(cvSize(nwidth, 9*bn),IPL_DEPTH_8U,1); bimg = cvCreateImage(cvSize(nwidth, 2*bn),IPL_DEPTH_8U,1); b2img = cvCreateImage(cvSize(nwidth, 2*bn),IPL_DEPTH_8U,1); // cvNamedWindow("src",CV_WINDOW_AUTOSIZE); // cvShowImage("src",srcimg); //prepare buffers for our gpu algorithm CudaBuffer srcBuf, dstBuf, dstBuf2, bBuf, b2Buf, errBuf, glabel; srcBuf.Create2D(nwidth, nheight); //the binary image to be processed dstBuf.Create2D(nwidth, (nheight-2)/2); //the label result, only about 1/4 the size of source image contains the final labels dstBuf2.Create2D(nwidth,(nheight-2)/2); //a copy of the pass1 temp result, for debug purpose glabel.Create2D(4, 1); //a int size global buffer for unique final label errBuf.Create2D(nwidth, 9*bn); //a buffer for debug info bBuf.Create2D(nwidth, 2 * bn); //the intersection info used by pass2 b2Buf.Create2D(nwidth, 2 * bn); //a copy of bBuf for debug purpose srcBuf.SetZeroData(); srcBuf.CopyFrom(srcimgb->imageData, srcimgb->widthStep, nwidth, cvGetSize(srcimgb).height); float elapsedTimeInMs = 0.0f; //-------------------gpu part---------------------------- cudaEvent_t start, stop; cutilSafeCall ( cudaEventCreate( &start ) ); cutilSafeCall ( cudaEventCreate( &stop ) ); cutilSafeCall( cudaEventRecord( start, 0 ) ); if(nwidth == 512) label_512(&dstBuf, &dstBuf2, &srcBuf, &bBuf, &b2Buf, &glabel, nheight, bn, &errBuf); else if(nwidth == 1024) label_1024(&dstBuf, &dstBuf2, &srcBuf, &bBuf, &b2Buf, &glabel, nheight, bn, &errBuf); else if(nwidth == 2048) label_2048(&dstBuf, &dstBuf2, &srcBuf, &bBuf, &b2Buf, &glabel, nheight, bn, &errBuf); cutilSafeCall( cudaEventRecord( stop, 0 ) ); // cutilCheckMsg("kernel launch failure"); cudaEventSynchronize(stop); cutilSafeCall( cudaEventElapsedTime( &elapsedTimeInMs, start, stop ) ); uint tlabel = 0; cudaMemcpy(&tlabel, glabel.GetData(), 4, cudaMemcpyDeviceToHost); printf("gpu alg 1 used %f ms, total labels %u\n", elapsedTimeInMs, tlabel); dstBuf.CopyToHost(srcimgb->imageData, srcimgb->widthStep, nwidth, (nheight-2)/2); dstBuf2.CopyToHost(srcimgb2->imageData, srcimgb->widthStep, nwidth, (nheight-2)/2); errBuf.CopyToHost(bugimg->imageData, bugimg->widthStep, nwidth, 9*bn); bBuf.CopyToHost(bimg->imageData, bimg->widthStep, nwidth, 2*bn); b2Buf.CopyToHost(b2img->imageData, bimg->widthStep, nwidth, 2*bn); // cvNamedWindow("gpu",CV_WINDOW_AUTOSIZE); // cvShowImage("gpu",srcimgb); cvSaveImage(argv[1], srcimgb); cvSaveImage("gpu2.bmp", srcimgb2); //the final labels of our algorithm cvSaveImage("bug.bmp", bugimg); cvSaveImage("b.bmp", bimg); cvSaveImage("b2.bmp", b2img); //now start the gpu ccl implemented by Ostava alg2dst= cvCreateImage(cvSize(nwidth*4, cvGetSize(srcimgb).height),IPL_DEPTH_8U,1); CCLBase* m_ccl; m_ccl = new CCL(); m_ccl->FindRegions(nwidth, cvGetSize(srcimgb).height, &srcBuf); m_ccl->GetConnectedRegionsBuffer()->CopyToHost(alg2dst->imageData, alg2dst->widthStep, nwidth*4, cvGetSize(srcimgb).height); delete m_ccl; cvSaveImage("alg2.bmp", alg2dst); cvReleaseImage(&bugimg); cvReleaseImage(&bimg); cvReleaseImage(&b2img); cvReleaseImage(&alg2dst); // } //cvWaitKey(0); //now start cross compare label results of our ccl and the BBDT, to check the correctness // if(smCnt != 0){ ushort *gpures, *cpures; uint sz = nwidth * (cvGetSize(srcimgb).height/2); gpures = (ushort*)malloc(sz); cpures = (ushort*)malloc(sz); dstBuf.CopyToHost(gpures, nwidth, nwidth, (cvGetSize(srcimgb).height/2)); //first, reduce cpu labels from one label for each pixel to one label for a 2x2 block, assuming 8-connectivity for(int j = 0; j < (cvGetSize(srcimgb).height/2); j++) for(int i = 0; i < (nwidth/2); i++){ uint* cpup; ushort res = LBMAX; uint y = j*2, x = i*2; cpup = (uint*)(dst2->imageData + y*dst2->widthStep); // if(y < cvGetSize(srcimgb).height){ if(cpup[x] != 0) res = cpup[x]-1; if(cpup[x+1] != 0) res = cpup[x+1]-1; // } y++; cpup = (uint*)(dst2->imageData + y*dst2->widthStep); // if(y < cvGetSize(srcimgb).height){ if(cpup[x] != 0) res = cpup[x]-1; if(cpup[x+1] != 0) res = cpup[x+1]-1; // } cpures[i + j*(nwidth/2)] = res; } //our algo use unsigned short to represent a label, the first label starts a 0, and maximun labels is LBMAX if(iNumLabels > LBMAX) printf("too much cc, compare abort.\n"); else{ //create a error //cpures[5] = 12; //cpures[15] = 18; printf("Checking correctness of gpu alg1\nChecking gpu ref by cpu.\n"); checkLabels(cpures, gpures, nwidth/2, cvGetSize(srcimgb).height/2, iNumLabels); printf("Checking cpu ref by gpu.\n"); checkLabels(gpures, cpures, nwidth/2, cvGetSize(srcimgb).height/2, tlabel); } free(gpures); free(cpures); printf("speedup is %f, %f, %f\n", gpu2time/elapsedTimeInMs, elapsedMilliSeconds1/elapsedTimeInMs, elapsedMilliSeconds2/elapsedTimeInMs); } cvReleaseImage(&srcimgb); cvReleaseImage(&srcimgb2); cvReleaseImage(&dst2); cvReleaseImage(&src2); cutilSafeCall( cudaThreadExit() ); return 0; }