/*
 * Allocate an FFTComplex array with specified number of complex elements.
 * Returns NULL on malloc failure. Caller must free the returned
 * pointer via fftFreeComplexArray().
 */
FFTComplex *fftAllocComplexArray(
	size_t numComplex)
{
	FFTComplex *fftComplex = (FFTComplex *)malloc(sizeof(FFTComplex));
	if(fftComplex == NULL) {
		printf("***fftAllocComplexArray: malloc error\n");
		return NULL;
	}
	fftComplex->real = fftComplex->imag = NULL;
	if(numComplex == 0) {
		/* that's OK here, we're done */
		return fftComplex;
	}
	
	size_t bufSize = numComplex * sizeof(FFTFloat);
	fftComplex->real = (FFTFloat *)malloc(bufSize);
	fftComplex->imag = (FFTFloat *)malloc(bufSize);
	if((fftComplex->real == NULL) || (fftComplex->imag == NULL)) {
		printf("***fftAllocComplexArray: malloc error\n");
		COND_FREE(fftComplex->real);
		COND_FREE(fftComplex->imag);
		free(fftComplex);
		return NULL;
	}
	
	return fftComplex;
}
/*
 * Allocate well-aligned FFTComplex array. Returns the aligned array.
 * Free the returned freePtr via fftFreeComplexArrayAlign().
 * Specified alignSize must be a power of 2. 
 */
FFTComplex *fftAllocComplexArrayAlign(
	size_t					numComplex,
	size_t					alignSize,		// in bytes
	FFTComplex				**freePtr)		// to be freed
{
	*freePtr = NULL;
	
	/* alloc two empty FFTComplexes */
	FFTComplex *alignComplex = fftAllocComplexArray(0);
	FFTComplex *freeComplex  = fftAllocComplexArray(0);
	if((alignComplex == NULL) || (freeComplex == NULL)) {
		COND_FREE(freeComplex);
		COND_FREE(alignComplex);
		printf("***fftAllocComplexArrayAlign: malloc error\n");
		return NULL;
	}
	
	/* alloc unaligned pointers */
	size_t bufSize = (numComplex * sizeof(FFTFloat)) + alignSize;
	freeComplex->real = (FFTFloat *)malloc(bufSize);
	freeComplex->imag = (FFTFloat *)malloc(bufSize);
	if((freeComplex->real == NULL) || (freeComplex->imag == NULL)) {
		printf("***fftAllocComplexArrayAlign: malloc error\n");
		fftFreeComplexArray(freeComplex);
		fftFreeComplexArray(alignComplex);
		return NULL;
	}
	
	/* obtain aligned pointers */
	alignComplex->real = (FFTFloat *)FFT_ALIGN(freeComplex->real, alignSize);
	alignComplex->imag = (FFTFloat *)FFT_ALIGN(freeComplex->imag, alignSize);
	
	*freePtr = freeComplex;
	return alignComplex;
}
void fftFreeComplexArray(
	FFTComplex *buf)
{
	if(buf == NULL) {
		return;
	}
	COND_FREE(buf->real);
	COND_FREE(buf->imag);
	free(buf);
}
/*
 * This configuration has to free two FFTComplex's, plus the 
 * real/imag pointers in *alignBuf. 
 */
void fftFreeComplexArrayAlign(
	FFTComplex				*buf,
    FFTComplex              *freeBuf)
{
    fftFreeComplexArray(freeBuf);
    COND_FREE(buf);
}
static int doTest(
	TestParams *tp)
{
	int ourRtn = 0;
	unsigned actLoops;
	size_t total2DSamples = tp->numRows * tp->numCols;
	size_t totalComplexSamples = total2DSamples >> 1;	/* for buffer alloc/init */
	size_t complexCols = tp->numCols >> 1;				/* for display */
	unsigned dims[2];
	
	FFT_Setup fftSetup = NULL;
	MatrixFFTPlan mfftPlan = NULL;
	MFFTReturn mrtn;
	
	unsigned log2TotalSamples;
	unsigned log2NumRows;
	unsigned log2NumCols;
	if(!fftIsPowerOfTwo(tp->numRows, &log2NumRows) ||
	   !fftIsPowerOfTwo(tp->numCols, &log2NumCols) ) {
		printf("***We only operate on powers of 2 only\n");
		return -1;
	}
	log2TotalSamples = log2NumRows + log2NumCols;
	unsigned maxLog = max(log2NumCols, log2NumRows);    /* for vDSP setup */
	
	if((tp->whichFFT == FW_vDSP) && 
	   (total2DSamples > VDSP_MAX_2D_REAL) &&
	   !tp->overrideLimits) {
	   
		/*
		 * This facilitates large signal testing in the scripts without
		 * having to be limited by the vDSP limits.
		 */
		printf("   2^%-2u |  2^%-2u  |   2^%-2u  |     0.000     |  0.000\n",
			log2NumCols, log2NumRows, log2TotalSamples);
		return 0;
	}

	double normFactor = 1.0 / (FFTFloat)total2DSamples;
	
	/* for vDSP */
	vDSPComplex zBufIn = {NULL, NULL};				
	vDSPComplex zBufOut = {NULL, NULL};				/* out of place */
	
	/* MatrixFFT */
	FFTComplex *fftSrc = NULL;						/* aligned MatrixFFT input */
	FFTComplex *fftDst = NULL;						/* aligned MatrixFFT output if OOP */
	FFTComplex *fftSrcFree = NULL;					/* to-be-freed MatrixFFT input */
	FFTComplex *fftDstFree = NULL;					/* to-be-freed MatrixFFT output if OOP */
	FFTComplex *actFftDst = NULL;
	
	double setupStart = 0.0;
	double setupEnd = 0.0;
	
	double startTime = 0.0;
	double endTime = 0.0;
	double elapsedTime;
	double ops = 2.5 * (double)total2DSamples * log2TotalSamples;
	if(tp->forwardOnly) { 
		ops *= tp->loops;
	}
	else {
		ops *= (2.0 * tp->loops);
	}
	double CTGs;
	uint32_t flagsForward = 0;
	uint32_t flagsReverse = 0;
	uint32_t flagsCreate = 0;
	
	if(!tp->manualNorm) {
		flagsReverse = MEF_NormOutput;
		normFactor /= 2.0;
	}
	
	/* 
	 * Create plans.
	 */
	if(tp->verbose) {
		printf("...setting up plans\n");
	}
	if(tp->verbose) {
		setupStart = fftGetTime(tp->wallTime);
	}
	
	switch(tp->whichFFT) {
		case FW_vDSP:
			/* Straight vDSP */
			fftSetup = FFTCreateSetup(maxLog);
			if (fftSetup == NULL) {
				printf("***Error: unable to create FFT setup.\n");
				return -1;
			}
			break;
		case FW_Matrix:
			dims[0] = log2NumRows;
			dims[1] = log2NumCols;
			mrtn = mfftCreatePlan(2, dims, true, flagsCreate, tp->numThreads, &mfftPlan);
			if(mrtn) {
				mfftPrintErrInfo("mfftCreatePlan", mrtn);
				return -1;
			}
			break;
			
	}

	if(tp->verbose) {
		setupEnd = fftGetTime(tp->wallTime);
	}
	
	/*
	 * Alloc and init SplitBuffers. 
	 */
	if(tp->verbose) {
		printf("...setting up buffers\n");
	}
	switch(tp->whichFFT) {
		case FW_vDSP:
			if(fftAllocDSPComplex(&zBufIn, totalComplexSamples)) {
				printf("***Malloc failure for totalComplexSamples = %llu\n", 
					(unsigned long long)totalComplexSamples);
				ourRtn = -1;
				goto errOut;
			}
			if(tp->outOfPlace) {
				if(fftAllocDSPComplex(&zBufOut, totalComplexSamples)) {
					printf("***Malloc failure for totalComplexSamples = %llu", 
						(unsigned long long)totalComplexSamples);
					ourRtn = -1;
					goto errOut;
				}
			}
			genRandComplexDSP(&zBufIn, totalComplexSamples);
            fftFlushComplexDSP(&zBufIn, totalComplexSamples);
			break;

		case FW_Matrix:
		{
			fftSrc = fftAllocComplexArrayAlign(totalComplexSamples, FFT_MEM_ALIGNMENT, &fftSrcFree);
			if(fftSrc == NULL) {
				printf("***Malloc failure for totalComplexSamples = %llu\n", 
					(unsigned long long)totalComplexSamples);
				ourRtn = -1;
				goto errOut;
			} 
			if(tp->outOfPlace) {
				fftDst = fftAllocComplexArrayAlign(totalComplexSamples, FFT_MEM_ALIGNMENT, &fftDstFree);
				if(fftDst == NULL) {
					printf("***Malloc failure for totalComplexSamples = %llu", 
						(unsigned long long)totalComplexSamples);
					ourRtn = -1;
					goto errOut;
				} 
			}			
			genRandComplex(fftSrc, totalComplexSamples);
            fftFlushComplex(fftSrc, totalComplexSamples);
		}
	}
	
	if(tp->dumpBuffers) {
		if(tp->whichFFT == FW_vDSP) {
			fftDump2DDSPComplex("Starting time domain", &zBufIn, tp->numRows, complexCols);
		}
		else {
			fftDump2DComplex("Starting time domain", fftSrc, tp->numRows, complexCols);
		}
	}

	/* MatrixFFT only */
	if(!tp->skipTranspose) {
		flagsForward |= MEF_TransposeOutput;
		flagsReverse |= MEF_TransposeInput;
	}

	if(tp->verbose) {
		printf("...performing %llu element FFT\n", 
			(unsigned long long)total2DSamples);
	}
	
	startTime = fftGetTime(tp->wallTime);
	actLoops = tp->loops;
	if(tp->dontTimeFirstLoop) {
		actLoops++;
	}
	switch(tp->whichFFT) {
		case FW_vDSP:
			for(unsigned loop=0; loop<actLoops; loop++) {
				if(tp->outOfPlace) {
					/* real -- 2-d -- out of place */
					FFTReal2dOP(fftSetup, &zBufIn, &zBufOut, log2NumCols, log2NumRows, FFT_FORWARD);
					if(!tp->forwardOnly) {
						FFTReal2dOP(fftSetup, &zBufOut, &zBufIn, log2NumCols, log2NumRows, FFT_INVERSE);
					}
				}
				else {
					/* real -- 2-d -- in place */
					FFTReal2d(fftSetup, &zBufIn, log2NumCols, log2NumRows, FFT_FORWARD);
					if(!tp->forwardOnly) {
						FFTReal2d(fftSetup, &zBufIn, log2NumCols, log2NumRows, FFT_INVERSE);
					}
				}
				if(!tp->forwardOnly) {
					fftScaleDSPComplex(&zBufIn, normFactor, totalComplexSamples);
				}
				if(tp->dontTimeFirstLoop && (loop == 0)) {
					/* restart timer */
					startTime = fftGetTime(tp->wallTime);
				}
			}
			break;
			
		case FW_Matrix:
			actFftDst = tp->outOfPlace ? fftDst : fftSrc;
			for(unsigned loop=0; loop<actLoops; loop++) {
				mrtn = mfftExecute(mfftPlan, flagsForward, true, fftSrc, actFftDst);
				if(mrtn) {
					mfftPrintErrInfo("mfftExecute", mrtn);
					ourRtn = -1;
					goto errOut;
				}
				if(!tp->forwardOnly) {
					mrtn = mfftExecute(mfftPlan, flagsReverse, false, actFftDst, fftSrc);
					if(mrtn) {
						mfftPrintErrInfo("mfftExecute", mrtn);
						ourRtn = -1;
						goto errOut;
					}
					
					if(tp->manualNorm) {
						fftScaleComplex(fftSrc, normFactor, totalComplexSamples);
					}
				}
				if(tp->dontTimeFirstLoop && (loop == 0)) {
					/* restart timer */
					startTime = fftGetTime(tp->wallTime);
				}
			}
			break;
	}
	
	endTime = fftGetTime(tp->wallTime);
	elapsedTime = endTime - startTime;
    
    if(!tp->displayAll) {
        /*
         * Accumulate best time
         */
        if((tp->bestTime == 0.0) ||             // first time thru
           (tp->bestTime > elapsedTime)) {      // new best
            tp->bestTime = elapsedTime;
        }
        if(!tp->lastRun) {
            /* We're done, no display this time thru */
            goto errOut;
        }
        
        /* Last run: display cumulative best */
        elapsedTime = tp->bestTime;
    }

	CTGs = (ops / elapsedTime) / 1.0e+9;
	
	if(tp->dumpBuffers) {
		if(tp->whichFFT == FW_vDSP) {
			fftDump2DDSPComplex("Ending time domain", &zBufIn, tp->numRows, complexCols);
		}
		else {
			fftDump2DComplex("Ending time domain", fftSrc, tp->numRows, complexCols);
		}
	}
	

	if(tp->verbose) {
		printf("complexity = 2.5 * totalSamples * lg(n) * 2 * loops\n");
		printf("           = 2.5 * %llu * %u * 2 * %u\n",
			(unsigned long long)total2DSamples, log2TotalSamples, tp->loops);
		printf("           = %.1f\n", ops);
		printf("Setup time = %.4f s\n", setupEnd - setupStart);
	}
	printf("   2^%-2u |  2^%-2u  |   2^%-2u  | %9.3f     | %6.3f\n",
		log2NumCols, log2NumRows, 
		log2TotalSamples,
		elapsedTime,
		CTGs);
	
errOut:
	
	fftFreeComplexArrayAlign(fftSrc, fftSrcFree);
	if(tp->outOfPlace) {
		fftFreeComplexArrayAlign(fftDst, fftDstFree);
	}
	
	COND_FREE(zBufIn.realp);
	COND_FREE(zBufIn.imagp);
	if(tp->outOfPlace) {
		COND_FREE(zBufOut.realp);
		COND_FREE(zBufOut.imagp);
	}
	
	if(fftSetup) {
		FFTFreeSetup(fftSetup);
	}
	if(mfftPlan)  {
		mfftFreePlan(mfftPlan);
	}
	return ourRtn;	
}
int main(int argc, char **argv)
{
	/* user-spec'd variables */
	vImagePixelCount imageRows = VT_IMAGE_ROWS_DEF;
	vImagePixelCount imageCols = VT_IMAGE_COLS_DEF;
	vImagePixelCount kernelSizeMin = VT_KERNEL_SIZE_MIN;
	vImagePixelCount kernelSizeMax = VT_KERNEL_SIZE_MAX;
	unsigned kernelSizeIncr = VT_KERNEL_SIZE_INCR;
	bool preallocTempBuf = false;
	bool edgeExtend = false;
	bool wallTime = false;
	bool crandallFormat = false;
	bool printBanner = true;
	
	int arg;
	while ((arg = getopt(argc, argv, "r:c:k:K:i:pewCnh")) != -1) {
		switch (arg) {
			case 'r':
				imageRows = atoi(optarg);
				break;
			case 'c':
				imageCols = atoi(optarg);
				break;
			case 'k':
				kernelSizeMin = atoi(optarg);
				break;
			case 'K':
				kernelSizeMax = atoi(optarg);
				break;
			case 'i':
				kernelSizeIncr = atoi(optarg);
				break;
			case 'p':
				preallocTempBuf = true;
				break;
			case 'e':
				edgeExtend = true;
				break;
			case 'w':
				wallTime = true;
				break;
			case 'C':
				crandallFormat = true;
				break;
			case 'n':
				printBanner = false;
				break;
			case 'h':
				usage(argv);
		}
	}
	if(optind != argc) {
		usage(argv);
	}
	
	/* validate inputs */
	if(!(kernelSizeMin & 0x01) || !(kernelSizeMax & 0x01)) {
		printf("***kernelSize min and max (%lu, %lu) must be odd\n",
			(unsigned long)kernelSizeMin, (unsigned long)kernelSizeMax);
		exit(1);
	}
	if(kernelSizeIncr & 0x01) {
		printf("***kernelSizeIncr (%u) must be even\n", kernelSizeIncr);
		exit(1);
	}
	if(kernelSizeMax < kernelSizeMin) {
		printf("***kernelSizeMax must be >= kernelSizeMin\n");
		exit(1);
	}
	
	if(printBanner) {
		fftPrintTestBanner("Two-dimension Real Convolve", "vImage", false, "Random", NULL, 0, 0);
		printf("\n");
		printf("Image Rows | Image Cols | Kernel size | Convolution time (s)\n");
		printf("-----------+------------+-------------+---------------------\n");
	}
	
	/* allocate some memory */
	vImagePixelCount rowBytesMax = VT_ROW_BYTES(imageCols);
	size_t bufSize = rowBytesMax * imageRows;
	size_t imageSize = imageRows * imageCols;
	void *srcBuf = malloc(bufSize);
	void *dstBuf = malloc(bufSize);
	if((srcBuf == NULL) || (dstBuf == NULL)) {
		printf("***malloc failure (bufSize %lu\n", bufSize);
		exit(1);
	}
	size_t kernSize = kernelSizeMax * kernelSizeMax * sizeof(float);
	float *kernel = (float *)malloc(kernSize);
	if(kernel == NULL) {
		printf("***malloc failure (kernSize %lu\n", kernSize);
		exit(1);
	}	
	
	/* and the optional temp buffer */
	void *tempBuf = NULL;
	if(preallocTempBuf) {
		vImage_Buffer src;
		src.height = imageRows;
		src.width = imageCols;
		src.rowBytes = rowBytesMax;
		src.data = srcBuf;
		vImage_Flags flags = kvImageGetTempBufferSize;
		if(edgeExtend) {
			flags |= kvImageEdgeExtend;
		}
		else {
			flags |= kvImageBackgroundColorFill;
		}
		vImage_Error vrtn = vImageConvolve_PlanarF(&src, &src,  
				NULL,							// tempBuffer
				0, 0,							// region of interest
				kernel, kernelSizeMax, kernelSizeMax, 
				0.0,							// background fill 
				flags);
		if(vrtn <= 0) {
			printf("***vImageConvolve_PlanarF() returned %ld on get temp bufsize op\n",
				(long)vrtn);
			exit(1);
		}
		printf("...temp buffer size %ld\n", (long)vrtn);
		tempBuf = malloc((size_t)vrtn);
		if(tempBuf == NULL) {
			printf("***malloc failure (temp bufsize %ld\n", (long)vrtn);
			exit(1);
		}
	}
	
	genRandFloats((float *)srcBuf, imageSize);
	genRandFloats(kernel, kernelSizeMax);
	
	/* collect everything into VTParams */
	VTParams vtp;
	vtp.srcBuf     = srcBuf;
	vtp.dstBuf     = dstBuf;
	vtp.kernel     = kernel;
	vtp.imageRows  = imageRows;
	vtp.imageCols  = imageCols;
	vtp.tempBuffer = tempBuf;
	vtp.edgeExtend = edgeExtend;
	vtp.wallTime   = wallTime;
	
	/* here we go */
	vImagePixelCount kernelSize;
	char *crandallStr = NULL;

	if(crandallFormat) {
		crandallStr = strdup("\n/* {time(seconds), n^2 * m^2} */\n");
	}
	
	
	for(kernelSize=kernelSizeMin; kernelSize<=kernelSizeMax; kernelSize+=kernelSizeIncr) {
		vtp.kernelSize = kernelSize;
		int irtn = doTest(&vtp);
		if(irtn) {
			printf("***test failure; aborting\n");
			exit(1);
		}
		
		printf( "%8lu   | %8lu   | %8lu    | %10.5f\n", 
			(unsigned long)imageRows, (unsigned long)imageCols, 
			(unsigned long)kernelSize,
			vtp.runTime);
			
		if(crandallFormat) {
			/* save this result by appending it to the string we'll output when finished */
			crandallStr = appendCrandallFormat(imageRows, imageCols, kernelSize, vtp.runTime, 
				crandallStr);
		}
	}
	
	if(crandallFormat) {
		printf("\n%s\n", crandallStr);
	}
	
	/* clean up */
	COND_FREE(srcBuf);
	COND_FREE(dstBuf);
	COND_FREE(kernel);
	COND_FREE(tempBuf);
	return 0;
}
Exemple #7
0
/*
 * Free all malloc'ed memory for the specified service
 */
void sc_free( struct service_config *scp )
{
#ifdef HAVE_MDNS
   COND_FREE( SC_MDNS_NAME(scp) );
   xinetd_mdns_svc_free(scp);
#endif
#ifdef LIBWRAP
   COND_FREE( SC_LIBWRAP(scp) );
#endif
   COND_FREE( SC_NAME(scp) ) ;
   COND_FREE( SC_ID(scp) ) ;
   COND_FREE( SC_PROTONAME(scp) ) ;
   COND_FREE( SC_SERVER(scp) ) ;
   COND_FREE( (char *)SC_REDIR_ADDR(scp) ) ;
   COND_FREE( (char *)SC_BIND_ADDR(scp) ) ;
   COND_FREE( (char *)SC_ORIG_BIND_ADDR(scp) ) ;
   COND_FREE( (char *)SC_BANNER(scp) ) ;
   COND_FREE( (char *)SC_BANNER_SUCCESS(scp) ) ;
   COND_FREE( (char *)SC_BANNER_FAIL(scp) ) ;
   if ( SC_SERVER_ARGV(scp) )
   {
      char **pp ;

      /*
       * argv[ 0 ] is a special case because it may not have been allocated yet
       */
      if ( SC_SERVER_ARGV(scp)[ 0 ] != NULL)
         free( SC_SERVER_ARGV(scp)[ 0 ] ) ;
      for ( pp = &SC_SERVER_ARGV(scp)[ 1 ] ; *pp != NULL ; pp++ )
         free( *pp ) ;
      free( (char *) SC_SERVER_ARGV(scp) ) ;
   }
   COND_FREE( LOG_GET_FILELOG( SC_LOG( scp ) )->fl_filename ) ;

   if ( SC_ACCESS_TIMES(scp) != NULL )
   {
      ti_free( SC_ACCESS_TIMES(scp) ) ;
      pset_destroy( SC_ACCESS_TIMES(scp) ) ;
   }

   if ( SC_ONLY_FROM(scp) != NULL )
   {
      addrlist_free( SC_ONLY_FROM(scp) ) ;
      pset_destroy( SC_ONLY_FROM(scp) ) ;
   }

   if ( SC_NO_ACCESS(scp) != NULL )
   {
      addrlist_free( SC_NO_ACCESS(scp) ) ;
      pset_destroy( SC_NO_ACCESS(scp) ) ;
   }

   if ( SC_ENV_VAR_DEFS(scp) != NULL )
      release_string_pset( SC_ENV_VAR_DEFS(scp) ) ;
   if ( SC_PASS_ENV_VARS(scp) != NULL )
      release_string_pset( SC_PASS_ENV_VARS(scp) ) ;
   if ( SC_ENV( scp )->env_type == CUSTOM_ENV && 
                                    SC_ENV( scp )->env_handle != ENV_NULL )
      env_destroy( SC_ENV( scp )->env_handle ) ;
   if (SC_DISABLED(scp) ) 
      release_string_pset( SC_DISABLED(scp) ) ;
   if (SC_ENABLED(scp) ) 
      release_string_pset( SC_ENABLED(scp) ) ;
   
   CLEAR( *scp ) ;
   FREE_SCONF( scp ) ;
}