Exemple #1
0
/**
 * @brief Main principal
 * @param argc El número de argumentos del programa
 * @param argv Cadenas de argumentos del programa
 * @return Nada si es correcto o algún número negativo si es incorrecto
 */
int main( int argc, char** argv ) {

	if(argc != 2)
		return -1;

	// Medimos tiempo para el programa
	const double start_time = getCurrentTimestamp();

	// Creamos el buffer para las partículas y reservamos espacio ALINEADO para los datos
	size_t N = atoi(argv[1]);
	particle *particulas = (particle*) _aligned_malloc(N * sizeof(particle), 64);

	// Inicializamos las partículas
	const double inicio = getCurrentTimestamp();

	for(unsigned index = 0; index < N; ++index) {
		particulas[index].x = 0.0;
		particulas[index].y = 0.0;
		particulas[index].s = 1.0;
		particulas[index].xp = 0.0;
		particulas[index].yp = 0.0;
		particulas[index].sp = 1.0;
		particulas[index].x0 = 0.0;
		particulas[index].y0 = 0.0;
		particulas[index].width = 500;
		particulas[index].height = 500;
		particulas[index].w = 0.0f;
    }

	const double end_time = getCurrentTimestamp();

	// Obtenemos el tiempo consumido por el programa y la suma de los pesos
	printf("\nTiempo total del programa: %0.3f ms\n", (end_time - start_time) * 1e3);
	printf("Tiempo total consumido por la inicializacion de las particulas: %0.3f ms\n", (end_time - inicio) * 1e3);
}
/************************************************************************
method CompilerTrackingInfo::resetInterval

start the new interval (at the current time, clock())

************************************************************************/
inline
void CompilerTrackingInfo::resetInterval()
{  
  beginIntervalTime_  = getCurrentTimestamp();
  beginIntervalTimeUEpoch_  = getCurrentTimestampUEpoch();
  beginIntervalClock_ = clock();
  //
  // water marks for stmt and context heap back to 0
  CmpCommon::statementHeap()->resetIntervalWaterMark();
  CmpCommon::contextHeap()->resetIntervalWaterMark();
  //
  // metadata cache counters maintained on each interval
  resetMetadataCacheCounters();  
  //
  // query cache
  resetQueryCacheCounters();
  //
  // histogram cache counters reset on interval
  resetHistogramCacheCounters();  
  //
  // other counters
  largestStmtIntervalWaterMark_ = 0;
  systemHeapWaterMark_          = 0;
  longestCompileClock_          = 0;
  successfulQueryCount_         = 0;
  failedQueryCount_             = 0;
  caughtExceptionCount_         = 0;
  sessionCount_                 = 0;
}
/************************************************************************
method CompilerTrackingInfo::intervalExpired

Check whether the defined interval for logging has expired and it's
OK to log CompilerTrackingInfo again.

************************************************************************/
inline
NABoolean
CompilerTrackingInfo::intervalExpired(Int32 intervalLengthMins)
{  
  return ( currentIntervalDuration(getCurrentTimestamp()) 
                                                >= intervalLengthMins );
}
Exemple #4
0
/**
 * @brief Main principal
 * @param argc El número de argumentos del programa
 * @param argv Cadenas de argumentos del programa
 * @return Nada si es correcto o algún número negativo si es incorrecto
 */
int main( int argc, char** argv ) {

	if(argc != 2)
		return -1;

	// Medimos tiempo para el programa
	const double start_time = getCurrentTimestamp();

	// Creamos el buffer para las partículas y los pesos y reservamos espacio ALINEADO para los datos
	size_t N = atoi(argv[1]);
	particle *particulas = (particle*) _aligned_malloc(N * sizeof(particle), 64);
	int *pesos = (int*) _aligned_malloc(N * sizeof(int), 64);
	float sum = 0.0f;

	// Inicializamos las partículas (Me interesan los pesos)
	srand(time(NULL));
	for(unsigned index = 0; index < N; ++index) {
		particulas[index].x = 0.0;
		particulas[index].y = 0.0;
		particulas[index].s = 0.0;
		particulas[index].xp = 0.0;
		particulas[index].yp = 0.0;
		particulas[index].sp = 0.0;
		particulas[index].x0 = 0.0;
		particulas[index].y0 = 0.0;
		particulas[index].width = 0;
		particulas[index].height = 0;
		particulas[index].w = (float) (rand() % 2000);
		sum+=particulas[index].w;
    }

	// Normalizamos los datos
	for(int i = 0; i < N; ++i)
		particulas[i].w /= sum;

	const double inicio = getCurrentTimestamp();

	// Calculamos el número de partículas en base al peso de cada una
	for(unsigned index = 0; index < N; ++index)
		pesos[index] = cvRound( particulas[index].w * N );

	const double end_time = getCurrentTimestamp();

	// Obtenemos el tiempo consumido por el programa y la suma de los pesos
	printf("\nTiempo total del programa: %0.3f ms\n", (end_time - start_time) * 1e3);
	printf("Tiempo total consumido por la generacion del numero de particulas: %0.3f ms\n", (end_time - inicio) * 1e3);
}
/************************************************************************
method CompilationStats::exitCmpPhase

 mark the end of a compilation phase

************************************************************************/
void 
CompilationStats::exitCmpPhase(CompilationPhase phase)
{
  if (!(isValidPhase(phase))) return;

  cpuMonitor_[phase].exit();
  //
  // mark the end of the compilation
  if( CMP_PHASE_ALL == phase )
  {
    compileEndTime_ = getCurrentTimestamp();
  }
}
Exemple #6
0
/************************************************************************
method CompilerTrackingInfo::logCompilerStatusOnInterval

Dump the fields of this class out to a file (or to repository) if
the tracking compiler interval has expired

************************************************************************/
void
CompilerTrackingInfo::logCompilerStatusOnInterval(Int32 intervalLengthMins)
{

  if( intervalExpired(intervalLengthMins) )
  {
    //
    // this interval is now done/expired
    endIntervalTime_ = getCurrentTimestamp();
    //
    // get the latest cache stats once per interval
    if (!CURRENTQCACHE->getCompilationCacheStats(currentQCacheStats_))
    {
       // if query is disabled, clear the cache counters
       clearQCacheCounters();
    }

    //
    // log this interval
    if( NULL != getCompilerTrackingLogFilename() )
    {
      printToFile();      
    }

    //
    // log directly to a private table using dynamic SQL 
    // instead of using the Repository infrastructure to
    // populate repository table
    if (CmpCommon::getDefault(COMPILER_TRACKING_LOGTABLE) == DF_ON)
    {
       logIntervalInPrivateTable();    
    }

//
// This table doesn't exist on Windows, so don't log there
    // always log to the repository table
    Int32 rc = logIntervalInRepository();    
    if (rc)
    {
       // raise a warning that compiler process is unable to log 
       // its status and health information to the repository
       *CmpCommon::diags() << DgSqlCode(2242);
    }

    //
    // since the interval is expired, reset to begin tracking new interval
    resetInterval();        
  }
}
/************************************************************************
method CompilationStats::enterCmpPhase

 mark the begining of a compilation phase

************************************************************************/
void 
CompilationStats::enterCmpPhase(CompilationPhase phase)
{
  if (!isValidPhase(phase)) return;

  // always initialize it to zero
  cpuMonitor_[phase].init(0);
  cpuMonitor_[phase].enter();
  //
  // mark the start of the compilation
  if( CMP_PHASE_ALL == phase )
  {
    compileStartTime_ = getCurrentTimestamp();
  }
}
/************************************************************************
method CompilerTrackingInfo::logCompilerStatusOnInterval

Dump the fields of this class out to a file (or to repository) if
the tracking compiler interval has expired

************************************************************************/
void
CompilerTrackingInfo::logCompilerStatusOnInterval(Int32 intervalLengthMins)
{

  if( intervalExpired(intervalLengthMins) )
  {
    //
    // this interval is now done/expired
    endIntervalTime_ = getCurrentTimestamp();
    //
    // get the latest cache stats once per interval
    if (!CURRENTQCACHE->getCompilationCacheStats(currentQCacheStats_))
    {
       // if query is disabled, clear the cache counters
       clearQCacheCounters();
    }

    //
    // log this interval
    if( NULL != getCompilerTrackingLogFilename() )
    {
      printToFile();      
    }

    //
    // log directly to a private table using dynamic SQL 
    if (CmpCommon::getDefault(COMPILER_TRACKING_LOGTABLE) == DF_ON)
    {
       logIntervalInPrivateTable();    
    }

    // always log to log4cxx log
    logIntervalInLog4Cxx();
        
    // since the interval is expired, reset to begin tracking new interval
    resetInterval();        
  }
}
void OpenniGrabber :: run()
{
    m_should_exit = false;
    m_current_image.setCalibration(m_calib_data);
    m_rgbd_image.setCalibration(m_calib_data);

    // Depth
    m_rgbd_image.rawDepthRef() = Mat1f(m_calib_data->raw_depth_size);
    m_rgbd_image.rawDepthRef() = 0.f;
    m_rgbd_image.depthRef() = m_rgbd_image.rawDepthRef();
    m_current_image.rawDepthRef() = Mat1f(m_calib_data->raw_depth_size);
    m_current_image.rawDepthRef() = 0.f;
    m_current_image.depthRef() = m_current_image.rawDepthRef();

    // Color
    if (m_has_rgb)
    {
        m_rgbd_image.rawRgbRef() = Mat3b(m_calib_data->rawRgbSize());
        m_rgbd_image.rawRgbRef() = Vec3b(0,0,0);
        m_rgbd_image.rgbRef() = m_rgbd_image.rawRgbRef();
        m_current_image.rawRgbRef() = Mat3b(m_calib_data->rawRgbSize());
        m_current_image.rawRgbRef() = Vec3b(0,0,0);
        m_current_image.rgbRef() = m_current_image.rawRgbRef();

        m_rgbd_image.rawIntensityRef() = Mat1f(m_calib_data->rawRgbSize());
        m_rgbd_image.rawIntensityRef() = 0.f;
        m_rgbd_image.intensityRef() = m_rgbd_image.rawIntensityRef();
        m_current_image.rawIntensityRef() = Mat1f(m_calib_data->rawRgbSize());
        m_current_image.rawIntensityRef() = 0.f;
        m_current_image.intensityRef() = m_current_image.rawIntensityRef();
    }

    // User tracking
    m_rgbd_image.userLabelsRef() = cv::Mat1b(m_calib_data->raw_depth_size);
    m_rgbd_image.userLabelsRef() = 0u;

    if (m_track_users)
        m_rgbd_image.setSkeletonData(new Skeleton());

    m_current_image.userLabelsRef() = cv::Mat1b(m_calib_data->raw_depth_size);
    m_current_image.userLabelsRef() = 0u;

    if (m_track_users)
        m_current_image.setSkeletonData(new Skeleton());

    if (m_has_rgb)
    {
        bool mapping_required = m_calib_data->rawRgbSize() != m_calib_data->raw_depth_size;
        if (!mapping_required)
        {
            m_rgbd_image.mappedRgbRef() = m_rgbd_image.rawRgbRef();
            m_rgbd_image.mappedDepthRef() = m_rgbd_image.rawDepthRef();
            m_current_image.mappedRgbRef() = m_current_image.rawRgbRef();
            m_current_image.mappedDepthRef() = m_current_image.rawDepthRef();
        }
        else
        {
            m_rgbd_image.mappedRgbRef() = Mat3b(m_calib_data->raw_depth_size);
            m_rgbd_image.mappedRgbRef() = Vec3b(0,0,0);
            m_rgbd_image.mappedDepthRef() = Mat1f(m_calib_data->rawRgbSize());
            m_rgbd_image.mappedDepthRef() = 0.f;
            m_current_image.mappedRgbRef() = Mat3b(m_calib_data->rawDepthSize());
            m_current_image.mappedRgbRef() = Vec3b(0,0,0);
            m_current_image.mappedDepthRef() = Mat1f(m_calib_data->rawRgbSize());
            m_current_image.mappedDepthRef() = 0.f;
        }
    }

    m_rgbd_image.setCameraSerial(cameraSerial());
    m_current_image.setCameraSerial(cameraSerial());

    xn::SceneMetaData sceneMD;
    xn::DepthMetaData depthMD;
    xn::ImageMetaData rgbMD;
    xn::IRMetaData irMD;

    ImageBayerGRBG bayer_decoder(ImageBayerGRBG::EdgeAware);

    RGBDImage oversampled_image;
    if (m_subsampling_factor != 1)
    {
        oversampled_image.rawDepthRef().create(m_calib_data->rawDepthSize()*m_subsampling_factor);
        oversampled_image.userLabelsRef().create(oversampled_image.rawDepth().size());
    }

    while (!m_should_exit)
    {
        waitForNewEvent();
        ntk_dbg(2) << format("[%x] running iteration", this);

        {
            // OpenNI calls do not seem to be thread safe.
            QMutexLocker ni_locker(&m_ni_mutex);
            waitAndUpdateActiveGenerators();
        }

        if (m_track_users && m_body_event_detector)
            m_body_event_detector->update();

        m_ni_depth_generator.GetMetaData(depthMD);
        if (m_has_rgb)
        {
            if (m_get_infrared)
            {
                m_ni_ir_generator.GetMetaData(irMD);
            }
            else
            {
                m_ni_rgb_generator.GetMetaData(rgbMD);
            }
        }

        RGBDImage& temp_image =
                m_subsampling_factor == 1 ? m_current_image : oversampled_image;

        const XnDepthPixel* pDepth = depthMD.Data();
        ntk_assert((depthMD.XRes() == temp_image.rawDepth().cols)
                   && (depthMD.YRes() == temp_image.rawDepth().rows),
                   "Invalid image size.");

        // Convert to meters.
        const float depth_correction_factor = 1.0;
        float* raw_depth_ptr = temp_image.rawDepthRef().ptr<float>();
        for (int i = 0; i < depthMD.XRes()*depthMD.YRes(); ++i)
            raw_depth_ptr[i] = depth_correction_factor * pDepth[i]/1000.f;

        if (m_has_rgb)
        {
            if (m_get_infrared)
            {
                const XnGrayscale16Pixel* pImage = irMD.Data();
                m_current_image.rawIntensityRef().create(irMD.YRes(), irMD.XRes());
                float* raw_img_ptr = m_current_image.rawIntensityRef().ptr<float>();
                for (int i = 0; i < irMD.XRes()*irMD.YRes(); ++i)
                {
                    raw_img_ptr[i] = pImage[i];
                }
            }
            else
            {
                if (m_custom_bayer_decoding)
                {
                    uchar* raw_rgb_ptr = m_current_image.rawRgbRef().ptr<uchar>();
                    bayer_decoder.fillRGB(rgbMD,
                                          m_current_image.rawRgb().cols, m_current_image.rawRgb().rows,
                                          raw_rgb_ptr);
                    cvtColor(m_current_image.rawRgbRef(), m_current_image.rawRgbRef(), CV_RGB2BGR);
                }
                else
                {
                    const XnUInt8* pImage = rgbMD.Data();
                    ntk_assert(rgbMD.PixelFormat() == XN_PIXEL_FORMAT_RGB24, "Invalid RGB format.");
                    uchar* raw_rgb_ptr = m_current_image.rawRgbRef().ptr<uchar>();
                    for (int i = 0; i < rgbMD.XRes()*rgbMD.YRes()*3; i += 3)
                        for (int k = 0; k < 3; ++k)
                        {
                            raw_rgb_ptr[i+k] = pImage[i+(2-k)];
                        }
                }
            }
        }

        if (m_track_users)
        {
            m_ni_user_generator.GetUserPixels(0, sceneMD);
            uchar* user_mask_ptr = temp_image.userLabelsRef().ptr<uchar>();
            const XnLabel* pLabel = sceneMD.Data();
            for (int i = 0; i < sceneMD.XRes()*sceneMD.YRes(); ++i)
            {
                user_mask_ptr[i] = pLabel[i];
            }

            XnUserID user_ids[15];
            XnUInt16 num_users = 15;
            m_ni_user_generator.GetUsers(user_ids, num_users);

            // FIXME: only one user supported.
            for (int i = 0; i < num_users; ++i)
            {
                XnUserID user_id = user_ids[i];
                if (m_ni_user_generator.GetSkeletonCap().IsTracking(user_id))
                {
                    m_current_image.skeletonRef()->computeJoints(user_id, m_ni_user_generator, m_ni_depth_generator);
                    break;
                }
            }
        }

        if (m_subsampling_factor != 1)
        {
            // Cannot use interpolation here, since this would
            // spread the invalid depth values.
            cv::resize(oversampled_image.rawDepth(),
                       m_current_image.rawDepthRef(),
                       m_current_image.rawDepth().size(),
                       0, 0, INTER_NEAREST);
            // we have to repeat this, since resize can change the pointer.
            // m_current_image.depthRef() = m_current_image.rawDepthRef();
            cv::resize(oversampled_image.userLabels(),
                       m_current_image.userLabelsRef(),
                       m_current_image.userLabels().size(),
                       0, 0, INTER_NEAREST);
        }

        m_current_image.setTimestamp(getCurrentTimestamp());

        {
            QWriteLocker locker(&m_lock);
            m_current_image.swap(m_rgbd_image);
        }

        advertiseNewFrame();
    }
    ntk_dbg(1) << format("[%x] finishing", this);
}
Exemple #10
0
	//run waifu2x
	void run(std::vector<float> &input,
		std::vector<float> &weight,
		std::vector<float> &output,
		std::vector<double> &bias,
		int iter, const int kernelSize, int r, int c) {

		unsigned int ipp[7][1] = { { 1 },{ 32 },{ 32 },{ 64 },{ 64 },{ 128 },{ 128 } };
		unsigned int opp[7][1] = { { 32 },{ 32 },{ 64 },{ 64 },{ 128 },{ 128 },{ 1 } };

		const unsigned nInputPlanes = ipp[iter][1];
		const unsigned nOutputPlanes = opp[iter][1];

		cl_int status;

		status = clEnqueueWriteBuffer(queue, input_buf, CL_FALSE,
			0, r * c * nInputPlanes * sizeof(float), input.data(), 0, NULL, NULL);
		checkError(status, "Failed to transfer input");

		status = clEnqueueWriteBuffer(queue, weight_buf, CL_FALSE,
			0, kernelSize * nInputPlanes * nOutputPlanes * sizeof(float), weight.data(), 0, NULL, NULL);
		checkError(status, "Failed to transfer weight");

		status = clEnqueueWriteBuffer(queue, bias_buf, CL_FALSE,
			0, nOutputPlanes * sizeof(double), bias.data(), 0, NULL, NULL);
		checkError(status, "Failed to transfer bias");

		clFinish(queue);

		cl_event kernel_event;

		const double start_time = getCurrentTimestamp();

		unsigned argi = 0;

		status = clSetKernelArg(kernel, argi++, sizeof(cl_mem), &output_buf);
		checkError(status, "Failed to set argument %d", argi - 1);

		status = clSetKernelArg(kernel, argi++, sizeof(cl_mem), &input_buf);
		checkError(status, "Failed to set argument %d", argi - 1);

		status = clSetKernelArg(kernel, argi++, sizeof(cl_mem), &weight_buf);
		checkError(status, "Failed to set argument %d", argi - 1);

		status = clSetKernelArg(kernel, argi++, sizeof(cl_mem), &bias_buf);
		checkError(status, "Failed to set argument %d", argi - 1);

		status = clSetKernelArg(kernel, argi++, sizeof(nInputPlanes), &nInputPlanes);
		checkError(status, "Failed to set argument %d", argi - 1);

		status = clSetKernelArg(kernel, argi++, sizeof(nOutputPlanes), &nOutputPlanes);
		checkError(status, "Failed to set argument %d", argi - 1);

		status = clSetKernelArg(kernel, argi++, sizeof(r), &r);
		checkError(status, "Failed to set argument %d", argi - 1);

		status = clSetKernelArg(kernel, argi++, sizeof(c), &c);
		checkError(status, "Failed to set argument %d", argi - 1);

		const size_t* global_work_size = opp[iter];
		const size_t* local_work_size = ipp[iter];
		printf("Iteration %d\n", iter);

		status = clEnqueueNDRangeKernel(queue, kernel, 2, NULL,
			global_work_size, local_work_size, 0, NULL, &kernel_event);
		checkError(status, "Failed to launch kernel");

		status = clFinish(queue);
		checkError(status, "Failed to finish");

		const double end_time = getCurrentTimestamp();
		const double total_time = end_time - start_time;

		// Wall-clock time taken.
		printf("\nTime: %0.3f ms\n", total_time * 1e3);

		// Get kernel times using the OpenCL event profiling API.
		cl_ulong time_ns = getStartEndTime(kernel_event);
		printf("Kernel time: %0.3f ms\n", double(time_ns) * 1e-6);

		clReleaseEvent(kernel_event);

		status = clEnqueueReadBuffer(queue, output_buf, CL_TRUE,
			0, r - 1 * c - 1 * nOutputPlanes * sizeof(float), output.data(), 0, NULL, NULL);
		checkError(status, "Failed to read output matrix");

	}
Exemple #11
0
/**
 * @brief Main principal
 * @param argc El número de argumentos del programa
 * @param argv Cadenas de argumentos del programa
 * @return Nada si es correcto o algún número negativo si es incorrecto
 */
int main( int argc, char** argv ) {

	if(argc != 2)
		return -1;

	// Medimos tiempo para el programa
	const double start_time = getCurrentTimestamp();

	FILE *kernels;
	char *source_str;
	size_t source_size, work_items;

	// OpenCL runtime configuration
	unsigned num_devices;
	cl_platform_id platform_ids[3];
	cl_uint ret_num_platforms;
	cl_device_id device_id;
	cl_context context = NULL;
	cl_command_queue command_queue;
	cl_program program = NULL;
	cl_int ret;
	cl_kernel kernelNUM;
	cl_event kernel_event, finish_event;
	cl_mem objPARTICULAS, objPESOS;

	// Abrimos el fichero que contiene el kernel
	fopen_s(&kernels, "numparticulasCPU.cl", "r");
	if (!kernels) {
		fprintf(stderr, "Fallo al cargar el kernel\n");
		exit(-1);
	}	
	source_str = (char *) malloc(0x100000);
	source_size = fread(source_str, 1, 0x100000, kernels);
	fclose(kernels);

	// Obtenemos los IDs de las plataformas disponibles
	if( clGetPlatformIDs(3, platform_ids, &ret_num_platforms) != CL_SUCCESS) {
		printf("No se puede obtener id de la plataforma");
		return -1;
	}

	// Intentamos obtener un dispositivo CPU soportado
	if( clGetDeviceIDs(platform_ids[1], CL_DEVICE_TYPE_CPU, 1, &device_id, &num_devices) != CL_SUCCESS) {
		printf("No se puede obtener id del dispositivo");
		return -1;
	}
	clGetDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &work_items, NULL);
 
	// Creación de un contexto OpenCL
	context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
 
	// Creación de una cola de comandos
	command_queue = clCreateCommandQueue(context, device_id, CL_QUEUE_PROFILING_ENABLE, &ret);

	// Creación de un programa kernel desde un fichero de código
	program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret);
	ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
	if (ret != CL_SUCCESS) {
		size_t len;
		char buffer[2048];
		printf("Error: ¡Fallo al construir el programa ejecutable!\n");
		clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
		printf("%s", buffer);
		exit(-1);
	}

	// Creación del kernel OpenCL
	kernelNUM = clCreateKernel(program, "calc_num_particulas", &ret);

	// Creamos el buffer para las partículas y reservamos espacio ALINEADO para los datos
	size_t N = atoi(argv[1]);
	particle *particulas = (particle*) _aligned_malloc(N * sizeof(particle), 64);
	int *pesos = (int*) _aligned_malloc(N * sizeof(int), 64);
	objPARTICULAS = clCreateBuffer(context, CL_MEM_READ_ONLY, N * sizeof(particle), NULL, &ret);
	objPESOS = clCreateBuffer(context, CL_MEM_WRITE_ONLY, N * sizeof(int), NULL, &ret);
	float sum = 0.0f;
	const size_t global = 2;
	const size_t local_work_size = 1;

	// Inicializamos las partículas (Me interesan los pesos)
	srand(time(NULL));
	for(unsigned index = 0; index < N; ++index) {
		particulas[index].x = 0.0;
		particulas[index].y = 0.0;
		particulas[index].s = 0.0;
		particulas[index].xp = 0.0;
		particulas[index].yp = 0.0;
		particulas[index].sp = 0.0;
		particulas[index].x0 = 0.0;
		particulas[index].y0 = 0.0;
		particulas[index].width = 0;
		particulas[index].height = 0;
		particulas[index].w = (float) (rand() % 2000);
		sum+=particulas[index].w;
    }

	// Normalizamos los datos
	for(int i = 0; i < N; ++i)
		particulas[i].w /= sum;

	// Transferimos las partículas al dispositivo y los pesos
	cl_event write_event;
	ret = clEnqueueWriteBuffer(command_queue, objPARTICULAS, CL_FALSE, 0, N * sizeof(particle), particulas, 0, NULL, &write_event);

	// Establecemos los argumentos del kernel
	ret = clSetKernelArg(kernelNUM, 0, sizeof(cl_mem), &objPARTICULAS);
	ret = clSetKernelArg(kernelNUM, 1, sizeof(int), &N);
	ret = clSetKernelArg(kernelNUM, 2, sizeof(cl_mem), &objPESOS);

	// Ejecutamos el kernel. Un work-item por cada work-group o unidad de cómputo
	ret = clEnqueueNDRangeKernel(command_queue, kernelNUM, 1, NULL, &global, &local_work_size, 1, &write_event, &kernel_event);

	// Leemos los resultados
	ret = clEnqueueReadBuffer(command_queue, objPESOS, CL_FALSE, 0, N * sizeof(int), pesos, 1, &kernel_event, &finish_event);
	
	// Esperamos a que termine de leer los resultados
	clWaitForEvents(1, &finish_event);

	// Obtenemos el tiempo del kernel y de las transferencias CPU-RAM
	cl_ulong totalKernel = getStartEndTime(kernel_event);
	cl_ulong totalRam = getStartEndTime(write_event) + getStartEndTime(finish_event);

	const double end_time = getCurrentTimestamp();

	// Obtenemos el tiempo consumido por el programa, el kernel y las transferencias de memoria
	printf("\nTiempo total del programa: %0.3f ms\n", (end_time - start_time) * 1e3);
	printf("Tiempo total consumido por el kernel: %0.3f ms\n", double(totalKernel) * 1e-6);
	printf("Tiempo total consumido en transferencias CPU-RAM: %0.3f ms\n", double(totalRam) * 1e-6);

	// Liberamos todos los recursos usados (kernels y objetos OpenCL)
	clReleaseEvent(kernel_event);
	clReleaseEvent(finish_event);
	clReleaseEvent(write_event);
	clReleaseMemObject(objPARTICULAS);
	clReleaseMemObject(objPESOS);
	clReleaseKernel(kernelNUM);
	clReleaseCommandQueue(command_queue);
	clReleaseProgram(program);
	clReleaseContext(context);
}
Exemple #12
0
/**
 * @brief Main principal
 * @param argc El número de argumentos del programa
 * @param argv Cadenas de argumentos del programa
 * @return Nada si es correcto o algún número negativo si es incorrecto
 */
int main( int argc, char** argv ) {

	if(argc != 2)
		return -1;

	// Medimos tiempo para el programa
	const double start_time = getCurrentTimestamp();

	// Declaración de variables
	IplImage *first_frame; // Primer frame
	IplImage *frame, *hsv_frame;
	CvCapture *video;
	FILE *kernels;
	char *source_str;
	size_t source_size, work_items;

	// OpenCL runtime configuration
	unsigned num_devices;
	cl_platform_id platform_ids[3];
	cl_uint ret_num_platforms;
	cl_device_id device_id;
	cl_context context = NULL;
	cl_command_queue command_queue;
	cl_program program = NULL;
	cl_int ret;
	cl_kernel kernelHISTO;
	cl_event kernel_event, finish_event;
	cl_mem objFRAME, objHISTO;

	// Abrimos el fichero que contiene el kernel
	fopen_s(&kernels, "histoGPU.cl", "r");
	if (!kernels) {
		fprintf(stderr, "Fallo al cargar el kernel\n");
		exit(-1);
	}	
	source_str = (char *) malloc(0x100000);
	source_size = fread(source_str, 1, 0x100000, kernels);
	fclose(kernels);

	// Obtenemos los IDs de las plataformas disponibles
	if( clGetPlatformIDs(3, platform_ids, &ret_num_platforms) != CL_SUCCESS) {
		printf("No se puede obtener id de la plataforma");
		return -1;
	}

	// Intentamos obtener un dispositivo GPU soportado
	if( clGetDeviceIDs(platform_ids[0], CL_DEVICE_TYPE_GPU, 1, &device_id, &num_devices) != CL_SUCCESS) {
		printf("No se puede obtener id del dispositivo");
		return -1;
	}
	clGetDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &work_items, NULL);
 
	// Creación de un contexto OpenCL
	context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
 
	// Creación de una cola de comandos
	command_queue = clCreateCommandQueue(context, device_id, CL_QUEUE_PROFILING_ENABLE, &ret);

	// Creación de un programa kernel desde un fichero de código
	program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret);
	ret = clBuildProgram(program, 1, &device_id, "-cl-nv-verbose", NULL, NULL);
	if (ret != CL_SUCCESS) {
		size_t len;
		char buffer[2048];
		printf("Error: ¡Fallo al construir el programa ejecutable!\n");
		clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
		printf("%s", buffer);
		exit(-1);
	}

	// Creación del kernel OpenCL
	kernelHISTO = clCreateKernel(program, "calc_histo", &ret);

	// Abrimos el fichero de video y leemos el primer frame
	video = cvCaptureFromFile( argv[1] );
	if( !video ) {
		printf("No se pudo abrir el fichero de video %s\n", &argv[1]);
		exit(-1);
	}

	first_frame = cvQueryFrame( video );
	hsv_frame = cvCreateImage( cvGetSize(first_frame), IPL_DEPTH_32F, 3 );
	cvConvertScale( first_frame, hsv_frame, 1.0 / 255.0, 0 );
	cvCvtColor( hsv_frame, hsv_frame, CV_BGR2HSV );

	// Creamos el buffer para los frames y el histograma
	float *histo = (float*) _aligned_malloc(HTAM * sizeof(float), 64);
	objFRAME = clCreateBuffer(context, CL_MEM_READ_ONLY, hsv_frame->imageSize, NULL, &ret);
	objHISTO = clCreateBuffer(context, CL_MEM_READ_WRITE, HTAM * sizeof(float), NULL, &ret);
	memset(histo, 0.0f, HTAM * sizeof(float));
	const size_t global_work_size = work_items * 1024;
	const size_t local_work_size = 1024;

	// Transferimos el frame al dispositivo
	cl_event write_event[2];
	ret = clEnqueueWriteBuffer(command_queue, objFRAME, CL_FALSE, 0, hsv_frame->imageSize, hsv_frame->imageData, 0, NULL, &write_event[0]);
	ret = clEnqueueWriteBuffer(command_queue, objHISTO, CL_FALSE, 0, HTAM * sizeof(float), histo, 0, NULL, &write_event[1]);

	// Establecemos los argumentos del kernel
	ret = clSetKernelArg(kernelHISTO, 0, sizeof(cl_mem), &objHISTO);
	ret = clSetKernelArg(kernelHISTO, 1, sizeof(cl_mem), &objFRAME);
	ret = clSetKernelArg(kernelHISTO, 2, sizeof(int), &hsv_frame->widthStep);
	ret = clSetKernelArg(kernelHISTO, 3, sizeof(int), &hsv_frame->height);
	ret = clSetKernelArg(kernelHISTO, 4, sizeof(int), &hsv_frame->width);

	// Ejecutamos el kernel. 128 work-items por cada work-group o unidad de cómputo
	ret = clEnqueueNDRangeKernel(command_queue, kernelHISTO, 1, NULL, &global_work_size, &local_work_size, 2, write_event, &kernel_event);

	// Leemos los resultados
	ret = clEnqueueReadBuffer(command_queue, objHISTO, CL_FALSE, 0, HTAM * sizeof(float), histo, 1, &kernel_event, &finish_event);
	
	// Esperamos a que termine de leer los resultados
	clWaitForEvents(1, &finish_event);

	// Obtenemos el tiempo del kernel y de las transferencias Pcie
	cl_ulong totalKernel = getStartEndTime(kernel_event);
	cl_ulong totalPcie = getStartEndTime(write_event[0]) + getStartEndTime(write_event[1]) + getStartEndTime(finish_event);
	cvReleaseImage( &hsv_frame );

	// Recordar que frame no se puede liberar debido al cvQueryFrame
	while( frame = cvQueryFrame( video ) ) {
		hsv_frame = cvCreateImage( cvGetSize(frame), IPL_DEPTH_32F, 3 );
		cvConvertScale( frame, hsv_frame, 1.0 / 255.0, 0 );
		cvCvtColor( hsv_frame, hsv_frame, CV_BGR2HSV );
		memset(histo, 0.0f, HTAM * sizeof(float));
		ret = clEnqueueWriteBuffer(command_queue, objFRAME, CL_FALSE, 0, hsv_frame->imageSize, hsv_frame->imageData, 0, NULL, &write_event[0]);
		ret = clSetKernelArg(kernelHISTO, 0, sizeof(cl_mem), &objHISTO);
		ret = clSetKernelArg(kernelHISTO, 1, sizeof(cl_mem), &objFRAME);
		ret = clSetKernelArg(kernelHISTO, 2, sizeof(int), &hsv_frame->widthStep);
		ret = clSetKernelArg(kernelHISTO, 3, sizeof(int), &hsv_frame->height);
		ret = clSetKernelArg(kernelHISTO, 4, sizeof(int), &hsv_frame->width);
		ret = clEnqueueNDRangeKernel(command_queue, kernelHISTO, 1, NULL, &global_work_size, &local_work_size, 2, write_event, &kernel_event);
		ret = clEnqueueReadBuffer(command_queue, objHISTO, CL_FALSE, 0, HTAM * sizeof(float), histo, 1, &kernel_event, &finish_event);
		clWaitForEvents(1, &finish_event);
		totalKernel += getStartEndTime(kernel_event);
		totalPcie += (getStartEndTime(write_event[0]) + getStartEndTime(write_event[1]) + getStartEndTime(finish_event));
		cvReleaseImage( &hsv_frame );
	}

	const double end_time = getCurrentTimestamp();

	// Obtenemos el tiempo consumido por el programa, el kernel y las transferencias de memoria
	printf("\nTiempo total del programa: %0.3f ms\n", (end_time - start_time) * 1e3);
	printf("Tiempo total consumido por el kernel: %0.3f ms\n", double(totalKernel) * 1e-6);
	printf("Tiempo total consumido en transferencias Pcie: %0.3f ms\n", double(totalPcie) * 1e-6);

	// Liberamos todos los recursos usados (kernels, frames y objetos OpenCL)
	clReleaseEvent(kernel_event);
	clReleaseEvent(finish_event);
	clReleaseEvent(write_event[0]);
    clReleaseEvent(write_event[1]);
	cvReleaseCapture( &video );
	clReleaseMemObject(objFRAME);
	clReleaseMemObject(objHISTO);
	clReleaseKernel(kernelHISTO);
	clReleaseCommandQueue(command_queue);
	clReleaseProgram(program);
	clReleaseContext(context);
}
  // Add new variables to the end of the ordering
  BOOST_FOREACH(const Values::ConstKeyValuePair& key_value, newTheta) {
    ordering_.push_back(key_value.key);
  }
  // Augment Delta
  delta_.insert(newTheta.zeroVectors());

  // Add the new factors to the graph, updating the variable index
  insertFactors(newFactors);
  gttoc(augment_system);

  // Update the Timestamps associated with the factor keys
  updateKeyTimestampMap(timestamps);

  // Get current timestamp
  double current_timestamp = getCurrentTimestamp();
  if (debug)
    std::cout << "Current Timestamp: " << current_timestamp << std::endl;

  // Find the set of variables to be marginalized out
  std::set<Key> marginalizableKeys = findKeysBefore(
      current_timestamp - smootherLag_);
  if (debug) {
    std::cout << "Marginalizable Keys: ";
    BOOST_FOREACH(Key key, marginalizableKeys) {
      std::cout << DefaultKeyFormatter(key) << " ";
    }
    std::cout << std::endl;
  }

  // Reorder