Esempio n. 1
0
File: main.cpp Progetto: rotty11/Pfc
/**
 * @brief Main principal
 * @param argc El número de argumentos del programa
 * @param argv Cadenas de argumentos del programa
 * @return Nada si es correcto o algún número negativo si es incorrecto
 */
int main( int argc, char** argv ) {

	if(argc != 2)
		return -1;

	// Medimos tiempo para el programa
	const double start_time = getCurrentTimestamp();

	FILE *kernels;
	char *source_str;
	size_t source_size, work_items;

	// OpenCL runtime configuration
	unsigned num_devices;
	cl_platform_id platform_ids[3];
	cl_uint ret_num_platforms;
	cl_device_id device_id;
	cl_context context = NULL;
	cl_command_queue command_queue;
	cl_program program = NULL;
	cl_int ret;
	cl_kernel kernelNUM;
	cl_event kernel_event, finish_event;
	cl_mem objPARTICULAS, objPESOS;

	// Abrimos el fichero que contiene el kernel
	fopen_s(&kernels, "numparticulasCPU.cl", "r");
	if (!kernels) {
		fprintf(stderr, "Fallo al cargar el kernel\n");
		exit(-1);
	}	
	source_str = (char *) malloc(0x100000);
	source_size = fread(source_str, 1, 0x100000, kernels);
	fclose(kernels);

	// Obtenemos los IDs de las plataformas disponibles
	if( clGetPlatformIDs(3, platform_ids, &ret_num_platforms) != CL_SUCCESS) {
		printf("No se puede obtener id de la plataforma");
		return -1;
	}

	// Intentamos obtener un dispositivo CPU soportado
	if( clGetDeviceIDs(platform_ids[1], CL_DEVICE_TYPE_CPU, 1, &device_id, &num_devices) != CL_SUCCESS) {
		printf("No se puede obtener id del dispositivo");
		return -1;
	}
	clGetDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &work_items, NULL);
 
	// Creación de un contexto OpenCL
	context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
 
	// Creación de una cola de comandos
	command_queue = clCreateCommandQueue(context, device_id, CL_QUEUE_PROFILING_ENABLE, &ret);

	// Creación de un programa kernel desde un fichero de código
	program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret);
	ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
	if (ret != CL_SUCCESS) {
		size_t len;
		char buffer[2048];
		printf("Error: ¡Fallo al construir el programa ejecutable!\n");
		clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
		printf("%s", buffer);
		exit(-1);
	}

	// Creación del kernel OpenCL
	kernelNUM = clCreateKernel(program, "calc_num_particulas", &ret);

	// Creamos el buffer para las partículas y reservamos espacio ALINEADO para los datos
	size_t N = atoi(argv[1]);
	particle *particulas = (particle*) _aligned_malloc(N * sizeof(particle), 64);
	int *pesos = (int*) _aligned_malloc(N * sizeof(int), 64);
	objPARTICULAS = clCreateBuffer(context, CL_MEM_READ_ONLY, N * sizeof(particle), NULL, &ret);
	objPESOS = clCreateBuffer(context, CL_MEM_WRITE_ONLY, N * sizeof(int), NULL, &ret);
	float sum = 0.0f;
	const size_t global = 2;
	const size_t local_work_size = 1;

	// Inicializamos las partículas (Me interesan los pesos)
	srand(time(NULL));
	for(unsigned index = 0; index < N; ++index) {
		particulas[index].x = 0.0;
		particulas[index].y = 0.0;
		particulas[index].s = 0.0;
		particulas[index].xp = 0.0;
		particulas[index].yp = 0.0;
		particulas[index].sp = 0.0;
		particulas[index].x0 = 0.0;
		particulas[index].y0 = 0.0;
		particulas[index].width = 0;
		particulas[index].height = 0;
		particulas[index].w = (float) (rand() % 2000);
		sum+=particulas[index].w;
    }

	// Normalizamos los datos
	for(int i = 0; i < N; ++i)
		particulas[i].w /= sum;

	// Transferimos las partículas al dispositivo y los pesos
	cl_event write_event;
	ret = clEnqueueWriteBuffer(command_queue, objPARTICULAS, CL_FALSE, 0, N * sizeof(particle), particulas, 0, NULL, &write_event);

	// Establecemos los argumentos del kernel
	ret = clSetKernelArg(kernelNUM, 0, sizeof(cl_mem), &objPARTICULAS);
	ret = clSetKernelArg(kernelNUM, 1, sizeof(int), &N);
	ret = clSetKernelArg(kernelNUM, 2, sizeof(cl_mem), &objPESOS);

	// Ejecutamos el kernel. Un work-item por cada work-group o unidad de cómputo
	ret = clEnqueueNDRangeKernel(command_queue, kernelNUM, 1, NULL, &global, &local_work_size, 1, &write_event, &kernel_event);

	// Leemos los resultados
	ret = clEnqueueReadBuffer(command_queue, objPESOS, CL_FALSE, 0, N * sizeof(int), pesos, 1, &kernel_event, &finish_event);
	
	// Esperamos a que termine de leer los resultados
	clWaitForEvents(1, &finish_event);

	// Obtenemos el tiempo del kernel y de las transferencias CPU-RAM
	cl_ulong totalKernel = getStartEndTime(kernel_event);
	cl_ulong totalRam = getStartEndTime(write_event) + getStartEndTime(finish_event);

	const double end_time = getCurrentTimestamp();

	// Obtenemos el tiempo consumido por el programa, el kernel y las transferencias de memoria
	printf("\nTiempo total del programa: %0.3f ms\n", (end_time - start_time) * 1e3);
	printf("Tiempo total consumido por el kernel: %0.3f ms\n", double(totalKernel) * 1e-6);
	printf("Tiempo total consumido en transferencias CPU-RAM: %0.3f ms\n", double(totalRam) * 1e-6);

	// Liberamos todos los recursos usados (kernels y objetos OpenCL)
	clReleaseEvent(kernel_event);
	clReleaseEvent(finish_event);
	clReleaseEvent(write_event);
	clReleaseMemObject(objPARTICULAS);
	clReleaseMemObject(objPESOS);
	clReleaseKernel(kernelNUM);
	clReleaseCommandQueue(command_queue);
	clReleaseProgram(program);
	clReleaseContext(context);
}
Esempio n. 2
0
void* DefaultAllocateAligned(size_t size, size_t alignment)
{
	return _aligned_malloc(size, alignment);
}
int _tmain(int argc, _TCHAR* argv[])
{
	const size_t max_num = 100000000;
	const int buffer_element_count = 10000;
	const int max_float_digits = 8;
	
	// Init critical section;
	InitializeCriticalSection(&g_write_queue_cs);
	g_write_queue_has_more_data_event = CreateEvent(NULL, FALSE, FALSE, NULL);
	g_write_queue_accepts_more_data_event = CreateEvent(NULL, FALSE, TRUE, NULL);

	HANDLE hFile = ::CreateFile(L"output.txt", GENERIC_WRITE, 0, 0, CREATE_ALWAYS, 0, NULL);
	if (hFile == INVALID_HANDLE_VALUE) 
	{
		printf("Oppps");
		exit(-1);
	}

	// Launch a writer thread.
	HANDLE hThread = CreateThread(NULL, 0, &WriteThreadProc, hFile,0, 0);

	sfmt_t sfmt;
	sfmt_init_gen_rand(&sfmt, 1234);
	uint32_t* randoms = (uint32_t*) _aligned_malloc(sizeof(uint32_t)*buffer_element_count, 32);

	g_begin_ticks = GetTickCount64();

	// std::ofstream output();
	int finish = max_num / buffer_element_count;
	for (size_t i=0; i < finish; ++i) 
	{
		// Prepare a block of numbers for writing.
		WriteBuffer* write_buffer = new WriteBuffer(buffer_element_count, max_float_digits);
		char* write_ptr = write_buffer->ptr_;

		sfmt_fill_array32(&sfmt, randoms, buffer_element_count);

		for (int k = 0; k < buffer_element_count; ++k)
		{
			// Format each float to string and append to buffer.
			// float random = float(rand()) / RAND_MAX;
			float random = float(randoms[k]) / 4294967296.0f;
			write_ptr += modp_dtoa(random, write_ptr, max_float_digits);
			*(write_ptr++) = '\r';
			*(write_ptr++) = '\n';
		}
		// Compute how many bytes to write.
		write_buffer->useful_data_size_ =  write_ptr - write_buffer->ptr_;

		// Enqueue for writing.
		while (write_buffer) {
			EnterCriticalSection(&g_write_queue_cs);
			if (g_write_queue.size() < kMaxQueue) 
			{
				// ops.
				g_write_queue.push(write_buffer);
				SetEvent(g_write_queue_has_more_data_event);
				write_buffer = NULL;
			}
			LeaveCriticalSection(&g_write_queue_cs);
			if (write_buffer) {
				// slow down writing, queue is full
				printf("S");
				WaitForSingleObject(g_write_queue_accepts_more_data_event, 200);
			}
		}
	}

	g_end_ticks = GetTickCount64();

	// Let the writing thread know we are done.
	EnterCriticalSection(&g_write_queue_cs);
	g_done = true;
	LeaveCriticalSection(&g_write_queue_cs);
	SetEvent(g_write_queue_has_more_data_event);
	// Wait for writing thread to finish.
	WaitForSingleObject(hThread, INFINITE);

	_aligned_free(randoms);

	__int64 delta = g_end_ticks - g_begin_ticks;

	printf("Speed %f Mb per sec\n", (g_total_bytes_written * 1000.0) / (1024.0 * 1024 * delta));

	char c;
	scanf("%c", &c);
	::CloseHandle(hFile);
	return 0;
}
Esempio n. 4
0
void* FreeImage_Aligned_Malloc(size_t amount, size_t alignment) {
	assert(alignment == FIBITMAP_ALIGNMENT);
	return _aligned_malloc(amount, alignment);
}
Esempio n. 5
0
/**
 * Function description
 *
 * @return 0 on success, otherwise a Win32 error code
 */
static UINT xf_CreateSurface(RdpgfxClientContext* context,
                             const RDPGFX_CREATE_SURFACE_PDU* createSurface)
{
	UINT ret = CHANNEL_RC_NO_MEMORY;
	size_t size;
	xfGfxSurface* surface;
	rdpGdi* gdi = (rdpGdi*)context->custom;
	xfContext* xfc = (xfContext*) gdi->context;
	surface = (xfGfxSurface*) calloc(1, sizeof(xfGfxSurface));

	if (!surface)
		return CHANNEL_RC_NO_MEMORY;

	surface->gdi.codecs = gdi->context->codecs;

	if (!surface->gdi.codecs)
	{
		WLog_ERR(TAG, "%s: global GDI codecs aren't set", __FUNCTION__);
		goto out_free;
	}

	surface->gdi.surfaceId = createSurface->surfaceId;
	surface->gdi.width = (UINT32) createSurface->width;
	surface->gdi.height = (UINT32) createSurface->height;

	switch (createSurface->pixelFormat)
	{
		case GFX_PIXEL_FORMAT_ARGB_8888:
			surface->gdi.format = PIXEL_FORMAT_BGRA32;
			break;

		case GFX_PIXEL_FORMAT_XRGB_8888:
			surface->gdi.format = PIXEL_FORMAT_BGRX32;
			break;

		default:
			WLog_ERR(TAG, "%s: unknown pixelFormat 0x%"PRIx32"", __FUNCTION__, createSurface->pixelFormat);
			ret = ERROR_INTERNAL_ERROR;
			goto out_free;
	}

	surface->gdi.scanline = surface->gdi.width * GetBytesPerPixel(surface->gdi.format);
	surface->gdi.scanline = x11_pad_scanline(surface->gdi.scanline, xfc->scanline_pad);
	size = surface->gdi.scanline * surface->gdi.height;
	surface->gdi.data = (BYTE*)_aligned_malloc(size, 16);

	if (!surface->gdi.data)
	{
		WLog_ERR(TAG, "%s: unable to allocate GDI data", __FUNCTION__);
		goto out_free;
	}

	ZeroMemory(surface->gdi.data, size);

	if (AreColorFormatsEqualNoAlpha(gdi->dstFormat, surface->gdi.format))
	{
		surface->image = XCreateImage(xfc->display, xfc->visual, xfc->depth, ZPixmap, 0,
		                              (char*) surface->gdi.data, surface->gdi.width, surface->gdi.height,
		                              xfc->scanline_pad, surface->gdi.scanline);
	}
	else
	{
		UINT32 width = surface->gdi.width;
		UINT32 bytes = GetBytesPerPixel(gdi->dstFormat);
		surface->stageScanline = width * bytes;
		surface->stageScanline = x11_pad_scanline(surface->stageScanline, xfc->scanline_pad);
		size = surface->stageScanline * surface->gdi.height;
		surface->stage = (BYTE*) _aligned_malloc(size, 16);

		if (!surface->stage)
		{
			WLog_ERR(TAG, "%s: unable to allocate stage buffer", __FUNCTION__);
			goto out_free_gdidata;
		}

		ZeroMemory(surface->stage, size);
		surface->image = XCreateImage(xfc->display, xfc->visual, xfc->depth,
		                              ZPixmap, 0, (char*) surface->stage,
		                              surface->gdi.width, surface->gdi.height,
		                              xfc->scanline_pad, surface->stageScanline);
	}

	if (!surface->image)
	{
		WLog_ERR(TAG, "%s: an error occurred when creating the XImage", __FUNCTION__);
		goto error_surface_image;
	}

	surface->image->byte_order = LSBFirst;
	surface->image->bitmap_bit_order = LSBFirst;
	surface->gdi.outputMapped = FALSE;
	region16_init(&surface->gdi.invalidRegion);

	if (context->SetSurfaceData(context, surface->gdi.surfaceId, (void*) surface) != CHANNEL_RC_OK)
	{
		WLog_ERR(TAG, "%s: an error occurred during SetSurfaceData", __FUNCTION__);
		goto error_set_surface_data;
	}

	return CHANNEL_RC_OK;
error_set_surface_data:
	surface->image->data = NULL;
	XDestroyImage(surface->image);
error_surface_image:
	_aligned_free(surface->stage);
out_free_gdidata:
	_aligned_free(surface->gdi.data);
out_free:
	free(surface);
	return ret;
}
Esempio n. 6
0
void* __restrict DefaultAlloc::_Allocate(size_t dwSize)
{
    return _aligned_malloc(dwSize, 16);
}
Esempio n. 7
0
void DngDecoderSlices::decodeSlice(DngDecoderThread* t) {
  if (compression == 7) {
    while (!t->slices.empty()) {
      LJpegPlain l(mFile, mRaw);
      l.mDNGCompatible = mFixLjpeg;
      DngSliceElement e = t->slices.front();
      l.mUseBigtable = e.mUseBigtable;
      t->slices.pop();
      try {
        l.startDecoder(e.byteOffset, e.byteCount, e.offX, e.offY);
      } catch (RawDecoderException &err) {
        mRaw->setError(err.what());
      } catch (IOException &err) {
        mRaw->setError(err.what());
      }
    }
    /* Lossy DNG */
  } else if (compression == 0x884c) {
    /* Each slice is a JPEG image */
    struct jpeg_decompress_struct dinfo;
    struct jpeg_error_mgr jerr;
    while (!t->slices.empty()) {
      DngSliceElement e = t->slices.front();
      t->slices.pop();
      uchar8 *complete_buffer = NULL;
      JSAMPARRAY buffer = (JSAMPARRAY)malloc(sizeof(JSAMPROW));

      try {
        uint32 size = mFile->getSize();
        jpeg_create_decompress(&dinfo);
        dinfo.err = jpeg_std_error(&jerr);
        jerr.error_exit = my_error_throw;
        CHECKSIZE(e.byteOffset);
        CHECKSIZE(e.byteOffset+e.byteCount);
        JPEG_MEMSRC(&dinfo, (unsigned char*)mFile->getData(e.byteOffset, e.byteCount), e.byteCount);

        if (JPEG_HEADER_OK != jpeg_read_header(&dinfo, TRUE))
          ThrowRDE("DngDecoderSlices: Unable to read JPEG header");

        jpeg_start_decompress(&dinfo);
        if (dinfo.output_components != (int)mRaw->getCpp())
          ThrowRDE("DngDecoderSlices: Component count doesn't match");
        int row_stride = dinfo.output_width * dinfo.output_components;
        int pic_size = dinfo.output_height * row_stride;
        complete_buffer = (uchar8*)_aligned_malloc(pic_size, 16);
        while (dinfo.output_scanline < dinfo.output_height) {
          buffer[0] = (JSAMPROW)(&complete_buffer[dinfo.output_scanline*row_stride]);
          if (0 == jpeg_read_scanlines(&dinfo, buffer, 1))
            ThrowRDE("DngDecoderSlices: JPEG Error while decompressing image.");
        }
        jpeg_finish_decompress(&dinfo);

        // Now the image is decoded, and we copy the image data
        int copy_w = min(mRaw->dim.x-e.offX, dinfo.output_width);
        int copy_h = min(mRaw->dim.y-e.offY, dinfo.output_height);
        for (int y = 0; y < copy_h; y++) {
          uchar8* src = &complete_buffer[row_stride*y];
          ushort16* dst = (ushort16*)mRaw->getData(e.offX, y+e.offY);
          for (int x = 0; x < copy_w; x++) {
            for (int c=0; c < dinfo.output_components; c++)
              *dst++ = (*src++);
          }
        }
      } catch (RawDecoderException &err) {
        mRaw->setError(err.what());
      } catch (IOException &err) {
        mRaw->setError(err.what());
      }
      free(buffer);
      if (complete_buffer)
        _aligned_free(complete_buffer);
      jpeg_destroy_decompress(&dinfo);
    }
  }
  else
    mRaw->setError("DngDecoderSlices: Unknown compression");
}
Esempio n. 8
0
void * __cdecl _aligned_malloc_dbg( size_t size, size_t align, const char * f_name, int line_n)
{
    return _aligned_malloc(size, align);
}
Esempio n. 9
0
GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read)
	: m_pbo_size(0), m_clean(false), m_local_buffer(NULL), m_r_x(0), m_r_y(0), m_r_w(0), m_r_h(0)
{
	// OpenGL didn't like dimensions of size 0
	m_size.x = max(1,w);
	m_size.y = max(1,h);
	m_format = format;
	m_type   = type;
	m_fbo_read = fbo_read;
	m_texture_id = 0;

	// Bunch of constant parameter
	switch (m_format) {
			// 1 Channel integer
		case GL_R32UI:
		case GL_R32I:
			m_int_format    = GL_RED_INTEGER;
			m_int_type      = (m_format == GL_R32UI) ? GL_UNSIGNED_INT : GL_INT;
			m_int_shift     = 2;
			break;
		case GL_R16UI:
			m_int_format    = GL_RED_INTEGER;
			m_int_type      = GL_UNSIGNED_SHORT;
			m_int_shift     = 1;
			break;

			// 1 Channel normalized
		case GL_R8:
			m_int_format    = GL_RED;
			m_int_type      = GL_UNSIGNED_BYTE;
			m_int_shift     = 0;
			break;

			// 4 channel normalized
		case GL_RGBA16:
			m_int_format    = GL_RGBA;
			m_int_type      = GL_UNSIGNED_SHORT;
			m_int_shift     = 3;
			break;
		case GL_RGBA8:
			m_int_format    = GL_RGBA;
			m_int_type      = GL_UNSIGNED_BYTE;
			m_int_shift     = 2;
			break;

			// 4 channel integer
		case GL_RGBA16I:
		case GL_RGBA16UI:
			m_int_format    = GL_RGBA_INTEGER;
			m_int_type      = (m_format == GL_R16UI) ? GL_UNSIGNED_SHORT : GL_SHORT;
			m_int_shift     = 3;
			break;

			// 4 channel float
		case GL_RGBA32F:
			m_int_format    = GL_RGBA;
			m_int_type      = GL_FLOAT;
			m_int_shift     = 4;
			break;
		case GL_RGBA16F:
			m_int_format    = GL_RGBA;
			m_int_type      = GL_HALF_FLOAT;
			m_int_shift     = 3;
			break;

			// Depth buffer
		case GL_DEPTH32F_STENCIL8:
			m_int_format    = GL_DEPTH_STENCIL;
			m_int_type      = GL_FLOAT_32_UNSIGNED_INT_24_8_REV;
			m_int_shift     = 0;
			break;

			// Backbuffer
		case 0:
			m_int_format    = 0;
			m_int_type      = 0;
			m_int_shift     = 0;
			break;

		default:
			m_int_format    = 0;
			m_int_type      = 0;
			m_int_shift     = 0;
			ASSERT(0);
	}

	// Generate & Allocate the buffer
	switch (m_type) {
		case GSTexture::Offscreen:
			// Offscreen is only used to read color. So it only requires 4B by pixel
			m_local_buffer = (uint8*)_aligned_malloc(m_size.x * m_size.y * 4, 32);
		case GSTexture::Texture:
		case GSTexture::RenderTarget:
		case GSTexture::DepthStencil:
			glCreateTextures(GL_TEXTURE_2D, 1, &m_texture_id);
			glTextureStorage2D(m_texture_id, 1+GL_TEX_LEVEL_0, m_format, m_size.x, m_size.y);
			if (m_format == GL_R8) {
				// Emulate DX behavior, beside it avoid special code in shader to differentiate
				// palette texture from a GL_RGBA target or a GL_R texture.
				glTextureParameteri(m_texture_id, GL_TEXTURE_SWIZZLE_A, GL_RED);
			}
			break;
		case GSTexture::Backbuffer:
		default:
			break;
	}
}
Esempio n. 10
0
 void GSClut::init()
 {
	g_pbyGSClut = (u8*)_aligned_malloc(256 * 8, 1024); // need 512 alignment!
	memset(g_pbyGSClut, 0, 256*8);
 }
Esempio n. 11
0
ConvertToY8::ConvertToY8(PClip src, int in_matrix, IScriptEnvironment* env) : GenericVideoFilter(src), matrix(0) {
  yuy2_input = blit_luma_only = rgb_input = false;

  if (vi.IsPlanar()) {
    blit_luma_only = true;
    vi.pixel_type = VideoInfo::CS_Y8;
    return;
  }

  if (vi.IsYUY2()) {
    yuy2_input = true;
    vi.pixel_type = VideoInfo::CS_Y8;
    return;
  }

  if (vi.IsRGB()) {
    rgb_input = true;
    pixel_step = vi.BytesFromPixels(1);
    vi.pixel_type = VideoInfo::CS_Y8;
    matrix = (signed short*)_aligned_malloc(sizeof(short)*4, 16);
    signed short* m = matrix;
    if (in_matrix == Rec601) {
      *m++ = (signed short)((219.0/255.0)*0.114*32768.0+0.5);  //B
      *m++ = (signed short)((219.0/255.0)*0.587*32768.0+0.5);  //G
      *m++ = (signed short)((219.0/255.0)*0.299*32768.0+0.5);  //R
      offset_y = 16;
    } else if (in_matrix == PC_601) {
      *m++ = (signed short)(0.114*32768.0+0.5);  //B
      *m++ = (signed short)(0.587*32768.0+0.5);  //G
      *m++ = (signed short)(0.299*32768.0+0.5);  //R
      offset_y = 0;
    } else if (in_matrix == Rec709) {
      *m++ = (signed short)((219.0/255.0)*0.0722*32768.0+0.5);  //B
      *m++ = (signed short)((219.0/255.0)*0.7152*32768.0+0.5);  //G
      *m++ = (signed short)((219.0/255.0)*0.2126*32768.0+0.5);  //R
      offset_y = 16;
    } else if (in_matrix == PC_709) {
      *m++ = (signed short)(0.0722*32768.0+0.5);  //B
      *m++ = (signed short)(0.7152*32768.0+0.5);  //G
      *m++ = (signed short)(0.2126*32768.0+0.5);  //R
      offset_y = 0;
    } else if (in_matrix == AVERAGE) {
      *m++ = (signed short)(32768.0/3 + 0.5);  //B
      *m++ = (signed short)(32768.0/3 + 0.5);  //G
      *m++ = (signed short)(32768.0/3 + 0.5);  //R
      offset_y = 0;
    } else {
      _aligned_free(matrix);
      matrix = 0;
      env->ThrowError("ConvertToY8: Unknown matrix.");
    }
    *m = 0;  // Alpha
 
    if (pixel_step == 4)
      genRGB32toY8(vi.width, vi.height, offset_y, matrix, env);
    else if (pixel_step == 3)
      genRGB24toY8(vi.width, vi.height, offset_y, matrix, env);

    return;
  }

  env->ThrowError("ConvertToY8: Unknown input format");
}
Esempio n. 12
0
static int posix_memalign(void **p, size_t align, size_t size) { 
   void *buf = _aligned_malloc(size, align);
   if (buf == NULL) return errno;
   *p = buf;
   return 0;
}
Esempio n. 13
0
void* aligned_malloc(size_t size, size_t align) {
  return _aligned_malloc(size, align);
}
Esempio n. 14
0
TEMmod::TEMmod(PClip c, double thy, double thc, int tp, int chroma, int lnk,
               bool inv, float sc, IScriptEnvironment* env)
    : GenericVideoFilter(c), link(lnk), invert(inv), type(tp), scale(sc)
{
    if (!vi.IsPlanar()) {
        env->ThrowError("TEMmod: Planar format only.");
    }
    if (vi.IsY8()) {
        link = 0;
        chroma = 0;
    }

    process[0] = 1; process[1] = process[2] = chroma;

    double th[] = {thy, thc};
    for (int i = 0; i < 2; i++) {
        double d;
        if (type == 1) {
            d = th[i] * th[i] * 4 + 0.5;
        } else if (type == 2) {
            d = th[i] * th[i] * 10000 + 0.5;
        } else if (type == 3) {
            d = th[i] * 2 + 0.5;
        } else if (type == 4) {
            d = th[i] * 100 / 3.0 + 0.5;
        } else {
            d = th[i] * 4 + 0.5;
        }
        threshold[i] = static_cast<int>(d);
    }
    threshold[2] = threshold[1];

    if (threshold[0] == 0 || threshold[1] == 0) {
        link = 0;
    }

    if (type == 1) {
        calc_map = calc_maps[threshold[0] > 0 ? 1 : 0];
    } else if (type == 2) {
        calc_map = calc_maps[2 + (threshold[0] > 0 ? 1 : 0)];
    } else {
        calc_map = calc_maps[type + 1];
    }

    const link_planes_func* links = link == 1 ? link_y_to_uv : link_all;
    if (vi.IsYV24()) {
        link_planes = links[0];
    } else if (vi.IsYV16()) {
        link_planes = links[1];
    } else if (vi.IsYV12()) {
        link_planes = links[2];
    } else {
        link_planes = links[3];
    }

    buff_pitch = ((vi.width + 47) / 16) * 16;
    buff = (uint8_t*)_aligned_malloc(buff_pitch * (type * 2 + 1), 16);
    if (!buff) {
        env->ThrowError("TEMmod: failed to allocate buffer.");
    }
}
Esempio n. 15
0
/*---------------------------------------------------------------------------
// 16Byte Allignment calloc
//-------------------------------------------------------------------------*/
void* xmm_calloc(size_t nitems, size_t size)
{
	unsigned char*	t_RetPtr	 = (unsigned char*)_aligned_malloc(nitems*size, 16);

	if(t_RetPtr)
	{
#ifdef	__SSE__
		size_t	i,j, k;
		__m128	XMM0, XMM1, XMM2, XMM3;
		XMM0	 = 
		XMM1	 = 
		XMM2	 = 
		XMM3	 = _mm_setzero_ps();
		k	 = nitems*size;
		j	 = k&(~127);
		for(i=0;i<j;i+=128)
		{
			_mm_stream_ps((float*)(t_RetPtr+i    ), XMM0);
			_mm_stream_ps((float*)(t_RetPtr+i+ 16), XMM1);
			_mm_stream_ps((float*)(t_RetPtr+i+ 32), XMM2);
			_mm_stream_ps((float*)(t_RetPtr+i+ 48), XMM3);
			_mm_stream_ps((float*)(t_RetPtr+i+ 64), XMM0);
			_mm_stream_ps((float*)(t_RetPtr+i+ 80), XMM1);
			_mm_stream_ps((float*)(t_RetPtr+i+ 96), XMM2);
			_mm_stream_ps((float*)(t_RetPtr+i+112), XMM3);
		}
		j	 = k&(~63);
		for(;i<j;i+=64)
		{
			_mm_stream_ps((float*)(t_RetPtr+i    ), XMM0);
			_mm_stream_ps((float*)(t_RetPtr+i+ 16), XMM1);
			_mm_stream_ps((float*)(t_RetPtr+i+ 32), XMM2);
			_mm_stream_ps((float*)(t_RetPtr+i+ 48), XMM3);
		}
		j	 = k&(~31);
		for(;i<j;i+=32)
		{
			_mm_stream_ps((float*)(t_RetPtr+i    ), XMM0);
			_mm_stream_ps((float*)(t_RetPtr+i+ 16), XMM1);
		}
		j	 = k&(~15);
		for(;i<j;i+=16)
		{
			_mm_stream_ps((float*)(t_RetPtr+i    ), XMM0);
		}
		j	 = k&(~7);
		for(;i<j;i+=8)
		{
			_mm_storel_pi((__m64*)(t_RetPtr+i   ), XMM0);
		}
		j	 = k&(~3);
		for(;i<j;i+=4)
		{
			_mm_store_ss((float*)(t_RetPtr+i)   , XMM0);
		}
		for(;i<k;i++)
			*(t_RetPtr+i    )	 = 0;
		_mm_sfence();
#else
		memset(t_RetPtr, 0, nitems*size);
#endif
	}
	return	(void*)t_RetPtr;
}
Esempio n. 16
0
void* align_base_64::operator new[](size_t bytes)
{
	return _aligned_malloc(bytes, 64);
}
Esempio n. 17
0
HRESULT CGraphics::InitializeDisplay(HWND hWnd,UINT width,UINT height,BOOL blur)
{
	// Check if windowed visualization (skin mode not supported)
	if(!hWnd)
		return E_FAIL;

	// Safe to assume that if device is not null display is initialized
	if(m_Device)
		UninitializeDisplay();

	// Get the address of the create function
	if(!m_Direct3DCreate9)
	{
		TRACE(TEXT("Error: Failed to find \"Direct3DCreate9\" in \"%s\".\n"),D3DDLL);
		return E_FAIL;
	}

	// Reset audio data
	if(IsProcessorFeaturePresent(PF_XMMI_INSTRUCTIONS_AVAILABLE))
	{
		TRACE(TEXT("Info: Using SSE instruction set.\n"));

		m_Levels = (PFLOAT)_aligned_malloc(sizeof(FLOAT) * VISUALIZATION_BARCOUNT,16);
		m_LevelsBuffer = (PFLOAT)_aligned_malloc(sizeof(FLOAT) * VISUALIZATION_BARCOUNT,16);
		m_Waveform = (PFLOAT)_aligned_malloc(sizeof(FLOAT) * SA_BUFFER_SIZE,16);
		m_WaveformBuffer = (PFLOAT)_aligned_malloc(sizeof(FLOAT) * SA_BUFFER_SIZE,16);
	}
	else
	{
		m_Levels = (PFLOAT)malloc(sizeof(FLOAT) * VISUALIZATION_BARCOUNT);
		m_LevelsBuffer = (PFLOAT)malloc(sizeof(FLOAT) * VISUALIZATION_BARCOUNT);
		m_Waveform = (PFLOAT)malloc(sizeof(FLOAT) * SA_BUFFER_SIZE);
		m_WaveformBuffer = (PFLOAT)malloc(sizeof(FLOAT) * SA_BUFFER_SIZE);
	}

	ZeroMemory(m_Levels,sizeof(FLOAT) * VISUALIZATION_BARCOUNT);
	ZeroMemory(m_LevelsBuffer,sizeof(FLOAT) * VISUALIZATION_BARCOUNT);
	ZeroMemory(m_Waveform,sizeof(FLOAT) * SA_BUFFER_SIZE);
	ZeroMemory(m_WaveformBuffer,sizeof(FLOAT) * SA_BUFFER_SIZE);
	ZeroMemory(m_Peaks,sizeof(m_Peaks));

	m_Hwnd = hWnd;
	m_Blur = blur;

	m_Direct3D = m_Direct3DCreate9(D3D_SDK_VERSION);
	if(!m_Direct3D)
	{
		TRACE(TEXT("Error: Failed to create direct 3d.\n"));
		return E_FAIL;
	}

	m_Direct3D->GetDeviceCaps(D3DADAPTER_DEFAULT,D3DDEVTYPE_HAL,&m_Caps);
	m_Direct3D->GetAdapterIdentifier(D3DADAPTER_DEFAULT,NULL,&m_AdapterIdentifier);

	ZeroMemory(&m_PresentParameters,sizeof(m_PresentParameters));
	m_PresentParameters.Windowed					= TRUE;
	m_PresentParameters.SwapEffect					= D3DSWAPEFFECT_DISCARD;
	m_PresentParameters.BackBufferFormat			= D3DFMT_X8R8G8B8;
    //m_PresentParameters.EnableAutoDepthStencil		= TRUE;
	m_PresentParameters.AutoDepthStencilFormat		= D3DFMT_D16;
	m_PresentParameters.PresentationInterval		= D3DPRESENT_INTERVAL_DEFAULT;
	//m_PresentParameters.PresentationInterval		= D3DPRESENT_INTERVAL_IMMEDIATE;
	m_PresentParameters.BackBufferWidth				= width;
	m_PresentParameters.BackBufferHeight			= height;

	//m_PresentParameters.MultiSampleType				= D3DMULTISAMPLE_4_SAMPLES;

	DWORD vp = NULL;

	if(m_Caps.DevCaps & D3DDEVCAPS_PUREDEVICE)
	{
		vp |= D3DCREATE_PUREDEVICE;
		TRACE(TEXT("Info: Using pure device.\n"));
	}
	
	if(m_Caps.DevCaps & D3DDEVCAPS_HWTRANSFORMANDLIGHT)
	{
		vp |= D3DCREATE_HARDWARE_VERTEXPROCESSING;
		TRACE(TEXT("Info: Using hardware vertex processing.\n"));
	}
	else
	{
		vp |= D3DCREATE_SOFTWARE_VERTEXPROCESSING;
		TRACE(TEXT("Info: Using software vertex processing.\n"));
	}

	if(FAILED(m_Direct3D->CreateDevice(D3DADAPTER_DEFAULT,D3DDEVTYPE_HAL,m_Hwnd,vp|D3DCREATE_MULTITHREADED,&m_PresentParameters,&m_Device)))
	{
		TRACE(TEXT("Error: Failed to create direct 3d device.\n"));
		return E_FAIL;
	}

	if(FAILED(Restore()))
	{
		TRACE(TEXT("Error: Failed to initaly restore device.\n"));
		return E_FAIL;
	}

	return S_OK;
}
OMX_ERRORTYPE COMXCoreComponent::AllocOutputBuffers(bool use_buffers /* = false */)
{
  OMX_ERRORTYPE omx_err = OMX_ErrorNone;

  if(!m_handle)
    return OMX_ErrorUndefined;

  m_omx_output_use_buffers = use_buffers; 

  OMX_PARAM_PORTDEFINITIONTYPE portFormat;
  OMX_INIT_STRUCTURE(portFormat);
  portFormat.nPortIndex = m_output_port;

  omx_err = OMX_GetParameter(m_handle, OMX_IndexParamPortDefinition, &portFormat);
  if(omx_err != OMX_ErrorNone)
    return omx_err;

  if(GetState() != OMX_StateIdle)
  {
    if(GetState() != OMX_StateLoaded)
      SetStateForComponent(OMX_StateLoaded);

    SetStateForComponent(OMX_StateIdle);
  }

  omx_err = EnablePort(m_output_port, false);
  if(omx_err != OMX_ErrorNone)
    return omx_err;

  m_output_alignment     = portFormat.nBufferAlignment;
  m_output_buffer_count  = portFormat.nBufferCountActual;
  m_output_buffer_size   = portFormat.nBufferSize;

  CLog::Log(LOGDEBUG, "COMXCoreComponent::AllocOutputBuffers component(%s) - port(%d), nBufferCountMin(%lu), nBufferCountActual(%lu), nBufferSize(%lu) nBufferAlignmen(%lu)\n",
            m_componentName.c_str(), m_output_port, portFormat.nBufferCountMin,
            portFormat.nBufferCountActual, portFormat.nBufferSize, portFormat.nBufferAlignment);

  for (size_t i = 0; i < portFormat.nBufferCountActual; i++)
  {
    OMX_BUFFERHEADERTYPE *buffer = NULL;
    OMX_U8* data = NULL;

    if(m_omx_output_use_buffers)
    {
      data = (OMX_U8*)_aligned_malloc(portFormat.nBufferSize, m_output_alignment);
      omx_err = OMX_UseBuffer(m_handle, &buffer, m_output_port, NULL, portFormat.nBufferSize, data);
    }
    else
    {
      omx_err = OMX_AllocateBuffer(m_handle, &buffer, m_output_port, NULL, portFormat.nBufferSize);
    }
    if(omx_err != OMX_ErrorNone)
    {
      CLog::Log(LOGERROR, "COMXCoreComponent::AllocOutputBuffers component(%s) - OMX_UseBuffer failed with omx_err(0x%x)\n",
        m_componentName.c_str(), omx_err);

      if(m_omx_output_use_buffers && data)
       _aligned_free(data);

      return omx_err;
    }
    buffer->nOutputPortIndex = m_output_port;
    buffer->nFilledLen       = 0;
    buffer->nOffset          = 0;
    buffer->pAppPrivate      = (void*)i;
    m_omx_output_buffers.push_back(buffer);
    m_omx_output_available.push(buffer);
  }

  omx_err = WaitForCommand(OMX_CommandPortEnable, m_output_port);

  m_flush_output = false;

  return omx_err;
}
Esempio n. 19
0
void* DefaultAlloc::_ReAllocate(LPVOID lpData, size_t dwSize)
{
    return (!lpData) ? _aligned_malloc(dwSize, 16) : _aligned_realloc(lpData, dwSize, 16);
}
Esempio n. 20
0
void* mpeg2_malloc(size_t size, mpeg2_alloc_t reason)
{
 return _aligned_malloc(size,64);
}
Esempio n. 21
0
EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow)
{
	::SetPriorityClass(::GetCurrentProcess(), HIGH_PRIORITY_CLASS);

	FILE* file = fopen("c:\\temp1\\log.txt", "a");

	fprintf(file, "-------------------------\n\n");

	if(1)
	{
		GSLocalMemory * pMem = new GSLocalMemory();
		GSLocalMemory& mem(*pMem);
		

		static struct {int psm; const char* name;} s_format[] =
		{
			{PSM_PSMCT32, "32"},
			{PSM_PSMCT24, "24"},
			{PSM_PSMCT16, "16"},
			{PSM_PSMCT16S, "16S"},
			{PSM_PSMT8, "8"},
			{PSM_PSMT4, "4"},
			{PSM_PSMT8H, "8H"},
			{PSM_PSMT4HL, "4HL"},
			{PSM_PSMT4HH, "4HH"},
			{PSM_PSMZ32, "32Z"},
			{PSM_PSMZ24, "24Z"},
			{PSM_PSMZ16, "16Z"},
			{PSM_PSMZ16S, "16ZS"},
		};

		uint8* ptr = (uint8*)_aligned_malloc(1024 * 1024 * 4, 32);

		for(int i = 0; i < 1024 * 1024 * 4; i++) ptr[i] = (uint8)i;

		//

		for(int tbw = 5; tbw <= 10; tbw++)
		{
			int n = 256 << ((10 - tbw) * 2);

			int w = 1 << tbw;
			int h = 1 << tbw;

			fprintf(file, "%d x %d\n\n", w, h);

			for(size_t i = 0; i < countof(s_format); i++)
			{
				const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[s_format[i].psm];

				GSLocalMemory::writeImage wi = psm.wi;
				GSLocalMemory::readImage ri = psm.ri;
				GSLocalMemory::readTexture rtx = psm.rtx;
				GSLocalMemory::readTexture rtxP = psm.rtxP;

				GIFRegBITBLTBUF BITBLTBUF;

				BITBLTBUF.SBP = 0;
				BITBLTBUF.SBW = w / 64;
				BITBLTBUF.SPSM = s_format[i].psm;
				BITBLTBUF.DBP = 0;
				BITBLTBUF.DBW = w / 64;
				BITBLTBUF.DPSM = s_format[i].psm;

				GIFRegTRXPOS TRXPOS;

				TRXPOS.SSAX = 0;
				TRXPOS.SSAY = 0;
				TRXPOS.DSAX = 0;
				TRXPOS.DSAY = 0;

				GIFRegTRXREG TRXREG;

				TRXREG.RRW = w;
				TRXREG.RRH = h;

				GSVector4i r(0, 0, w, h);

				GIFRegTEX0 TEX0;

				TEX0.TBP0 = 0;
				TEX0.TBW = w / 64;

				GIFRegTEXA TEXA;

				TEXA.TA0 = 0;
				TEXA.TA1 = 0x80;
				TEXA.AEM = 0;

				int trlen = w * h * psm.trbpp / 8;
				int len = w * h * psm.bpp / 8;

				clock_t start, end;

				_ftprintf(file, _T("[%4s] "), s_format[i].name);

				start = clock();

				for(int j = 0; j < n; j++)
				{
					int x = 0;
					int y = 0;

					(mem.*wi)(x, y, ptr, trlen, BITBLTBUF, TRXPOS, TRXREG);
				}

				end = clock();

				fprintf(file, "%6d %6d | ", (int)((float)trlen * n / (end - start) / 1000), (int)((float)(w * h) * n / (end - start) / 1000));

				start = clock();

				for(int j = 0; j < n; j++)
				{
					int x = 0;
					int y = 0;

					(mem.*ri)(x, y, ptr, trlen, BITBLTBUF, TRXPOS, TRXREG);
				}

				end = clock();

				fprintf(file, "%6d %6d | ", (int)((float)trlen * n / (end - start) / 1000), (int)((float)(w * h) * n / (end - start) / 1000));

				const GSOffset* o = mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);

				start = clock();

				for(int j = 0; j < n; j++)
				{
					(mem.*rtx)(o, r, ptr, w * 4, TEXA);
				}

				end = clock();

				fprintf(file, "%6d %6d ", (int)((float)len * n / (end - start) / 1000), (int)((float)(w * h) * n / (end - start) / 1000));

				if(psm.pal > 0)
				{
					start = clock();

					for(int j = 0; j < n; j++)
					{
						(mem.*rtxP)(o, r, ptr, w, TEXA);
					}

					end = clock();

					fprintf(file, "| %6d %6d ", (int)((float)len * n / (end - start) / 1000), (int)((float)(w * h) * n / (end - start) / 1000));
				}

				fprintf(file, "\n");

				fflush(file);
			}

			fprintf(file, "\n");
		}

		_aligned_free(ptr);
		delete pMem;
	}

	//

	if(0)
	{
		GSLocalMemory * pMem2 = new GSLocalMemory();
		GSLocalMemory& mem2(*pMem2);

		uint8* ptr = (uint8*)_aligned_malloc(1024 * 1024 * 4, 32);

		for(int i = 0; i < 1024 * 1024 * 4; i++) ptr[i] = (uint8)i;

		const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[PSM_PSMCT32];

		GSLocalMemory::writeImage wi = psm.wi;

		GIFRegBITBLTBUF BITBLTBUF;

		BITBLTBUF.DBP = 0;
		BITBLTBUF.DBW = 32;
		BITBLTBUF.DPSM = PSM_PSMCT32;

		GIFRegTRXPOS TRXPOS;

		TRXPOS.DSAX = 0;
		TRXPOS.DSAY = 1;

		GIFRegTRXREG TRXREG;

		TRXREG.RRW = 256;
		TRXREG.RRH = 256;

		int trlen = 256 * 256 * psm.trbpp / 8;

		int x = 0;
		int y = 0;

		(mem2.*wi)(x, y, ptr, trlen, BITBLTBUF, TRXPOS, TRXREG);
		delete pMem2;
	}

	//

	fclose(file);
	PostQuitMessage(0);
}
Esempio n. 22
0
int uv_fs_event_init(uv_loop_t* loop, uv_fs_event_t* handle,
    const char* filename, uv_fs_event_cb cb, int flags) {
  int name_size, is_path_dir;
  DWORD attr, last_error;
  wchar_t* dir = NULL, *dir_to_watch, *filenamew = NULL;
  wchar_t short_path[MAX_PATH];

  /* We don't support any flags yet. */
  assert(!flags);

  uv_fs_event_init_handle(loop, handle, filename, cb);

  /* Convert name to UTF16. */
  name_size = uv_utf8_to_utf16(filename, NULL, 0) * sizeof(wchar_t);
  filenamew = (wchar_t*)malloc(name_size);
  if (!filenamew) {
    uv_fatal_error(ERROR_OUTOFMEMORY, "malloc");
  }

  if (!uv_utf8_to_utf16(filename, filenamew,
      name_size / sizeof(wchar_t))) {
    uv__set_sys_error(loop, GetLastError());
    return -1;
  }

  /* Determine whether filename is a file or a directory. */
  attr = GetFileAttributesW(filenamew);
  if (attr == INVALID_FILE_ATTRIBUTES) {
    last_error = GetLastError();
    goto error;
  }

  is_path_dir = (attr & FILE_ATTRIBUTE_DIRECTORY) ? 1 : 0;

  if (is_path_dir) {
     /* filename is a directory, so that's the directory that we will watch. */
    handle->dirw = filenamew;
    dir_to_watch = filenamew;
  } else {
    /*
     * filename is a file.  So we split filename into dir & file parts, and
     * watch the dir directory.
     */

    /* Convert to short path. */
    if (!GetShortPathNameW(filenamew, short_path, ARRAY_SIZE(short_path))) {
      last_error = GetLastError();
      goto error;
    }

    if (uv_split_path(filenamew, &dir, &handle->filew) != 0) {
      last_error = GetLastError();
      goto error;
    }

    if (uv_split_path(short_path, NULL, &handle->short_filew) != 0) {
      last_error = GetLastError();
      goto error;
    }

    dir_to_watch = dir;
    free(filenamew);
    filenamew = NULL;
  }

  handle->dir_handle = CreateFileW(dir_to_watch,
                                   FILE_LIST_DIRECTORY,
                                   FILE_SHARE_READ | FILE_SHARE_DELETE |
                                     FILE_SHARE_WRITE,
                                   NULL,
                                   OPEN_EXISTING,
                                   FILE_FLAG_BACKUP_SEMANTICS |
                                     FILE_FLAG_OVERLAPPED,
                                   NULL);

  if (dir) {
    free(dir);
    dir = NULL;
  }

  if (handle->dir_handle == INVALID_HANDLE_VALUE) {
    last_error = GetLastError();
    goto error;
  }

  if (CreateIoCompletionPort(handle->dir_handle,
                             loop->iocp,
                             (ULONG_PTR)handle,
                             0) == NULL) {
    last_error = GetLastError();
    goto error;
  }

  handle->buffer = (char*)_aligned_malloc(uv_directory_watcher_buffer_size,
    sizeof(DWORD));
  if (!handle->buffer) {
    uv_fatal_error(ERROR_OUTOFMEMORY, "malloc");
  }

  memset(&(handle->req.overlapped), 0, sizeof(handle->req.overlapped));

  if (!ReadDirectoryChangesW(handle->dir_handle,
                             handle->buffer,
                             uv_directory_watcher_buffer_size,
                             FALSE,
                             FILE_NOTIFY_CHANGE_FILE_NAME      |
                               FILE_NOTIFY_CHANGE_DIR_NAME     |
                               FILE_NOTIFY_CHANGE_ATTRIBUTES   |
                               FILE_NOTIFY_CHANGE_SIZE         |
                               FILE_NOTIFY_CHANGE_LAST_WRITE   |
                               FILE_NOTIFY_CHANGE_LAST_ACCESS  |
                               FILE_NOTIFY_CHANGE_CREATION     |
                               FILE_NOTIFY_CHANGE_SECURITY,
                             NULL,
                             &handle->req.overlapped,
                             NULL)) {
    last_error = GetLastError();
    goto error;
  }

  handle->req_pending = 1;
  return 0;

error:
  if (handle->filename) {
    free(handle->filename);
    handle->filename = NULL;
  }

  if (handle->filew) {
    free(handle->filew);
    handle->filew = NULL;
  }

  if (handle->short_filew) {
    free(handle->short_filew);
    handle->short_filew = NULL;
  }

  free(filenamew);

  if (handle->dir_handle != INVALID_HANDLE_VALUE) {
    CloseHandle(handle->dir_handle);
    handle->dir_handle = INVALID_HANDLE_VALUE;
  }

  if (handle->buffer) {
    _aligned_free(handle->buffer);
    handle->buffer = NULL;
  }

  uv__set_sys_error(loop, last_error);
  return -1;
}
Esempio n. 23
0
BOOL xf_Bitmap_Decompress(rdpContext* context, rdpBitmap* bitmap,
		BYTE* data, int width, int height, int bpp, int length,
		BOOL compressed, int codecId)
{
	int status;
	UINT16 size;
	BYTE* pSrcData;
	BYTE* pDstData;
	UINT32 SrcSize;
	UINT32 SrcFormat;
	UINT32 bytesPerPixel;
	xfContext* xfc = (xfContext*) context;

	bytesPerPixel = (bpp + 7) / 8;
	size = width * height * 4;

	bitmap->data = (BYTE*) _aligned_malloc(size, 16);
	if (!bitmap->data)
		return FALSE;

	pSrcData = data;
	SrcSize = (UINT32) length;
	pDstData = bitmap->data;

	if (compressed)
	{
		if (bpp < 32)
		{
			if (!freerdp_client_codecs_prepare(xfc->codecs, FREERDP_CODEC_INTERLEAVED))
				return FALSE;

			status = interleaved_decompress(xfc->codecs->interleaved, pSrcData, SrcSize, bpp,
					&pDstData, xfc->format, -1, 0, 0, width, height, xfc->palette);
		}
		else
		{
			if (!freerdp_client_codecs_prepare(xfc->codecs, FREERDP_CODEC_PLANAR))
				return FALSE;

			status = planar_decompress(xfc->codecs->planar, pSrcData, SrcSize,
					&pDstData, xfc->format, -1, 0, 0, width, height, TRUE);
		}

		if (status < 0)
		{
			WLog_ERR(TAG, "Bitmap Decompression Failed");
			return FALSE;
		}
	}
	else
	{
		SrcFormat = gdi_get_pixel_format(bpp, TRUE);

		status = freerdp_image_copy(pDstData, xfc->format, -1, 0, 0,
				width, height, pSrcData, SrcFormat, -1, 0, 0, xfc->palette);
	}

	bitmap->compressed = FALSE;
	bitmap->length = size;
	bitmap->bpp = (xfc->depth >= 24) ? 32 : xfc->depth;
	return TRUE;
}
Esempio n. 24
0
File: rpc.c Progetto: d0rian/FreeRDP
rdpRpc* rpc_new(rdpTransport* transport)
{
	rdpRpc* rpc = (rdpRpc*) malloc(sizeof(rdpRpc));

	if (rpc != NULL)
	{
		ZeroMemory(rpc, sizeof(rdpRpc));

		rpc->State = RPC_CLIENT_STATE_INITIAL;

		rpc->transport = transport;
		rpc->settings = transport->settings;

		rpc->send_seq_num = 0;
		rpc->ntlm = ntlm_new();

		rpc->NtlmHttpIn = ntlm_http_new();
		rpc->NtlmHttpOut = ntlm_http_new();

		rpc_ntlm_http_init_channel(rpc, rpc->NtlmHttpIn, TSG_CHANNEL_IN);
		rpc_ntlm_http_init_channel(rpc, rpc->NtlmHttpOut, TSG_CHANNEL_OUT);

		rpc->FragBufferSize = 20;
		rpc->FragBuffer = (BYTE*) malloc(rpc->FragBufferSize);

		rpc->StubOffset = 0;
		rpc->StubBufferSize = 20;
		rpc->StubLength = 0;
		rpc->StubFragCount = 0;
		rpc->StubBuffer = (BYTE*) malloc(rpc->FragBufferSize);

		rpc->rpc_vers = 5;
		rpc->rpc_vers_minor = 0;

		/* little-endian data representation */
		rpc->packed_drep[0] = 0x10;
		rpc->packed_drep[1] = 0x00;
		rpc->packed_drep[2] = 0x00;
		rpc->packed_drep[3] = 0x00;

		rpc->max_xmit_frag = 0x0FF8;
		rpc->max_recv_frag = 0x0FF8;

		rpc->pdu = (RPC_PDU*) _aligned_malloc(sizeof(RPC_PDU), MEMORY_ALLOCATION_ALIGNMENT);

		rpc->SendQueue = (PSLIST_HEADER) _aligned_malloc(sizeof(SLIST_HEADER), MEMORY_ALLOCATION_ALIGNMENT);
		InitializeSListHead(rpc->SendQueue);

		rpc->ReceiveQueue = (PSLIST_HEADER) _aligned_malloc(sizeof(SLIST_HEADER), MEMORY_ALLOCATION_ALIGNMENT);
		InitializeSListHead(rpc->ReceiveQueue);

		rpc->ReceiveWindow = 0x00010000;

		rpc->ChannelLifetime = 0x40000000;
		rpc->ChannelLifetimeSet = 0;

		rpc->KeepAliveInterval = 300000;
		rpc->CurrentKeepAliveInterval = rpc->KeepAliveInterval;
		rpc->CurrentKeepAliveTime = 0;

		rpc->VirtualConnection = rpc_client_virtual_connection_new(rpc);
		rpc->VirtualConnectionCookieTable = rpc_virtual_connection_cookie_table_new(rpc);

		rpc->call_id = 1;

		rpc_client_new(rpc);

		rpc->client->SynchronousSend = TRUE;
		rpc->client->SynchronousReceive = TRUE;
	}

	return rpc;
}
Esempio n. 25
0
SearchContext::SearchContext(const Point* points_begin, const Point* points_end)
	: mTree(nullptr)
	, mKDTreeMemPool(nullptr)
	, mIteratorMemPool(nullptr)
{
#ifdef _ENABLE_STATS_LOGGING
	spxCurrentContext = this;
#endif // _ENABLE_STATS_LOGGING

	int64_t pointCount = points_end - points_begin;

	{
		// Add and sort all points on rank
		mPoints.assign(points_begin, points_end);

		std::sort(mPoints.begin(), mPoints.end(),
			[] (const Point& inLHS, const Point& inRHS)
		{
			return inLHS.rank < inRHS.rank;
		});
	}

	{
		// Create KDTree Mem Pool
		int minimumNodeCount	= (int) (pointCount / kBinSize) + 1;
		int byteCount			= sizeof(KdTree<Axis_X>) * minimumNodeCount;
		mKDTreeMemPool			= new MemoryPool(byteCount);
	}

	{
		// Create iterator mem pool
		// DANGEROUS: Can overrun memory here,
		// but it's marginally faster than using a checked mem pool...
		static const int kIteratorMemPoolMaxCount = 2000;
		mIteratorMemPool = (SearchIterator*) _aligned_malloc(sizeof(SearchIterator)*kIteratorMemPoolMaxCount, __alignof(SearchIterator));
	}

	{
		// Create KDTree
		mTree = mKDTreeMemPool->Alloc< KdTree<Axis_X> >();
		INC_KDTREE_COUNT;

#ifdef _BALANCE_KDTREE
		std::vector<CoordPairAndRank> coords;
		coords.reserve(mPoints.size());

		for (auto& p : mPoints)
		{
			coords.emplace_back( p );
		}

		mTree->Fill(*mKDTreeMemPool, coords);
#else
		for (auto& p : mPoints)
		{
			mTree->Add(*mKDTreeMemPool, p);
		}
#endif // _BALANCE_KDTREE

		mTree->Finalise();
	}

#ifdef _PRINT_POINTS_AND_QUERIES
	if (pointCount > 1) // Avoid robustness test.
	{
		DataPrinter::DeleteFiles();
		DataPrinter::PrintPoints(&(*mPoints.begin()), &(*mPoints.end()));

		auto copyPoints = mPoints;
		std::sort(copyPoints.begin(), copyPoints.end(),
			[] (const Point& inLHS, const Point& inRHS)
		{
			return inLHS.x < inRHS.x;
		});

		DataPrinter::PrintPoints(&(*copyPoints.begin()), &(*copyPoints.end()), "InputSortedOnX.txt");

		std::sort(copyPoints.begin(), copyPoints.end(),
			[] (const Point& inLHS, const Point& inRHS)
		{
			return inLHS.y < inRHS.y;
		});

		DataPrinter::PrintPoints(&(*copyPoints.begin()), &(*copyPoints.end()), "InputSortedOnY.txt");
	}
#endif // _PRINT_POINTS_AND_QUERIES
}
Esempio n. 26
0
void *av_malloc(size_t size)
{
    void *ptr = NULL;
#if CONFIG_MEMALIGN_HACK
    long diff;
#endif

    /* let's disallow possibly ambiguous cases */
    if (size > (max_alloc_size - 32))
        return NULL;

#if CONFIG_MEMALIGN_HACK
    ptr = malloc(size + ALIGN);
    if (!ptr)
        return ptr;
    diff              = ((~(long)ptr)&(ALIGN - 1)) + 1;
    ptr               = (char *)ptr + diff;
    ((char *)ptr)[-1] = diff;
#elif HAVE_POSIX_MEMALIGN
    if (size) //OS X on SDK 10.6 has a broken posix_memalign implementation
    if (posix_memalign(&ptr, ALIGN, size))
        ptr = NULL;
#elif HAVE_ALIGNED_MALLOC
    ptr = _aligned_malloc(size, ALIGN);
#elif HAVE_MEMALIGN
#ifndef __DJGPP__
    ptr = memalign(ALIGN, size);
#else
    ptr = memalign(size, ALIGN);
#endif
    /* Why 64?
     * Indeed, we should align it:
     *   on  4 for 386
     *   on 16 for 486
     *   on 32 for 586, PPro - K6-III
     *   on 64 for K7 (maybe for P3 too).
     * Because L1 and L2 caches are aligned on those values.
     * But I don't want to code such logic here!
     */
    /* Why 32?
     * For AVX ASM. SSE / NEON needs only 16.
     * Why not larger? Because I did not see a difference in benchmarks ...
     */
    /* benchmarks with P3
     * memalign(64) + 1          3071, 3051, 3032
     * memalign(64) + 2          3051, 3032, 3041
     * memalign(64) + 4          2911, 2896, 2915
     * memalign(64) + 8          2545, 2554, 2550
     * memalign(64) + 16         2543, 2572, 2563
     * memalign(64) + 32         2546, 2545, 2571
     * memalign(64) + 64         2570, 2533, 2558
     *
     * BTW, malloc seems to do 8-byte alignment by default here.
     */
#else
    ptr = malloc(size);
#ifdef USE_MEM_STATS
    printf("malloc(%ld) -> %p\n", size, ptr);
    if (ptr) {
        mem_cur += malloc_usable_size(ptr);
        if (mem_cur > mem_max) {
            mem_max = mem_cur;
            printf("mem_max=%d\n", mem_max);
        }
    }
#endif
#endif
    if(!ptr && !size) {
        size = 1;
        ptr= av_malloc(1);
    }
#if CONFIG_MEMORY_POISONING
    if (ptr)
        memset(ptr, FF_MEMORY_POISON, size);
#endif
    return ptr;
}
Esempio n. 27
0
int xf_CreateSurface(RdpgfxClientContext* context, RDPGFX_CREATE_SURFACE_PDU* createSurface)
{
	size_t size;
	UINT32 bytesPerPixel;
	xfGfxSurface* surface;
	xfContext* xfc = (xfContext*) context->custom;

	surface = (xfGfxSurface*) calloc(1, sizeof(xfGfxSurface));

	if (!surface)
		return -1;

	surface->surfaceId = createSurface->surfaceId;
	surface->width = (UINT32) createSurface->width;
	surface->height = (UINT32) createSurface->height;
	surface->alpha = (createSurface->pixelFormat == PIXEL_FORMAT_ARGB_8888) ? TRUE : FALSE;
	surface->format = PIXEL_FORMAT_XRGB32;

	surface->scanline = surface->width * 4;
	surface->scanline += (surface->scanline % (xfc->scanline_pad / 8));

	size = surface->scanline * surface->height;
	surface->data = (BYTE*) _aligned_malloc(size, 16);

	if (!surface->data)
	{
		free (surface);
		return -1;
	}

	ZeroMemory(surface->data, size);

	if ((xfc->depth == 24) || (xfc->depth == 32))
	{
		surface->image = XCreateImage(xfc->display, xfc->visual, xfc->depth, ZPixmap, 0,
				(char*) surface->data, surface->width, surface->height, xfc->scanline_pad, surface->scanline);
	}
	else
	{
		bytesPerPixel = (FREERDP_PIXEL_FORMAT_BPP(xfc->format) / 8);
		surface->stageStep = surface->width * bytesPerPixel;
		surface->stageStep += (surface->stageStep % (xfc->scanline_pad / 8));
		size = surface->stageStep * surface->height;

		surface->stage = (BYTE*) _aligned_malloc(size, 16);

		if (!surface->stage)
		{
			free (surface->data);
			free (surface);
			return -1;
		}

		ZeroMemory(surface->stage, size);

		surface->image = XCreateImage(xfc->display, xfc->visual, xfc->depth, ZPixmap, 0,
				(char*) surface->stage, surface->width, surface->height, xfc->scanline_pad, surface->stageStep);
	}

	context->SetSurfaceData(context, surface->surfaceId, (void*) surface);

	return 1;
}
Esempio n. 28
0
/*---------------------------------------------------------------------------
// 16Byte Allignment malloc
//-------------------------------------------------------------------------*/
void* xmm_malloc(size_t size)
{
	return (void*)_aligned_malloc(size, 16);
}
Esempio n. 29
0
static void *b3AlignedAllocDefault(size_t size, int alignment)
{
	return _aligned_malloc(size, (size_t)alignment);
}
Esempio n. 30
0
//////////////////////////////////////////////////////////////////////////////////////////////////
/// Resize buffers and opengl texture
void	SoXipCPUMprRender::resizeBuffers(SbVec2s &size)
{
	mMPRSize = size;

#ifdef _CRT_ALLOCATION_DEFINED
	if (mMPRBuf)
		_aligned_free(mMPRBuf);
	if (mMPRCache)
		_aligned_free(mMPRCache);
#else
	if (mMPRBuf)
		delete[] mMPRBuf;
	if (mMPRCache)
		delete[] mMPRCache;
#endif


	if (!mMPRTexId)
		glGenTextures(1, &mMPRTexId);

	int volBytes = 1;
	if (mLutBuf)
	{
		volBytes =  sizeof(float) * 4;
		mTexInternalFormat = GL_RGBA8;
		mTexType = GL_FLOAT;
	}
	else
		switch (mVolDataType)
		{
		case SbXipImage::UNSIGNED_BYTE:
			mTexInternalFormat = GL_LUMINANCE8;
			mTexType = GL_UNSIGNED_BYTE;
			break;
		case SbXipImage::BYTE:
			mTexInternalFormat = GL_LUMINANCE8;
			mTexType = GL_BYTE;
			break;
		case SbXipImage::UNSIGNED_SHORT:
			mTexInternalFormat = GL_LUMINANCE16;
			mTexType = GL_UNSIGNED_SHORT;
			volBytes = 2;
			break;
		case SbXipImage::SHORT:
			mTexInternalFormat = GL_LUMINANCE16;
			mTexType = GL_SHORT;
			volBytes = 2;
			break;
		case SbXipImage::UNSIGNED_INT:
			mTexInternalFormat = GL_LUMINANCE16;
			mTexType = GL_UNSIGNED_INT;
			volBytes = 4;
			break;
		case SbXipImage::INT:
			mTexInternalFormat = GL_LUMINANCE16;
			mTexType = GL_INT;
			volBytes = 4;
			break;
		case SbXipImage::FLOAT:
			mTexInternalFormat = GL_LUMINANCE16;
			mTexType = GL_FLOAT;
			volBytes = 4;
			break;
		case SbXipImage::DOUBLE:
			mTexInternalFormat = GL_LUMINANCE16;
			mTexType = GL_DOUBLE;
			volBytes = 8;
			break;
		default:
            mTexInternalFormat = 0;
            mTexType = 0;
            SoDebugError::postInfo("SoXipCPUMprRender::resizeBuffers", "Unsupported image type: %d!", mVolDataType);
            return;
		}
		
		
#ifdef _CRT_ALLOCATION_DEFINED 
	mMPRBuf = _aligned_malloc(size[0] * size[1] * volBytes, 16);
	mMPRCache = (mprCacheElem*) _aligned_malloc(sizeof(mprCacheElem) * size[0] * size[1], 16);
#else
	mMPRBuf = (void*) new char[size[0] * size[1] * volBytes];
	mMPRCache = new mprCacheElem[size[0] * size[1]];
#endif
	
	glBindTexture(GL_TEXTURE_2D, mMPRTexId);
	glTexImage2D(GL_TEXTURE_2D, 0, mTexInternalFormat, size[0], size[1], 0, mLutBuf ? GL_RGBA : GL_LUMINANCE, mTexType, 0);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S,     GL_CLAMP_TO_EDGE);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T,     GL_CLAMP_TO_EDGE);
}