Exemplo n.º 1
0
void process_image_simple(struct image* img){
	unsigned char *input, *output, *temp;
	unsigned int addr1, addr2, i, j, k, r, g, b;
	int block_nr = img->block_nr;
	vector unsigned char *v1, *v2, *v3, *v4, *v5 ;

	input = malloc_align(NUM_CHANNELS * SCALE_FACTOR * img->width, 4);
	output = malloc_align(NUM_CHANNELS * img->width / SCALE_FACTOR, 4);
	temp = malloc_align(NUM_CHANNELS * img->width, 4);

	v1 = (vector unsigned char *) &input[0];
	v2 = (vector unsigned char *) &input[1 * img->width * NUM_CHANNELS];
	v3 = (vector unsigned char *) &input[2 * img->width * NUM_CHANNELS];
	v4 = (vector unsigned char *) &input[3 * img->width * NUM_CHANNELS];
	v5 = (vector unsigned char *) temp;

	addr2 = (unsigned int)img->dst; //start of image
	addr2 += (block_nr / NUM_IMAGES_HEIGHT) * img->width * NUM_CHANNELS * 
		img->height / NUM_IMAGES_HEIGHT; //start line of spu block
	addr2 += (block_nr % NUM_IMAGES_WIDTH) * NUM_CHANNELS *
		img->width / NUM_IMAGES_WIDTH;

	for (i=0; i<img->height / SCALE_FACTOR; i++){
		//get 4 lines
		addr1 = ((unsigned int)img->src) + i * img->width * NUM_CHANNELS * SCALE_FACTOR;
		mfc_get(input, addr1, SCALE_FACTOR * img->width * NUM_CHANNELS, MY_TAG, 0, 0);
		mfc_write_tag_mask(1 << MY_TAG);
		mfc_read_tag_status_all();

		//compute the scaled line
		for (j = 0; j < img->width * NUM_CHANNELS / 16; j++){
			v5[j] = spu_avg(spu_avg(v1[j], v2[j]), spu_avg(v3[j], v4[j]));
		}
		for (j=0; j < img->width; j+=SCALE_FACTOR){
			r = g = b = 0;
			for (k = j; k < j + SCALE_FACTOR; k++) {
				r += temp[k * NUM_CHANNELS + 0];
				g += temp[k * NUM_CHANNELS + 1];
				b += temp[k * NUM_CHANNELS + 2];
			}
			r /= SCALE_FACTOR;
			b /= SCALE_FACTOR;
			g /= SCALE_FACTOR;

			output[j / SCALE_FACTOR * NUM_CHANNELS + 0] = (unsigned char) r;
			output[j / SCALE_FACTOR * NUM_CHANNELS + 1] = (unsigned char) g;
			output[j / SCALE_FACTOR * NUM_CHANNELS + 2] = (unsigned char) b;
		}

		//put the scaled line back
		mfc_put(output, addr2, img->width / SCALE_FACTOR * NUM_CHANNELS, MY_TAG, 0, 0);
		addr2 += img->width * NUM_CHANNELS; //line inside spu block
		mfc_write_tag_mask(1 << MY_TAG);
		mfc_read_tag_status_all();
	}

	free_align(temp);
	free_align(input);
	free_align(output);
}
Exemplo n.º 2
0
image alloc_img(unsigned int width, unsigned int height)
{
    image img;
    img = malloc_align(sizeof (image_t), 4);
    img->buf = malloc_align((width * height + 1) * sizeof (pixel_t), 4);
    img->width = width;
    img->height = height;
    return img;
}
Exemplo n.º 3
0
void*
malloc(size_t count)
{
    if (count < PAGE_SIZE) {
        return malloc_align(count, 1);
    }
    else {
        return malloc_align(count, PAGE_SIZE);
    }
}
Exemplo n.º 4
0
SortedMap *createSortedMap(int length) {
	// Create sorted list of length "length".
	SortedMap *map = (SortedMap *) malloc_align(sizeof(SortedMap), 7);

	map->size = 0;
	map->length = length;
	map->keys = (double *) malloc_align(map->length * sizeof(double), 7);
	map->values = (int *) malloc_align(map->length * sizeof(int), 7);

	return map;
}
void read_from_file(FILE *fin, struct pixel **a, int *width, int *height,
		int *max_color)
{
	printf("PPU reading from file\n");
	char line[256];
	char *numbers, *tok;
	long line_no = 0, i = 0;
	int red, green, blue;
	
	/* Check if the file is ppm */
	fgets(line, sizeof(line), fin);
	if (strncmp(line, "P3", 2)) {
		perror("The input file is not ppm");
		return;
	}

	/* Read initial parameters */
	fscanf(fin, "%d", width);
	fscanf(fin, "%d", height);
	fscanf(fin, "%d", max_color);
	printf("PPU reads %d, %d, %d\n", *width, *height, *max_color);
	*a = malloc_align(*width * *height * sizeof(struct pixel), 4);
	if (!(*a)) {
		perror("Error on allocating memory for image");
		return;
	}

	/* Read the pixels */
	while(fscanf(fin, "%d %d %d", &red, &green, &blue) != EOF){
		(*a)[i].red = red;
		(*a)[i].green = green;
		(*a)[i].blue = blue;
		i++;
	}
}
Exemplo n.º 6
0
END_TEST

/*******************************************************************************
 * mallac_align/free_align
 */

START_TEST(test_malloc_align)
{
	void *ptr[128][256];
	int size, align;

	for (size = 0; size < countof(ptr); size++)
	{
		for (align = 0; align < countof(ptr[0]); align++)
		{
			ptr[size][align] = malloc_align(size, align);
			if (align)
			{
				ck_assert((uintptr_t)ptr[size][align] % align == 0);
			}
			if (size)
			{
				ck_assert(ptr[size][align]);
				memset(ptr[size][align], 0xEF, size);
			}
		}
	}
	for (size = 0; size < countof(ptr); size++)
	{
		for (align = 0; align < countof(ptr[0]); align++)
		{
			free_align(ptr[size][align]);
		}
	}
}
int main(){
  int i;
  int N=1024;
  float pi=0.0;
  pthread_t pthreads[SPU_THREADS];
  context ctxs[SPU_THREADS] __attribute__ ((aligned(16)));

  for(i=0;i<SPU_THREADS;i++){
    ctxs[i].N=N;
    ctxs[i].Nstart=(N/SPU_THREADS)*i;
    ctxs[i].Nend=(N/SPU_THREADS)*(i+1);
    ctxs[i].pi=(float*) malloc_align(sizeof(float),7);
    pthread_create(&pthreads[i], NULL, &pthread_run_spe, &ctxs[i]);
  }

  for (i=0; i<SPU_THREADS; i++)
    pthread_join (pthreads[i], NULL);

  for(i=0;i<SPU_THREADS;i++)
    pi+=*(ctxs[i].pi);

  for(i=0;i<SPU_THREADS;i++)
    free_align(ctxs[i].pi);

  printf("PI = %f\n",pi);

  return (0);
}
Exemplo n.º 8
0
int main(int argc, char* argv[]) {

    printf("coucou\n");

    int i = 42;
    int* i_p = &i;
    long* l_p = (long*) i_p;

    char* c_p = (char*) i_p;
/*
    bitprint((long)i_p);
    bitprint((long)(i_p+1)); // + 4

    bitprint((long)l_p);
    bitprint((long)(l_p+1)); // + 8

    bitprint((long)c_p);
    bitprint((long)(c_p+1)); // + 8
*/
    int align = 24;

    if (argc > 1)
        align = atoi(argv[1]);

    free_align(malloc_align(1024, align));

    exit(0);
}
Exemplo n.º 9
0
static void do_alloc(char *file, uint32_t align, uint8_t zone)
{
	int id = fw_cfg_file_id(file);
	int n = fw_cfg_file_size(id);
	char *p;

	if (id == -1)
		panic();

	if (align < 16)
		align = 16;

	if (zone == ALLOC_FSEG)
		p = malloc_fseg_align(n, align);
	else
		p = malloc_align(n, align);

	set_file_addr(id, p);
	fw_cfg_read_file(id, p, n);

	/* For PVH boot, save the PA where the RSDP is stored */
	if (zone == ALLOC_FSEG) {
		if (!memcmp(p, "RSD PTR ", 8)) {
			start_info.rsdp_paddr = (uintptr_t)id_to_addr(id);
		}
	}
}
Exemplo n.º 10
0
/* allocate image data */
void alloc_image(struct image* img) {
	//img->data = calloc(NUM_CHANNELS * img->width * img->height, sizeof(char));
	img->data = malloc_align(NUM_CHANNELS * img->width * img->height * sizeof(char), 4);

	if (!img->data){
		PRINT_ERR_MSG_AND_EXIT("Calloc failed\n");
	}
}
Exemplo n.º 11
0
static void alloc_bufs(OLTraceCtx *ctx)
{
	ctx->aw = (ctx->p.width+15) & ~15;
	ctx->ah = (ctx->p.height+15) & ~15;

	ctx->ksize = ((int)round(ctx->p.sigma * 6 + 1)) | 1;

	if (ctx->ksize <= 1) {
		ctx->ksize = 0;
		ctx->k = NULL;
		ctx->kpad = 0;
		ctx->bibuf = NULL;
		ctx->btbuf = NULL;
		ctx->sibuf = NULL;
	} else {
	    ctx->k = malloc_align(16 * ctx->ksize, 64);
		ctx->kpad = ctx->ksize / 2;

		ctx->bibuf = malloc_align(ctx->aw * (ctx->ah + 2 * ctx->kpad), 64);
		ctx->btbuf = malloc_align(ctx->ah * (ctx->aw + 2 * ctx->kpad), 64);
		ctx->sibuf = malloc_align(ctx->aw * (ctx->ah + 2), 64);
	}

	if (ctx->p.mode == OL_TRACE_CANNY) {
		if (!ctx->sibuf)
			ctx->sibuf = malloc_align(ctx->aw * (ctx->ah + 2), 64);
		ctx->stbuf = malloc_align(sizeof(*ctx->stbuf) * ctx->ah * (ctx->aw + 2), 64);
		ctx->sxbuf = malloc_align(sizeof(*ctx->sxbuf) * ctx->aw * ctx->ah, 64);
		ctx->sybuf = malloc_align(sizeof(*ctx->sybuf) * ctx->aw * ctx->ah, 64);
		ctx->smbuf = malloc_align(sizeof(*ctx->smbuf) * ctx->aw * ctx->ah, 64);
	} else {
		ctx->stbuf = NULL;
		ctx->sxbuf = NULL;
		ctx->sybuf = NULL;
		ctx->smbuf = NULL;
	}

	ctx->tracebuf = malloc(ctx->p.width * ctx->p.height * sizeof(*ctx->tracebuf));
	memset(ctx->tracebuf, 0, ctx->p.width * ctx->p.height * sizeof(*ctx->tracebuf));	

	ctx->sb_size = ctx->p.width * 16;
	ctx->sb = malloc(ctx->sb_size * sizeof(*ctx->sb));
	ctx->sbp = ctx->sb;
	ctx->sb_end = ctx->sb + ctx->sb_size;

	ctx->pb_size = ctx->p.width * 16;
	ctx->pb = malloc(ctx->pb_size * sizeof(*ctx->pb));
	ctx->pbp = ctx->pb;
	ctx->pb_end = ctx->pb + ctx->pb_size;
}
Exemplo n.º 12
0
int main ()
{
	int data=1000, offset=1024;
	char *buffer;
	
	buffer = (char *)malloc_align(data, offset);
	free(buffer);
	return 1;
}
Exemplo n.º 13
0
int* make_seed_vector()
{
	int i;
	int* rand_seed = malloc_align(SPU_THREADS * sizeof(int), 4);
	if (rand_seed == NULL) {
		perror("malloc_align failed in make_seed_vector()");
		return NULL;
	}

	for (i = 0; i < SPU_THREADS; i++)
		rand_seed[i]=rand()%12345612;

	return rand_seed;
}
Exemplo n.º 14
0
void work(param_t param)
{
printf("SPU[%u] work()\n", param.proc);
	unsigned int inbox, offset;
    unsigned int *in = malloc_align(param.bitset_size, ALIGN_EXP);
    unsigned int *out = malloc_align(param.bitset_size, ALIGN_EXP);
    unsigned int *use = malloc_align(param.bitset_size, ALIGN_EXP);
    unsigned int *def = malloc_align(param.bitset_size, ALIGN_EXP);
    if(in == NULL || out == NULL || use == NULL || def == NULL) {
	    printf("malloc_align() failed\n");
	    exit(1);
    }
    unsigned tag_1, tag_2, tag_3, tag_4;
    unsigned int tag_id;   
    /* Reserve a tag for application usage */ 
    if ((tag_1 = mfc_tag_reserve()) == MFC_TAG_INVALID) 
    {
        printf("ERROR: unable to reserve a tag_1\n"); 
    }
    if ((tag_2 = mfc_tag_reserve()) == MFC_TAG_INVALID) 
    {
        printf("ERROR: unable to reserve a tag_2\n"); 
    }
    if ((tag_3 = mfc_tag_reserve()) == MFC_TAG_INVALID) 
    {
        printf("ERROR: unable to reserve a tag_3\n"); 
    }
    if ((tag_4 = mfc_tag_reserve()) == MFC_TAG_INVALID) 
    {
        printf("ERROR: unable to reserve a tag_4\n");
    } 

	while(1) {
		inbox = spu_read_in_mbox();

        if(inbox == UINT_MAX)
        {
            printf("SPU[%u] received exit signal.. exiting.\n", param.proc);
            return;
        }
		
		offset = param.bitset_subsets*inbox;

		mfc_get(in,  (unsigned int) (param.bs_in_addr  + offset), param.bitset_size, tag_1, 0, 0);
		mfc_get(out, (unsigned int) (param.bs_out_addr + offset), param.bitset_size, tag_2, 0, 0);
		mfc_get(use, (unsigned int) (param.bs_use_addr + offset), param.bitset_size, tag_3, 0, 0);
		mfc_get(def, (unsigned int) (param.bs_def_addr + offset), param.bitset_size, tag_4, 0, 0);
		mfc_write_tag_mask(1 << tag_1 | 1 << tag_2 | 1 << tag_3 | 1 << tag_4);
		mfc_read_tag_status_all();

D(printf("SPU[%d] index: %u  bitset_subsets: %u  offset: %u\n", param.proc, inbox, param.bitset_subsets, offset);
printf("SPU[%d]\t&use: %p\n\t&def: %p\n\t&out: %p\n\t&in:  %p\n", param.proc, (void*)param.bs_use_addr, (void*)param.bs_def_addr, (void*)param.bs_out_addr, (void*)param.bs_in_addr);
void *tmp_ptr = (void*) (param.bs_use_addr  + offset);
printf("SPU[%d] read\t\t&%p = use(%p)={", param.proc, (void*)use, tmp_ptr);
	for (int i = 0; i < 100; ++i){
	if ( bitset_get_bit(use, i) ) {
			printf("%d ", i);
		}
	}
printf("}\n");
tmp_ptr = (void*) (param.bs_def_addr  + offset);
printf("SPU[%d] read\t\t&%p = def(%p)={", param.proc, (void*)def, tmp_ptr);
	for (int i = 0; i < 100; ++i){
	if ( bitset_get_bit(def, i) ) {
			printf("%d ", i);
		}
	}
printf("}\n");
tmp_ptr = (void*) (param.bs_out_addr  + offset);
printf("SPU[%d] read\t\t&%p = out(%p)={", param.proc, (void*)out, tmp_ptr);
	for (int i = 0; i < 100; ++i){
	if ( bitset_get_bit(out, i) ) {
			printf("%d ", i);
		}
	}
printf("}\n");
tmp_ptr = (void*) (param.bs_in_addr  + offset);
printf("SPU[%d] read\t\t&%p = in (%p)={", param.proc, (void*)in, tmp_ptr);
	for (int i = 0; i < 100; ++i){
	if ( bitset_get_bit(in, i) ) {
			printf("%d ", i);
		}
	}
printf("}\n"));
		bitset_megaop(param, in, out, use, def);		

D(printf("SPU[%d] calculated\tin={", param.proc);
	for (int i = 0; i < 100; ++i){
	if ( bitset_get_bit(in, i) ) {
			printf("%d ", i);
		}
	}
printf("}\n");)

		mfc_put(in, (unsigned int)  (param.bs_in_addr  +  offset), param.bitset_size, tag_1, 0, 0);
		mfc_write_tag_mask(1 << tag_1);
		mfc_read_tag_status_all();

		spu_write_out_intr_mbox(inbox);
	}
/* Does the actual processing of the frame */
static void do_work(ppu_data_t ppu_data) {
	struct image input;
	struct image big_image;

	dprintf("SPU[%d] ppu_data.input:%p ppu_big_img:%p sizeof(struct image):%lu\n",
		ppu_data.spe_id, (void *)ppu_data.input,
		(void *)ppu_data.big_image, sizeof(struct image));

	/* Get input image and big_image details */
	mfc_get((void *)(&input), (uint32_t)(ppu_data.input),
			(uint32_t)(sizeof(struct image)), tag_id, 0, 0);
	mfc_get((void *)(&big_image), (uint32_t)(ppu_data.big_image),
			(uint32_t)(sizeof(struct image)), tag_id, 0, 0);

	waittag(tag_id);
	dprintf("SPU[%d] got structs\n"\
			"input.width=%u\tinput.height=%u\n"\
			"big_image.width=%u\tbig_image.height=%u\n"\
			"input.data=%p\tbig_image.data=%p\n",
			ppu_data.spe_id, input.width, input.height, big_image.width,
			big_image.height, (void *)input.data, (void *)big_image.data);

	struct image img_chunk;
	unsigned int buf_line_sz = input.width * NUM_CHANNELS;
	int transfer_sz = 4 * buf_line_sz;

	img_chunk.width = input.width;
	img_chunk.height = 4;
	alloc_image(&img_chunk);

	struct image img_scaled_line;
	img_scaled_line.width = input.width / SCALE_FACTOR;
	img_scaled_line.height = 1;

	/* Hack for memory align of local image data to have the same 4 bits in its
	 * address as the remote corresponding address in PPU
	 */
	int left_padding = (ppu_data.spe_id % 4) * 4;
	unsigned char* addr_to_free = malloc_align(NUM_CHANNELS * 3 * sizeof(char) +
												left_padding, 4);

	img_scaled_line.data = addr_to_free + left_padding;

	unsigned int i;
	/* Process 4 lines from the initial image at a time */
	for (i = 0; i < input.height / img_chunk.height; ++i) {

		/* Get the image chunk from PPU through DMA transfer */
		dprintf("SPU[%d] getting image_chunk %d of size %d\n",
				ppu_data.spe_id, i, transfer_sz);

		dprintf("SPU[%d] input.data=%p img_chunk.data=%p "\
				"start_addr=%p\n", ppu_data.spe_id, (void *)input.data,
				(void *)img_chunk.data, (void *)((uint32_t)(input.data) + i * transfer_sz));

		mfc_get((void *)(img_chunk.data), (uint32_t)(input.data) + i * transfer_sz,
				(uint32_t)(transfer_sz), tag_id, 0, 0);

		waittag(tag_id);
		dprintf("SPU[%d] got image_chunk %d\n", ppu_data.spe_id, i);

		compute_lines_average(&img_chunk, buf_line_sz);

		/* Make average for column. avg = (c0.r + c1.r) / 2 etc*/
		compute_columns_average(&img_chunk, &img_scaled_line);

		store_line(&img_scaled_line, ppu_data, &big_image, i);
	}

	free_image(&img_chunk);
	free_align(addr_to_free);
}
Exemplo n.º 16
0
void process_image_2lines(struct image* img){
	unsigned char *input, *output, *output2, *temp;
	unsigned int addr1, addr2, i, j, k, r1, g1, b1, r2, g2, b2;
	
	int block_nr = img->block_nr;
	
	vector unsigned char *v1_1, *v1_2, *v1_3, *v1_4, *v1_5;
	vector unsigned char *v2_1, *v2_2, *v2_3, *v2_4, *v2_5;

	// optimization
	unsigned int num_channels_X_img_width = NUM_CHANNELS * img->width;
	unsigned int num_channels_X_img_width_X_SCALE_FACTOR = num_channels_X_img_width * SCALE_FACTOR;
	
	input  = malloc_align(2 * num_channels_X_img_width_X_SCALE_FACTOR, 4);
	
	output  = malloc_align(num_channels_X_img_width / SCALE_FACTOR, 4);
	output2 = malloc_align(num_channels_X_img_width / SCALE_FACTOR, 4);
	
	temp = malloc_align(2 * NUM_CHANNELS * img->width, 4);

	// first line
	v1_1 = (vector unsigned char *) &input[0];
	v1_2 = (vector unsigned char *) &input[1 * num_channels_X_img_width];
	v1_3 = (vector unsigned char *) &input[2 * num_channels_X_img_width];
	v1_4 = (vector unsigned char *) &input[3 * num_channels_X_img_width];
	v1_5 = (vector unsigned char *) temp;
	
	// second line
	v2_1 = (vector unsigned char *) &input[4 * num_channels_X_img_width];
	v2_2 = (vector unsigned char *) &input[5 * num_channels_X_img_width];
	v2_3 = (vector unsigned char *) &input[6 * num_channels_X_img_width];
	v2_4 = (vector unsigned char *) &input[7 * num_channels_X_img_width];
	v2_5 = (vector unsigned char *) &temp[num_channels_X_img_width];

	addr2 = (unsigned int)img->dst; //start of image
	addr2 += (block_nr / NUM_IMAGES_HEIGHT) * img->width * NUM_CHANNELS * 
		img->height / NUM_IMAGES_HEIGHT; //start line of spu block
	addr2 += (block_nr % NUM_IMAGES_WIDTH) * NUM_CHANNELS *
		img->width / NUM_IMAGES_WIDTH;

	for (i = 0; i<img->height / SCALE_FACTOR / 2; i++){
		// get 8 lines
		addr1 = ((unsigned int)img->src) + 2 * i * num_channels_X_img_width_X_SCALE_FACTOR;
		mfc_get(input, addr1, 2 * num_channels_X_img_width * SCALE_FACTOR, MY_TAG, 0, 0);
		mfc_write_tag_mask(1 << MY_TAG);
		mfc_read_tag_status_all();

		// compute the 2 scaled line
		for (j = 0; j < num_channels_X_img_width / 16; j++){
			v1_5[j] = spu_avg(spu_avg(v1_1[j], v1_2[j]), spu_avg(v1_3[j], v1_4[j]));
			v2_5[j] = spu_avg(spu_avg(v2_1[j], v2_2[j]), spu_avg(v2_3[j], v2_4[j]));
		}

		for (j = 0; j < img->width; j += SCALE_FACTOR){
			r1 = g1 = b1 = 0;
			r2 = b2 = g2 = 0;
			for (k = j; k < j + SCALE_FACTOR; k++) {
				unsigned int k_X_NUM_CHANNELS = k * NUM_CHANNELS;
				r1 += temp[k_X_NUM_CHANNELS + 0];
				g1 += temp[k_X_NUM_CHANNELS + 1];
				b1 += temp[k_X_NUM_CHANNELS + 2];

				r2 += temp[num_channels_X_img_width + k_X_NUM_CHANNELS + 0];
				g2 += temp[num_channels_X_img_width + k_X_NUM_CHANNELS + 1];
				b2 += temp[num_channels_X_img_width + k_X_NUM_CHANNELS + 2];
			}
			r1 /= SCALE_FACTOR;
			b1 /= SCALE_FACTOR;
			g1 /= SCALE_FACTOR;
			
			r2 /= SCALE_FACTOR;
			b2 /= SCALE_FACTOR;
			g2 /= SCALE_FACTOR;
			
			output[j / SCALE_FACTOR * NUM_CHANNELS + 0] = (unsigned char) r1;
			output[j / SCALE_FACTOR * NUM_CHANNELS + 1] = (unsigned char) g1;
			output[j / SCALE_FACTOR * NUM_CHANNELS + 2] = (unsigned char) b1;
			
			output2[j / SCALE_FACTOR * NUM_CHANNELS + 0] = (unsigned char) r2;	
			output2[j / SCALE_FACTOR * NUM_CHANNELS + 1] = (unsigned char) g2;
			output2[j / SCALE_FACTOR * NUM_CHANNELS + 2] = (unsigned char) b2;
		}

		//put the scaled line back
		mfc_put(output, addr2, img->width / SCALE_FACTOR * NUM_CHANNELS, MY_TAG, 0, 0);
		addr2 += img->width * NUM_CHANNELS; //line inside spu block
		
		// trimite si al 2-lea set
		mfc_put(output2, addr2, img->width / SCALE_FACTOR * NUM_CHANNELS, MY_TAG, 0, 0);
		addr2 += img->width * NUM_CHANNELS; //line inside spu block
		
		mfc_write_tag_mask(1 << MY_TAG);
		mfc_read_tag_status_all();
	}

	free_align(temp);
	free_align(input);
	free_align(output);
	free_align(output2);
}
Exemplo n.º 17
0
void process_image_double(struct image* img){
	unsigned char *input[2], *output, *temp;
	unsigned int addr1, addr2, i, j, k, r, g, b;
	int block_nr = img->block_nr;
	vector unsigned char *v1[2], *v2[2], *v3[2], *v4[2], *v5;

	int buf, nxt_buf; //index of the buffer (0/1)

	input[0] = malloc_align(NUM_CHANNELS * SCALE_FACTOR * img->width, 4);
	input[1] = malloc_align(NUM_CHANNELS * SCALE_FACTOR * img->width, 4);

	output = malloc_align(NUM_CHANNELS * img->width / SCALE_FACTOR, 4);
	temp = malloc_align(NUM_CHANNELS * img->width, 4);

	//optimization
	unsigned int num_channels_X_img_width = NUM_CHANNELS * img->width;

	v1[0] = (vector unsigned char *) &input[0][0];
	v2[0] = (vector unsigned char *) &input[0][1 * num_channels_X_img_width];
	v3[0] = (vector unsigned char *) &input[0][2 * num_channels_X_img_width];
	v4[0] = (vector unsigned char *) &input[0][3 * num_channels_X_img_width];
	v5 = (vector unsigned char *) temp;

	v1[1] = (vector unsigned char *) &input[1][0];
	v2[1] = (vector unsigned char *) &input[1][1 * num_channels_X_img_width];
	v3[1] = (vector unsigned char *) &input[1][2 * num_channels_X_img_width];
	v4[1] = (vector unsigned char *) &input[1][3 * num_channels_X_img_width];


	addr2 = (unsigned int)img->dst; //start of image
	addr2 += (block_nr / NUM_IMAGES_HEIGHT) * num_channels_X_img_width * 
		img->height / NUM_IMAGES_HEIGHT; //start line of spu block
	addr2 += (block_nr % NUM_IMAGES_WIDTH) * num_channels_X_img_width / NUM_IMAGES_WIDTH;

	addr1 = ((unsigned int)img->src);

	buf = 0; // first data transfer
	mfc_getb(input[buf], addr1, SCALE_FACTOR * num_channels_X_img_width, 0, 0, 0);

	for (i = 1; i<img->height / SCALE_FACTOR; i++){
		// get 4 lines
		nxt_buf = buf ^ 1; //ask for next data buffer from PPU
		
		//mfg_get with barrier
		addr1 = ((unsigned int)img->src) + i * num_channels_X_img_width * SCALE_FACTOR;
		mfc_getb(input[nxt_buf], addr1, SCALE_FACTOR * num_channels_X_img_width, nxt_buf, 0, 0);

		mfc_write_tag_mask(1 << buf);
		mfc_read_tag_status_all();

		// process current buffer
		for (j = 0; j < img->width * NUM_CHANNELS / 16; j++){
			v5[j] = spu_avg(spu_avg(v1[buf][j], v2[buf][j]), spu_avg(v3[buf][j], v4[buf][j]));
		}
		
		for (j = 0; j < img->width; j+=SCALE_FACTOR){
			r = g = b = 0;
			for (k = j; k < j + SCALE_FACTOR; k++) {
				r += temp[k * NUM_CHANNELS + 0];
				g += temp[k * NUM_CHANNELS + 1];
				b += temp[k * NUM_CHANNELS + 2];
			}
			r /= SCALE_FACTOR;
			b /= SCALE_FACTOR;
			g /= SCALE_FACTOR;

			output[j / SCALE_FACTOR * NUM_CHANNELS + 0] = (unsigned char) r;
			output[j / SCALE_FACTOR * NUM_CHANNELS + 1] = (unsigned char) g;
			output[j / SCALE_FACTOR * NUM_CHANNELS + 2] = (unsigned char) b;
		}

		// sent precedent buffer to PPU
		mfc_put(output, addr2, img->width / SCALE_FACTOR * NUM_CHANNELS, MY_TAG, 0, 0);
		addr2 += img->width * NUM_CHANNELS; //line inside spu block
		
		mfc_write_tag_mask(1 << MY_TAG);
		mfc_read_tag_status_all();

		buf = nxt_buf; //prepare next iteration
	}

	mfc_write_tag_mask(1 << buf);
	mfc_read_tag_status_all();

	// process last buffer
	for (j = 0; j < img->width * NUM_CHANNELS / 16; j++){
		v5[j] = spu_avg(spu_avg(v1[buf][j], v2[buf][j]), spu_avg(v3[buf][j], v4[buf][j]));
	}
	
	for (j=0; j < img->width; j+=SCALE_FACTOR){
		r = g = b = 0;
		for (k = j; k < j + SCALE_FACTOR; k++) {
			r += temp[k * NUM_CHANNELS + 0];
			g += temp[k * NUM_CHANNELS + 1];
			b += temp[k * NUM_CHANNELS + 2];
		}
		r /= SCALE_FACTOR;
		b /= SCALE_FACTOR;
		g /= SCALE_FACTOR;

		output[j / SCALE_FACTOR * NUM_CHANNELS + 0] = (unsigned char) r;
		output[j / SCALE_FACTOR * NUM_CHANNELS + 1] = (unsigned char) g;
		output[j / SCALE_FACTOR * NUM_CHANNELS + 2] = (unsigned char) b;
	}

	// send last buffer to PPU
	mfc_put(output, addr2, img->width / SCALE_FACTOR * NUM_CHANNELS, MY_TAG, 0, 0);
	addr2 += img->width * NUM_CHANNELS;

	mfc_write_tag_mask(1 << MY_TAG);
	mfc_read_tag_status_all();

	free_align(temp);
	free_align(input[0]);
	free_align(input[1]);
	free_align(output);
}
Exemplo n.º 18
0
Arquivo: vmm.c Projeto: PSanf2/POS_C9
u32int *malloc(u32int size)
{
	return malloc_align(size, 0x1);
}
Exemplo n.º 19
0
int main(int argc, char **argv)
{
    
	if (argc != 8) {
		printf("Usage: ./tema3 mod_vect mod_dma num_spus in.pgm out.cmp out.pgm results.txt");
		return -1;
    }
	int mod_vect = atoi(argv[1]);
	int mod_dma = atoi(argv[2]);
	int num_spus = atoi(argv[3]);
	char *inpgm = argv[4];
	char *outcmp = argv[5];
	char *outpgm = argv[6];
	char *results = argv[7];
	int i;

	struct img initial_image, decompressed_image;
	struct c_img compressed_image;

	struct timeval start_total, end_total, start_op, end_op;
	double total_time = 0, op_time = 0;

	gettimeofday(&start_total, NULL);

	// citeste imaginea initiala 
	read_pgm(inpgm, &initial_image);

	gettimeofday(&start_op, NULL);
	
	compressed_image.width = initial_image.width;
	compressed_image.height = initial_image.height;
	int nr_cmp_blocks = (1LL * initial_image.width * initial_image.height) / (BLOCK_SIZE * BLOCK_SIZE);
	compressed_image.blocks = (struct block *)malloc_align(nr_cmp_blocks * sizeof(struct block), 7);

	pthread_t *compress_threads = (pthread_t*)malloc_align(num_spus * sizeof(pthread_t), 7);
	struct package_t *cthread_arg = (struct package_t *)malloc_align(num_spus * sizeof(struct package_t), 7);
	
	int nr_of_blocks = (initial_image.width * initial_image.height) / (BLOCK_SIZE * BLOCK_SIZE);
	int average_blocks = nr_of_blocks / num_spus;
	int rest_blocks = nr_of_blocks % num_spus;
	int offset = 0;

	for(i = 0; i < num_spus; i++) { 

		/* completeaza structura package_t de trimis la spu pentru fiecare spu*/	
		cthread_arg[i].action_type = 0;
		cthread_arg[i].mod_vect = mod_vect;
		cthread_arg[i].mod_dma = mod_dma;
		cthread_arg[i].num_spus = num_spus;
		cthread_arg[i].nr_blocks = average_blocks;
		cthread_arg[i].index_block = offset;
			
		cthread_arg[i].img_pgm.width = initial_image.width;
		cthread_arg[i].img_pgm.height = initial_image.height;
		cthread_arg[i].img_pgm.pixels = initial_image.pixels + ((offset / (initial_image.width / BLOCK_SIZE)) * BLOCK_SIZE * initial_image.width + (offset % (initial_image.width / BLOCK_SIZE)) * BLOCK_SIZE);
				
		cthread_arg[i].img_cmp.width = compressed_image.width;
		cthread_arg[i].img_cmp.height = compressed_image.height;
		cthread_arg[i].img_cmp.blocks = compressed_image.blocks + ((offset / (initial_image.width / BLOCK_SIZE)) * (initial_image.width / BLOCK_SIZE) + (offset % (initial_image.width / BLOCK_SIZE)));

		offset += average_blocks;
		nr_of_blocks -= average_blocks;
		if (rest_blocks != 0 && i != num_spus - 1) {
			average_blocks = nr_of_blocks / (num_spus - 1 - i);
			rest_blocks = nr_of_blocks % (num_spus - 1 - i);
		}

		/* Create thread for each SPE context */
		if (pthread_create (&compress_threads[i], NULL, &ppu_pthread_function, &cthread_arg[i]))  {
			perror ("Failed creating thread");
			exit (1);
		}
	}

	/* Wait for SPU-thread to complete execution.  */
  	for (i = 0; i < num_spus; i++) {
		if (pthread_join (compress_threads[i], NULL)) {
			perror("Failed pthread_join");
			exit (1);
		}
	}

  	free_align(compress_threads);
	free_align(cthread_arg);
 
	decompressed_image.width = initial_image.width;
	decompressed_image.height = initial_image.height;
	int nr_dec_blocks = (1LL * initial_image.width * initial_image.height) / (BLOCK_SIZE * BLOCK_SIZE);
	decompressed_image.pixels = (unsigned char *)malloc_align(initial_image.height * initial_image.width * sizeof(unsigned char), 7);

	pthread_t *decompress_threads = (pthread_t*)malloc_align(num_spus * sizeof(pthread_t), 7);
	struct package_t *dthread_arg = (struct package_t *)malloc_align(num_spus * sizeof(struct package_t), 7);
	
	int dec_average_blocks = nr_dec_blocks / num_spus;
	int dec_rest_blocks = nr_dec_blocks % num_spus;
	int dec_offset = 0;

	for(i = 0; i < num_spus; i++) { 

		/* completeaza structura package_t de trimis la spu pentru fiecare spu*/	
		dthread_arg[i].action_type = 1;
		dthread_arg[i].mod_vect = mod_vect;
		dthread_arg[i].mod_dma = mod_dma;
		dthread_arg[i].num_spus = num_spus;
		dthread_arg[i].nr_blocks = dec_average_blocks;
		dthread_arg[i].index_block = dec_offset;
			
		dthread_arg[i].img_pgm.width = initial_image.width;
		dthread_arg[i].img_pgm.height = initial_image.height;
		dthread_arg[i].img_pgm.pixels = decompressed_image.pixels + ((dec_offset / (initial_image.width / BLOCK_SIZE)) * BLOCK_SIZE * initial_image.width + (dec_offset % (initial_image.width / BLOCK_SIZE)) * BLOCK_SIZE);
				
		dthread_arg[i].img_cmp.width = compressed_image.width;
		dthread_arg[i].img_cmp.height = compressed_image.height;
		dthread_arg[i].img_cmp.blocks = compressed_image.blocks + ((dec_offset / (initial_image.width / BLOCK_SIZE)) * (initial_image.width / BLOCK_SIZE) + (dec_offset % (initial_image.width / BLOCK_SIZE)));

		dec_offset += dec_average_blocks;
		nr_dec_blocks -= dec_average_blocks;
		if (dec_rest_blocks != 0 && i != num_spus - 1) {
			dec_average_blocks = nr_dec_blocks / (num_spus - 1 - i);
			dec_rest_blocks = nr_dec_blocks % (num_spus - 1 - i);
		}

		/* Create thread for each SPE context */
		if (pthread_create (&decompress_threads[i], NULL, &ppu_pthread_function, &dthread_arg[i]))  {
			perror ("Failed creating thread");
			exit (1);
		}
	}

	/* Wait for SPU-thread to complete execution.  */
  	for (i = 0; i < num_spus; i++) {
		if (pthread_join (decompress_threads[i], NULL)) {
			perror("Failed pthread_join");
			exit (1);
		}
	}
	gettimeofday(&end_op, NULL);

	write_cmp(outcmp, &compressed_image);
	write_pgm(outpgm, &decompressed_image);
	
	free_align(compressed_image.blocks);
	free_align(decompressed_image.pixels);
	free_align(decompress_threads);
	free_align(dthread_arg);

	gettimeofday(&end_total, NULL);
	
	total_time += GET_TIME_DELTA(start_total, end_total);
	op_time += GET_TIME_DELTA(start_op, end_op);

	freopen(results, "a+", stdout);
	printf("%i %lf %lf\n", num_spus, op_time, total_time);
	fclose(stdout);

	return 0;
}
Exemplo n.º 20
0
int main(int argc, char **argv)
{
	init_spus();
	srand((unsigned)time(NULL));
	char *fis_in, *fis_out;
	int zoom, rows, cols, i, j,
	overlap_spu, overlap_ppu,
	patch_w, patch_h, nr_patches;	

	if (argc < 8) {
		fprintf(stderr, "Error: Missing some parameters.\n");
		fprintf(stderr, "Run: ./program fis_in fis_out zoom nr_bucati_dim1 nr_bucati_dim2 banda_de_suprapunere_dim1 banda_de_suprapunere_dim2\n");
		return -1;
	}

	fis_in  = argv[1];
	fis_out = argv[2];
	zoom    = atoi(argv[3]);
	rows = atoi(argv[4]);
	cols = atoi(argv[5]);
	overlap_spu = atoi(argv[6]);
	overlap_ppu = atoi(argv[7]);

	
	image img_src = read_ppm(fis_in);
	if (img_src == NULL) {
		fprintf(stderr, "Error reading image file.\n");
		return -1;
	}

	patch_w = (zoom * img_src->width)  / cols;
	patch_h = (zoom * img_src->height) / rows;
	nr_patches = rows * cols;
	printf("PPU: NR PATCHES NECESARY = %d\n", nr_patches);

	int **spu_patch_id_vector = alloc_patch_id_vector(rows);
	if (spu_patch_id_vector == NULL)
		return -1;

	printf("PPU: ZOOM=%d ROWS=%d COLS=%d img->width=%d img->height=%d patch_w=%d patch_h=%d\n", zoom, rows, cols, img_src->width, img_src->height, patch_w, patch_h);

	int* rand_seed = make_seed_vector();
	if (rand_seed == NULL)
		return -1;
	int ***min_borders = malloc_align(SPU_THREADS * sizeof(int**), 4);
	if (min_borders == NULL) {
		perror("PPU: malloc_align failed in main");
		return -1;
	}
	for (i = 0; i < SPU_THREADS; i++) {
		min_borders[i] = alloc_aligned_matrix((rows-1), overlap_spu);
		if (min_borders[i] == NULL)
			return -1;
	}

	pixel_t **patches_to_send = make_patches(img_src, patch_w, patch_h, nr_patches);

	send_patch_info(&patch_w, &patch_h, &rows, &nr_patches, spu_patch_id_vector, patches_to_send, rand_seed, &overlap_spu, min_borders);

	stop_spus();

	int out_img_width = zoom * img_src->width;
	int out_img_height = zoom * img_src->height;
	image img_dst = alloc_img(out_img_width, out_img_height);

	for (i = 0; i < SPU_THREADS; i++) {
		printf("PPU: spu[%d]: ID= ", i);
		for (j = 0; j < rows; j++)
			printf("%d ", spu_patch_id_vector[i][j]);
		printf("\n");
	}

	make_final_image(img_dst, patch_w, patch_h, spu_patch_id_vector, rows, patches_to_send);
	write_ppm(fis_out, img_dst);

	free_img(img_src);
	free_img(img_dst);
	free_seed_vector(rand_seed);
	free_patch_id_vector(spu_patch_id_vector);
	for (i = 0; i < SPU_THREADS; i++)
		free_aligned_matrix(min_borders[i], rows-1);
	return 0;
}
Exemplo n.º 21
0
inline void* operator new[](size_t sz)    { return malloc_align(sz); }