예제 #1
0
OSStatus host_platform_spi_transfer( bus_transfer_direction_t dir, uint8_t* buffer, uint16_t buffer_length )
{
    OSStatus result;
    int i;

    for(i=0; i<buffer_length; i++) {
        buffer_temp_32[i] = SPI0_TXCMD | (uint32_t)buffer[i];;
    }
    dmaSPITX.dmaChStcd = (edma_software_tcd_t *)mem_align(2 * sizeof(edma_software_tcd_t) * dmaSPITX.period, 32);
    dmaSPITX.srcAddr = (uint32_t)buffer_temp_32;
    dmaSPITX.length = buffer_length * 4;
    dmaSPIRX.dmaChStcd = (edma_software_tcd_t *)mem_align(2 * sizeof(edma_software_tcd_t) * dmaSPIRX.period, 32);
    dmaSPIRX.destAddr = (uint32_t)buffer;
    dmaSPIRX.length = buffer_length;
    
    MCU_CLOCKS_NEEDED();
    SPI0_CS_ENABLE;
    setup_edma_loop(&dmaSPITX);
    setup_edma_loop(&dmaSPIRX);

    EDMA_DRV_StartChannel(dmaSPIRX.dmaCh);
    EDMA_DRV_StartChannel(dmaSPITX.dmaCh);

    result = mico_rtos_get_semaphore( &spi_transfer_finished_semaphore, 100 );
    disable_edma_loop(&dmaSPIRX);
    disable_edma_loop(&dmaSPITX);
    SPI0_CS_DISABLE;
    MCU_CLOCKS_NOT_NEEDED();
    free_align(dmaSPITX.dmaChStcd);
    free_align(dmaSPIRX.dmaChStcd);
    return result;
}
예제 #2
0
파일: ppu_master.c 프로젝트: silviu/asc.3
void free_aligned_matrix(int **a, int h)
{
	int i;
	for (i = 0; i < h; i++)
		free_align(a[i]);
	free_align(a);
}
예제 #3
0
파일: spu.c 프로젝트: LaitaStefan/labs-2014
void process_image_simple(struct image* img){
	unsigned char *input, *output, *temp;
	unsigned int addr1, addr2, i, j, k, r, g, b;
	int block_nr = img->block_nr;
	vector unsigned char *v1, *v2, *v3, *v4, *v5 ;

	input = malloc_align(NUM_CHANNELS * SCALE_FACTOR * img->width, 4);
	output = malloc_align(NUM_CHANNELS * img->width / SCALE_FACTOR, 4);
	temp = malloc_align(NUM_CHANNELS * img->width, 4);

	v1 = (vector unsigned char *) &input[0];
	v2 = (vector unsigned char *) &input[1 * img->width * NUM_CHANNELS];
	v3 = (vector unsigned char *) &input[2 * img->width * NUM_CHANNELS];
	v4 = (vector unsigned char *) &input[3 * img->width * NUM_CHANNELS];
	v5 = (vector unsigned char *) temp;

	addr2 = (unsigned int)img->dst; //start of image
	addr2 += (block_nr / NUM_IMAGES_HEIGHT) * img->width * NUM_CHANNELS * 
		img->height / NUM_IMAGES_HEIGHT; //start line of spu block
	addr2 += (block_nr % NUM_IMAGES_WIDTH) * NUM_CHANNELS *
		img->width / NUM_IMAGES_WIDTH;

	for (i=0; i<img->height / SCALE_FACTOR; i++){
		//get 4 lines
		addr1 = ((unsigned int)img->src) + i * img->width * NUM_CHANNELS * SCALE_FACTOR;
		mfc_get(input, addr1, SCALE_FACTOR * img->width * NUM_CHANNELS, MY_TAG, 0, 0);
		mfc_write_tag_mask(1 << MY_TAG);
		mfc_read_tag_status_all();

		//compute the scaled line
		for (j = 0; j < img->width * NUM_CHANNELS / 16; j++){
			v5[j] = spu_avg(spu_avg(v1[j], v2[j]), spu_avg(v3[j], v4[j]));
		}
		for (j=0; j < img->width; j+=SCALE_FACTOR){
			r = g = b = 0;
			for (k = j; k < j + SCALE_FACTOR; k++) {
				r += temp[k * NUM_CHANNELS + 0];
				g += temp[k * NUM_CHANNELS + 1];
				b += temp[k * NUM_CHANNELS + 2];
			}
			r /= SCALE_FACTOR;
			b /= SCALE_FACTOR;
			g /= SCALE_FACTOR;

			output[j / SCALE_FACTOR * NUM_CHANNELS + 0] = (unsigned char) r;
			output[j / SCALE_FACTOR * NUM_CHANNELS + 1] = (unsigned char) g;
			output[j / SCALE_FACTOR * NUM_CHANNELS + 2] = (unsigned char) b;
		}

		//put the scaled line back
		mfc_put(output, addr2, img->width / SCALE_FACTOR * NUM_CHANNELS, MY_TAG, 0, 0);
		addr2 += img->width * NUM_CHANNELS; //line inside spu block
		mfc_write_tag_mask(1 << MY_TAG);
		mfc_read_tag_status_all();
	}

	free_align(temp);
	free_align(input);
	free_align(output);
}
예제 #4
0
파일: ppu_master.c 프로젝트: silviu/asc.3
void free_patch_id_vector(int **spu_patch_id_vector)
{
	int i;
	for (i = 0; i < SPU_THREADS; i++)
		free_align(spu_patch_id_vector[i]);
	free_align(spu_patch_id_vector);
}
예제 #5
0
파일: trace.c 프로젝트: UIKit0/openlase-1
static void free_bufs(OLTraceCtx *ctx)
{
	if (ctx->tracebuf)
		free(ctx->tracebuf);
	if (ctx->sb)
		free(ctx->sb);
	if (ctx->pb)
		free(ctx->pb);
	if (ctx->k)
		free_align(ctx->k);
	if (ctx->bibuf)
		free_align(ctx->bibuf);
	if (ctx->btbuf)
		free_align(ctx->btbuf);
	if (ctx->sibuf)
		free_align(ctx->sibuf);
	if (ctx->stbuf)
		free_align(ctx->stbuf);
	if (ctx->sxbuf)
		free_align(ctx->sxbuf);
	if (ctx->sybuf)
		free_align(ctx->sybuf);
	if (ctx->smbuf)
		free_align(ctx->smbuf);
}
예제 #6
0
END_TEST

/*******************************************************************************
 * mallac_align/free_align
 */

START_TEST(test_malloc_align)
{
	void *ptr[128][256];
	int size, align;

	for (size = 0; size < countof(ptr); size++)
	{
		for (align = 0; align < countof(ptr[0]); align++)
		{
			ptr[size][align] = malloc_align(size, align);
			if (align)
			{
				ck_assert((uintptr_t)ptr[size][align] % align == 0);
			}
			if (size)
			{
				ck_assert(ptr[size][align]);
				memset(ptr[size][align], 0xEF, size);
			}
		}
	}
	for (size = 0; size < countof(ptr); size++)
	{
		for (align = 0; align < countof(ptr[0]); align++)
		{
			free_align(ptr[size][align]);
		}
	}
}
/* free image data */
void free_image(struct image* img) {
    if (img != NULL) {
        //free(img->data);
		free_align(img->data);
        img->data = NULL;
    }
}
예제 #8
0
int main(int argc, char* argv[]) {

    printf("coucou\n");

    int i = 42;
    int* i_p = &i;
    long* l_p = (long*) i_p;

    char* c_p = (char*) i_p;
/*
    bitprint((long)i_p);
    bitprint((long)(i_p+1)); // + 4

    bitprint((long)l_p);
    bitprint((long)(l_p+1)); // + 8

    bitprint((long)c_p);
    bitprint((long)(c_p+1)); // + 8
*/
    int align = 24;

    if (argc > 1)
        align = atoi(argv[1]);

    free_align(malloc_align(1024, align));

    exit(0);
}
int main(){
  int i;
  int N=1024;
  float pi=0.0;
  pthread_t pthreads[SPU_THREADS];
  context ctxs[SPU_THREADS] __attribute__ ((aligned(16)));

  for(i=0;i<SPU_THREADS;i++){
    ctxs[i].N=N;
    ctxs[i].Nstart=(N/SPU_THREADS)*i;
    ctxs[i].Nend=(N/SPU_THREADS)*(i+1);
    ctxs[i].pi=(float*) malloc_align(sizeof(float),7);
    pthread_create(&pthreads[i], NULL, &pthread_run_spe, &ctxs[i]);
  }

  for (i=0; i<SPU_THREADS; i++)
    pthread_join (pthreads[i], NULL);

  for(i=0;i<SPU_THREADS;i++)
    pi+=*(ctxs[i].pi);

  for(i=0;i<SPU_THREADS;i++)
    free_align(ctxs[i].pi);

  printf("PI = %f\n",pi);

  return (0);
}
예제 #10
0
파일: edma.c 프로젝트: 287631983/MICO
void disable_edma_loop(edma_loop_setup_t *loopSetup)
{
    EDMA_DRV_StopChannel(loopSetup->dmaCh);

#if (defined(__ICCARM__) || defined(__CC_ARM))
    free_align(loopSetup->dmaChStcd);
#elif defined(__GNUC__)
    //OSA_MemFree(loopSetup->dmaChStcd);
#endif

    print_edma_ch_erq(DMA0, loopSetup->dmaChanNum);

    //OSA_MemFree(loopSetup);

}
예제 #11
0
파일: btc.c 프로젝트: vmitris/Homework_CELL
void write_btc(char* path, struct c_img* out_img){
	int i, nr_blocks, j, fd, k;
	struct bits tmp;
	char *buf;

	fd = _open_for_write(path);

	write(fd, &out_img->width, sizeof(int));
	write(fd, &out_img->height, sizeof(int));

	nr_blocks = out_img->width * out_img->height / (BLOCK_SIZE * BLOCK_SIZE);
	buf = _alloc(nr_blocks * (2 + BLOCK_SIZE * BLOCK_SIZE / BITS_IN_BYTE));

	k = 0;
	for (i=0; i<nr_blocks; i++){
		//write a and b
		buf[k++] = out_img->blocks[i].a;
		buf[k++] = out_img->blocks[i].b;		
		//from bytes to bits
		j = 0;
		while (j < BLOCK_SIZE * BLOCK_SIZE){
			tmp.bit0 = out_img->blocks[i].bitplane[j++];
			tmp.bit1 = out_img->blocks[i].bitplane[j++];
			tmp.bit2 = out_img->blocks[i].bitplane[j++];
			tmp.bit3 = out_img->blocks[i].bitplane[j++];
			tmp.bit4 = out_img->blocks[i].bitplane[j++];
			tmp.bit5 = out_img->blocks[i].bitplane[j++];
			tmp.bit6 = out_img->blocks[i].bitplane[j++];
			tmp.bit7 = out_img->blocks[i].bitplane[j++];						
			buf[k++] = *((char*)&tmp);
		}
		//write bitplane
	}

	_write_buffer(fd, buf, 
		nr_blocks * (2 + BLOCK_SIZE * BLOCK_SIZE / BITS_IN_BYTE));

	free_align(buf);
	close(fd);
}
예제 #12
0
파일: btc.c 프로젝트: vmitris/Homework_CELL
void read_btc(char* path, struct c_img* out_img){
	int fd, nr_blocks, i, j = 0, k, ii;
	char *big_buf;
	struct bits tmp;

	fd = _open_for_read(path);

	read(fd, &out_img->width, sizeof(int));
	read(fd, &out_img->height, sizeof(int));

	nr_blocks = out_img->width * out_img->height / (BLOCK_SIZE * BLOCK_SIZE);
	out_img->blocks = (struct block*) _alloc(nr_blocks * sizeof(struct block));

	big_buf = (char*) _alloc(nr_blocks * (2 + BLOCK_SIZE * BLOCK_SIZE / BITS_IN_BYTE));

	_read_buffer(fd, big_buf, nr_blocks * (2 + BLOCK_SIZE * BLOCK_SIZE / BITS_IN_BYTE));

	for (i=0; i<nr_blocks; i++){
		//read a and b
		out_img->blocks[i].a = big_buf[j++];
		out_img->blocks[i].b = big_buf[j++];
		//read bitplane
		k = 0;
		for (ii=0; ii<BLOCK_SIZE * BLOCK_SIZE / BITS_IN_BYTE; ii++){
			tmp = *((struct bits*)&big_buf[j++]);
			out_img->blocks[i].bitplane[k++] = tmp.bit0;
			out_img->blocks[i].bitplane[k++] = tmp.bit1;
			out_img->blocks[i].bitplane[k++] = tmp.bit2;
			out_img->blocks[i].bitplane[k++] = tmp.bit3;
			out_img->blocks[i].bitplane[k++] = tmp.bit4;
			out_img->blocks[i].bitplane[k++] = tmp.bit5;
			out_img->blocks[i].bitplane[k++] = tmp.bit6;
			out_img->blocks[i].bitplane[k++] = tmp.bit7;			
		}
	}

	free_align(big_buf);
	close(fd);
}
예제 #13
0
static void
print_res(result_p_t res, int rev, seq_p_t seq1, seq_p_t seq2)
{
  unsigned int i;
  if (res->st.nmatches >= options.minScore_cutoff) {
    printf("\n%s%s\n", seq1->header, seq2->header);
    if (rev)
      printf("(complement)\n\n");
    switch (options.ali_flag) {
    case 0:
      print_exons(&res->eCol, res->direction);
      break;
    case 1:
      print_align_lat(seq1->seq, seq2->seq, res);
      break;
    case 3:
      print_exons(&res->eCol, res->direction);
      print_align_lat(seq1->seq, seq2->seq, res);
      break;
    case 4:
      print_exons(&res->eCol, res->direction);
      print_polyA_info(seq1, seq2, &res->eCol, &res->st);
      print_align_lat(seq1->seq, seq2->seq, res);
      break;
    default:
      fatal("Unrecognized option for alignment output.\n");
    }
    printf("\n");
  }
  for (i = 0; i < res->eCol.nb; i++)
    free(res->eCol.e.elt[i]);
  free(res->eCol.e.elt);
  if (res->sList)
    free_align(res->sList);
  free(res);
}
예제 #14
0
파일: spu.c 프로젝트: LaitaStefan/labs-2014
void process_image_2lines(struct image* img){
	unsigned char *input, *output, *output2, *temp;
	unsigned int addr1, addr2, i, j, k, r1, g1, b1, r2, g2, b2;
	
	int block_nr = img->block_nr;
	
	vector unsigned char *v1_1, *v1_2, *v1_3, *v1_4, *v1_5;
	vector unsigned char *v2_1, *v2_2, *v2_3, *v2_4, *v2_5;

	// optimization
	unsigned int num_channels_X_img_width = NUM_CHANNELS * img->width;
	unsigned int num_channels_X_img_width_X_SCALE_FACTOR = num_channels_X_img_width * SCALE_FACTOR;
	
	input  = malloc_align(2 * num_channels_X_img_width_X_SCALE_FACTOR, 4);
	
	output  = malloc_align(num_channels_X_img_width / SCALE_FACTOR, 4);
	output2 = malloc_align(num_channels_X_img_width / SCALE_FACTOR, 4);
	
	temp = malloc_align(2 * NUM_CHANNELS * img->width, 4);

	// first line
	v1_1 = (vector unsigned char *) &input[0];
	v1_2 = (vector unsigned char *) &input[1 * num_channels_X_img_width];
	v1_3 = (vector unsigned char *) &input[2 * num_channels_X_img_width];
	v1_4 = (vector unsigned char *) &input[3 * num_channels_X_img_width];
	v1_5 = (vector unsigned char *) temp;
	
	// second line
	v2_1 = (vector unsigned char *) &input[4 * num_channels_X_img_width];
	v2_2 = (vector unsigned char *) &input[5 * num_channels_X_img_width];
	v2_3 = (vector unsigned char *) &input[6 * num_channels_X_img_width];
	v2_4 = (vector unsigned char *) &input[7 * num_channels_X_img_width];
	v2_5 = (vector unsigned char *) &temp[num_channels_X_img_width];

	addr2 = (unsigned int)img->dst; //start of image
	addr2 += (block_nr / NUM_IMAGES_HEIGHT) * img->width * NUM_CHANNELS * 
		img->height / NUM_IMAGES_HEIGHT; //start line of spu block
	addr2 += (block_nr % NUM_IMAGES_WIDTH) * NUM_CHANNELS *
		img->width / NUM_IMAGES_WIDTH;

	for (i = 0; i<img->height / SCALE_FACTOR / 2; i++){
		// get 8 lines
		addr1 = ((unsigned int)img->src) + 2 * i * num_channels_X_img_width_X_SCALE_FACTOR;
		mfc_get(input, addr1, 2 * num_channels_X_img_width * SCALE_FACTOR, MY_TAG, 0, 0);
		mfc_write_tag_mask(1 << MY_TAG);
		mfc_read_tag_status_all();

		// compute the 2 scaled line
		for (j = 0; j < num_channels_X_img_width / 16; j++){
			v1_5[j] = spu_avg(spu_avg(v1_1[j], v1_2[j]), spu_avg(v1_3[j], v1_4[j]));
			v2_5[j] = spu_avg(spu_avg(v2_1[j], v2_2[j]), spu_avg(v2_3[j], v2_4[j]));
		}

		for (j = 0; j < img->width; j += SCALE_FACTOR){
			r1 = g1 = b1 = 0;
			r2 = b2 = g2 = 0;
			for (k = j; k < j + SCALE_FACTOR; k++) {
				unsigned int k_X_NUM_CHANNELS = k * NUM_CHANNELS;
				r1 += temp[k_X_NUM_CHANNELS + 0];
				g1 += temp[k_X_NUM_CHANNELS + 1];
				b1 += temp[k_X_NUM_CHANNELS + 2];

				r2 += temp[num_channels_X_img_width + k_X_NUM_CHANNELS + 0];
				g2 += temp[num_channels_X_img_width + k_X_NUM_CHANNELS + 1];
				b2 += temp[num_channels_X_img_width + k_X_NUM_CHANNELS + 2];
			}
			r1 /= SCALE_FACTOR;
			b1 /= SCALE_FACTOR;
			g1 /= SCALE_FACTOR;
			
			r2 /= SCALE_FACTOR;
			b2 /= SCALE_FACTOR;
			g2 /= SCALE_FACTOR;
			
			output[j / SCALE_FACTOR * NUM_CHANNELS + 0] = (unsigned char) r1;
			output[j / SCALE_FACTOR * NUM_CHANNELS + 1] = (unsigned char) g1;
			output[j / SCALE_FACTOR * NUM_CHANNELS + 2] = (unsigned char) b1;
			
			output2[j / SCALE_FACTOR * NUM_CHANNELS + 0] = (unsigned char) r2;	
			output2[j / SCALE_FACTOR * NUM_CHANNELS + 1] = (unsigned char) g2;
			output2[j / SCALE_FACTOR * NUM_CHANNELS + 2] = (unsigned char) b2;
		}

		//put the scaled line back
		mfc_put(output, addr2, img->width / SCALE_FACTOR * NUM_CHANNELS, MY_TAG, 0, 0);
		addr2 += img->width * NUM_CHANNELS; //line inside spu block
		
		// trimite si al 2-lea set
		mfc_put(output2, addr2, img->width / SCALE_FACTOR * NUM_CHANNELS, MY_TAG, 0, 0);
		addr2 += img->width * NUM_CHANNELS; //line inside spu block
		
		mfc_write_tag_mask(1 << MY_TAG);
		mfc_read_tag_status_all();
	}

	free_align(temp);
	free_align(input);
	free_align(output);
	free_align(output2);
}
예제 #15
0
파일: spu.c 프로젝트: LaitaStefan/labs-2014
void process_image_double(struct image* img){
	unsigned char *input[2], *output, *temp;
	unsigned int addr1, addr2, i, j, k, r, g, b;
	int block_nr = img->block_nr;
	vector unsigned char *v1[2], *v2[2], *v3[2], *v4[2], *v5;

	int buf, nxt_buf; //index of the buffer (0/1)

	input[0] = malloc_align(NUM_CHANNELS * SCALE_FACTOR * img->width, 4);
	input[1] = malloc_align(NUM_CHANNELS * SCALE_FACTOR * img->width, 4);

	output = malloc_align(NUM_CHANNELS * img->width / SCALE_FACTOR, 4);
	temp = malloc_align(NUM_CHANNELS * img->width, 4);

	//optimization
	unsigned int num_channels_X_img_width = NUM_CHANNELS * img->width;

	v1[0] = (vector unsigned char *) &input[0][0];
	v2[0] = (vector unsigned char *) &input[0][1 * num_channels_X_img_width];
	v3[0] = (vector unsigned char *) &input[0][2 * num_channels_X_img_width];
	v4[0] = (vector unsigned char *) &input[0][3 * num_channels_X_img_width];
	v5 = (vector unsigned char *) temp;

	v1[1] = (vector unsigned char *) &input[1][0];
	v2[1] = (vector unsigned char *) &input[1][1 * num_channels_X_img_width];
	v3[1] = (vector unsigned char *) &input[1][2 * num_channels_X_img_width];
	v4[1] = (vector unsigned char *) &input[1][3 * num_channels_X_img_width];


	addr2 = (unsigned int)img->dst; //start of image
	addr2 += (block_nr / NUM_IMAGES_HEIGHT) * num_channels_X_img_width * 
		img->height / NUM_IMAGES_HEIGHT; //start line of spu block
	addr2 += (block_nr % NUM_IMAGES_WIDTH) * num_channels_X_img_width / NUM_IMAGES_WIDTH;

	addr1 = ((unsigned int)img->src);

	buf = 0; // first data transfer
	mfc_getb(input[buf], addr1, SCALE_FACTOR * num_channels_X_img_width, 0, 0, 0);

	for (i = 1; i<img->height / SCALE_FACTOR; i++){
		// get 4 lines
		nxt_buf = buf ^ 1; //ask for next data buffer from PPU
		
		//mfg_get with barrier
		addr1 = ((unsigned int)img->src) + i * num_channels_X_img_width * SCALE_FACTOR;
		mfc_getb(input[nxt_buf], addr1, SCALE_FACTOR * num_channels_X_img_width, nxt_buf, 0, 0);

		mfc_write_tag_mask(1 << buf);
		mfc_read_tag_status_all();

		// process current buffer
		for (j = 0; j < img->width * NUM_CHANNELS / 16; j++){
			v5[j] = spu_avg(spu_avg(v1[buf][j], v2[buf][j]), spu_avg(v3[buf][j], v4[buf][j]));
		}
		
		for (j = 0; j < img->width; j+=SCALE_FACTOR){
			r = g = b = 0;
			for (k = j; k < j + SCALE_FACTOR; k++) {
				r += temp[k * NUM_CHANNELS + 0];
				g += temp[k * NUM_CHANNELS + 1];
				b += temp[k * NUM_CHANNELS + 2];
			}
			r /= SCALE_FACTOR;
			b /= SCALE_FACTOR;
			g /= SCALE_FACTOR;

			output[j / SCALE_FACTOR * NUM_CHANNELS + 0] = (unsigned char) r;
			output[j / SCALE_FACTOR * NUM_CHANNELS + 1] = (unsigned char) g;
			output[j / SCALE_FACTOR * NUM_CHANNELS + 2] = (unsigned char) b;
		}

		// sent precedent buffer to PPU
		mfc_put(output, addr2, img->width / SCALE_FACTOR * NUM_CHANNELS, MY_TAG, 0, 0);
		addr2 += img->width * NUM_CHANNELS; //line inside spu block
		
		mfc_write_tag_mask(1 << MY_TAG);
		mfc_read_tag_status_all();

		buf = nxt_buf; //prepare next iteration
	}

	mfc_write_tag_mask(1 << buf);
	mfc_read_tag_status_all();

	// process last buffer
	for (j = 0; j < img->width * NUM_CHANNELS / 16; j++){
		v5[j] = spu_avg(spu_avg(v1[buf][j], v2[buf][j]), spu_avg(v3[buf][j], v4[buf][j]));
	}
	
	for (j=0; j < img->width; j+=SCALE_FACTOR){
		r = g = b = 0;
		for (k = j; k < j + SCALE_FACTOR; k++) {
			r += temp[k * NUM_CHANNELS + 0];
			g += temp[k * NUM_CHANNELS + 1];
			b += temp[k * NUM_CHANNELS + 2];
		}
		r /= SCALE_FACTOR;
		b /= SCALE_FACTOR;
		g /= SCALE_FACTOR;

		output[j / SCALE_FACTOR * NUM_CHANNELS + 0] = (unsigned char) r;
		output[j / SCALE_FACTOR * NUM_CHANNELS + 1] = (unsigned char) g;
		output[j / SCALE_FACTOR * NUM_CHANNELS + 2] = (unsigned char) b;
	}

	// send last buffer to PPU
	mfc_put(output, addr2, img->width / SCALE_FACTOR * NUM_CHANNELS, MY_TAG, 0, 0);
	addr2 += img->width * NUM_CHANNELS;

	mfc_write_tag_mask(1 << MY_TAG);
	mfc_read_tag_status_all();

	free_align(temp);
	free_align(input[0]);
	free_align(input[1]);
	free_align(output);
}
예제 #16
0
파일: storage.hpp 프로젝트: bambang/vsipl
 static void deallocate(type data)
 {
   free_align(data);
 }
예제 #17
0
파일: btc.c 프로젝트: vmitris/Homework_CELL
void free_btc(struct c_img* image){
	free_align(image->blocks);
}
예제 #18
0
파일: ppu_master.c 프로젝트: silviu/asc.3
void free_seed_vector(int* rand_seed)
{
	free_align(rand_seed);
}
예제 #19
0
파일: imglib.c 프로젝트: silviu/asc.3
void free_img(image img)
{
    free_align(img->buf);
    free_align(img);
}
예제 #20
0
main(int argc, char **argv)
{
	int	i, j, k, l, m, n;
	int	dist[20];
	int	reads;
	int	num_vertex, num_class, num_edge;
	int	*len_seq, num_seq, num_remain;
	int	**num_pa;
	char	**src_seq, **src_name;
	char	temp[100];
	ALIGN	**eq_class, *align;
	EDGE	**edge, *edge1, *edge2, *bal_edge1, *bal_edge2;
	PATH	*path;
	int	num_path;
	NODES	**vertex, *begin, *node, *node_next, **start_node;
	LIST	**list;
	READINTERVAL	*readinterval;
	POSITION	*position;
	FILE	*fp, *fp1;

	readpar();
	random1(&idum);
	initenv(argc, argv);
	printf("%d %d %d\n", sizeof(POSITION), sizeof(NODES), sizeof(LIST));

/*	Input the length of the genome (required) */

	len_seq = (int *) ckalloc(2 * MAX_NUM * sizeof(int));
	src_name = alloc_name(MAX_NUM, 100);
	fp = ckopen(lenfile, "r");
	num_seq = readlen(fp, len_seq, src_name);
	fclose(fp);

	src_seq = (char **) ckalloc(2 * num_seq * sizeof(char *));
	l = 0;
	printf("Genome length: ");
	for(i = 0; i < num_seq; i ++)	{
		l += len_seq[i];
		printf("%d ", len_seq[i]);
	}
	printf("\n");
	printf("Total length: %d\n", l);

/*	Make reverse complements of input sequences rev(i) --> i + num_seq	*/

	for(i = 0; i < num_seq; i ++)	{
		len_seq[i + num_seq] = len_seq[i];
		src_seq[i] = (char *) ckalloc(len_seq[i] * sizeof(char));
		src_seq[i + num_seq] = (char *) ckalloc(len_seq[i] * sizeof(char));
		for(j = 0; j < len_seq[i]; j ++)	{
			src_seq[num_seq + i][j] = rev(src_seq[i][len_seq[i] - j - 1]);
		}
	}

/*	Input equivalent readintervales between reads --
	see the format of the equivalent readinterval files	*/

	printf("Read equivalent readintervales...\n");
	eq_class = (ALIGN **) ckalloc(2 * num_seq * sizeof(ALIGN *));
	fp = ckopen(inpfile, "r");
	num_class = readclass(eq_class, num_seq, fp);
	fclose(fp);
	printf("# equivalent readintervales input: %d\n", num_class);

/*
	for(i = 0; i < 2 * num_seq; i ++)	{
		align = eq_class[i];
		while(align)	{
			printf("See: \n");
			output_align(align, src_name, src_seq, len_seq, num_seq);
			getchar();
			align = align -> next;
		}
	}
*/

/*	Initialize the nodes: each position in each read is assigned
	as a new node. An array of "list" is set up for each read	*/

	list = (LIST **) ckalloc(2 * num_seq * sizeof(LIST *));
	for(i = 0; i < 2 * num_seq; i ++)	{
		list[i] = (LIST *) ckalloc(len_seq[i] * sizeof(LIST));
	}
	printf("intitialize nodes...\n");
	initialize(list, len_seq, num_seq);
	printf("done.\n");
	n = countnode(list, len_seq, 2 * num_seq);
	printf("# of nodes before merge: %d\n", n);

/*	Glue together two nodes if their corresponding positions are defined
	as equivalent in a pairwise alignment		*/

	printf("Merge...\n");
	merge(num_seq, len_seq, eq_class, num_class, list);
	printf("done.\n");
	for(i = 0; i < num_seq; i ++)	{
		while(eq_class[i])	{
			eq_class[i] = free_align(eq_class[i]);
		}
	}
	free((void **) eq_class);

/*      Compute the width of each node  */

        for(i = 0; i < 2 * num_seq; i ++)       {
                for(j = 0; j < len_seq[i]; j ++)        {
                        if(!list[i][j].node -> visit)   {
                                list[i][j].node -> num_path = countthickness(list[i][j].node);
                                list[i][j].node -> visit = 1;
                        }
                }
        }
	cleannode(list, len_seq, 2 * num_seq);
	n = countnode(list, len_seq, 2 * num_seq);
	printf("# of nodes after merge: %d\n", n);

/*	Add edges to the graph		*/
	edge = (EDGE **) ckalloc(n * sizeof(EDGE *));
	num_edge = graph(num_seq, len_seq, list, edge);
	printf("# edges: %d\n", num_edge);
	start_node = (NODES **) ckalloc(num_seq * sizeof(NODES *));
	for(i = 0; i < num_seq; i ++)	{
		if(len_seq[i] > 0)	{
			start_node[i] = list[i][0].node;
		} else	{
			start_node[i] = (NODES *) NULL;
		}
	}
	for(i = 0; i < 2 * num_seq; i ++)	{
		free((void *) list[i]);
	}
	free((void **) list);

	vertex = (NODES **) ckalloc(2 * num_edge * sizeof(NODES *));
	num_vertex = count_vertex(edge, num_edge, vertex);
	free((void **) edge);

	num_pa = (int **) ckalloc(MAX_BRA * sizeof(int *));
	for(i = 0; i < MAX_BRA; i ++)	{
		num_pa[i] = (int *) ckalloc(MAX_BRA * sizeof(int));
	}
	num_edge = count_edge_simp(vertex, num_vertex, num_pa);
	printf("%d vertices %d edges (%d source %d sinks) remained.\n", num_vertex, num_edge,
		num_pa[0][1], num_pa[1][0]);

/*	Assign the complementary edges of each edge	*/
	for(i = 0; i < num_vertex; i ++)	{
		for(j = 0; j < vertex[i] -> num_nextedge; j ++)	{
			edge1 = vertex[i] -> nextedge[j];
			edge1 -> bal_edge = find_bal_edge(edge1, len_seq, num_seq, i);
		}
	}

/*	Remove bulges in the graph	*/
	printf("Shave...\n");
	num_vertex = shave_graph(vertex, num_vertex);
	printf("done.\n");

/*      Remove cycles shorter than some threshold in the graph  */
/*
        printf("Shaving graph...\n");
        num_vertex = rem_cycle(vertex, num_vertex);
        printf("done.\n%d vertices remained.\n", num_vertex);
*/

/*	remove short edges	*/
/*
	printf("Remove shortedges...\n");
	num_vertex = rem_short_edge(vertex, num_vertex, len_seq);
	printf("done.\n%d vertices remained.\n", num_vertex);
	fflush(stdout);
*/

	num_edge = count_edge_simp(vertex, num_vertex, num_pa);
	printf("%d vertices %d edges (%d source %d sinks) remained.\n", num_vertex, num_edge,
		num_pa[0][1], num_pa[1][0]);
	fflush(stdout);

/*	Allocate the spaces for paths	*/
	printf("Allocating paths...\n");
	for(i = 0; i < num_vertex; i ++)	{
		vertex[i] -> num_path = 0;
	}

/*	Build sequence paths	*/
	printf("Define paths...\n");
	m = 0;
	for(i = 0; i < num_vertex; i ++)	{
		for(j = 0; j < vertex[i] -> num_nextedge; j ++)	{
			m += vertex[i] -> nextedge[j] -> multip;
		}
	}
	path = (PATH *) ckalloc(2 * num_seq * sizeof(PATH));
	for(i = 0; i < 2 * num_seq; i ++)	{
		path[i].edge = (EDGE **) ckalloc(m * sizeof(EDGE *));
	}
	num_path = readpath(start_node, path, num_seq);
	free((void **) start_node);
	num_edge = count_edge_simp(vertex, num_vertex, num_pa);
	m = l = 0;
	for(i = 0; i < num_vertex; i ++)	{
		for(j = 0; j < vertex[i] -> num_nextedge; j ++)	{
			l += vertex[i] -> nextedge[j] -> length;
			if(vertex[i] -> nextedge[j] -> length > m)	{
				m = vertex[i] -> nextedge[j] -> length;
			}
		}
	}
	printf("%d vertics %d edges (%d source %d sinks) remained: total length %d (maximal %d).\n", num_vertex, num_edge,
	 	num_pa[0][1], num_pa[1][0], l, m);
	fflush(stdout);

/*	Make consensus of edges	*/
	initial_edge(vertex, num_vertex, src_seq, num_seq);
	printf("edge initialed\n");

/*	Output sequence path	*/

	n = 0;
	for(i = 0; i < num_vertex; i ++)	{
		vertex[i] -> visit = i;
		for(j = 0; j < vertex[i] -> num_nextedge; j ++)	{
			vertex[i] -> nextedge[j] -> start_cover = n;
			n ++;
		}
	}
	for(m = 0; m < num_seq; m ++)	{
		printf("len_path %d\n", path[m].len_path);
		printf("Sequence%d: ", m + 1);
		for(i = 0; i < path[m].len_path; i ++)	{
			printf("%d -- %d(%d,%d) --> ", path[m].edge[i] -> begin -> visit,
				path[m].edge[i] -> start_cover, path[m].edge[i] -> multip,
				path[m].edge[i] -> length);
			if(i % 5 == 4)	{
				printf("\n");
			}
		}
		if(path[m].len_path > 0)	{
			printf("%d\n", path[m].edge[i - 1] -> end -> visit);
		} else	{
			printf("\n");
		}
		fflush(stdout);
	}

/*	Output graph & contigs	*/
	sprintf(temp, "%s.edge", seqfile);
	fp = ckopen(temp, "w");
	sprintf(temp, "%s.graph", seqfile);
	fp1 = ckopen(temp, "w");
	write_graph(vertex, num_vertex, fp, fp1);
	fclose(fp);
	fclose(fp1);

/*	Output read intervals in each edge	*/
	sprintf(temp, "%s.intv", seqfile);
	fp = ckopen(temp, "w");
	write_interval(vertex, num_vertex, fp);
	fclose(fp);

/*	Output graphviz format graph	*/

	sprintf(temp, "%s", outfile);
	fp = ckopen(temp, "w");
	output_graph(vertex, num_vertex, fp);
	fclose(fp);

	for(i = 0; i < MAX_BRA; i ++)	{
		free((void *) num_pa[i]);
	}
	free((void **) num_pa);
	for(i = 0; i < 2 * num_seq; i ++)	{
		if(path[i].len_path > 0)	{
			free((void **) path[i].edge);
		}
	}
	free((void *) path);
	free_graph(vertex, num_vertex);
	for(i = 0; i < 2 * num_seq; i ++)	{
		free((void *) src_seq[i]);
	}
	free((void **) src_seq);
	free_name(src_name, MAX_NUM);
	free((void *) len_seq);
}
예제 #21
0
int main(int argc, char **argv)
{
    
	if (argc != 8) {
		printf("Usage: ./tema3 mod_vect mod_dma num_spus in.pgm out.cmp out.pgm results.txt");
		return -1;
    }
	int mod_vect = atoi(argv[1]);
	int mod_dma = atoi(argv[2]);
	int num_spus = atoi(argv[3]);
	char *inpgm = argv[4];
	char *outcmp = argv[5];
	char *outpgm = argv[6];
	char *results = argv[7];
	int i;

	struct img initial_image, decompressed_image;
	struct c_img compressed_image;

	struct timeval start_total, end_total, start_op, end_op;
	double total_time = 0, op_time = 0;

	gettimeofday(&start_total, NULL);

	// citeste imaginea initiala 
	read_pgm(inpgm, &initial_image);

	gettimeofday(&start_op, NULL);
	
	compressed_image.width = initial_image.width;
	compressed_image.height = initial_image.height;
	int nr_cmp_blocks = (1LL * initial_image.width * initial_image.height) / (BLOCK_SIZE * BLOCK_SIZE);
	compressed_image.blocks = (struct block *)malloc_align(nr_cmp_blocks * sizeof(struct block), 7);

	pthread_t *compress_threads = (pthread_t*)malloc_align(num_spus * sizeof(pthread_t), 7);
	struct package_t *cthread_arg = (struct package_t *)malloc_align(num_spus * sizeof(struct package_t), 7);
	
	int nr_of_blocks = (initial_image.width * initial_image.height) / (BLOCK_SIZE * BLOCK_SIZE);
	int average_blocks = nr_of_blocks / num_spus;
	int rest_blocks = nr_of_blocks % num_spus;
	int offset = 0;

	for(i = 0; i < num_spus; i++) { 

		/* completeaza structura package_t de trimis la spu pentru fiecare spu*/	
		cthread_arg[i].action_type = 0;
		cthread_arg[i].mod_vect = mod_vect;
		cthread_arg[i].mod_dma = mod_dma;
		cthread_arg[i].num_spus = num_spus;
		cthread_arg[i].nr_blocks = average_blocks;
		cthread_arg[i].index_block = offset;
			
		cthread_arg[i].img_pgm.width = initial_image.width;
		cthread_arg[i].img_pgm.height = initial_image.height;
		cthread_arg[i].img_pgm.pixels = initial_image.pixels + ((offset / (initial_image.width / BLOCK_SIZE)) * BLOCK_SIZE * initial_image.width + (offset % (initial_image.width / BLOCK_SIZE)) * BLOCK_SIZE);
				
		cthread_arg[i].img_cmp.width = compressed_image.width;
		cthread_arg[i].img_cmp.height = compressed_image.height;
		cthread_arg[i].img_cmp.blocks = compressed_image.blocks + ((offset / (initial_image.width / BLOCK_SIZE)) * (initial_image.width / BLOCK_SIZE) + (offset % (initial_image.width / BLOCK_SIZE)));

		offset += average_blocks;
		nr_of_blocks -= average_blocks;
		if (rest_blocks != 0 && i != num_spus - 1) {
			average_blocks = nr_of_blocks / (num_spus - 1 - i);
			rest_blocks = nr_of_blocks % (num_spus - 1 - i);
		}

		/* Create thread for each SPE context */
		if (pthread_create (&compress_threads[i], NULL, &ppu_pthread_function, &cthread_arg[i]))  {
			perror ("Failed creating thread");
			exit (1);
		}
	}

	/* Wait for SPU-thread to complete execution.  */
  	for (i = 0; i < num_spus; i++) {
		if (pthread_join (compress_threads[i], NULL)) {
			perror("Failed pthread_join");
			exit (1);
		}
	}

  	free_align(compress_threads);
	free_align(cthread_arg);
 
	decompressed_image.width = initial_image.width;
	decompressed_image.height = initial_image.height;
	int nr_dec_blocks = (1LL * initial_image.width * initial_image.height) / (BLOCK_SIZE * BLOCK_SIZE);
	decompressed_image.pixels = (unsigned char *)malloc_align(initial_image.height * initial_image.width * sizeof(unsigned char), 7);

	pthread_t *decompress_threads = (pthread_t*)malloc_align(num_spus * sizeof(pthread_t), 7);
	struct package_t *dthread_arg = (struct package_t *)malloc_align(num_spus * sizeof(struct package_t), 7);
	
	int dec_average_blocks = nr_dec_blocks / num_spus;
	int dec_rest_blocks = nr_dec_blocks % num_spus;
	int dec_offset = 0;

	for(i = 0; i < num_spus; i++) { 

		/* completeaza structura package_t de trimis la spu pentru fiecare spu*/	
		dthread_arg[i].action_type = 1;
		dthread_arg[i].mod_vect = mod_vect;
		dthread_arg[i].mod_dma = mod_dma;
		dthread_arg[i].num_spus = num_spus;
		dthread_arg[i].nr_blocks = dec_average_blocks;
		dthread_arg[i].index_block = dec_offset;
			
		dthread_arg[i].img_pgm.width = initial_image.width;
		dthread_arg[i].img_pgm.height = initial_image.height;
		dthread_arg[i].img_pgm.pixels = decompressed_image.pixels + ((dec_offset / (initial_image.width / BLOCK_SIZE)) * BLOCK_SIZE * initial_image.width + (dec_offset % (initial_image.width / BLOCK_SIZE)) * BLOCK_SIZE);
				
		dthread_arg[i].img_cmp.width = compressed_image.width;
		dthread_arg[i].img_cmp.height = compressed_image.height;
		dthread_arg[i].img_cmp.blocks = compressed_image.blocks + ((dec_offset / (initial_image.width / BLOCK_SIZE)) * (initial_image.width / BLOCK_SIZE) + (dec_offset % (initial_image.width / BLOCK_SIZE)));

		dec_offset += dec_average_blocks;
		nr_dec_blocks -= dec_average_blocks;
		if (dec_rest_blocks != 0 && i != num_spus - 1) {
			dec_average_blocks = nr_dec_blocks / (num_spus - 1 - i);
			dec_rest_blocks = nr_dec_blocks % (num_spus - 1 - i);
		}

		/* Create thread for each SPE context */
		if (pthread_create (&decompress_threads[i], NULL, &ppu_pthread_function, &dthread_arg[i]))  {
			perror ("Failed creating thread");
			exit (1);
		}
	}

	/* Wait for SPU-thread to complete execution.  */
  	for (i = 0; i < num_spus; i++) {
		if (pthread_join (decompress_threads[i], NULL)) {
			perror("Failed pthread_join");
			exit (1);
		}
	}
	gettimeofday(&end_op, NULL);

	write_cmp(outcmp, &compressed_image);
	write_pgm(outpgm, &decompressed_image);
	
	free_align(compressed_image.blocks);
	free_align(decompressed_image.pixels);
	free_align(decompress_threads);
	free_align(dthread_arg);

	gettimeofday(&end_total, NULL);
	
	total_time += GET_TIME_DELTA(start_total, end_total);
	op_time += GET_TIME_DELTA(start_op, end_op);

	freopen(results, "a+", stdout);
	printf("%i %lf %lf\n", num_spus, op_time, total_time);
	fclose(stdout);

	return 0;
}
/* Does the actual processing of the frame */
static void do_work(ppu_data_t ppu_data) {
	struct image input;
	struct image big_image;

	dprintf("SPU[%d] ppu_data.input:%p ppu_big_img:%p sizeof(struct image):%lu\n",
		ppu_data.spe_id, (void *)ppu_data.input,
		(void *)ppu_data.big_image, sizeof(struct image));

	/* Get input image and big_image details */
	mfc_get((void *)(&input), (uint32_t)(ppu_data.input),
			(uint32_t)(sizeof(struct image)), tag_id, 0, 0);
	mfc_get((void *)(&big_image), (uint32_t)(ppu_data.big_image),
			(uint32_t)(sizeof(struct image)), tag_id, 0, 0);

	waittag(tag_id);
	dprintf("SPU[%d] got structs\n"\
			"input.width=%u\tinput.height=%u\n"\
			"big_image.width=%u\tbig_image.height=%u\n"\
			"input.data=%p\tbig_image.data=%p\n",
			ppu_data.spe_id, input.width, input.height, big_image.width,
			big_image.height, (void *)input.data, (void *)big_image.data);

	struct image img_chunk;
	unsigned int buf_line_sz = input.width * NUM_CHANNELS;
	int transfer_sz = 4 * buf_line_sz;

	img_chunk.width = input.width;
	img_chunk.height = 4;
	alloc_image(&img_chunk);

	struct image img_scaled_line;
	img_scaled_line.width = input.width / SCALE_FACTOR;
	img_scaled_line.height = 1;

	/* Hack for memory align of local image data to have the same 4 bits in its
	 * address as the remote corresponding address in PPU
	 */
	int left_padding = (ppu_data.spe_id % 4) * 4;
	unsigned char* addr_to_free = malloc_align(NUM_CHANNELS * 3 * sizeof(char) +
												left_padding, 4);

	img_scaled_line.data = addr_to_free + left_padding;

	unsigned int i;
	/* Process 4 lines from the initial image at a time */
	for (i = 0; i < input.height / img_chunk.height; ++i) {

		/* Get the image chunk from PPU through DMA transfer */
		dprintf("SPU[%d] getting image_chunk %d of size %d\n",
				ppu_data.spe_id, i, transfer_sz);

		dprintf("SPU[%d] input.data=%p img_chunk.data=%p "\
				"start_addr=%p\n", ppu_data.spe_id, (void *)input.data,
				(void *)img_chunk.data, (void *)((uint32_t)(input.data) + i * transfer_sz));

		mfc_get((void *)(img_chunk.data), (uint32_t)(input.data) + i * transfer_sz,
				(uint32_t)(transfer_sz), tag_id, 0, 0);

		waittag(tag_id);
		dprintf("SPU[%d] got image_chunk %d\n", ppu_data.spe_id, i);

		compute_lines_average(&img_chunk, buf_line_sz);

		/* Make average for column. avg = (c0.r + c1.r) / 2 etc*/
		compute_columns_average(&img_chunk, &img_scaled_line);

		store_line(&img_scaled_line, ppu_data, &big_image, i);
	}

	free_image(&img_chunk);
	free_align(addr_to_free);
}
예제 #23
0
파일: storage.hpp 프로젝트: bambang/vsipl
 static void deallocate(type data) 
 {
   free_align(data.first);
   free_align(data.second);
 }
예제 #24
0
void closeSortedMap(SortedMap *map) {
	free_align(map->keys);
	free_align(map->values);
	free_align(map);
}