OSStatus host_platform_spi_transfer( bus_transfer_direction_t dir, uint8_t* buffer, uint16_t buffer_length ) { OSStatus result; int i; for(i=0; i<buffer_length; i++) { buffer_temp_32[i] = SPI0_TXCMD | (uint32_t)buffer[i];; } dmaSPITX.dmaChStcd = (edma_software_tcd_t *)mem_align(2 * sizeof(edma_software_tcd_t) * dmaSPITX.period, 32); dmaSPITX.srcAddr = (uint32_t)buffer_temp_32; dmaSPITX.length = buffer_length * 4; dmaSPIRX.dmaChStcd = (edma_software_tcd_t *)mem_align(2 * sizeof(edma_software_tcd_t) * dmaSPIRX.period, 32); dmaSPIRX.destAddr = (uint32_t)buffer; dmaSPIRX.length = buffer_length; MCU_CLOCKS_NEEDED(); SPI0_CS_ENABLE; setup_edma_loop(&dmaSPITX); setup_edma_loop(&dmaSPIRX); EDMA_DRV_StartChannel(dmaSPIRX.dmaCh); EDMA_DRV_StartChannel(dmaSPITX.dmaCh); result = mico_rtos_get_semaphore( &spi_transfer_finished_semaphore, 100 ); disable_edma_loop(&dmaSPIRX); disable_edma_loop(&dmaSPITX); SPI0_CS_DISABLE; MCU_CLOCKS_NOT_NEEDED(); free_align(dmaSPITX.dmaChStcd); free_align(dmaSPIRX.dmaChStcd); return result; }
void free_aligned_matrix(int **a, int h) { int i; for (i = 0; i < h; i++) free_align(a[i]); free_align(a); }
void process_image_simple(struct image* img){ unsigned char *input, *output, *temp; unsigned int addr1, addr2, i, j, k, r, g, b; int block_nr = img->block_nr; vector unsigned char *v1, *v2, *v3, *v4, *v5 ; input = malloc_align(NUM_CHANNELS * SCALE_FACTOR * img->width, 4); output = malloc_align(NUM_CHANNELS * img->width / SCALE_FACTOR, 4); temp = malloc_align(NUM_CHANNELS * img->width, 4); v1 = (vector unsigned char *) &input[0]; v2 = (vector unsigned char *) &input[1 * img->width * NUM_CHANNELS]; v3 = (vector unsigned char *) &input[2 * img->width * NUM_CHANNELS]; v4 = (vector unsigned char *) &input[3 * img->width * NUM_CHANNELS]; v5 = (vector unsigned char *) temp; addr2 = (unsigned int)img->dst; //start of image addr2 += (block_nr / NUM_IMAGES_HEIGHT) * img->width * NUM_CHANNELS * img->height / NUM_IMAGES_HEIGHT; //start line of spu block addr2 += (block_nr % NUM_IMAGES_WIDTH) * NUM_CHANNELS * img->width / NUM_IMAGES_WIDTH; for (i=0; i<img->height / SCALE_FACTOR; i++){ //get 4 lines addr1 = ((unsigned int)img->src) + i * img->width * NUM_CHANNELS * SCALE_FACTOR; mfc_get(input, addr1, SCALE_FACTOR * img->width * NUM_CHANNELS, MY_TAG, 0, 0); mfc_write_tag_mask(1 << MY_TAG); mfc_read_tag_status_all(); //compute the scaled line for (j = 0; j < img->width * NUM_CHANNELS / 16; j++){ v5[j] = spu_avg(spu_avg(v1[j], v2[j]), spu_avg(v3[j], v4[j])); } for (j=0; j < img->width; j+=SCALE_FACTOR){ r = g = b = 0; for (k = j; k < j + SCALE_FACTOR; k++) { r += temp[k * NUM_CHANNELS + 0]; g += temp[k * NUM_CHANNELS + 1]; b += temp[k * NUM_CHANNELS + 2]; } r /= SCALE_FACTOR; b /= SCALE_FACTOR; g /= SCALE_FACTOR; output[j / SCALE_FACTOR * NUM_CHANNELS + 0] = (unsigned char) r; output[j / SCALE_FACTOR * NUM_CHANNELS + 1] = (unsigned char) g; output[j / SCALE_FACTOR * NUM_CHANNELS + 2] = (unsigned char) b; } //put the scaled line back mfc_put(output, addr2, img->width / SCALE_FACTOR * NUM_CHANNELS, MY_TAG, 0, 0); addr2 += img->width * NUM_CHANNELS; //line inside spu block mfc_write_tag_mask(1 << MY_TAG); mfc_read_tag_status_all(); } free_align(temp); free_align(input); free_align(output); }
void free_patch_id_vector(int **spu_patch_id_vector) { int i; for (i = 0; i < SPU_THREADS; i++) free_align(spu_patch_id_vector[i]); free_align(spu_patch_id_vector); }
static void free_bufs(OLTraceCtx *ctx) { if (ctx->tracebuf) free(ctx->tracebuf); if (ctx->sb) free(ctx->sb); if (ctx->pb) free(ctx->pb); if (ctx->k) free_align(ctx->k); if (ctx->bibuf) free_align(ctx->bibuf); if (ctx->btbuf) free_align(ctx->btbuf); if (ctx->sibuf) free_align(ctx->sibuf); if (ctx->stbuf) free_align(ctx->stbuf); if (ctx->sxbuf) free_align(ctx->sxbuf); if (ctx->sybuf) free_align(ctx->sybuf); if (ctx->smbuf) free_align(ctx->smbuf); }
END_TEST /******************************************************************************* * mallac_align/free_align */ START_TEST(test_malloc_align) { void *ptr[128][256]; int size, align; for (size = 0; size < countof(ptr); size++) { for (align = 0; align < countof(ptr[0]); align++) { ptr[size][align] = malloc_align(size, align); if (align) { ck_assert((uintptr_t)ptr[size][align] % align == 0); } if (size) { ck_assert(ptr[size][align]); memset(ptr[size][align], 0xEF, size); } } } for (size = 0; size < countof(ptr); size++) { for (align = 0; align < countof(ptr[0]); align++) { free_align(ptr[size][align]); } } }
/* free image data */ void free_image(struct image* img) { if (img != NULL) { //free(img->data); free_align(img->data); img->data = NULL; } }
int main(int argc, char* argv[]) { printf("coucou\n"); int i = 42; int* i_p = &i; long* l_p = (long*) i_p; char* c_p = (char*) i_p; /* bitprint((long)i_p); bitprint((long)(i_p+1)); // + 4 bitprint((long)l_p); bitprint((long)(l_p+1)); // + 8 bitprint((long)c_p); bitprint((long)(c_p+1)); // + 8 */ int align = 24; if (argc > 1) align = atoi(argv[1]); free_align(malloc_align(1024, align)); exit(0); }
int main(){ int i; int N=1024; float pi=0.0; pthread_t pthreads[SPU_THREADS]; context ctxs[SPU_THREADS] __attribute__ ((aligned(16))); for(i=0;i<SPU_THREADS;i++){ ctxs[i].N=N; ctxs[i].Nstart=(N/SPU_THREADS)*i; ctxs[i].Nend=(N/SPU_THREADS)*(i+1); ctxs[i].pi=(float*) malloc_align(sizeof(float),7); pthread_create(&pthreads[i], NULL, &pthread_run_spe, &ctxs[i]); } for (i=0; i<SPU_THREADS; i++) pthread_join (pthreads[i], NULL); for(i=0;i<SPU_THREADS;i++) pi+=*(ctxs[i].pi); for(i=0;i<SPU_THREADS;i++) free_align(ctxs[i].pi); printf("PI = %f\n",pi); return (0); }
void disable_edma_loop(edma_loop_setup_t *loopSetup) { EDMA_DRV_StopChannel(loopSetup->dmaCh); #if (defined(__ICCARM__) || defined(__CC_ARM)) free_align(loopSetup->dmaChStcd); #elif defined(__GNUC__) //OSA_MemFree(loopSetup->dmaChStcd); #endif print_edma_ch_erq(DMA0, loopSetup->dmaChanNum); //OSA_MemFree(loopSetup); }
void write_btc(char* path, struct c_img* out_img){ int i, nr_blocks, j, fd, k; struct bits tmp; char *buf; fd = _open_for_write(path); write(fd, &out_img->width, sizeof(int)); write(fd, &out_img->height, sizeof(int)); nr_blocks = out_img->width * out_img->height / (BLOCK_SIZE * BLOCK_SIZE); buf = _alloc(nr_blocks * (2 + BLOCK_SIZE * BLOCK_SIZE / BITS_IN_BYTE)); k = 0; for (i=0; i<nr_blocks; i++){ //write a and b buf[k++] = out_img->blocks[i].a; buf[k++] = out_img->blocks[i].b; //from bytes to bits j = 0; while (j < BLOCK_SIZE * BLOCK_SIZE){ tmp.bit0 = out_img->blocks[i].bitplane[j++]; tmp.bit1 = out_img->blocks[i].bitplane[j++]; tmp.bit2 = out_img->blocks[i].bitplane[j++]; tmp.bit3 = out_img->blocks[i].bitplane[j++]; tmp.bit4 = out_img->blocks[i].bitplane[j++]; tmp.bit5 = out_img->blocks[i].bitplane[j++]; tmp.bit6 = out_img->blocks[i].bitplane[j++]; tmp.bit7 = out_img->blocks[i].bitplane[j++]; buf[k++] = *((char*)&tmp); } //write bitplane } _write_buffer(fd, buf, nr_blocks * (2 + BLOCK_SIZE * BLOCK_SIZE / BITS_IN_BYTE)); free_align(buf); close(fd); }
void read_btc(char* path, struct c_img* out_img){ int fd, nr_blocks, i, j = 0, k, ii; char *big_buf; struct bits tmp; fd = _open_for_read(path); read(fd, &out_img->width, sizeof(int)); read(fd, &out_img->height, sizeof(int)); nr_blocks = out_img->width * out_img->height / (BLOCK_SIZE * BLOCK_SIZE); out_img->blocks = (struct block*) _alloc(nr_blocks * sizeof(struct block)); big_buf = (char*) _alloc(nr_blocks * (2 + BLOCK_SIZE * BLOCK_SIZE / BITS_IN_BYTE)); _read_buffer(fd, big_buf, nr_blocks * (2 + BLOCK_SIZE * BLOCK_SIZE / BITS_IN_BYTE)); for (i=0; i<nr_blocks; i++){ //read a and b out_img->blocks[i].a = big_buf[j++]; out_img->blocks[i].b = big_buf[j++]; //read bitplane k = 0; for (ii=0; ii<BLOCK_SIZE * BLOCK_SIZE / BITS_IN_BYTE; ii++){ tmp = *((struct bits*)&big_buf[j++]); out_img->blocks[i].bitplane[k++] = tmp.bit0; out_img->blocks[i].bitplane[k++] = tmp.bit1; out_img->blocks[i].bitplane[k++] = tmp.bit2; out_img->blocks[i].bitplane[k++] = tmp.bit3; out_img->blocks[i].bitplane[k++] = tmp.bit4; out_img->blocks[i].bitplane[k++] = tmp.bit5; out_img->blocks[i].bitplane[k++] = tmp.bit6; out_img->blocks[i].bitplane[k++] = tmp.bit7; } } free_align(big_buf); close(fd); }
static void print_res(result_p_t res, int rev, seq_p_t seq1, seq_p_t seq2) { unsigned int i; if (res->st.nmatches >= options.minScore_cutoff) { printf("\n%s%s\n", seq1->header, seq2->header); if (rev) printf("(complement)\n\n"); switch (options.ali_flag) { case 0: print_exons(&res->eCol, res->direction); break; case 1: print_align_lat(seq1->seq, seq2->seq, res); break; case 3: print_exons(&res->eCol, res->direction); print_align_lat(seq1->seq, seq2->seq, res); break; case 4: print_exons(&res->eCol, res->direction); print_polyA_info(seq1, seq2, &res->eCol, &res->st); print_align_lat(seq1->seq, seq2->seq, res); break; default: fatal("Unrecognized option for alignment output.\n"); } printf("\n"); } for (i = 0; i < res->eCol.nb; i++) free(res->eCol.e.elt[i]); free(res->eCol.e.elt); if (res->sList) free_align(res->sList); free(res); }
void process_image_2lines(struct image* img){ unsigned char *input, *output, *output2, *temp; unsigned int addr1, addr2, i, j, k, r1, g1, b1, r2, g2, b2; int block_nr = img->block_nr; vector unsigned char *v1_1, *v1_2, *v1_3, *v1_4, *v1_5; vector unsigned char *v2_1, *v2_2, *v2_3, *v2_4, *v2_5; // optimization unsigned int num_channels_X_img_width = NUM_CHANNELS * img->width; unsigned int num_channels_X_img_width_X_SCALE_FACTOR = num_channels_X_img_width * SCALE_FACTOR; input = malloc_align(2 * num_channels_X_img_width_X_SCALE_FACTOR, 4); output = malloc_align(num_channels_X_img_width / SCALE_FACTOR, 4); output2 = malloc_align(num_channels_X_img_width / SCALE_FACTOR, 4); temp = malloc_align(2 * NUM_CHANNELS * img->width, 4); // first line v1_1 = (vector unsigned char *) &input[0]; v1_2 = (vector unsigned char *) &input[1 * num_channels_X_img_width]; v1_3 = (vector unsigned char *) &input[2 * num_channels_X_img_width]; v1_4 = (vector unsigned char *) &input[3 * num_channels_X_img_width]; v1_5 = (vector unsigned char *) temp; // second line v2_1 = (vector unsigned char *) &input[4 * num_channels_X_img_width]; v2_2 = (vector unsigned char *) &input[5 * num_channels_X_img_width]; v2_3 = (vector unsigned char *) &input[6 * num_channels_X_img_width]; v2_4 = (vector unsigned char *) &input[7 * num_channels_X_img_width]; v2_5 = (vector unsigned char *) &temp[num_channels_X_img_width]; addr2 = (unsigned int)img->dst; //start of image addr2 += (block_nr / NUM_IMAGES_HEIGHT) * img->width * NUM_CHANNELS * img->height / NUM_IMAGES_HEIGHT; //start line of spu block addr2 += (block_nr % NUM_IMAGES_WIDTH) * NUM_CHANNELS * img->width / NUM_IMAGES_WIDTH; for (i = 0; i<img->height / SCALE_FACTOR / 2; i++){ // get 8 lines addr1 = ((unsigned int)img->src) + 2 * i * num_channels_X_img_width_X_SCALE_FACTOR; mfc_get(input, addr1, 2 * num_channels_X_img_width * SCALE_FACTOR, MY_TAG, 0, 0); mfc_write_tag_mask(1 << MY_TAG); mfc_read_tag_status_all(); // compute the 2 scaled line for (j = 0; j < num_channels_X_img_width / 16; j++){ v1_5[j] = spu_avg(spu_avg(v1_1[j], v1_2[j]), spu_avg(v1_3[j], v1_4[j])); v2_5[j] = spu_avg(spu_avg(v2_1[j], v2_2[j]), spu_avg(v2_3[j], v2_4[j])); } for (j = 0; j < img->width; j += SCALE_FACTOR){ r1 = g1 = b1 = 0; r2 = b2 = g2 = 0; for (k = j; k < j + SCALE_FACTOR; k++) { unsigned int k_X_NUM_CHANNELS = k * NUM_CHANNELS; r1 += temp[k_X_NUM_CHANNELS + 0]; g1 += temp[k_X_NUM_CHANNELS + 1]; b1 += temp[k_X_NUM_CHANNELS + 2]; r2 += temp[num_channels_X_img_width + k_X_NUM_CHANNELS + 0]; g2 += temp[num_channels_X_img_width + k_X_NUM_CHANNELS + 1]; b2 += temp[num_channels_X_img_width + k_X_NUM_CHANNELS + 2]; } r1 /= SCALE_FACTOR; b1 /= SCALE_FACTOR; g1 /= SCALE_FACTOR; r2 /= SCALE_FACTOR; b2 /= SCALE_FACTOR; g2 /= SCALE_FACTOR; output[j / SCALE_FACTOR * NUM_CHANNELS + 0] = (unsigned char) r1; output[j / SCALE_FACTOR * NUM_CHANNELS + 1] = (unsigned char) g1; output[j / SCALE_FACTOR * NUM_CHANNELS + 2] = (unsigned char) b1; output2[j / SCALE_FACTOR * NUM_CHANNELS + 0] = (unsigned char) r2; output2[j / SCALE_FACTOR * NUM_CHANNELS + 1] = (unsigned char) g2; output2[j / SCALE_FACTOR * NUM_CHANNELS + 2] = (unsigned char) b2; } //put the scaled line back mfc_put(output, addr2, img->width / SCALE_FACTOR * NUM_CHANNELS, MY_TAG, 0, 0); addr2 += img->width * NUM_CHANNELS; //line inside spu block // trimite si al 2-lea set mfc_put(output2, addr2, img->width / SCALE_FACTOR * NUM_CHANNELS, MY_TAG, 0, 0); addr2 += img->width * NUM_CHANNELS; //line inside spu block mfc_write_tag_mask(1 << MY_TAG); mfc_read_tag_status_all(); } free_align(temp); free_align(input); free_align(output); free_align(output2); }
void process_image_double(struct image* img){ unsigned char *input[2], *output, *temp; unsigned int addr1, addr2, i, j, k, r, g, b; int block_nr = img->block_nr; vector unsigned char *v1[2], *v2[2], *v3[2], *v4[2], *v5; int buf, nxt_buf; //index of the buffer (0/1) input[0] = malloc_align(NUM_CHANNELS * SCALE_FACTOR * img->width, 4); input[1] = malloc_align(NUM_CHANNELS * SCALE_FACTOR * img->width, 4); output = malloc_align(NUM_CHANNELS * img->width / SCALE_FACTOR, 4); temp = malloc_align(NUM_CHANNELS * img->width, 4); //optimization unsigned int num_channels_X_img_width = NUM_CHANNELS * img->width; v1[0] = (vector unsigned char *) &input[0][0]; v2[0] = (vector unsigned char *) &input[0][1 * num_channels_X_img_width]; v3[0] = (vector unsigned char *) &input[0][2 * num_channels_X_img_width]; v4[0] = (vector unsigned char *) &input[0][3 * num_channels_X_img_width]; v5 = (vector unsigned char *) temp; v1[1] = (vector unsigned char *) &input[1][0]; v2[1] = (vector unsigned char *) &input[1][1 * num_channels_X_img_width]; v3[1] = (vector unsigned char *) &input[1][2 * num_channels_X_img_width]; v4[1] = (vector unsigned char *) &input[1][3 * num_channels_X_img_width]; addr2 = (unsigned int)img->dst; //start of image addr2 += (block_nr / NUM_IMAGES_HEIGHT) * num_channels_X_img_width * img->height / NUM_IMAGES_HEIGHT; //start line of spu block addr2 += (block_nr % NUM_IMAGES_WIDTH) * num_channels_X_img_width / NUM_IMAGES_WIDTH; addr1 = ((unsigned int)img->src); buf = 0; // first data transfer mfc_getb(input[buf], addr1, SCALE_FACTOR * num_channels_X_img_width, 0, 0, 0); for (i = 1; i<img->height / SCALE_FACTOR; i++){ // get 4 lines nxt_buf = buf ^ 1; //ask for next data buffer from PPU //mfg_get with barrier addr1 = ((unsigned int)img->src) + i * num_channels_X_img_width * SCALE_FACTOR; mfc_getb(input[nxt_buf], addr1, SCALE_FACTOR * num_channels_X_img_width, nxt_buf, 0, 0); mfc_write_tag_mask(1 << buf); mfc_read_tag_status_all(); // process current buffer for (j = 0; j < img->width * NUM_CHANNELS / 16; j++){ v5[j] = spu_avg(spu_avg(v1[buf][j], v2[buf][j]), spu_avg(v3[buf][j], v4[buf][j])); } for (j = 0; j < img->width; j+=SCALE_FACTOR){ r = g = b = 0; for (k = j; k < j + SCALE_FACTOR; k++) { r += temp[k * NUM_CHANNELS + 0]; g += temp[k * NUM_CHANNELS + 1]; b += temp[k * NUM_CHANNELS + 2]; } r /= SCALE_FACTOR; b /= SCALE_FACTOR; g /= SCALE_FACTOR; output[j / SCALE_FACTOR * NUM_CHANNELS + 0] = (unsigned char) r; output[j / SCALE_FACTOR * NUM_CHANNELS + 1] = (unsigned char) g; output[j / SCALE_FACTOR * NUM_CHANNELS + 2] = (unsigned char) b; } // sent precedent buffer to PPU mfc_put(output, addr2, img->width / SCALE_FACTOR * NUM_CHANNELS, MY_TAG, 0, 0); addr2 += img->width * NUM_CHANNELS; //line inside spu block mfc_write_tag_mask(1 << MY_TAG); mfc_read_tag_status_all(); buf = nxt_buf; //prepare next iteration } mfc_write_tag_mask(1 << buf); mfc_read_tag_status_all(); // process last buffer for (j = 0; j < img->width * NUM_CHANNELS / 16; j++){ v5[j] = spu_avg(spu_avg(v1[buf][j], v2[buf][j]), spu_avg(v3[buf][j], v4[buf][j])); } for (j=0; j < img->width; j+=SCALE_FACTOR){ r = g = b = 0; for (k = j; k < j + SCALE_FACTOR; k++) { r += temp[k * NUM_CHANNELS + 0]; g += temp[k * NUM_CHANNELS + 1]; b += temp[k * NUM_CHANNELS + 2]; } r /= SCALE_FACTOR; b /= SCALE_FACTOR; g /= SCALE_FACTOR; output[j / SCALE_FACTOR * NUM_CHANNELS + 0] = (unsigned char) r; output[j / SCALE_FACTOR * NUM_CHANNELS + 1] = (unsigned char) g; output[j / SCALE_FACTOR * NUM_CHANNELS + 2] = (unsigned char) b; } // send last buffer to PPU mfc_put(output, addr2, img->width / SCALE_FACTOR * NUM_CHANNELS, MY_TAG, 0, 0); addr2 += img->width * NUM_CHANNELS; mfc_write_tag_mask(1 << MY_TAG); mfc_read_tag_status_all(); free_align(temp); free_align(input[0]); free_align(input[1]); free_align(output); }
static void deallocate(type data) { free_align(data); }
void free_btc(struct c_img* image){ free_align(image->blocks); }
void free_seed_vector(int* rand_seed) { free_align(rand_seed); }
void free_img(image img) { free_align(img->buf); free_align(img); }
main(int argc, char **argv) { int i, j, k, l, m, n; int dist[20]; int reads; int num_vertex, num_class, num_edge; int *len_seq, num_seq, num_remain; int **num_pa; char **src_seq, **src_name; char temp[100]; ALIGN **eq_class, *align; EDGE **edge, *edge1, *edge2, *bal_edge1, *bal_edge2; PATH *path; int num_path; NODES **vertex, *begin, *node, *node_next, **start_node; LIST **list; READINTERVAL *readinterval; POSITION *position; FILE *fp, *fp1; readpar(); random1(&idum); initenv(argc, argv); printf("%d %d %d\n", sizeof(POSITION), sizeof(NODES), sizeof(LIST)); /* Input the length of the genome (required) */ len_seq = (int *) ckalloc(2 * MAX_NUM * sizeof(int)); src_name = alloc_name(MAX_NUM, 100); fp = ckopen(lenfile, "r"); num_seq = readlen(fp, len_seq, src_name); fclose(fp); src_seq = (char **) ckalloc(2 * num_seq * sizeof(char *)); l = 0; printf("Genome length: "); for(i = 0; i < num_seq; i ++) { l += len_seq[i]; printf("%d ", len_seq[i]); } printf("\n"); printf("Total length: %d\n", l); /* Make reverse complements of input sequences rev(i) --> i + num_seq */ for(i = 0; i < num_seq; i ++) { len_seq[i + num_seq] = len_seq[i]; src_seq[i] = (char *) ckalloc(len_seq[i] * sizeof(char)); src_seq[i + num_seq] = (char *) ckalloc(len_seq[i] * sizeof(char)); for(j = 0; j < len_seq[i]; j ++) { src_seq[num_seq + i][j] = rev(src_seq[i][len_seq[i] - j - 1]); } } /* Input equivalent readintervales between reads -- see the format of the equivalent readinterval files */ printf("Read equivalent readintervales...\n"); eq_class = (ALIGN **) ckalloc(2 * num_seq * sizeof(ALIGN *)); fp = ckopen(inpfile, "r"); num_class = readclass(eq_class, num_seq, fp); fclose(fp); printf("# equivalent readintervales input: %d\n", num_class); /* for(i = 0; i < 2 * num_seq; i ++) { align = eq_class[i]; while(align) { printf("See: \n"); output_align(align, src_name, src_seq, len_seq, num_seq); getchar(); align = align -> next; } } */ /* Initialize the nodes: each position in each read is assigned as a new node. An array of "list" is set up for each read */ list = (LIST **) ckalloc(2 * num_seq * sizeof(LIST *)); for(i = 0; i < 2 * num_seq; i ++) { list[i] = (LIST *) ckalloc(len_seq[i] * sizeof(LIST)); } printf("intitialize nodes...\n"); initialize(list, len_seq, num_seq); printf("done.\n"); n = countnode(list, len_seq, 2 * num_seq); printf("# of nodes before merge: %d\n", n); /* Glue together two nodes if their corresponding positions are defined as equivalent in a pairwise alignment */ printf("Merge...\n"); merge(num_seq, len_seq, eq_class, num_class, list); printf("done.\n"); for(i = 0; i < num_seq; i ++) { while(eq_class[i]) { eq_class[i] = free_align(eq_class[i]); } } free((void **) eq_class); /* Compute the width of each node */ for(i = 0; i < 2 * num_seq; i ++) { for(j = 0; j < len_seq[i]; j ++) { if(!list[i][j].node -> visit) { list[i][j].node -> num_path = countthickness(list[i][j].node); list[i][j].node -> visit = 1; } } } cleannode(list, len_seq, 2 * num_seq); n = countnode(list, len_seq, 2 * num_seq); printf("# of nodes after merge: %d\n", n); /* Add edges to the graph */ edge = (EDGE **) ckalloc(n * sizeof(EDGE *)); num_edge = graph(num_seq, len_seq, list, edge); printf("# edges: %d\n", num_edge); start_node = (NODES **) ckalloc(num_seq * sizeof(NODES *)); for(i = 0; i < num_seq; i ++) { if(len_seq[i] > 0) { start_node[i] = list[i][0].node; } else { start_node[i] = (NODES *) NULL; } } for(i = 0; i < 2 * num_seq; i ++) { free((void *) list[i]); } free((void **) list); vertex = (NODES **) ckalloc(2 * num_edge * sizeof(NODES *)); num_vertex = count_vertex(edge, num_edge, vertex); free((void **) edge); num_pa = (int **) ckalloc(MAX_BRA * sizeof(int *)); for(i = 0; i < MAX_BRA; i ++) { num_pa[i] = (int *) ckalloc(MAX_BRA * sizeof(int)); } num_edge = count_edge_simp(vertex, num_vertex, num_pa); printf("%d vertices %d edges (%d source %d sinks) remained.\n", num_vertex, num_edge, num_pa[0][1], num_pa[1][0]); /* Assign the complementary edges of each edge */ for(i = 0; i < num_vertex; i ++) { for(j = 0; j < vertex[i] -> num_nextedge; j ++) { edge1 = vertex[i] -> nextedge[j]; edge1 -> bal_edge = find_bal_edge(edge1, len_seq, num_seq, i); } } /* Remove bulges in the graph */ printf("Shave...\n"); num_vertex = shave_graph(vertex, num_vertex); printf("done.\n"); /* Remove cycles shorter than some threshold in the graph */ /* printf("Shaving graph...\n"); num_vertex = rem_cycle(vertex, num_vertex); printf("done.\n%d vertices remained.\n", num_vertex); */ /* remove short edges */ /* printf("Remove shortedges...\n"); num_vertex = rem_short_edge(vertex, num_vertex, len_seq); printf("done.\n%d vertices remained.\n", num_vertex); fflush(stdout); */ num_edge = count_edge_simp(vertex, num_vertex, num_pa); printf("%d vertices %d edges (%d source %d sinks) remained.\n", num_vertex, num_edge, num_pa[0][1], num_pa[1][0]); fflush(stdout); /* Allocate the spaces for paths */ printf("Allocating paths...\n"); for(i = 0; i < num_vertex; i ++) { vertex[i] -> num_path = 0; } /* Build sequence paths */ printf("Define paths...\n"); m = 0; for(i = 0; i < num_vertex; i ++) { for(j = 0; j < vertex[i] -> num_nextedge; j ++) { m += vertex[i] -> nextedge[j] -> multip; } } path = (PATH *) ckalloc(2 * num_seq * sizeof(PATH)); for(i = 0; i < 2 * num_seq; i ++) { path[i].edge = (EDGE **) ckalloc(m * sizeof(EDGE *)); } num_path = readpath(start_node, path, num_seq); free((void **) start_node); num_edge = count_edge_simp(vertex, num_vertex, num_pa); m = l = 0; for(i = 0; i < num_vertex; i ++) { for(j = 0; j < vertex[i] -> num_nextedge; j ++) { l += vertex[i] -> nextedge[j] -> length; if(vertex[i] -> nextedge[j] -> length > m) { m = vertex[i] -> nextedge[j] -> length; } } } printf("%d vertics %d edges (%d source %d sinks) remained: total length %d (maximal %d).\n", num_vertex, num_edge, num_pa[0][1], num_pa[1][0], l, m); fflush(stdout); /* Make consensus of edges */ initial_edge(vertex, num_vertex, src_seq, num_seq); printf("edge initialed\n"); /* Output sequence path */ n = 0; for(i = 0; i < num_vertex; i ++) { vertex[i] -> visit = i; for(j = 0; j < vertex[i] -> num_nextedge; j ++) { vertex[i] -> nextedge[j] -> start_cover = n; n ++; } } for(m = 0; m < num_seq; m ++) { printf("len_path %d\n", path[m].len_path); printf("Sequence%d: ", m + 1); for(i = 0; i < path[m].len_path; i ++) { printf("%d -- %d(%d,%d) --> ", path[m].edge[i] -> begin -> visit, path[m].edge[i] -> start_cover, path[m].edge[i] -> multip, path[m].edge[i] -> length); if(i % 5 == 4) { printf("\n"); } } if(path[m].len_path > 0) { printf("%d\n", path[m].edge[i - 1] -> end -> visit); } else { printf("\n"); } fflush(stdout); } /* Output graph & contigs */ sprintf(temp, "%s.edge", seqfile); fp = ckopen(temp, "w"); sprintf(temp, "%s.graph", seqfile); fp1 = ckopen(temp, "w"); write_graph(vertex, num_vertex, fp, fp1); fclose(fp); fclose(fp1); /* Output read intervals in each edge */ sprintf(temp, "%s.intv", seqfile); fp = ckopen(temp, "w"); write_interval(vertex, num_vertex, fp); fclose(fp); /* Output graphviz format graph */ sprintf(temp, "%s", outfile); fp = ckopen(temp, "w"); output_graph(vertex, num_vertex, fp); fclose(fp); for(i = 0; i < MAX_BRA; i ++) { free((void *) num_pa[i]); } free((void **) num_pa); for(i = 0; i < 2 * num_seq; i ++) { if(path[i].len_path > 0) { free((void **) path[i].edge); } } free((void *) path); free_graph(vertex, num_vertex); for(i = 0; i < 2 * num_seq; i ++) { free((void *) src_seq[i]); } free((void **) src_seq); free_name(src_name, MAX_NUM); free((void *) len_seq); }
int main(int argc, char **argv) { if (argc != 8) { printf("Usage: ./tema3 mod_vect mod_dma num_spus in.pgm out.cmp out.pgm results.txt"); return -1; } int mod_vect = atoi(argv[1]); int mod_dma = atoi(argv[2]); int num_spus = atoi(argv[3]); char *inpgm = argv[4]; char *outcmp = argv[5]; char *outpgm = argv[6]; char *results = argv[7]; int i; struct img initial_image, decompressed_image; struct c_img compressed_image; struct timeval start_total, end_total, start_op, end_op; double total_time = 0, op_time = 0; gettimeofday(&start_total, NULL); // citeste imaginea initiala read_pgm(inpgm, &initial_image); gettimeofday(&start_op, NULL); compressed_image.width = initial_image.width; compressed_image.height = initial_image.height; int nr_cmp_blocks = (1LL * initial_image.width * initial_image.height) / (BLOCK_SIZE * BLOCK_SIZE); compressed_image.blocks = (struct block *)malloc_align(nr_cmp_blocks * sizeof(struct block), 7); pthread_t *compress_threads = (pthread_t*)malloc_align(num_spus * sizeof(pthread_t), 7); struct package_t *cthread_arg = (struct package_t *)malloc_align(num_spus * sizeof(struct package_t), 7); int nr_of_blocks = (initial_image.width * initial_image.height) / (BLOCK_SIZE * BLOCK_SIZE); int average_blocks = nr_of_blocks / num_spus; int rest_blocks = nr_of_blocks % num_spus; int offset = 0; for(i = 0; i < num_spus; i++) { /* completeaza structura package_t de trimis la spu pentru fiecare spu*/ cthread_arg[i].action_type = 0; cthread_arg[i].mod_vect = mod_vect; cthread_arg[i].mod_dma = mod_dma; cthread_arg[i].num_spus = num_spus; cthread_arg[i].nr_blocks = average_blocks; cthread_arg[i].index_block = offset; cthread_arg[i].img_pgm.width = initial_image.width; cthread_arg[i].img_pgm.height = initial_image.height; cthread_arg[i].img_pgm.pixels = initial_image.pixels + ((offset / (initial_image.width / BLOCK_SIZE)) * BLOCK_SIZE * initial_image.width + (offset % (initial_image.width / BLOCK_SIZE)) * BLOCK_SIZE); cthread_arg[i].img_cmp.width = compressed_image.width; cthread_arg[i].img_cmp.height = compressed_image.height; cthread_arg[i].img_cmp.blocks = compressed_image.blocks + ((offset / (initial_image.width / BLOCK_SIZE)) * (initial_image.width / BLOCK_SIZE) + (offset % (initial_image.width / BLOCK_SIZE))); offset += average_blocks; nr_of_blocks -= average_blocks; if (rest_blocks != 0 && i != num_spus - 1) { average_blocks = nr_of_blocks / (num_spus - 1 - i); rest_blocks = nr_of_blocks % (num_spus - 1 - i); } /* Create thread for each SPE context */ if (pthread_create (&compress_threads[i], NULL, &ppu_pthread_function, &cthread_arg[i])) { perror ("Failed creating thread"); exit (1); } } /* Wait for SPU-thread to complete execution. */ for (i = 0; i < num_spus; i++) { if (pthread_join (compress_threads[i], NULL)) { perror("Failed pthread_join"); exit (1); } } free_align(compress_threads); free_align(cthread_arg); decompressed_image.width = initial_image.width; decompressed_image.height = initial_image.height; int nr_dec_blocks = (1LL * initial_image.width * initial_image.height) / (BLOCK_SIZE * BLOCK_SIZE); decompressed_image.pixels = (unsigned char *)malloc_align(initial_image.height * initial_image.width * sizeof(unsigned char), 7); pthread_t *decompress_threads = (pthread_t*)malloc_align(num_spus * sizeof(pthread_t), 7); struct package_t *dthread_arg = (struct package_t *)malloc_align(num_spus * sizeof(struct package_t), 7); int dec_average_blocks = nr_dec_blocks / num_spus; int dec_rest_blocks = nr_dec_blocks % num_spus; int dec_offset = 0; for(i = 0; i < num_spus; i++) { /* completeaza structura package_t de trimis la spu pentru fiecare spu*/ dthread_arg[i].action_type = 1; dthread_arg[i].mod_vect = mod_vect; dthread_arg[i].mod_dma = mod_dma; dthread_arg[i].num_spus = num_spus; dthread_arg[i].nr_blocks = dec_average_blocks; dthread_arg[i].index_block = dec_offset; dthread_arg[i].img_pgm.width = initial_image.width; dthread_arg[i].img_pgm.height = initial_image.height; dthread_arg[i].img_pgm.pixels = decompressed_image.pixels + ((dec_offset / (initial_image.width / BLOCK_SIZE)) * BLOCK_SIZE * initial_image.width + (dec_offset % (initial_image.width / BLOCK_SIZE)) * BLOCK_SIZE); dthread_arg[i].img_cmp.width = compressed_image.width; dthread_arg[i].img_cmp.height = compressed_image.height; dthread_arg[i].img_cmp.blocks = compressed_image.blocks + ((dec_offset / (initial_image.width / BLOCK_SIZE)) * (initial_image.width / BLOCK_SIZE) + (dec_offset % (initial_image.width / BLOCK_SIZE))); dec_offset += dec_average_blocks; nr_dec_blocks -= dec_average_blocks; if (dec_rest_blocks != 0 && i != num_spus - 1) { dec_average_blocks = nr_dec_blocks / (num_spus - 1 - i); dec_rest_blocks = nr_dec_blocks % (num_spus - 1 - i); } /* Create thread for each SPE context */ if (pthread_create (&decompress_threads[i], NULL, &ppu_pthread_function, &dthread_arg[i])) { perror ("Failed creating thread"); exit (1); } } /* Wait for SPU-thread to complete execution. */ for (i = 0; i < num_spus; i++) { if (pthread_join (decompress_threads[i], NULL)) { perror("Failed pthread_join"); exit (1); } } gettimeofday(&end_op, NULL); write_cmp(outcmp, &compressed_image); write_pgm(outpgm, &decompressed_image); free_align(compressed_image.blocks); free_align(decompressed_image.pixels); free_align(decompress_threads); free_align(dthread_arg); gettimeofday(&end_total, NULL); total_time += GET_TIME_DELTA(start_total, end_total); op_time += GET_TIME_DELTA(start_op, end_op); freopen(results, "a+", stdout); printf("%i %lf %lf\n", num_spus, op_time, total_time); fclose(stdout); return 0; }
/* Does the actual processing of the frame */ static void do_work(ppu_data_t ppu_data) { struct image input; struct image big_image; dprintf("SPU[%d] ppu_data.input:%p ppu_big_img:%p sizeof(struct image):%lu\n", ppu_data.spe_id, (void *)ppu_data.input, (void *)ppu_data.big_image, sizeof(struct image)); /* Get input image and big_image details */ mfc_get((void *)(&input), (uint32_t)(ppu_data.input), (uint32_t)(sizeof(struct image)), tag_id, 0, 0); mfc_get((void *)(&big_image), (uint32_t)(ppu_data.big_image), (uint32_t)(sizeof(struct image)), tag_id, 0, 0); waittag(tag_id); dprintf("SPU[%d] got structs\n"\ "input.width=%u\tinput.height=%u\n"\ "big_image.width=%u\tbig_image.height=%u\n"\ "input.data=%p\tbig_image.data=%p\n", ppu_data.spe_id, input.width, input.height, big_image.width, big_image.height, (void *)input.data, (void *)big_image.data); struct image img_chunk; unsigned int buf_line_sz = input.width * NUM_CHANNELS; int transfer_sz = 4 * buf_line_sz; img_chunk.width = input.width; img_chunk.height = 4; alloc_image(&img_chunk); struct image img_scaled_line; img_scaled_line.width = input.width / SCALE_FACTOR; img_scaled_line.height = 1; /* Hack for memory align of local image data to have the same 4 bits in its * address as the remote corresponding address in PPU */ int left_padding = (ppu_data.spe_id % 4) * 4; unsigned char* addr_to_free = malloc_align(NUM_CHANNELS * 3 * sizeof(char) + left_padding, 4); img_scaled_line.data = addr_to_free + left_padding; unsigned int i; /* Process 4 lines from the initial image at a time */ for (i = 0; i < input.height / img_chunk.height; ++i) { /* Get the image chunk from PPU through DMA transfer */ dprintf("SPU[%d] getting image_chunk %d of size %d\n", ppu_data.spe_id, i, transfer_sz); dprintf("SPU[%d] input.data=%p img_chunk.data=%p "\ "start_addr=%p\n", ppu_data.spe_id, (void *)input.data, (void *)img_chunk.data, (void *)((uint32_t)(input.data) + i * transfer_sz)); mfc_get((void *)(img_chunk.data), (uint32_t)(input.data) + i * transfer_sz, (uint32_t)(transfer_sz), tag_id, 0, 0); waittag(tag_id); dprintf("SPU[%d] got image_chunk %d\n", ppu_data.spe_id, i); compute_lines_average(&img_chunk, buf_line_sz); /* Make average for column. avg = (c0.r + c1.r) / 2 etc*/ compute_columns_average(&img_chunk, &img_scaled_line); store_line(&img_scaled_line, ppu_data, &big_image, i); } free_image(&img_chunk); free_align(addr_to_free); }
static void deallocate(type data) { free_align(data.first); free_align(data.second); }
void closeSortedMap(SortedMap *map) { free_align(map->keys); free_align(map->values); free_align(map); }