int get_num_nodes (PNODE n) { int left = 0; int right = 0; if (n == NULL) { return 0; } if (n->left != NULL) { left = get_num_nodes (n->left); } if (n->right != NULL) { right = get_num_nodes (n->right); } return (left + 1 + right); }
PID_ENTRY* fork_tree(int depth, int breadth, int tree_breadth_change, bool verbose){ TREE_BREADTH_CHANGE = tree_breadth_change; TOTAL_DEPTH = depth; int fds[2]; pipe(fds); pid_t child = fork(); if(child == 0){//in child if(verbose){ printf("Process %d is the head node at depth 0\n", getpid()); } int num_nodes = get_num_nodes(depth, breadth); PID_ENTRY* p = recursive_fork(depth - 1, breadth, verbose); //make this process a little less boring do_work(MEMFACTOR*1024, verbose); p[0].pid = getpid(); p[0].ppid = getppid(); p[0].depth = TOTAL_DEPTH - depth; p[0].subtree_depth = depth; p[0].breadth = 0; close(fds[0]); write(fds[1], p, num_nodes*(sizeof(PID_ENTRY))); delete[] p; // stop myself before exiting kill(getpid(), SIGSTOP); // I'll wait for my children to exit before I do while(wait(0) != -1) ; if(verbose){ printf("exiting process %d\n", getpid()); } exit(0); } // in parent int num_nodes = get_num_nodes(depth, breadth); PID_ENTRY* arr = new PID_ENTRY[num_nodes]; close(fds[1]); read(fds[0], arr, num_nodes*(sizeof(PID_ENTRY))); for(int i = 0; i < num_nodes; i++){ printf("pid %d ppid %d depth %d breadth %d subtree_depth %d\n", arr[i].pid, arr[i].ppid, arr[i].depth, arr[i].breadth, arr[i].subtree_depth); } return arr; }
HEAP *create_heap_from_sp_matrix ( SP_MATRIX *sp_mat // the matrix of s_points ) { int row_idx, col_idx, i; int num_seeds = 0; int num_rows = sp_get_num_rows(sp_mat); int num_cols = sp_get_num_cols(sp_mat); void *root, *temp; // iterate over the s_points in the sp_matrix to get the total number of seeds for (row_idx = 0; row_idx < num_rows; row_idx++) { for (col_idx = 0; col_idx < num_cols; col_idx++) { S_POINT *current_sp = get_spoint(sp_mat, row_idx, col_idx); HEAP *seed_heap = current_sp->seed_heap; num_seeds += get_num_nodes(seed_heap); } } // create the heap HEAP *mega_heap = create_heap( num_seeds, (int (*) (void *, void*))compare_seed, (void *)copy_seed, (void (*)(void*))free_seed, (char* (*)(void*))get_str_seed, (void (*)(FILE *, void*))print_seed ); // add the seeds to the heap for (row_idx = 0; row_idx < num_rows; row_idx++) { for (col_idx = 0; col_idx < num_cols; col_idx++) { S_POINT *current_sp = get_spoint(sp_mat, row_idx, col_idx); HEAP *current_heap = current_sp->seed_heap; HEAP *seed_heap = copy_heap(current_heap); // add copies of the seeds to the mega_heap int num_nodes = get_num_nodes(seed_heap); for (i=1; i<= num_nodes; i++){ root = pop_heap_root(seed_heap); temp = mega_heap->copy(root); temp = add_node_heap(mega_heap, temp); } } } // return the heap return mega_heap; } // create_heap_from_sp_matrix
/** * union_seed_packets * * Find the union of two seed_packet arrays. Return an array containing * the best seed_packets from this union. * * This function is used in reduce_across_heaps. * */ void union_seed_packets(void *f_data, void *f_result, int *f_length, MPI_Datatype *datatype) { int i; int num_seed_packets; SEED *bumped_seed; // create a heap to do the heap union HEAP *heap = create_heap( *f_length, (int (*) (void *, void*))compare_seed, (void *)copy_seed, (void (*)(void*))free_seed, (char* (*)(void*))get_str_seed, (void (*)(FILE *, void*))print_seed ); // get the number of seed_packets in f_data num_seed_packets = ((SEED_PACKET *)f_data + 0)->num_seed_packets; // unpack the seeds from f_data and add them to the heap for (i = 0; i < num_seed_packets; i++){ // get the data seed char *data_seed_str = ((SEED_PACKET *)f_data + i)->seed; double data_score = ((SEED_PACKET *)f_data + i)->score; SEED *data_seed = new_seed(data_seed_str, data_score); // add the seeds to the heap bumped_seed = (SEED *)(add_node_heap(heap, data_seed)); } // unpack the seeds from f_result and add them to the heap num_seed_packets = ((SEED_PACKET *)f_result + 0)->num_seed_packets; for (i = 0; i < num_seed_packets; i++){ // get the result seed char *result_seed_str = ((SEED_PACKET *)f_result + i)->seed; double result_score = ((SEED_PACKET *)f_result + i)->score; SEED *result_seed = new_seed(result_seed_str, result_score); // add the seeds to the heap bumped_seed = (SEED *)(add_node_heap(heap, result_seed)); } // pack the heap int num_seeds = get_num_nodes(heap); // set the number of filled packets (in case the heap is empty) ((SEED_PACKET *)f_result + 0)->num_seed_packets = num_seeds; for (i = 0; i < num_seeds; i++){ // set the number of seed_packets ((SEED_PACKET *)f_result + i)->num_seed_packets = num_seeds; // get the index for the seed in the heap // (populated heap nodes are at index 1 to num_seeds) int heap_idx = i + 1; // get the node SEED *curr_seed = get_node(heap, heap_idx); //double score = get_seed_score(curr_seed); ((SEED_PACKET *)f_result + i)->score = get_seed_score(curr_seed); char *seed_str = get_str_seed(curr_seed); strcpy(((SEED_PACKET *)f_result + i)->seed, seed_str); } } // union_seed_packets
void WikiGraph::push_page(WikiPage& wp) try { //update the ID of the new page wp.ID = get_num_nodes() + 1; // update WikiGraph data title_to_node[wp.title] = wp.ID; node_to_wiki.push_back(wp); // create list of Edges to be added to the graph list<Edge> p_neigh; if (title_to_node.size() == 1) { // if empty graph Graph::push_node(p_neigh); return; } ifstream f(wp.html_location); //get all possible articles (i.e. linked to the input wiki page in html) set<string> allPossibleArticles = allAssociations(f); for(auto& s : allPossibleArticles) { if (title_to_node.find(s) == title_to_node.end()) { //check this continue; } if(s == wp.title) continue; if(s.length() == 0) continue; // compute weight ifstream page_1_in (wp.txt_location.c_str()); ifstream page_2_in (node_to_wiki[title_to_node[s]].txt_location.c_str()); if(!page_1_in.is_open()) { cout << "Cannot open: " << wp.txt_location << endl; continue;} if(!page_2_in.is_open()) { cout << "Cannot open: " << node_to_wiki[title_to_node[s]].txt_location << endl; continue;} int weight = countOccurences(page_1_in, s) + countOccurences(page_2_in, wp.title); page_1_in.close(); page_2_in.close(); if( weight > 0) { Edge e { title_to_node[wp.title], title_to_node[s], weight}; p_neigh.push_back(e); } } Graph::push_node(p_neigh); } catch (my_exception& ex) { ex.addToStack("push_page"); throw; }
/** * transfer_final_scores * * Transfer the scores of the best seeds in the S_POINT heaps into the * S_POINTs themselves. */ void transfer_final_scores ( SP_MATRIX *sp_matrix ///< This object ) { // Proceed through the entire matrix, transfering the details for each // S_POINT: int row_idx; int col_idx; for (row_idx = 0; row_idx < sp_get_num_rows(sp_matrix); row_idx++) { S_POINT *curr_row = sp_matrix->matrix[row_idx]; for (col_idx = 0; col_idx < sp_get_num_cols(sp_matrix); col_idx++) { S_POINT *curr_sp = curr_row+col_idx; HEAP *sp_heap = curr_sp->seed_heap; if (get_num_nodes(sp_heap) >= 1) { SEED *best_seed = (SEED *)(get_node(sp_heap, get_best_node(sp_heap))); curr_sp->score = get_seed_score(best_seed); curr_sp->iseq = -1; // Seed does not correspond to a location in the dataset. curr_sp->ioff = -1; // Seed does not correspond to a location in the dataset. curr_sp->e_cons0 = get_e_seed(best_seed); free(curr_sp->cons0); curr_sp->cons0 = strdup(get_str_seed(best_seed)); } /* If the seed heap of the current s_point is empty, then it could mean that no seeds added to the s_point had enough maxima to be evaluated by align_top_subsequences. Report this situation: */ else if (TRACE) { fprintf(stderr, "Heap of spoint was empty, possibly because no seeds had" " enough local maxima. w = %i. nsites0 = %f.\n", curr_sp->w0, curr_sp->nsites0); } } // col_idx } // row_idx } // transfer_final_scores
/** * reduce_across_heaps * * Do a reduction across an array of S_POINT heaps. For each S_POINT in the * array, all the seeds from the heaps on each node are combinded (using a * union function). A heap containing the best seeds from every node is then * propogated to all nodes. * */ void reduce_across_heaps( S_POINT *s_points, // an array of S_POINTS int n_nsites0 // the number of S_POINTS in the s_points array ) { static int init; static MPI_Datatype seed_packet_type; static MPI_Op union_seed_packets_op; int i_packet; // Initialise MPI stuff if (init==0){ init = 1; SEED_PACKET seed_packet; int block_lengths[4]; MPI_Aint displacements[4]; MPI_Aint address[4]; MPI_Datatype typelist[4]; // Build the derived datatype // set the types typelist[0]=MPI_DOUBLE; typelist[1]=MPI_INT; typelist[2]=MPI_INT; typelist[3]=MPI_CHAR; // set number of elements of each type block_lengths[0] = block_lengths[1] = block_lengths[2] = 1; block_lengths[3] = MAXSITE; // the maximum length of a seed // calculate the displacements MPI_Address(&seed_packet.score, &address[0]); MPI_Address(&seed_packet.width, &address[1]); MPI_Address(&seed_packet.num_seed_packets, &address[2]); MPI_Address(&seed_packet.seed, &address[3]); displacements[0]=0; displacements[1]=address[1]-address[0]; displacements[2]=address[2]-address[0]; displacements[3]=address[3]-address[0]; // create the derived type MPI_Type_struct(4, block_lengths, displacements, typelist, &seed_packet_type); // commit the derived type MPI_Type_commit(&seed_packet_type); // set the MPI reduction operation MPI_Op_create(union_seed_packets, FALSE, &union_seed_packets_op); } // initialise MPI // do a reduction for each s_point in the s_point list int sp_idx; for (sp_idx = 0; sp_idx < n_nsites0; sp_idx++){ // package the heap for the spoint at sp_idx in the s_points list HEAP *seed_heap = s_points[sp_idx].seed_heap; // get the maximum heap size and the number of seeds in the heap int max_heap_size = get_max_size(seed_heap); int num_seeds = get_num_nodes(seed_heap); // set the number of seed packets to the maximum heap size SEED_PACKET packets[max_heap_size], best_packets[max_heap_size]; // set num_seed_packets to the number of filled nodes in the heap (in // case the heap is empty) packets[0].num_seed_packets = num_seeds; // package each seed in the heap into a seed packet for (i_packet = 0; i_packet < num_seeds; i_packet++){ // set the number of seed_packets that will be filled packets[i_packet].num_seed_packets = num_seeds; // get the seed at the root SEED *curr_seed = pop_heap_root(seed_heap); // set the seed packet score packets[i_packet].score = get_seed_score(curr_seed); // set the width of the string packets[i_packet].width = get_width(curr_seed); // set the seed char *seed_str = get_str_seed(curr_seed); strcpy(packets[i_packet].seed, seed_str); } /* // print the packets before the reduction if (mpMyID() == NODE_NO){ fprintf(stdout, "BEFORE\n"); for (i_packet = 0; i_packet < max_heap_size; i_packet++) fprintf(stdout, "node %d packet %d score= %g width= %i seed= %s\n", mpMyID(), i_packet, packets[i_packet].score, packets[i_packet].width, packets[i_packet].seed); fflush(stdout); } */ // Do the reduction MPI_Allreduce((void *)&packets, (void *)&best_packets, max_heap_size, seed_packet_type, union_seed_packets_op, MPI_COMM_WORLD); /* // print the packets after the reduction if (mpMyID() == NODE_NO){ fprintf(stdout, "AFTER\n"); for (i_packet = 0; i_packet < max_heap_size; i_packet++) fprintf(stdout, "node %d packet %d score= %g width= %i seed= %s\n", mpMyID(), i_packet, best_packets[i_packet].score, best_packets[i_packet].width, best_packets[i_packet].seed); fflush(stdout); } */ // Unpack the best seed packets into the heap // Get the number of filled packets int num_seed_packets = best_packets[0].num_seed_packets; // Add the best seeds to the heap for (i_packet = 0; i_packet < num_seed_packets; i_packet++){ double score = best_packets[i_packet].score; char *seed_str = best_packets[i_packet].seed; SEED *best_seed = new_seed(seed_str, score); //SEED *bumped_seed = (SEED *)(add_node_heap(seed_heap, best_seed)); (void *)(add_node_heap(seed_heap, best_seed)); } } // end n_nsites0 } // reduce_across_heaps
int main() { char buf[50]; int option; PNODE tree = NULL; PNODE node = NULL; while (1) { printf ("--------------------------\n"); printf ("Options are:\n\n"); printf (" 0 Exit\n"); printf (" 1 Insert node\n"); printf (" 2 Delete node\n"); printf (" 3 Find node\n"); printf (" 4 Pre order traversal\n"); printf (" 5 In order traversal\n"); printf (" 6 Post order traversal\n"); printf (" 7 Max depth\n"); printf (" 8 Min depth\n"); printf (" 9 Max value\n"); printf (" 10 Min value\n"); printf (" 11 Node Count\n\n"); printf ("--------------------------\n"); printf ("Select an option: "); fgets (buf, sizeof(buf), stdin); sscanf (buf, "%i", &option); printf ("--------------------------\n"); if (option < 0 || option > 11) { fprintf (stderr, "Invalid option"); continue; } switch (option) { case 0: exit (0); case 1: printf ("Enter number to insert: "); fgets (buf, sizeof(buf), stdin); sscanf (buf, "%i", &option); printf ("\n\n"); insert_node (&tree, option); break; case 2: printf ("Enter number to delete: "); fgets (buf, sizeof(buf), stdin); sscanf (buf, "%i", &option); printf ("\n\n"); delete_node (&tree, option); break; case 3: printf ("Enter number to find: "); fgets (buf, sizeof(buf), stdin); sscanf (buf, "%i", &option); printf ("\n\n"); node = find_node (tree, option); if (node != NULL) { printf ("Found node\n\n"); } else { printf ("Couldn't find node\n\n"); } break; case 4: printf ("Pre order traversal: "); pre_order_traversal (tree); printf ("\n\n"); break; case 5: printf ("In order traversal: "); in_order_traversal (tree); printf ("\n\n"); break; case 6: printf ("Post order traversal: "); post_order_traversal (tree); printf ("\n\n"); break; case 7: printf ("Max depth is %i\n\n", get_max_depth (tree)); break; case 8: printf ("Min depth is %i\n\n", get_min_depth (tree)); break; case 9: printf ("Max value is %i\n\n", get_max_value (tree)); break; case 10: printf ("Min value is %i\n\n", get_min_value (tree)); break; case 11: printf ("Node Count is %i\n\n", get_num_nodes (tree)); break; } } return 0; }
PID_ENTRY* recursive_fork(int depth, int breadth, bool verbose){ if(depth <= 0){ // if this node is a leaf return just its own information PID_ENTRY* arr = new PID_ENTRY[1]; arr[0].pid = getpid(); arr[0].ppid = getppid(); arr[0].depth = 1; arr[0].breadth = breadth; return arr; } // create the file descriptor arrays for the pipe // we'll need one pipe for each child int** fds; fds = new int*[breadth]; for(int i = 0; i < breadth; i++){ fds[i] = new int[2]; } // we are really at the depth of the calling process hence depth + 1 PID_ENTRY* pid_arr = new PID_ENTRY[get_num_nodes(depth + 1 , breadth)]; for (int i = 0; i < breadth; i++){ // create the pipe pipe(fds[i]); pid_t child = fork(); if(child == 0){// in child if(verbose){ printf("Process %d is the #%d child of %d at depth %d\n", getpid(), i, getppid(), TOTAL_DEPTH - depth); } // recurse PID_ENTRY* child_arr = recursive_fork(depth - 1, breadth + TREE_BREADTH_CHANGE, verbose); // do the work before the passing our array to the parent so when the // array gets all the way back to the calling function all the work is // done do_work(MEMFACTOR*1024, verbose); // write the array of all my descendent info back to my parent int num_nodes = get_num_nodes(depth , breadth); child_arr[0].pid = getpid(); child_arr[0].ppid = getppid(); child_arr[0].depth = TOTAL_DEPTH - depth; child_arr[0].subtree_depth = depth; child_arr[0].breadth = i; // close read end of pipe close(fds[i][0]); write(fds[i][1], child_arr, num_nodes * sizeof(PID_ENTRY)); delete[] child_arr; // stop myself, parent will restart me in time kill(getpid(), SIGSTOP); // I'll wait for my children to exit before I do while(wait(0) != -1) ; if(verbose){ printf("exiting process %d\n", getpid()); } exit(0); } //in parent if(child == -1){ perror("Error forking child"); // Seperate errors? if(errno == EAGAIN){ printf("EAGAIN\n"); } else if(errno == ENOMEM){ printf("ENOMEM\n"); } } // in parent } // leave space at the front of the array for myself PID_ENTRY p; p.pid = getpid(); p.ppid = getppid(); pid_arr[0] = p; // add the information for all of my descendents to the array for (int i = 0; i < breadth; i++){ //we are really at the depth of the calling process int num_nodes = (get_num_nodes(depth + 1 , breadth)-1) / breadth; PID_ENTRY temp_arr[num_nodes ]; close(fds[i][1]); read(fds[i][0], temp_arr, num_nodes * sizeof(PID_ENTRY)); int start = 1 + i*num_nodes; int k = 0; // copy the information over for(int j = start; j < start + num_nodes; j++){ pid_arr[j] = temp_arr[k]; k++; } } return pid_arr; }
void NAME(char *TRANSA, char *TRANSB, blasint *M, blasint *N, blasint *K, FLOAT *alpha, FLOAT *a, blasint *ldA, FLOAT *b, blasint *ldB, FLOAT *beta, FLOAT *c, blasint *ldC){ blas_arg_t args; int transa, transb, nrowa, nrowb; blasint info; char transA, transB; FLOAT *buffer; FLOAT *sa, *sb; #ifdef SMP #ifndef COMPLEX #ifdef XDOUBLE int mode = BLAS_XDOUBLE | BLAS_REAL; #elif defined(DOUBLE) int mode = BLAS_DOUBLE | BLAS_REAL; #else int mode = BLAS_SINGLE | BLAS_REAL; #endif #else #ifdef XDOUBLE int mode = BLAS_XDOUBLE | BLAS_COMPLEX; #elif defined(DOUBLE) int mode = BLAS_DOUBLE | BLAS_COMPLEX; #else int mode = BLAS_SINGLE | BLAS_COMPLEX; #endif #endif #endif #if defined(SMP) && !defined(NO_AFFINITY) && !defined(USE_SIMPLE_THREADED_LEVEL3) int nodes; #endif PRINT_DEBUG_NAME; args.m = *M; args.n = *N; args.k = *K; args.a = (void *)a; args.b = (void *)b; args.c = (void *)c; args.lda = *ldA; args.ldb = *ldB; args.ldc = *ldC; args.alpha = (void *)alpha; args.beta = (void *)beta; transA = *TRANSA; transB = *TRANSB; TOUPPER(transA); TOUPPER(transB); transa = -1; transb = -1; if (transA == 'N') transa = 0; if (transA == 'T') transa = 1; #ifndef COMPLEX if (transA == 'R') transa = 0; if (transA == 'C') transa = 1; #else if (transA == 'R') transa = 2; if (transA == 'C') transa = 3; #endif if (transB == 'N') transb = 0; if (transB == 'T') transb = 1; #ifndef COMPLEX if (transB == 'R') transb = 0; if (transB == 'C') transb = 1; #else if (transB == 'R') transb = 2; if (transB == 'C') transb = 3; #endif nrowa = args.m; if (transa & 1) nrowa = args.k; nrowb = args.k; if (transb & 1) nrowb = args.n; info = 0; if (args.ldc < args.m) info = 13; if (args.ldb < nrowb) info = 10; if (args.lda < nrowa) info = 8; if (args.k < 0) info = 5; if (args.n < 0) info = 4; if (args.m < 0) info = 3; if (transb < 0) info = 2; if (transa < 0) info = 1; if (info){ BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } #else void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint m, blasint n, blasint k, #ifndef COMPLEX FLOAT alpha, #else FLOAT *alpha, #endif FLOAT *a, blasint lda, FLOAT *b, blasint ldb, #ifndef COMPLEX FLOAT beta, #else FLOAT *beta, #endif FLOAT *c, blasint ldc) { blas_arg_t args; int transa, transb; blasint nrowa, nrowb, info; XFLOAT *buffer; XFLOAT *sa, *sb; #ifdef SMP #ifndef COMPLEX #ifdef XDOUBLE int mode = BLAS_XDOUBLE | BLAS_REAL; #elif defined(DOUBLE) int mode = BLAS_DOUBLE | BLAS_REAL; #else int mode = BLAS_SINGLE | BLAS_REAL; #endif #else #ifdef XDOUBLE int mode = BLAS_XDOUBLE | BLAS_COMPLEX; #elif defined(DOUBLE) int mode = BLAS_DOUBLE | BLAS_COMPLEX; #else int mode = BLAS_SINGLE | BLAS_COMPLEX; #endif #endif #endif #if defined(SMP) && !defined(NO_AFFINITY) && !defined(USE_SIMPLE_THREADED_LEVEL3) int nodes; #endif PRINT_DEBUG_CNAME; #ifndef COMPLEX args.alpha = (void *)α args.beta = (void *)β #else args.alpha = (void *)alpha; args.beta = (void *)beta; #endif transa = -1; transb = -1; info = 0; if (order == CblasColMajor) { args.m = m; args.n = n; args.k = k; args.a = (void *)a; args.b = (void *)b; args.c = (void *)c; args.lda = lda; args.ldb = ldb; args.ldc = ldc; if (TransA == CblasNoTrans) transa = 0; if (TransA == CblasTrans) transa = 1; #ifndef COMPLEX if (TransA == CblasConjNoTrans) transa = 0; if (TransA == CblasConjTrans) transa = 1; #else if (TransA == CblasConjNoTrans) transa = 2; if (TransA == CblasConjTrans) transa = 3; #endif if (TransB == CblasNoTrans) transb = 0; if (TransB == CblasTrans) transb = 1; #ifndef COMPLEX if (TransB == CblasConjNoTrans) transb = 0; if (TransB == CblasConjTrans) transb = 1; #else if (TransB == CblasConjNoTrans) transb = 2; if (TransB == CblasConjTrans) transb = 3; #endif nrowa = args.m; if (transa & 1) nrowa = args.k; nrowb = args.k; if (transb & 1) nrowb = args.n; info = -1; if (args.ldc < args.m) info = 13; if (args.ldb < nrowb) info = 10; if (args.lda < nrowa) info = 8; if (args.k < 0) info = 5; if (args.n < 0) info = 4; if (args.m < 0) info = 3; if (transb < 0) info = 2; if (transa < 0) info = 1; } if (order == CblasRowMajor) { args.m = n; args.n = m; args.k = k; args.a = (void *)b; args.b = (void *)a; args.c = (void *)c; args.lda = ldb; args.ldb = lda; args.ldc = ldc; if (TransB == CblasNoTrans) transa = 0; if (TransB == CblasTrans) transa = 1; #ifndef COMPLEX if (TransB == CblasConjNoTrans) transa = 0; if (TransB == CblasConjTrans) transa = 1; #else if (TransB == CblasConjNoTrans) transa = 2; if (TransB == CblasConjTrans) transa = 3; #endif if (TransA == CblasNoTrans) transb = 0; if (TransA == CblasTrans) transb = 1; #ifndef COMPLEX if (TransA == CblasConjNoTrans) transb = 0; if (TransA == CblasConjTrans) transb = 1; #else if (TransA == CblasConjNoTrans) transb = 2; if (TransA == CblasConjTrans) transb = 3; #endif nrowa = args.m; if (transa & 1) nrowa = args.k; nrowb = args.k; if (transb & 1) nrowb = args.n; info = -1; if (args.ldc < args.m) info = 13; if (args.ldb < nrowb) info = 10; if (args.lda < nrowa) info = 8; if (args.k < 0) info = 5; if (args.n < 0) info = 4; if (args.m < 0) info = 3; if (transb < 0) info = 2; if (transa < 0) info = 1; } if (info >= 0) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } #endif if ((args.m == 0) || (args.n == 0)) return; #if 0 fprintf(stderr, "m = %4d n = %d k = %d lda = %4d ldb = %4d ldc = %4d\n", args.m, args.n, args.k, args.lda, args.ldb, args.ldc); #endif IDEBUG_START; FUNCTION_PROFILE_START(); buffer = (XFLOAT *)blas_memory_alloc(0); sa = (XFLOAT *)((BLASLONG)buffer +GEMM_OFFSET_A); sb = (XFLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); #ifdef SMP mode |= (transa << BLAS_TRANSA_SHIFT); mode |= (transb << BLAS_TRANSB_SHIFT); args.common = NULL; args.nthreads = num_cpu_avail(3); if (args.nthreads == 1) { #endif (gemm[(transb << 2) | transa])(&args, NULL, NULL, sa, sb, 0); #ifdef SMP } else { #ifndef USE_SIMPLE_THREADED_LEVEL3 #ifndef NO_AFFINITY nodes = get_num_nodes(); if ((nodes > 1) && get_node_equal()) { args.nthreads /= nodes; gemm_thread_mn(mode, &args, NULL, NULL, gemm[16 | (transb << 2) | transa], sa, sb, nodes); } else { #endif (gemm[16 | (transb << 2) | transa])(&args, NULL, NULL, sa, sb, 0); #else GEMM_THREAD(mode, &args, NULL, NULL, gemm[(transb << 2) | transa], sa, sb, args.nthreads); #endif #ifndef USE_SIMPLE_THREADED_LEVEL3 #ifndef NO_AFFINITY } #endif #endif #endif #ifdef SMP } #endif blas_memory_free(buffer); FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, args.m * args.k + args.k * args.n + args.m * args.n, 2 * args.m * args.n * args.k); IDEBUG_END; return; }
void NAME(char *SIDE, char *UPLO, blasint *M, blasint *N, FLOAT *alpha, FLOAT *a, blasint *ldA, FLOAT *b, blasint *ldB, FLOAT *beta, FLOAT *c, blasint *ldC){ char side_arg = *SIDE; char uplo_arg = *UPLO; blas_arg_t args; FLOAT *buffer; FLOAT *sa, *sb; #if defined(SMP) && !defined(NO_AFFINITY) int nodes; #endif blasint info; int side; int uplo; PRINT_DEBUG_NAME; args.alpha = (void *)alpha; args.beta = (void *)beta; TOUPPER(side_arg); TOUPPER(uplo_arg); side = -1; uplo = -1; if (side_arg == 'L') side = 0; if (side_arg == 'R') side = 1; if (uplo_arg == 'U') uplo = 0; if (uplo_arg == 'L') uplo = 1; args.m = *M; args.n = *N; args.c = (void *)c; args.ldc = *ldC; info = 0; if (args.ldc < MAX(1, args.m)) info = 12; if (!side) { args.a = (void *)a; args.b = (void *)b; args.lda = *ldA; args.ldb = *ldB; if (args.ldb < MAX(1, args.m)) info = 9; if (args.lda < MAX(1, args.m)) info = 7; } else { args.a = (void *)b; args.b = (void *)a; args.lda = *ldB; args.ldb = *ldA; if (args.lda < MAX(1, args.m)) info = 9; if (args.ldb < MAX(1, args.n)) info = 7; } if (args.n < 0) info = 4; if (args.m < 0) info = 3; if (uplo < 0) info = 2; if (side < 0) info = 1; if (info != 0) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } #else void CNAME(enum CBLAS_ORDER order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint m, blasint n, #ifndef COMPLEX FLOAT alpha, #else FLOAT *alpha, #endif FLOAT *a, blasint lda, FLOAT *b, blasint ldb, #ifndef COMPLEX FLOAT beta, #else FLOAT *beta, #endif FLOAT *c, blasint ldc) { blas_arg_t args; int side, uplo; blasint info; FLOAT *buffer; FLOAT *sa, *sb; #if defined(SMP) && !defined(NO_AFFINITY) int nodes; #endif PRINT_DEBUG_CNAME; #ifndef COMPLEX args.alpha = (void *)α args.beta = (void *)β #else args.alpha = (void *)alpha; args.beta = (void *)beta; #endif args.c = (void *)c; args.ldc = ldc; side = -1; uplo = -1; info = 0; if (order == CblasColMajor) { if (Side == CblasLeft) side = 0; if (Side == CblasRight) side = 1; if (Uplo == CblasUpper) uplo = 0; if (Uplo == CblasLower) uplo = 1; info = -1; args.m = m; args.n = n; if (args.ldc < MAX(1, args.m)) info = 12; if (!side) { args.a = (void *)a; args.b = (void *)b; args.lda = lda; args.ldb = ldb; if (args.ldb < MAX(1, args.m)) info = 9; if (args.lda < MAX(1, args.m)) info = 7; } else { args.a = (void *)b; args.b = (void *)a; args.lda = ldb; args.ldb = lda; if (args.lda < MAX(1, args.m)) info = 9; if (args.ldb < MAX(1, args.n)) info = 7; } if (args.n < 0) info = 4; if (args.m < 0) info = 3; if (uplo < 0) info = 2; if (side < 0) info = 1; } if (order == CblasRowMajor) { if (Side == CblasLeft) side = 1; if (Side == CblasRight) side = 0; if (Uplo == CblasUpper) uplo = 1; if (Uplo == CblasLower) uplo = 0; info = -1; args.m = n; args.n = m; if (args.ldc < MAX(1, args.m)) info = 12; if (!side) { args.a = (void *)a; args.b = (void *)b; args.lda = lda; args.ldb = ldb; if (args.ldb < MAX(1, args.m)) info = 9; if (args.lda < MAX(1, args.m)) info = 7; } else { args.a = (void *)b; args.b = (void *)a; args.lda = ldb; args.ldb = lda; if (args.lda < MAX(1, args.m)) info = 9; if (args.ldb < MAX(1, args.n)) info = 7; } if (args.n < 0) info = 4; if (args.m < 0) info = 3; if (uplo < 0) info = 2; if (side < 0) info = 1; } if (info >= 0) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } #endif if (args.m == 0 || args.n == 0) return; IDEBUG_START; FUNCTION_PROFILE_START(); buffer = (FLOAT *)blas_memory_alloc(0); sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); #ifdef SMP args.common = NULL; args.nthreads = num_cpu_avail(3); if (args.nthreads == 1) { #endif (symm[(side << 1) | uplo ])(&args, NULL, NULL, sa, sb, 0); #ifdef SMP } else { #ifndef NO_AFFINITY nodes = get_num_nodes(); if (nodes > 1) { args.nthreads /= nodes; gemm_thread_mn(MODE, &args, NULL, NULL, symm[4 | (side << 1) | uplo ], sa, sb, nodes); } else { #endif #ifndef USE_SIMPLE_THREADED_LEVEL3 (symm[4 | (side << 1) | uplo ])(&args, NULL, NULL, sa, sb, 0); #else GEMM_THREAD(MODE, &args, NULL, NULL, symm[(side << 1) | uplo ], sa, sb, args.nthreads); #endif #ifndef NO_AFFINITY } #endif } #endif blas_memory_free(buffer); FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, (!side)? args.m * (args.m / 2 + args.n) : args.n * (args.m + args.n / 2), (!side)? 2 * args.m * args.m * args.n : 2 * args.m * args.n * args.n); IDEBUG_END; return; }
extern void subseq7( MODEL *model, // the model DATASET *dataset, /* the dataset */ int w, // w to use int n_nsites0, /* number of nsites0 values to try */ S_POINT s_points[], /* array of starting points: 1 per nsites0 */ HASH_TABLE evaluated_seed_ht /* A hash table used for remembering which seeds have been evaluated previously */ ) { MOTYPE mtype = model->mtype; /* type of model */ BOOLEAN ic = model->invcomp; /* use reverse complement strand of DNA, too */ THETA map = dataset->map; /* freq x letter map */ LOG_THETA_TYPE(ltheta); /* integer encoded log theta */ int iseq, ioff; int alength = dataset->alength; /* length of alphabet */ int n_samples = dataset->n_samples; /* number of samples in dataset */ SAMPLE **samples = dataset->samples; /* samples in dataset */ int n_starts = 0; /* number of sampled start subseq */ int n_maxima = ps(dataset, w); /* upper bound on # maxima */ /* the local maxima positions */ P_PROB maxima = (P_PROB) mymalloc(n_maxima * sizeof(p_prob)); int lmap[MAXALPH][MAXALPH]; /* consensus letter vs. log frequency matrix */ double col_scores[MAXSITE]; /* not used */ #ifdef PARALLEL int start_seq, start_off=0, end_seq, end_off=0; #endif char *str_seed; // A string representation of a seed. // PRECONDITIONS: // 1. If the sequence model is oops, then n_nsites0 is exactly 1: if (mtype == Oops) { assert(n_nsites0 == 1); } convert_to_lmap(map, lmap, alength); if (TRACE) { printf("w= %d\n", w); } /* get the probability that a site starting at position x_ij would NOT overlap a previously found motif. */ get_not_o(dataset, w); // Set up log_not_o: log_not_o[site] is: // log ( Pr(site not overlapped) * scaled_to_one_Pr(site) ) if (model->mtype != Tcm) { add_psp_to_log_not_o(dataset, w, model->invcomp, model->mtype); } /* score all the sampled positions saving the best position for each value of NSITES0 */ #ifdef PARALLEL /* Retrieve the previously-calculated starting and ending points. */ get_start_n_end(&start_seq, &start_off, &end_seq, &end_off); /* Divide the various samples among processors. */ for (iseq = start_seq; iseq <= end_seq; iseq++) { /* sequence */ #else /* not PARALLEL */ for (iseq = 0; iseq < n_samples; iseq++) { /* sequence */ #endif /* PARALLEL */ SAMPLE *s = samples[iseq]; int lseq = s->length; char *res = s->res; /* left to right */ char *name = s->sample_name; double *not_o = s->not_o; int max_off, init_off; if (lseq < w) continue; /* shorter than motif */ #ifdef PARALLEL if (mpMyID() == 0) #endif if ((!NO_STATUS) && ((iseq % 5) == 0)) { fprintf(stderr, "starts: w=%d, seq=%d, l=%d \r", w, iseq, lseq); } /* Set the appropriate starting and ending points. */ #ifdef PARALLEL if (iseq == start_seq) init_off = start_off; else #endif init_off = 0; #ifdef PARALLEL if (iseq == end_seq) max_off = MIN(end_off, lseq - w); else #endif max_off = lseq - w; /* Loop over all subsequences in the current sequence testing them each as "starting points" (inital values) for theta */ for (ioff = init_off; ioff <= max_off; ioff++) {/* subsequence */ /* warning: always do the next step; don't ever "continue" or the value of pY will not be correct since it is computed based the previous value */ /* convert subsequence in dataset to starting point for EM */ init_theta_1(w, res+ioff, <heta[1][0], lmap); if (ioff == init_off) { /* new sequence */ /* Compute p(Y_ij | theta_1^0) */ if (!ic) { get_pY(dataset, <heta[1][0], w, 0); } else { get_pY(dataset, <heta[1][0], w, 1); get_pY(dataset, <heta[1][0], w, 2); } } else { /* same sequence */ /* get theta[0][0]^{k-1} */ init_theta_1(1, res+ioff-1, <heta[0][0], lmap); /* compute p(Y_ij | theta_1^k) */ if (!ic) { next_pY(dataset, <heta[1][0], w, <heta[0][0][0], 0); } else { next_pY(dataset, <heta[1][0], w, <heta[0][0][0], 1); next_pY(dataset, <heta[1][0], w, <heta[0][0][0], 2); } } /* same sequence */ /* skip if there is a high probability that this subsequence is part of a site which has already been found */ if (not_o[ioff] < MIN_NOT_O) continue; /*fprintf(stderr, "subseq: %d %d\r", iseq+1, ioff+1);*/ // Put highest pY into first scratch array if using both DNA strands: if (ic) { combine_strands(samples, n_samples, w); } /* get a sorted list of the maxima of pY */ n_maxima = get_max(mtype, dataset, w, maxima, ic, TRUE); /* "fake out" align_top_subsequences by setting each of the scores in the s_points objects to LITTLE, thereby forcing align_top_subsequences to record the attributes for the current seed in the s_points, rather than the seed with the highest respective scores: */ int sp_idx; for (sp_idx = 0; sp_idx < n_nsites0; sp_idx++) { s_points[sp_idx].score = LITTLE; } /* align the top nsites0 subsequences for each value of nsites0 and save the alignments with the highest likelihood */ n_starts += align_top_subsequences( mtype, w, dataset, iseq, ioff, res+ioff, name, n_nsites0, n_maxima, maxima, col_scores, s_points ); /* A string version of the current seed is required for updating the S_POINT heaps: */ str_seed = to_str_seed(res+ioff, w); /* For each of the S_POINT objects, add the current seed to that S_POINT'S heap. Also, branching search will require a hash_table of all seeds that have been evaluated prior to when branching search is called. Hence also record the current seed (string, nsites0) combination in the hash_table, for all nsites0, unless that seed was already in the hash_table: */ hash_insert_str(str_seed, evaluated_seed_ht); update_s_point_heaps(s_points, str_seed, n_nsites0); myfree(str_seed); } /* subsequence */ } /* sequence */ #ifdef PARALLEL reduce_across_heaps(s_points, n_nsites0); #endif // PARALLEL // Print the sites predicted using the seed after subsequence search, for // each of the starting points, if requested: if (dataset->print_pred) { int sp_idx; for (sp_idx = 0; sp_idx < n_nsites0; sp_idx++) { // Retrieve the best seed, from the heap: HEAP *heap = s_points[sp_idx].seed_heap; // Only print sites for the s_point if its heap was non-empty: if (get_num_nodes(heap) > 0) { SEED *best_seed = (SEED *)get_node(heap, get_best_node(heap)); char *seed = get_str_seed(best_seed); /* Print the sites predicted using the motif corresponding to that seed, according to the sequence model being used: */ int nsites0 = s_points[sp_idx].nsites0; fprintf(stdout, "PREDICTED SITES AFTER SUBSEQUENCE SEARCH WITH W = %i " "NSITES = %i MOTIF = %i\n", w, nsites0, dataset->imotif); int n_maxima = ps(dataset, w); // upper bound on number of maxima P_PROB psites = (P_PROB) mymalloc(n_maxima * sizeof(p_prob)); n_maxima = get_pred_sites(psites, mtype, w, seed, ltheta[1], lmap, dataset, ic); print_site_array(psites, nsites0, stdout, w, dataset); myfree(psites); } // get_num_nodes > 0 } //sp_idx } // print_pred if (TRACE){ printf("Tested %d possible starts...\n", n_starts); } myfree(maxima); } // subseq7 /**********************************************************************/ /* next_pY Compute the value of p(Y_ij | theta_1^{k+1}) from p(Y_ij | theta_1^{k} and the probability of first letter of Y_ij given theta_1^k, p(Y_ij^0 | theta_1^k). */ /**********************************************************************/ static void next_pY( DATASET *dataset, /* the dataset */ LOG_THETAG_TYPE(theta_1), /* integer log theta_1 */ int w, /* width of motif */ int *theta_0, /* first column of previous theta_1 */ int pYindex /* which pY array to use */ ) { int i, k; int *theta_last = theta_1[w-1]; /* last column of theta_1 */ int n_samples = dataset->n_samples; SAMPLE **samples = dataset->samples; for (i=0; i < n_samples; i++) { /* sequence */ SAMPLE *s = samples[i]; /* sequence */ int lseq = s->length; /* length of sequence */ char *res = pYindex<2 ? s->res : s->resic; /* integer sequence */ int *pY = s->pY[pYindex]; /* log p(Y_j | theta_1) */ char *r = res+lseq-1; /* last position in sequence */ char *r0 = res+lseq-w-1; /* prior to start of last subsequence */ int j, p; if (lseq < w) continue; /* skip if sequence too short */ /* calculate p(Y_ij | theta_1) */ int *pY_shifted_1 = pY - 1; for (j=lseq-w; j>0; j--) { pY[j] = pY_shifted_1[j] + theta_last[(int)(*r--)] - theta_0[(int)(*r0--)]; } /* calculate log p(Y_i0 | theta_1) */ p = 0; r = res; for (k=0; k<w; k++) { /* position in site */ p += theta_1[k][(int)(*r++)]; } pY[0] = p; } }
struct Edge* find_MST_parallel_star(Graph g){ omp_set_num_threads(THREADS); int n = get_num_nodes(g); //store the edge index of the min weight edge incident on node i struct Edge* min_edges = new struct Edge[n]; struct set *components = new struct set[n]; struct Edge* mst_edges = new struct Edge[n]; bool *coin_flips = new bool[n]; //bool not_one_component = true; //keeps track of which tails have been contracted bool *is_contracted = new bool[n]; //bool not_one_component = true; //this is a hacky way to accommodate the fact that we look at every edge //even though we're contracting bool is_first_passes[n]; //loop guard - did the graph get smaller - only needs to be set by //at least one thread so it should work in the parallel version bool can_be_contracted = true; #pragma omp parallel for schedule(static) for(int i = 0; i < n; i++){ components[i].parent = i; components[i].rank = 0; is_first_passes[i] = true; } double startTimeFind, endTimeFind; double findTotal = 0.0; double startTimeContract, endTimeContract; double contractTotal = 0.0; //continue looping until there's only 1 component //in the case of a disconnected graph, until num_components doesn't change //TODO is it better to have one condition here and not have to deal with //not_one_component (you would go one extra iteration but it could be worth //it instad of having to loop through the components list every iteration - //but one iteration could be just as expensive so we'll have to see) while(can_be_contracted){ startTimeFind = CycleTimer::currentSeconds(); #pragma omp parallel for schedule(dynamic, CHUNKSIZE) for(int j = 0; j < n; j++){ if(find_parallel(components, j) == j){ //find minimum weight edge out of each componenet for(int i = 0; i < n; i++){ int set1 = find_parallel(components, i); if(set1 == j){ const Vertex* start = edges_begin(g, i); const Vertex* end = edges_end(g, i); int weight_offset = -1; for(const Vertex* v = start; v < end; v++){ weight_offset++; //get representative nodes int set2 = find_parallel(components, *v); //this edge has already been contracted (endpoints in same component) if(set1 != set2){ Edge e; e.src = i; e.dest = *v; e.weight = g->weights[g->offsets[i] + weight_offset]; if(is_first_passes[set1]){ min_edges[set1] = e; is_first_passes[set1] = false; } else if (min_edges[set1].weight > e.weight) min_edges[set1] = e; } } } } } } endTimeFind = CycleTimer::currentSeconds(); findTotal += (endTimeFind - startTimeFind); startTimeContract = CycleTimer::currentSeconds(); //TODO: need to rewrite union find so that it always contract the edge that we want //it to - this is necessary in star contraction so we contract into the HEAD //determine which vertices will be star centers and which are satellites //we make 0 mean you are a satellite (false) and 1 mean you are a star center (true) #pragma omp parallel for schedule(static) for(int i = 0; i < n; i++){ coin_flips[i] = ((rand() % 2) == 1); } //do this so we can see if any thread sets to true meaning something got contracted can_be_contracted = false; //star contraction - we'll say 1 is HEADS and 0 is TAILS #pragma omp parallel for schedule(dynamic, CHUNKSIZE) for(int i = 0; i < n; i++){ int src = min_edges[i].src; int dest = min_edges[i].dest; int root1 = find_parallel(components, src); int root2 = find_parallel(components, dest); if(root1 == root2){ continue; } can_be_contracted = true; //you wouldn't contract in case of both heads or both tails if((coin_flips[root1] == coin_flips[root2])){ continue; } //try to contract, but if you fail, that means someone has contracted already //I think this should be correct by how CAS works //mark the tail as having been contracted if(coin_flips[root1]){ if(!__sync_bool_compare_and_swap(&is_contracted[root2],false,true)) continue; } else{ if(!__sync_bool_compare_and_swap(&is_contracted[root1],false,true)) continue; } if(coin_flips[root1]){ union_parallel(components, root2, root1); mst_edges[root2] = min_edges[i]; } else{ union_parallel(components, root1, root2); mst_edges[root1] = min_edges[i]; } } #pragma omp parallel for schedule(static) for(int i = 0; i < n; i++){ is_first_passes[i] = true; is_contracted[i] = false; } endTimeContract = CycleTimer::currentSeconds(); contractTotal += (endTimeContract - startTimeContract); } /* for(int i = 0; i < n; i++){ if(mst_edges[i].src == 0 && mst_edges[i].dest == 0) continue; if(mst_edges[i].src < 0 || mst_edges[i].src > n || mst_edges[i].dest < 0 || mst_edges[i].dest > n) continue; printf("%d, %d\n", mst_edges[i].src, mst_edges[i].dest); } */ printf("find time parallel comp star: %.20f\n", findTotal); printf("contract time parallel comp star: %.20f\n", contractTotal); delete[] min_edges; delete[] components; return mst_edges; }