C++ (Cpp) get_num_nodes示例

示例#1

0

显示文件

文件： 50.C 项目： JackDrogon/Study

int get_num_nodes (PNODE n) {
  int left = 0;
  int right = 0;
  	if (n == NULL) {
  	    return 0;
	}
	if (n->left != NULL) {
		left = get_num_nodes (n->left);
	}
	if (n->right != NULL) {
		right = get_num_nodes (n->right);
	}
	return (left + 1 + right);
}

示例#2

0

显示文件

文件： procapi_fork_t.cpp 项目： AlainRoy/htcondor

PID_ENTRY* fork_tree(int depth, int breadth, int tree_breadth_change, bool verbose){
  TREE_BREADTH_CHANGE = tree_breadth_change;
  TOTAL_DEPTH = depth;

  int fds[2];

  pipe(fds);
  pid_t child = fork();
  if(child == 0){//in child
    if(verbose){
      printf("Process %d is the head node at depth 0\n", getpid());
    }

    int num_nodes = get_num_nodes(depth, breadth); 
    PID_ENTRY* p = recursive_fork(depth - 1, breadth, verbose);

    //make this process a little less boring
    do_work(MEMFACTOR*1024, verbose);
    p[0].pid = getpid();
    p[0].ppid = getppid();
    p[0].depth = TOTAL_DEPTH - depth;
    p[0].subtree_depth = depth;
    p[0].breadth = 0;
    close(fds[0]);
    write(fds[1], p, num_nodes*(sizeof(PID_ENTRY)));
    delete[] p;

    // stop myself before exiting
    kill(getpid(), SIGSTOP);

    // I'll wait for my children to exit before I do
    while(wait(0) != -1)
      ;
    if(verbose){
      printf("exiting process %d\n", getpid());
    }
    exit(0);
  }
  // in parent
  int num_nodes = get_num_nodes(depth, breadth);
  PID_ENTRY* arr = new PID_ENTRY[num_nodes];
  close(fds[1]);
  read(fds[0], arr, num_nodes*(sizeof(PID_ENTRY)));
  
  for(int i = 0; i < num_nodes; i++){
    printf("pid %d ppid %d depth %d breadth %d subtree_depth %d\n", arr[i].pid, arr[i].ppid, arr[i].depth, arr[i].breadth, arr[i].subtree_depth);
  }

  return arr;
}

示例#3

0

显示文件

文件： sp_matrix.c 项目： a1aks/Haystack

HEAP *create_heap_from_sp_matrix (
  SP_MATRIX *sp_mat    // the matrix of s_points 
) {

  int row_idx, col_idx, i;
  int num_seeds = 0;
  int num_rows = sp_get_num_rows(sp_mat);
  int num_cols = sp_get_num_cols(sp_mat);
  void *root, *temp;

  // iterate over the s_points in the sp_matrix to get the total number of seeds
  for (row_idx = 0; row_idx < num_rows; row_idx++) {
    for (col_idx = 0; col_idx < num_cols; col_idx++) {
      S_POINT *current_sp = get_spoint(sp_mat, row_idx, col_idx);
      HEAP *seed_heap = current_sp->seed_heap;
      num_seeds += get_num_nodes(seed_heap); 
    }
  }

  // create the heap
  HEAP *mega_heap = create_heap(
      num_seeds,
      (int (*) (void *, void*))compare_seed,
      (void *)copy_seed,
      (void (*)(void*))free_seed,
      (char* (*)(void*))get_str_seed,
      (void (*)(FILE *, void*))print_seed
    );

  // add the seeds to the heap
  for (row_idx = 0; row_idx < num_rows; row_idx++) {
    for (col_idx = 0; col_idx < num_cols; col_idx++) {
      S_POINT *current_sp = get_spoint(sp_mat, row_idx, col_idx);
      HEAP *current_heap = current_sp->seed_heap;
      HEAP *seed_heap = copy_heap(current_heap);
      // add copies of the seeds to the mega_heap
      int num_nodes = get_num_nodes(seed_heap);
      for (i=1; i<= num_nodes; i++){
        root = pop_heap_root(seed_heap);
        temp = mega_heap->copy(root);
        temp = add_node_heap(mega_heap, temp);
      }
    }
  }

  // return the heap
  return mega_heap;

} // create_heap_from_sp_matrix

示例#4

0

显示文件

文件： sp_matrix.c 项目： a1aks/Haystack

/**
 * union_seed_packets
 *
 * Find the union of two seed_packet arrays. Return an array containing 
 * the best seed_packets from this union.
 * 
 * This function is used in reduce_across_heaps.
 *
 */
void union_seed_packets(void *f_data, void *f_result, int *f_length,
                   MPI_Datatype *datatype)
{
  int i;
  int num_seed_packets; 	
  SEED *bumped_seed;
  
  // create a heap to do the heap union
  HEAP *heap = create_heap(
      *f_length, 
      (int (*) (void *, void*))compare_seed,
      (void *)copy_seed,
      (void (*)(void*))free_seed,
      (char* (*)(void*))get_str_seed,
      (void (*)(FILE *, void*))print_seed
    );
  
  // get the number of seed_packets in f_data
  num_seed_packets = ((SEED_PACKET *)f_data + 0)->num_seed_packets; 

  // unpack the seeds from f_data and add them to the heap
  for (i = 0; i < num_seed_packets; i++){
    // get the data seed
    char *data_seed_str = ((SEED_PACKET *)f_data + i)->seed;
    double data_score = ((SEED_PACKET *)f_data + i)->score;
    SEED *data_seed = new_seed(data_seed_str, data_score);
    // add the seeds to the heap
    bumped_seed = (SEED *)(add_node_heap(heap, data_seed)); 
  } 

  // unpack the seeds from f_result and add them to the heap
  num_seed_packets = ((SEED_PACKET *)f_result + 0)->num_seed_packets;
  for (i = 0; i < num_seed_packets; i++){
    // get the result seed
    char *result_seed_str = ((SEED_PACKET *)f_result + i)->seed;
    double result_score = ((SEED_PACKET *)f_result + i)->score;
    SEED *result_seed = new_seed(result_seed_str, result_score);
    // add the seeds to the heap
    bumped_seed = (SEED *)(add_node_heap(heap, result_seed)); 
  }

  // pack the heap
  int num_seeds = get_num_nodes(heap);
  // set the number of filled packets (in case the heap is empty)
  ((SEED_PACKET *)f_result + 0)->num_seed_packets = num_seeds;
  for (i = 0; i < num_seeds; i++){
    // set the number of seed_packets
    ((SEED_PACKET *)f_result + i)->num_seed_packets = num_seeds;
    // get the index for the seed in the heap
    // (populated heap nodes are at index 1 to num_seeds)
    int heap_idx = i + 1;
    // get the node
    SEED *curr_seed = get_node(heap, heap_idx);
    //double score = get_seed_score(curr_seed);
    ((SEED_PACKET *)f_result + i)->score = get_seed_score(curr_seed);
    char *seed_str = get_str_seed(curr_seed);
    strcpy(((SEED_PACKET *)f_result + i)->seed, seed_str);
  }
} // union_seed_packets

示例#5

0

显示文件

文件： wikiFinal.cpp 项目： davidhernon/comp322A1

void WikiGraph::push_page(WikiPage& wp) try {
    //update the ID of the new page
    wp.ID = get_num_nodes() + 1;
    // update WikiGraph data
    title_to_node[wp.title] = wp.ID;
    node_to_wiki.push_back(wp);
    
    // create list of Edges to be added to the graph
    list<Edge> p_neigh;
    if (title_to_node.size() == 1) { // if empty graph
        Graph::push_node(p_neigh);
        return;
    }
    
    ifstream f(wp.html_location);
    //get all possible articles (i.e. linked to the input wiki page in html)
    set<string> allPossibleArticles = allAssociations(f);
    for(auto& s : allPossibleArticles) {
        if (title_to_node.find(s) == title_to_node.end()) {  //check this
            continue;
        }
        if(s == wp.title) continue;
        if(s.length() == 0) continue;
        
        // compute weight
        ifstream page_1_in (wp.txt_location.c_str());
        ifstream page_2_in (node_to_wiki[title_to_node[s]].txt_location.c_str());
        if(!page_1_in.is_open()) { cout << "Cannot open: " << wp.txt_location << endl; continue;}
        if(!page_2_in.is_open()) { cout << "Cannot open: " << node_to_wiki[title_to_node[s]].txt_location << endl; continue;}
        int weight = countOccurences(page_1_in, s) + countOccurences(page_2_in, wp.title);
        page_1_in.close();
        page_2_in.close();
        if( weight > 0) {
            Edge e { title_to_node[wp.title], title_to_node[s], weight};
            p_neigh.push_back(e);
        }
    }
    Graph::push_node(p_neigh);
} catch (my_exception& ex) {
    ex.addToStack("push_page");
    throw;
}

示例#6

0

显示文件

文件： sp_matrix.c 项目： a1aks/Haystack

/**
 * transfer_final_scores
 *
 * Transfer the scores of the best seeds in the S_POINT heaps into the
 * S_POINTs themselves.
 */
void transfer_final_scores (
  SP_MATRIX *sp_matrix ///< This object
) {
  // Proceed through the entire matrix, transfering the details for each
  // S_POINT:
  int row_idx;
  int col_idx;
  for (row_idx = 0; row_idx < sp_get_num_rows(sp_matrix); row_idx++) {
    S_POINT *curr_row = sp_matrix->matrix[row_idx];
    for (col_idx = 0; col_idx < sp_get_num_cols(sp_matrix); col_idx++) {
      S_POINT *curr_sp = curr_row+col_idx;
      HEAP *sp_heap = curr_sp->seed_heap;
      
      if (get_num_nodes(sp_heap) >= 1) {
        SEED *best_seed = (SEED *)(get_node(sp_heap, get_best_node(sp_heap)));
        curr_sp->score = get_seed_score(best_seed);
        curr_sp->iseq = -1; // Seed does not correspond to a location in the dataset.
        curr_sp->ioff = -1; // Seed does not correspond to a location in the dataset.
        curr_sp->e_cons0 = get_e_seed(best_seed);
        free(curr_sp->cons0);
        curr_sp->cons0 = strdup(get_str_seed(best_seed));
      }

      /* If the seed heap of the current s_point is empty, then it could
         mean that no seeds added to the s_point had enough maxima to be
         evaluated by align_top_subsequences. Report this situation:
      */
      else if (TRACE) {
        fprintf(stderr,
                "Heap of spoint was empty, possibly because no seeds had"
                " enough local maxima. w = %i. nsites0 = %f.\n", curr_sp->w0,
                curr_sp->nsites0);
      }
    } // col_idx
  } // row_idx
} // transfer_final_scores

示例#7

0

显示文件

文件： sp_matrix.c 项目： a1aks/Haystack

/**
 * reduce_across_heaps
 *
 * Do a reduction across an array of S_POINT heaps. For each S_POINT in the
 * array, all the seeds from the heaps on each node are combinded (using a
 * union function). A heap containing the best seeds from every node is then
 * propogated to all nodes.
 *
 */
void reduce_across_heaps(
  S_POINT *s_points,     // an array of S_POINTS
  int n_nsites0          // the number of S_POINTS in the s_points array
) 
{
  static int init;
  static MPI_Datatype seed_packet_type;
  static MPI_Op union_seed_packets_op;
  int i_packet;

  // Initialise MPI stuff
  if (init==0){
    init = 1;
    SEED_PACKET seed_packet;
    int block_lengths[4];
    MPI_Aint displacements[4];
    MPI_Aint address[4]; 
    MPI_Datatype typelist[4];

    // Build the derived datatype
    // set the types
    typelist[0]=MPI_DOUBLE;
    typelist[1]=MPI_INT;
    typelist[2]=MPI_INT;
    typelist[3]=MPI_CHAR;

    // set number of elements of each type
    block_lengths[0] = block_lengths[1] = block_lengths[2] = 1;
    block_lengths[3] = MAXSITE;	// the maximum length of a seed

    // calculate the displacements
    MPI_Address(&seed_packet.score, &address[0]);
    MPI_Address(&seed_packet.width, &address[1]);
    MPI_Address(&seed_packet.num_seed_packets, &address[2]);
    MPI_Address(&seed_packet.seed, &address[3]);
    displacements[0]=0;
    displacements[1]=address[1]-address[0];
    displacements[2]=address[2]-address[0];
    displacements[3]=address[3]-address[0];

    // create the derived type
    MPI_Type_struct(4, block_lengths, displacements, typelist, &seed_packet_type);

    // commit the derived type
    MPI_Type_commit(&seed_packet_type);

    // set the MPI reduction operation
    MPI_Op_create(union_seed_packets, FALSE, &union_seed_packets_op);
  } // initialise MPI

  // do a reduction for each s_point in the s_point list
  int sp_idx;
  for (sp_idx = 0; sp_idx < n_nsites0; sp_idx++){
    // package the heap for the spoint at sp_idx in the s_points list
    HEAP *seed_heap = s_points[sp_idx].seed_heap;
    // get the maximum heap size and the number of seeds in the heap
    int max_heap_size = get_max_size(seed_heap);
    int num_seeds = get_num_nodes(seed_heap);

    // set the number of seed packets to the maximum heap size
    SEED_PACKET packets[max_heap_size], best_packets[max_heap_size];
    // set num_seed_packets to the number of filled nodes in the heap (in 
    // case the heap is empty)
    packets[0].num_seed_packets = num_seeds;
    // package each seed in the heap into a seed packet

    for (i_packet = 0; i_packet < num_seeds; i_packet++){
      // set the number of seed_packets that will be filled
      packets[i_packet].num_seed_packets = num_seeds;
      // get the seed at the root
      SEED *curr_seed = pop_heap_root(seed_heap);
      // set the seed packet score
      packets[i_packet].score = get_seed_score(curr_seed);
      // set the width of the string
      packets[i_packet].width = get_width(curr_seed);
      // set the seed
      char *seed_str = get_str_seed(curr_seed);
      strcpy(packets[i_packet].seed, seed_str); 
    }

/*
    // print the packets before the reduction
    if (mpMyID() == NODE_NO){
      fprintf(stdout, "BEFORE\n");
      for (i_packet = 0; i_packet < max_heap_size; i_packet++)
      fprintf(stdout, "node %d packet %d score= %g width= %i seed= %s\n",
                       mpMyID(), i_packet, packets[i_packet].score,
                       packets[i_packet].width, packets[i_packet].seed);
      fflush(stdout);
    }
*/

    // Do the reduction
    MPI_Allreduce((void *)&packets, (void *)&best_packets, max_heap_size,
                    seed_packet_type, union_seed_packets_op, MPI_COMM_WORLD);

/*
    // print the packets after the reduction
    if (mpMyID() == NODE_NO){
      fprintf(stdout, "AFTER\n");
      for (i_packet = 0; i_packet < max_heap_size; i_packet++)
      fprintf(stdout, "node %d packet %d score= %g width= %i seed= %s\n",
                       mpMyID(), i_packet, best_packets[i_packet].score,
                       best_packets[i_packet].width, best_packets[i_packet].seed);
      fflush(stdout);
    }
*/

    // Unpack the best seed packets into the heap

    // Get the number of filled packets
    int num_seed_packets = best_packets[0].num_seed_packets;

    // Add the best seeds to the heap
    for (i_packet = 0; i_packet < num_seed_packets; i_packet++){
      double score =  best_packets[i_packet].score;
      char *seed_str = best_packets[i_packet].seed;
      SEED *best_seed = new_seed(seed_str, score);
      //SEED *bumped_seed = (SEED *)(add_node_heap(seed_heap, best_seed));
      (void *)(add_node_heap(seed_heap, best_seed));
    }
  } // end n_nsites0

} // reduce_across_heaps

示例#8

0

显示文件

文件： 50.C 项目： JackDrogon/Study

int main() {
	char buf[50];
	int  option;
	PNODE tree = NULL;
    PNODE node = NULL;
    	while (1) {
		printf ("--------------------------\n");
		printf ("Options are:\n\n");
		printf (" 0  Exit\n");
		printf (" 1  Insert node\n");
		printf (" 2  Delete node\n");
		printf (" 3  Find node\n");
		printf (" 4  Pre order traversal\n");
		printf (" 5  In order traversal\n");
		printf (" 6  Post order traversal\n");
		printf (" 7  Max depth\n");
		printf (" 8  Min depth\n");
		printf (" 9  Max value\n");
		printf (" 10 Min value\n");
		printf (" 11 Node Count\n\n");
		printf ("--------------------------\n");
		printf ("Select an option: ");
		fgets (buf, sizeof(buf), stdin);
		sscanf (buf, "%i", &option);
		printf ("--------------------------\n");
		if (option < 0 || option > 11) {
		    fprintf (stderr, "Invalid option");
		    continue;
		}
			switch (option) {
		    case 0:
		        exit (0);
		    case 1:
		        printf ("Enter number to insert: ");
				fgets (buf, sizeof(buf), stdin);
				sscanf (buf, "%i", &option);
				printf ("\n\n");
				insert_node (&tree, option);
				break;
		    case 2:
            	printf ("Enter number to delete: ");
				fgets (buf, sizeof(buf), stdin);
				sscanf (buf, "%i", &option);
				printf ("\n\n");
				delete_node (&tree, option);
				break;
		    case 3:
            	printf ("Enter number to find: ");
				fgets (buf, sizeof(buf), stdin);
				sscanf (buf, "%i", &option);
				printf ("\n\n");
				node = find_node (tree, option);
				if (node != NULL) {
				    printf ("Found node\n\n");
				} else {
				    printf ("Couldn't find node\n\n");
				}
				break;
		    case 4:
				printf ("Pre order traversal: ");
				pre_order_traversal (tree);
				printf ("\n\n");
				break;
		    case 5:
		        printf ("In order traversal: ");
		    	in_order_traversal (tree);
		    	printf ("\n\n");
		    	break;
		    case 6:
		        printf ("Post order traversal: ");
		    	post_order_traversal (tree);
       			printf ("\n\n");
		    	break;
		    case 7:
		        printf ("Max depth is %i\n\n", get_max_depth (tree));
		        break;
		    case 8:
		        printf ("Min depth is %i\n\n", get_min_depth (tree));
		        break;
		    case 9:
		        printf ("Max value is %i\n\n", get_max_value (tree));
		        break;
		    case 10:
		        printf ("Min value is %i\n\n", get_min_value (tree));
		        break;
      		case 11:
		        printf ("Node Count is %i\n\n", get_num_nodes (tree));
		        break;
		}
 	}
	return 0;
}

示例#9

0

显示文件

文件： procapi_fork_t.cpp 项目： AlainRoy/htcondor

PID_ENTRY* recursive_fork(int depth, int breadth, bool verbose){
  
  if(depth <= 0){
    // if this node is a leaf return just its own information
    PID_ENTRY* arr = new PID_ENTRY[1];
    arr[0].pid = getpid();
    arr[0].ppid = getppid();
    arr[0].depth = 1;
    arr[0].breadth = breadth;
    return arr;
  }

  
  // create the file descriptor arrays for the pipe
  // we'll need one pipe for each child
  int** fds;
  fds = new int*[breadth];
  for(int i = 0; i < breadth; i++){
    fds[i] = new int[2]; 
  }
 
  // we are really at the depth of the calling process hence depth + 1
  PID_ENTRY* pid_arr = new PID_ENTRY[get_num_nodes(depth + 1 , breadth)];

  for (int i = 0; i < breadth; i++){
    // create the pipe
    
    pipe(fds[i]);
    pid_t child = fork();
    if(child == 0){// in child
      
      
      
      if(verbose){
	printf("Process %d is the #%d child of %d at depth %d\n", getpid(), i, 
	       getppid(),  TOTAL_DEPTH - depth);
      }
      
      // recurse 
      PID_ENTRY* child_arr = recursive_fork(depth - 1, breadth + TREE_BREADTH_CHANGE, verbose);

      // do the work before the passing our array to the parent so when the 
      // array gets all the way back to the calling function all the work is
      // done
      do_work(MEMFACTOR*1024, verbose);

      // write the array of all my descendent info back to my parent 
      int num_nodes = get_num_nodes(depth , breadth);
      child_arr[0].pid = getpid();
      child_arr[0].ppid = getppid();
      child_arr[0].depth = TOTAL_DEPTH - depth;
      child_arr[0].subtree_depth = depth;
      child_arr[0].breadth = i;
      
      // close read end of pipe
      close(fds[i][0]);
      write(fds[i][1], child_arr, num_nodes * sizeof(PID_ENTRY));
      delete[] child_arr;

      
      // stop myself, parent will restart me in time
      kill(getpid(), SIGSTOP);
      // I'll wait for my children to exit before I do
      while(wait(0) != -1)
	;
      if(verbose){
	printf("exiting process %d\n", getpid());
      }
      exit(0);
    }
    //in parent
    if(child == -1){
      perror("Error forking child");
      // Seperate errors?
      if(errno == EAGAIN){
	printf("EAGAIN\n");
      }
      else if(errno == ENOMEM){
	printf("ENOMEM\n");
      }     
    }
    
    // in parent
  }

  // leave space at the front of the array for myself
  PID_ENTRY p;
  p.pid = getpid();
  p.ppid = getppid();
  
  
  pid_arr[0] = p;
  
  // add the information for all of my descendents to the array
  for (int i = 0; i < breadth; i++){
    //we are really at the depth of the calling process
    int num_nodes = (get_num_nodes(depth + 1 , breadth)-1) / breadth;
    
    PID_ENTRY temp_arr[num_nodes ];
    close(fds[i][1]);
    read(fds[i][0], temp_arr, num_nodes * sizeof(PID_ENTRY));
    
    int start = 1 + i*num_nodes;
    int k = 0;

    // copy the information over 
    for(int j = start; j < start + num_nodes; j++){
      pid_arr[j] = temp_arr[k];
      k++;
    } 
    
  }
  
  return pid_arr;
}

示例#10

0

显示文件

文件： gemm.c 项目： 34985086/meshlab

void NAME(char *TRANSA, char *TRANSB,
	  blasint *M, blasint *N, blasint *K,
	  FLOAT *alpha,
	  FLOAT *a, blasint *ldA,
	  FLOAT *b, blasint *ldB,
	  FLOAT *beta,
	  FLOAT *c, blasint *ldC){
  
  blas_arg_t args;

  int transa, transb, nrowa, nrowb;
  blasint info;

  char transA, transB;
  FLOAT *buffer;
  FLOAT *sa, *sb;

#ifdef SMP
#ifndef COMPLEX
#ifdef XDOUBLE
  int mode  =  BLAS_XDOUBLE | BLAS_REAL;
#elif defined(DOUBLE)
  int mode  =  BLAS_DOUBLE  | BLAS_REAL;
#else
  int mode  =  BLAS_SINGLE  | BLAS_REAL;
#endif  
#else
#ifdef XDOUBLE
  int mode  =  BLAS_XDOUBLE | BLAS_COMPLEX;
#elif defined(DOUBLE)
  int mode  =  BLAS_DOUBLE  | BLAS_COMPLEX;
#else
  int mode  =  BLAS_SINGLE  | BLAS_COMPLEX;
#endif  
#endif
#endif

#if defined(SMP) && !defined(NO_AFFINITY) && !defined(USE_SIMPLE_THREADED_LEVEL3)
  int nodes;
#endif

  PRINT_DEBUG_NAME;

  args.m = *M;
  args.n = *N;
  args.k = *K;

  args.a = (void *)a;
  args.b = (void *)b;
  args.c = (void *)c;

  args.lda = *ldA;
  args.ldb = *ldB;
  args.ldc = *ldC;

  args.alpha = (void *)alpha;
  args.beta  = (void *)beta;

  transA = *TRANSA;
  transB = *TRANSB;

  TOUPPER(transA);
  TOUPPER(transB);

  transa = -1;
  transb = -1;

  if (transA == 'N') transa = 0;
  if (transA == 'T') transa = 1;
#ifndef COMPLEX
  if (transA == 'R') transa = 0;
  if (transA == 'C') transa = 1;
#else
  if (transA == 'R') transa = 2;
  if (transA == 'C') transa = 3;
#endif

  if (transB == 'N') transb = 0;
  if (transB == 'T') transb = 1;
#ifndef COMPLEX
  if (transB == 'R') transb = 0;
  if (transB == 'C') transb = 1;
#else
  if (transB == 'R') transb = 2;
  if (transB == 'C') transb = 3;
#endif

  nrowa = args.m;
  if (transa & 1) nrowa = args.k;
  nrowb = args.k;
  if (transb & 1) nrowb = args.n;

  info = 0;

  if (args.ldc < args.m) info = 13;
  if (args.ldb < nrowb)  info = 10;
  if (args.lda < nrowa)  info =  8; 
  if (args.k < 0)        info =  5;
  if (args.n < 0)        info =  4;
  if (args.m < 0)        info =  3;
  if (transb < 0)        info =  2;
  if (transa < 0)        info =  1;

  if (info){
    BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
    return;
  }

#else

void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB,
	   blasint m, blasint n, blasint k,
#ifndef COMPLEX
	   FLOAT alpha,
#else
	   FLOAT *alpha,
#endif
	   FLOAT *a, blasint lda,
	   FLOAT *b, blasint ldb, 
#ifndef COMPLEX
	   FLOAT beta,
#else
	   FLOAT *beta,
#endif
	   FLOAT *c, blasint ldc) {

  blas_arg_t args;
  int transa, transb;
  blasint nrowa, nrowb, info;

  XFLOAT *buffer;
  XFLOAT *sa, *sb;

#ifdef SMP
#ifndef COMPLEX
#ifdef XDOUBLE
  int mode  =  BLAS_XDOUBLE | BLAS_REAL;
#elif defined(DOUBLE)
  int mode  =  BLAS_DOUBLE  | BLAS_REAL;
#else
  int mode  =  BLAS_SINGLE  | BLAS_REAL;
#endif  
#else
#ifdef XDOUBLE
  int mode  =  BLAS_XDOUBLE | BLAS_COMPLEX;
#elif defined(DOUBLE)
  int mode  =  BLAS_DOUBLE  | BLAS_COMPLEX;
#else
  int mode  =  BLAS_SINGLE  | BLAS_COMPLEX;
#endif  
#endif
#endif

#if defined(SMP) && !defined(NO_AFFINITY) && !defined(USE_SIMPLE_THREADED_LEVEL3)
  int nodes;
#endif

  PRINT_DEBUG_CNAME;

#ifndef COMPLEX
  args.alpha = (void *)&alpha;
  args.beta  = (void *)&beta;
#else
  args.alpha = (void *)alpha;
  args.beta  = (void *)beta;
#endif

  transa = -1;
  transb = -1;
  info   =  0;

  if (order == CblasColMajor) {
    args.m = m;
    args.n = n;
    args.k = k;
    
    args.a = (void *)a;
    args.b = (void *)b;
    args.c = (void *)c;
    
    args.lda = lda;
    args.ldb = ldb;
    args.ldc = ldc;
    
    if (TransA == CblasNoTrans)     transa = 0;
    if (TransA == CblasTrans)       transa = 1;
#ifndef COMPLEX
    if (TransA == CblasConjNoTrans) transa = 0;
    if (TransA == CblasConjTrans)   transa = 1;
#else
    if (TransA == CblasConjNoTrans) transa = 2;
    if (TransA == CblasConjTrans)   transa = 3;
#endif
    if (TransB == CblasNoTrans)     transb = 0;
    if (TransB == CblasTrans)       transb = 1;
#ifndef COMPLEX
    if (TransB == CblasConjNoTrans) transb = 0;
    if (TransB == CblasConjTrans)   transb = 1;
#else
    if (TransB == CblasConjNoTrans) transb = 2;
    if (TransB == CblasConjTrans)   transb = 3;
#endif
    
    nrowa = args.m;
    if (transa & 1) nrowa = args.k;
    nrowb = args.k;
    if (transb & 1) nrowb = args.n;

    info = -1;

    if (args.ldc < args.m) info = 13;
    if (args.ldb < nrowb)  info = 10;
    if (args.lda < nrowa)  info =  8; 
    if (args.k < 0)        info =  5;
    if (args.n < 0)        info =  4;
    if (args.m < 0)        info =  3;
    if (transb < 0)        info =  2;
    if (transa < 0)        info =  1;
  }

  if (order == CblasRowMajor) {
    args.m = n;
    args.n = m;
    args.k = k;
    
    args.a = (void *)b;
    args.b = (void *)a;
    args.c = (void *)c;
    
    args.lda = ldb;
    args.ldb = lda;
    args.ldc = ldc;

    if (TransB == CblasNoTrans)     transa = 0;
    if (TransB == CblasTrans)       transa = 1;
#ifndef COMPLEX
    if (TransB == CblasConjNoTrans) transa = 0;
    if (TransB == CblasConjTrans)   transa = 1;
#else
    if (TransB == CblasConjNoTrans) transa = 2;
    if (TransB == CblasConjTrans)   transa = 3;
#endif
    if (TransA == CblasNoTrans)     transb = 0;
    if (TransA == CblasTrans)       transb = 1;
#ifndef COMPLEX
    if (TransA == CblasConjNoTrans) transb = 0;
    if (TransA == CblasConjTrans)   transb = 1;
#else
    if (TransA == CblasConjNoTrans) transb = 2;
    if (TransA == CblasConjTrans)   transb = 3;
#endif
    
    nrowa = args.m;
    if (transa & 1) nrowa = args.k;
    nrowb = args.k;
    if (transb & 1) nrowb = args.n;

    info = -1;

    if (args.ldc < args.m) info = 13;
    if (args.ldb < nrowb)  info = 10;
    if (args.lda < nrowa)  info =  8; 
    if (args.k < 0)        info =  5;
    if (args.n < 0)        info =  4;
    if (args.m < 0)        info =  3;
    if (transb < 0)        info =  2;
    if (transa < 0)        info =  1;

  }

  if (info >= 0) {
    BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
    return;
  }

#endif

  if ((args.m == 0) || (args.n == 0)) return;

#if 0
  fprintf(stderr, "m = %4d  n = %d  k = %d  lda = %4d  ldb = %4d  ldc = %4d\n",
	 args.m, args.n, args.k, args.lda, args.ldb, args.ldc);
#endif

  IDEBUG_START;

  FUNCTION_PROFILE_START();

  buffer = (XFLOAT *)blas_memory_alloc(0);
  
  sa = (XFLOAT *)((BLASLONG)buffer +GEMM_OFFSET_A);
  sb = (XFLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
  
#ifdef SMP
  mode |= (transa << BLAS_TRANSA_SHIFT);
  mode |= (transb << BLAS_TRANSB_SHIFT);

  args.common = NULL;
  args.nthreads = num_cpu_avail(3);

 if (args.nthreads == 1) {
#endif
    
    (gemm[(transb << 2) | transa])(&args, NULL, NULL, sa, sb, 0);
    
#ifdef SMP
    
  } else {
    
#ifndef USE_SIMPLE_THREADED_LEVEL3

#ifndef NO_AFFINITY
      nodes = get_num_nodes();
      
      if ((nodes > 1) && get_node_equal()) {
	
	args.nthreads /= nodes;
	
	gemm_thread_mn(mode, &args, NULL, NULL, gemm[16 | (transb << 2) | transa], sa, sb, nodes);
	
      } else {
#endif

	(gemm[16 | (transb << 2) | transa])(&args, NULL, NULL, sa, sb, 0);

#else

	GEMM_THREAD(mode, &args, NULL, NULL, gemm[(transb << 2) | transa], sa, sb, args.nthreads);
	
#endif
	
#ifndef USE_SIMPLE_THREADED_LEVEL3
#ifndef NO_AFFINITY
      }
#endif
#endif
    
#endif
    
#ifdef SMP
  }
#endif
  
 blas_memory_free(buffer);

  FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, args.m * args.k + args.k * args.n + args.m * args.n, 2 * args.m * args.n * args.k);

  IDEBUG_END;

  return;
}

示例#11

0

显示文件

文件： symm.c 项目： 4ker/OpenBLAS

void NAME(char *SIDE, char *UPLO,
         blasint *M, blasint *N,
         FLOAT *alpha, FLOAT *a, blasint *ldA,
         FLOAT *b, blasint *ldB,
         FLOAT *beta,  FLOAT *c, blasint *ldC){

  char side_arg  = *SIDE;
  char uplo_arg  = *UPLO;

  blas_arg_t args;

  FLOAT *buffer;
  FLOAT *sa, *sb;

#if defined(SMP) && !defined(NO_AFFINITY)
  int nodes;
#endif

  blasint info;
  int side;
  int uplo;

  PRINT_DEBUG_NAME;

  args.alpha = (void *)alpha;
  args.beta  = (void *)beta;

  TOUPPER(side_arg);
  TOUPPER(uplo_arg);

  side  = -1;
  uplo  = -1;

  if (side_arg  == 'L') side  = 0;
  if (side_arg  == 'R') side  = 1;

  if (uplo_arg  == 'U') uplo  = 0;
  if (uplo_arg  == 'L') uplo  = 1;

  args.m = *M;
  args.n = *N;

  args.c = (void *)c;
  args.ldc = *ldC;

  info = 0;

  if (args.ldc < MAX(1, args.m)) info = 12;

  if (!side) {
    args.a = (void *)a;
    args.b = (void *)b;

    args.lda = *ldA;
    args.ldb = *ldB;

    if (args.ldb < MAX(1, args.m)) info =  9;
    if (args.lda < MAX(1, args.m)) info =  7;

  } else {
    args.a = (void *)b;
    args.b = (void *)a;

    args.lda = *ldB;
    args.ldb = *ldA;

  if (args.lda < MAX(1, args.m)) info =  9;
  if (args.ldb < MAX(1, args.n)) info =  7;
  }

  if (args.n   < 0)              info =  4;
  if (args.m   < 0)              info =  3;
  if (uplo     < 0)              info =  2;
  if (side     < 0)              info =  1;

  if (info != 0) {
    BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
    return;
  }

#else

void CNAME(enum CBLAS_ORDER order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo,
	   blasint m, blasint n,
#ifndef COMPLEX
	   FLOAT alpha,
#else
	   FLOAT *alpha,
#endif
	   FLOAT *a, blasint lda,
	   FLOAT *b, blasint ldb,
#ifndef COMPLEX
	   FLOAT beta,
#else
	   FLOAT *beta,
#endif
	   FLOAT *c, blasint ldc) {

  blas_arg_t args;
  int side, uplo;
  blasint info;

  FLOAT *buffer;
  FLOAT *sa, *sb;

#if defined(SMP) && !defined(NO_AFFINITY)
  int nodes;
#endif

  PRINT_DEBUG_CNAME;

#ifndef COMPLEX
  args.alpha = (void *)&alpha;
  args.beta  = (void *)&beta;
#else
  args.alpha = (void *)alpha;
  args.beta  = (void *)beta;
#endif

  args.c = (void *)c;
  args.ldc = ldc;

  side  = -1;
  uplo  = -1;
  info  =  0;

  if (order == CblasColMajor) {
    if (Side == CblasLeft)  side = 0;
    if (Side == CblasRight) side = 1;

    if (Uplo == CblasUpper) uplo  = 0;
    if (Uplo == CblasLower) uplo  = 1;

    info = -1;

    args.m = m;
    args.n = n;

    if (args.ldc < MAX(1, args.m)) info = 12;

    if (!side) {
      args.a = (void *)a;
      args.b = (void *)b;

      args.lda = lda;
      args.ldb = ldb;

      if (args.ldb < MAX(1, args.m)) info =  9;
      if (args.lda < MAX(1, args.m)) info =  7;

    } else {
      args.a = (void *)b;
      args.b = (void *)a;

      args.lda = ldb;
      args.ldb = lda;

      if (args.lda < MAX(1, args.m)) info =  9;
      if (args.ldb < MAX(1, args.n)) info =  7;
    }

    if (args.n   < 0)              info =  4;
    if (args.m   < 0)              info =  3;
    if (uplo     < 0)              info =  2;
    if (side     < 0)              info =  1;
  }

  if (order == CblasRowMajor) {
    if (Side == CblasLeft)  side = 1;
    if (Side == CblasRight) side = 0;

    if (Uplo == CblasUpper) uplo  = 1;
    if (Uplo == CblasLower) uplo  = 0;

    info = -1;

    args.m = n;
    args.n = m;

    if (args.ldc < MAX(1, args.m)) info = 12;

    if (!side) {
      args.a = (void *)a;
      args.b = (void *)b;

      args.lda = lda;
      args.ldb = ldb;

      if (args.ldb < MAX(1, args.m)) info =  9;
      if (args.lda < MAX(1, args.m)) info =  7;

    } else {
      args.a = (void *)b;
      args.b = (void *)a;

      args.lda = ldb;
      args.ldb = lda;

      if (args.lda < MAX(1, args.m)) info =  9;
      if (args.ldb < MAX(1, args.n)) info =  7;
    }

    if (args.n   < 0)              info =  4;
    if (args.m   < 0)              info =  3;
    if (uplo     < 0)              info =  2;
    if (side     < 0)              info =  1;
  }

  if (info >= 0) {
    BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
    return;
  }

#endif

  if (args.m == 0 || args.n == 0) return;

  IDEBUG_START;

  FUNCTION_PROFILE_START();

  buffer = (FLOAT *)blas_memory_alloc(0);

  sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A);
  sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);

#ifdef SMP
  args.common = NULL;
  args.nthreads = num_cpu_avail(3);

  if (args.nthreads == 1) {
#endif

    (symm[(side << 1) | uplo ])(&args, NULL, NULL, sa, sb, 0);

#ifdef SMP

  } else {

#ifndef NO_AFFINITY
    nodes = get_num_nodes();

    if (nodes > 1) {

      args.nthreads /= nodes;

      gemm_thread_mn(MODE, &args, NULL, NULL,
		     symm[4 | (side << 1) | uplo ], sa, sb, nodes);

    } else {
#endif

#ifndef USE_SIMPLE_THREADED_LEVEL3

      (symm[4 | (side << 1) | uplo ])(&args, NULL, NULL, sa, sb, 0);

#else

      GEMM_THREAD(MODE, &args, NULL, NULL, symm[(side << 1) | uplo ], sa, sb, args.nthreads);

#endif

#ifndef NO_AFFINITY
    }
#endif

  }
#endif

 blas_memory_free(buffer);

  FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE,
		       (!side)? args.m * (args.m / 2 + args.n) : args.n * (args.m + args.n / 2),
		       (!side)? 2 * args.m * args.m * args.n : 2 * args.m * args.n * args.n);

  IDEBUG_END;

  return;
}

示例#12

0

显示文件

文件： subseq7.c 项目： CPFL/gmeme

extern void subseq7(
  MODEL *model,			// the model
  DATASET *dataset,		/* the dataset */
  int w,			// w to use
  int n_nsites0,		/* number of nsites0 values to try */
  S_POINT s_points[],           /* array of starting points: 1 per nsites0 */
  HASH_TABLE evaluated_seed_ht 	/* A hash table used for remembering which seeds
                                   have been evaluated previously */
)
{
  MOTYPE mtype = model->mtype;		/* type of model */
  BOOLEAN ic = model->invcomp;		/* use reverse complement strand of DNA, too */
  THETA map = dataset->map;		/* freq x letter map */
  LOG_THETA_TYPE(ltheta);		/* integer encoded log theta */
  int iseq, ioff;
  int alength = dataset->alength;	/* length of alphabet */
  int n_samples = dataset->n_samples;	/* number of samples in dataset */
  SAMPLE **samples = dataset->samples;	/* samples in dataset */
  int n_starts = 0;			/* number of sampled start subseq */
  int n_maxima = ps(dataset, w);	/* upper bound on # maxima */
  /* the local maxima positions */
  P_PROB maxima = (P_PROB) mymalloc(n_maxima * sizeof(p_prob));
  int lmap[MAXALPH][MAXALPH];	/* consensus letter vs. log frequency matrix */
  double col_scores[MAXSITE];		/* not used */
#ifdef PARALLEL
  int start_seq, start_off=0, end_seq, end_off=0;
#endif
  char *str_seed;                       // A string representation of a seed.

  // PRECONDITIONS:

  // 1. If the sequence model is oops, then n_nsites0 is exactly 1:
  if (mtype == Oops) {
    assert(n_nsites0 == 1);
  }

  convert_to_lmap(map, lmap, alength);

  if (TRACE) { printf("w= %d\n", w); }

  /* get the probability that a site starting at position x_ij would
     NOT overlap a previously found motif.
  */
  get_not_o(dataset, w);

  // Set up log_not_o: log_not_o[site] is:
  // log ( Pr(site not overlapped) * scaled_to_one_Pr(site) )
  if (model->mtype != Tcm) {
    add_psp_to_log_not_o(dataset, w, model->invcomp, model->mtype);
  }

  /* score all the sampled positions saving the best position for
     each value of NSITES0 */
#ifdef PARALLEL
  /* Retrieve the previously-calculated starting and ending points. */
  get_start_n_end(&start_seq, &start_off, &end_seq, &end_off);
  /* Divide the various samples among processors. */
  for (iseq = start_seq; iseq <= end_seq; iseq++) { /* sequence */
#else /* not PARALLEL */
  for (iseq = 0; iseq < n_samples; iseq++) {	/* sequence */
#endif /* PARALLEL */

    SAMPLE *s = samples[iseq];
    int lseq = s->length;
    char *res = s->res;				/* left to right */
    char *name = s->sample_name;
    double *not_o = s->not_o;
    int max_off, init_off;

    if (lseq < w) continue;			/* shorter than motif */

#ifdef PARALLEL
    if (mpMyID() == 0)
#endif
    if ((!NO_STATUS) && ((iseq % 5) == 0)) {
      fprintf(stderr, "starts: w=%d, seq=%d, l=%d          \r", w, iseq, lseq); 
    }
    /* Set the appropriate starting and ending points. */
#ifdef PARALLEL
    if (iseq == start_seq)
      init_off = start_off;
    else
#endif
      init_off = 0;

#ifdef PARALLEL
    if (iseq == end_seq)
      max_off = MIN(end_off, lseq - w);
    else
#endif
      max_off = lseq - w;

    /*
      Loop over all subsequences in the current sequence testing them
      each as "starting points" (inital values) for theta
    */
    for (ioff = init_off; ioff <= max_off; ioff++) {/* subsequence */ 
      /* warning: always do the next step; don't ever
         "continue" or the value of pY will not be correct since
         it is computed based the previous value 
      */

      /* convert subsequence in dataset to starting point for EM */
      init_theta_1(w, res+ioff, &ltheta[1][0], lmap);

      if (ioff == init_off) { 			/* new sequence */

        /* Compute p(Y_ij | theta_1^0) */
        if (!ic) {
          get_pY(dataset, &ltheta[1][0], w, 0);
        } else {
          get_pY(dataset, &ltheta[1][0], w, 1);
          get_pY(dataset, &ltheta[1][0], w, 2);
        }

      } else {					/* same sequence */
        
        /* get theta[0][0]^{k-1} */
        init_theta_1(1, res+ioff-1, &ltheta[0][0], lmap);

        /* compute p(Y_ij | theta_1^k) */
        if (!ic) {
          next_pY(dataset, &ltheta[1][0], w, &ltheta[0][0][0], 0);
        } else {
          next_pY(dataset, &ltheta[1][0], w, &ltheta[0][0][0], 1);
          next_pY(dataset, &ltheta[1][0], w, &ltheta[0][0][0], 2);
        }
      } /* same sequence */

      /* skip if there is a high probability that this subsequence
         is part of a site which has already been found 
      */
      if (not_o[ioff] < MIN_NOT_O) continue;

      /*fprintf(stderr, "subseq: %d %d\r", iseq+1, ioff+1);*/

      // Put highest pY into first scratch array if using both DNA strands:
      if (ic) {
        combine_strands(samples, n_samples, w);
      }

      /* get a sorted list of the maxima of pY */
      n_maxima = get_max(mtype, dataset, w, maxima, ic, TRUE);

      /* "fake out" align_top_subsequences by setting each of the scores in
         the s_points objects to LITTLE, thereby forcing
         align_top_subsequences to record the attributes for the current seed
         in the s_points, rather than the seed with the highest respective
         scores: */
      int sp_idx;
      for (sp_idx = 0; sp_idx < n_nsites0; sp_idx++) {
        s_points[sp_idx].score = LITTLE;
      }

      /* align the top nsites0 subsequences for each value
         of nsites0 and save the alignments with the highest likelihood 
      */
      n_starts += align_top_subsequences(
        mtype,
        w,
        dataset,
        iseq,
        ioff, 
        res+ioff,
        name,
        n_nsites0,
        n_maxima,
        maxima,
        col_scores,
        s_points
      );

      /* A string version of the current seed is required for updating the
         S_POINT heaps: */
      str_seed = to_str_seed(res+ioff, w);

      /* For each of the S_POINT objects, add the current seed to that
         S_POINT'S heap.
         Also, branching search will require a hash_table of all seeds that
         have been evaluated prior to when branching search is called. Hence
         also record the current seed (string, nsites0) combination in the
         hash_table, for all nsites0, unless that seed was already in the
         hash_table:
      */
      hash_insert_str(str_seed, evaluated_seed_ht);
      update_s_point_heaps(s_points, str_seed, n_nsites0);

      myfree(str_seed);
    } /* subsequence */
  } /* sequence */

#ifdef PARALLEL
  reduce_across_heaps(s_points, n_nsites0);
#endif // PARALLEL 

  // Print the sites predicted using the seed after subsequence search, for
  // each of the starting points, if requested:
  if (dataset->print_pred) {
    int sp_idx;
    for (sp_idx = 0; sp_idx < n_nsites0; sp_idx++) {
      // Retrieve the best seed, from the heap:
      HEAP *heap = s_points[sp_idx].seed_heap;
      // Only print sites for the s_point if its heap was non-empty:
      if (get_num_nodes(heap) > 0) {
        SEED *best_seed = (SEED *)get_node(heap, get_best_node(heap));
        char *seed = get_str_seed(best_seed);

        /* Print the sites predicted using the motif corresponding to that seed,
           according to the sequence model being used:
        */
        int nsites0 = s_points[sp_idx].nsites0;
        fprintf(stdout,
                "PREDICTED SITES AFTER SUBSEQUENCE SEARCH WITH W = %i "
                "NSITES = %i MOTIF = %i\n", w, nsites0, dataset->imotif);
        int n_maxima = ps(dataset, w); // upper bound on number of maxima
        P_PROB psites = (P_PROB) mymalloc(n_maxima * sizeof(p_prob));
        n_maxima = get_pred_sites(psites, mtype, w, seed, ltheta[1], lmap,
                                  dataset, ic);
        print_site_array(psites, nsites0, stdout, w, dataset);
        myfree(psites);
      } // get_num_nodes > 0
    } //sp_idx
  } // print_pred

  if (TRACE){
    printf("Tested %d possible starts...\n", n_starts);
    }

  myfree(maxima);
} // subseq7


/**********************************************************************/
/*
	next_pY

	Compute the value of p(Y_ij | theta_1^{k+1})
	from p(Y_ij | theta_1^{k} and the probability
	of first letter of Y_ij given theta_1^k,
	p(Y_ij^0 | theta_1^k).
*/
/**********************************************************************/
static void next_pY(
  DATASET *dataset,			/* the dataset */
  LOG_THETAG_TYPE(theta_1),		/* integer log theta_1 */
  int w,				/* width of motif */
  int *theta_0,				/* first column of previous theta_1 */
  int pYindex				/* which pY array to use */
) {
  int i, k;
  int *theta_last = theta_1[w-1];	/* last column of theta_1 */
  int n_samples = dataset->n_samples;
  SAMPLE **samples = dataset->samples;
  
  for (i=0; i < n_samples; i++) { 	/* sequence */
    SAMPLE *s = samples[i];		/* sequence */
    int lseq = s->length;		/* length of sequence */
    char *res = pYindex<2 ? s->res : s->resic;	/* integer sequence */
    int *pY = s->pY[pYindex];		/* log p(Y_j | theta_1) */
    char *r = res+lseq-1;		/* last position in sequence */
    char *r0 = res+lseq-w-1;	        /* prior to start of last subsequence */
    int j, p;

    if (lseq < w) continue;		/* skip if sequence too short */

    /* calculate p(Y_ij | theta_1) */
    int *pY_shifted_1 = pY - 1;
    for (j=lseq-w; j>0; j--) {
      pY[j] = pY_shifted_1[j] + theta_last[(int)(*r--)] - theta_0[(int)(*r0--)];
    }

    /* calculate log p(Y_i0 | theta_1) */
    p = 0;
    r = res;
    for (k=0; k<w; k++) {     		/* position in site */
      p += theta_1[k][(int)(*r++)];
    }
    pY[0] = p;
  }
}

示例#13

0

显示文件

文件： boruvka_parallel_star.cpp 项目： kfuh1/15418-project

struct Edge* find_MST_parallel_star(Graph g){
    omp_set_num_threads(THREADS);
    int n = get_num_nodes(g);
    //store the edge index of the min weight edge incident on node i
    struct Edge* min_edges = new struct Edge[n];
    struct set *components = new struct set[n];

    struct Edge* mst_edges = new struct Edge[n];

    bool *coin_flips = new bool[n];
    //bool not_one_component = true;
    //keeps track of which tails have been contracted
    bool *is_contracted = new bool[n];
    
    //bool not_one_component = true;

    //this is a hacky way to accommodate the fact that we look at every edge
    //even though we're contracting
    bool is_first_passes[n];
    //loop guard - did the graph get smaller - only needs to be set by
    //at least one thread so it should work in the parallel version
    bool can_be_contracted = true;

    #pragma omp parallel for schedule(static)
    for(int i = 0; i < n; i++){
        components[i].parent = i;
        components[i].rank = 0;
        is_first_passes[i] = true;
    }

    double startTimeFind, endTimeFind;
    double findTotal = 0.0;
    double startTimeContract, endTimeContract;
    double contractTotal = 0.0;
    //continue looping until there's only 1 component
    //in the case of a disconnected graph, until num_components doesn't change
    //TODO is it better to have one condition here and not have to deal with
    //not_one_component (you would go one extra iteration but it could be worth 
    //it instad of having to loop through the components list every iteration -
    //but one iteration could be just as expensive so we'll have to see)
    while(can_be_contracted){
        startTimeFind = CycleTimer::currentSeconds();
        #pragma omp parallel for schedule(dynamic, CHUNKSIZE)
        for(int j = 0; j < n; j++){
            if(find_parallel(components, j) == j){
            //find minimum weight edge out of each componenet
                for(int i = 0; i < n; i++){
                    int set1 = find_parallel(components, i);
                    if(set1 == j){
                        const Vertex* start = edges_begin(g, i);
                        const Vertex* end = edges_end(g, i);
                        int weight_offset = -1;
                        for(const Vertex* v = start; v < end; v++){
                            weight_offset++;
                            //get representative nodes 
                            int set2 = find_parallel(components, *v);
                            //this edge has already been contracted (endpoints in same component)
                            if(set1 != set2){
                                Edge e;
                                e.src = i;
                                e.dest = *v;
                                e.weight = g->weights[g->offsets[i] + weight_offset];
                                if(is_first_passes[set1]){
                                    min_edges[set1] = e; 
                                    is_first_passes[set1] = false;
                                }
                                else if (min_edges[set1].weight > e.weight)
                                    min_edges[set1] = e;
                            }
                        }
                    }
                }
            }
        }

        endTimeFind = CycleTimer::currentSeconds();
        findTotal += (endTimeFind - startTimeFind);

        startTimeContract = CycleTimer::currentSeconds();
        //TODO: need to rewrite union find so that it always contract the edge that we want
        //it to - this is necessary in star contraction so we contract into the HEAD        
        //determine which vertices will be star centers and which are satellites
        //we make 0 mean you are a satellite (false) and 1 mean you are a star center (true)
        #pragma omp parallel for schedule(static)
        for(int i = 0; i < n; i++){
            coin_flips[i] = ((rand() % 2) == 1);
        }

        //do this so we can see if any thread sets to true meaning something got contracted
        can_be_contracted = false;
        //star contraction - we'll say 1 is HEADS and 0 is TAILS
        #pragma omp parallel for schedule(dynamic, CHUNKSIZE) 
        for(int i = 0; i < n; i++){
            int src = min_edges[i].src;
            int dest = min_edges[i].dest;

            int root1 = find_parallel(components, src);
            int root2 = find_parallel(components, dest);
            if(root1 == root2){
                continue;
            }
            can_be_contracted = true;
            //you wouldn't contract in case of both heads or both tails
            if((coin_flips[root1] == coin_flips[root2])){
                continue;
            }
            //try to contract, but if you fail, that means someone has contracted already
            //I think this should be correct by how CAS works
            //mark the tail as having been contracted
            if(coin_flips[root1]){
                if(!__sync_bool_compare_and_swap(&is_contracted[root2],false,true))
                    continue;
            }
            else{
                if(!__sync_bool_compare_and_swap(&is_contracted[root1],false,true))
                    continue;
            }
            if(coin_flips[root1]){
                union_parallel(components, root2, root1);
                mst_edges[root2] = min_edges[i];
            }
            else{
                union_parallel(components, root1, root2);
                mst_edges[root1] = min_edges[i];
            }
        }

        #pragma omp parallel for schedule(static)
        for(int i = 0; i < n; i++){
            is_first_passes[i] = true;
            is_contracted[i] = false;
        }
        endTimeContract = CycleTimer::currentSeconds();
        contractTotal += (endTimeContract - startTimeContract);
    }

/*    for(int i = 0; i < n; i++){
        if(mst_edges[i].src == 0 && mst_edges[i].dest == 0)
            continue;
        if(mst_edges[i].src < 0 || mst_edges[i].src > n || mst_edges[i].dest < 0 || mst_edges[i].dest > n)
            continue;
        printf("%d, %d\n", mst_edges[i].src, mst_edges[i].dest);
    }
  */  
    printf("find time parallel comp star: %.20f\n", findTotal);
    printf("contract time parallel comp star: %.20f\n", contractTotal);
    delete[] min_edges;
    delete[] components;
    return mst_edges;
}