예제 #1
0
/*  allocate a new iop */
GASNETI_NEVER_INLINE(gasnete_iop_alloc,
static gasnete_iop_t * gasnete_iop_alloc(gasnete_threaddata_t * const thread)) {
    gasnete_iop_t *iop = (gasnete_iop_t *)gasneti_malloc(sizeof(gasnete_iop_t));
    gasneti_leak(iop);
#if GASNET_DEBUG
    memset(iop, 0, sizeof(gasnete_iop_t)); /* set pad to known value */
#endif
    SET_OPTYPE((gasnete_op_t *)iop, OPTYPE_IMPLICIT);
    iop->threadidx = thread->threadidx;
    iop->initiated_get_cnt = 0;
    iop->initiated_put_cnt = 0;
    gasneti_weakatomic_set(&(iop->completed_get_cnt), 0, 0);
    gasneti_weakatomic_set(&(iop->completed_put_cnt), 0, 0);
    return iop;
}
예제 #2
0
/*  get a new iop */
static
gasnete_iop_t *gasnete_iop_new(gasnete_threaddata_t * const thread) {
    gasnete_iop_t *iop = thread->iop_free;
    if_pt (iop) {
        thread->iop_free = iop->next;
        gasneti_memcheck(iop);
        gasneti_assert(OPTYPE(iop) == OPTYPE_IMPLICIT);
        gasneti_assert(iop->threadidx == thread->threadidx);
        /* If using trace or stats, want meaningful counts when tracing NBI access regions */
        #if GASNETI_STATS_OR_TRACE
            iop->initiated_get_cnt = 0;
            iop->initiated_put_cnt = 0;
            gasneti_weakatomic_set(&(iop->completed_get_cnt), 0, 0);
            gasneti_weakatomic_set(&(iop->completed_put_cnt), 0, 0);
        #endif
    } else {
예제 #3
0
/*  allocate more eops */
GASNETI_NEVER_INLINE(gasnete_eop_alloc,
static void gasnete_eop_alloc(gasnete_threaddata_t * const thread)) {
    gasnete_eopaddr_t addr;
    int bufidx = thread->eop_num_bufs;
    gasnete_eop_t *buf;
    int i;
    gasnete_threadidx_t threadidx = thread->threadidx;
    if (bufidx == 256) gasneti_fatalerror("GASNet Extended API: Ran out of explicit handles (limit=65535)");
    thread->eop_num_bufs++;
    buf = (gasnete_eop_t *)gasneti_calloc(256,sizeof(gasnete_eop_t));
    gasneti_leak(buf);
    for (i=0; i < 256; i++) {
        addr.bufferidx = bufidx;
#if GASNETE_SCATTER_EOPS_ACROSS_CACHELINES
#ifdef GASNETE_EOP_MOD
        addr.eopidx = (i+32) % 255;
#else
        {   int k = i+32;
            addr.eopidx = k > 255 ? k - 255 : k;
        }
#endif
#else
        addr.eopidx = i+1;
#endif
        buf[i].threadidx = threadidx;
        buf[i].addr = addr;
#if 0 /* these can safely be skipped when the values are zero */
        SET_OPSTATE(&(buf[i]),OPSTATE_FREE);
        SET_OPTYPE(&(buf[i]),OPTYPE_EXPLICIT);
#if GASNETE_EOP_COUNTED
       buff[i].initiated_cnt = 0;
#endif
#endif
#if GASNETE_EOP_COUNTED
       gasneti_weakatomic_set(&buf[i].completed_cnt, 0 , 0);
#endif
    }
    /*  add a list terminator */
#if GASNETE_SCATTER_EOPS_ACROSS_CACHELINES
#ifdef GASNETE_EOP_MOD
    buf[223].addr.eopidx = 255; /* modular arithmetic messes up this one */
#endif
    buf[255].addr = EOPADDR_NIL;
#else
    buf[255].addr = EOPADDR_NIL;
#endif
    thread->eop_bufs[bufidx] = buf;
    addr.bufferidx = bufidx;
    addr.eopidx = 0;
    thread->eop_free = addr;

#if GASNET_DEBUG
    {   /* verify new free list got built correctly */
        int i;
        int seen[256];
        gasnete_eopaddr_t addr = thread->eop_free;

#if 0
        if (gasneti_mynode == 0)
            for (i=0; i<256; i++) {
                fprintf(stderr,"%i:  %i: next=%i\n",gasneti_mynode,i,buf[i].addr.eopidx);
                fflush(stderr);
            }
        sleep(5);
#endif

        gasneti_memcheck(thread->eop_bufs[bufidx]);
        memset(seen, 0, 256*sizeof(int));
        for (i=0; i<(bufidx==255?255:256); i++) {
            gasnete_eop_t *eop;
            gasneti_assert(!gasnete_eopaddr_isnil(addr));
            eop = GASNETE_EOPADDR_TO_PTR(thread,addr);
            gasneti_assert(OPTYPE(eop) == OPTYPE_EXPLICIT);
            gasneti_assert(OPSTATE(eop) == OPSTATE_FREE);
            gasneti_assert(eop->threadidx == threadidx);
            gasneti_assert(addr.bufferidx == bufidx);
            gasneti_assert(!seen[addr.eopidx]);/* see if we hit a cycle */
            seen[addr.eopidx] = 1;
            addr = eop->addr;
        }
        gasneti_assert(gasnete_eopaddr_isnil(addr));
    }
#endif
}
예제 #4
0
gasnete_coll_local_tree_geom_t *gasnete_coll_local_tree_geom_fetch(gasnete_coll_tree_type_t type, gasnet_node_t root,  gasnete_coll_team_t team) {
  gasnete_coll_tree_geom_t *geom_cache_head = team->tree_geom_cache_head;
  gasnete_coll_local_tree_geom_t *ret;
  gasnete_coll_tree_geom_t *curr_geom;
  
  
  /*lock here so that only one multiple threads don't try to build it*/
  gasneti_mutex_lock(&team->tree_geom_cache_lock);
  curr_geom = gasnete_coll_tree_geom_fetch_helper(type, team);
  if(curr_geom == NULL) {
    int i;
#if 0
    if(gasneti_mynode ==0) fprintf(stderr, "%d> new tree: %d type %d fanout\n",gasneti_mynode, type.tree_class, type.prams[0]);
#endif
    /* allocate new geometry */
    curr_geom = (gasnete_coll_tree_geom_t *) gasneti_malloc(sizeof(gasnete_coll_tree_geom_t));
    curr_geom->local_views = (gasnete_coll_local_tree_geom_t**) 
    gasneti_malloc(sizeof(gasnete_coll_local_tree_geom_t*)*team->total_ranks);
    for(i=0; i<team->total_ranks; i++) {
      curr_geom->local_views[i] = NULL;
    }
    curr_geom->tree_type = type;
#if 0
    /* Not using the ref_count for now */
    gasneti_weakatomic_set(&(curr_geom->ref_count), 0, 0);
#endif
    /*	curr_geom->root = root; */
    /* link it into the cache*/
    if(geom_cache_head == NULL) {
      /*cache is empty*/
      curr_geom->prev = NULL;
      curr_geom->next = NULL;
      team->tree_geom_cache_head = curr_geom;
      team->tree_geom_cache_tail = curr_geom;
    } else {
      curr_geom->prev = NULL; /* new head */
      curr_geom->next = team->tree_geom_cache_head;
      team->tree_geom_cache_head->prev = curr_geom;
      team->tree_geom_cache_head = curr_geom;
    }
    curr_geom->local_views[root] = gasnete_coll_tree_geom_create_local(type, root, team, curr_geom);
    
    ret = curr_geom->local_views[root];
    
    /* create local view for the root that we request */
  } else {
    /* if it is already allocated for root go ahead and return it ... this should be the fast path*/    
    if(curr_geom->local_views[root] == NULL) {
      curr_geom->local_views[root] = gasnete_coll_tree_geom_create_local(type, root, team, curr_geom);
    }
    ret = curr_geom->local_views[root];
  } 

#ifdef GASNETC_HAVE_AMRDMA  
  /*at the time of this writing no conduits support this yet*/
  if(team->myrank != ret->root) {
    int count = GASNETE_COLL_TREE_GEOM_CHILD_COUNT(ret);
    gasnet_node_t *tmp = gasneti_calloc(1+count, sizeof(gasnet_node_t));
    memcpy(tmp, GASNETE_COLL_TREE_GEOM_CHILDREN(ret), count*sizeof(gasnet_node_t));
    tmp[count] = GASNETE_COLL_TREE_GEOM_PARENT(ret);
    gasnetc_amrdma_init(1+count, tmp);
    gasneti_free(tmp);
  } else {
    gasnetc_amrdma_init(GASNETE_COLL_TREE_GEOM_CHILD_COUNT(ret),
                        GASNETE_COLL_TREE_GEOM_CHILDREN(ret));
  }
#endif

#if 0
  /* Not using the reference counts for now */
  gasneti_weakatomic_increment(&(curr_geom->ref_count), 0);
  gasneti_weakatomic_increment(&(ret->ref_count), 0);

  /*
  fprintf(stderr, "[%u] gasnete_coll_local_tree_geom_fetch: curr_geom->ref_count %u, ret->ref_count %u \n",
          gasnet_mynode(), gasneti_weakatomic_read(&(curr_geom->ref_count), 0),
          gasneti_weakatomic_read(&(ret->ref_count), 0));
  */
#endif

  gasneti_mutex_unlock(&team->tree_geom_cache_lock);
  return ret;
}
예제 #5
0
/*this fucntion is already serialized in the function that calls this 
  so from here on out there is no worry about locking*/
gasnete_coll_local_tree_geom_t *gasnete_coll_tree_geom_create_local(gasnete_coll_tree_type_t in_type, int rootrank, gasnete_coll_team_t team, gasnete_coll_tree_geom_t *base_geom)  {
  gasnete_coll_local_tree_geom_t *geom;
  int i;
  gasnete_coll_tree_type_t intype_copy;
  tree_node_t *allnodes = (tree_node_t*) team->tree_construction_scratch;
  tree_node_t rootnode,mynode;
  gasneti_assert(rootrank<team->total_ranks && rootrank >=0);
  gasneti_assert_always(in_type);
  intype_copy = in_type;
  geom = (gasnete_coll_local_tree_geom_t*)gasneti_malloc(sizeof(gasnete_coll_local_tree_geom_t));
  gasneti_assert_always(in_type==intype_copy);
  
  switch (in_type->tree_class) {
#if 1
  case GASNETE_COLL_NARY_TREE:
      gasneti_assert(in_type->num_params ==1);
      allnodes =  allocate_nodes((tree_node_t**) &team->tree_construction_scratch, team, rootrank);
      rootnode = make_nary_tree(allnodes, team->total_ranks, in_type->params[0]);
       geom->rotation_points = (int*) gasneti_malloc(sizeof(int)*1);
       geom->num_rotations = 1;
       geom->rotation_points[0] = rootrank;
      break;
    case GASNETE_COLL_KNOMIAL_TREE:
      gasneti_assert(in_type->num_params ==1);
       allnodes = allocate_nodes((tree_node_t**) &team->tree_construction_scratch, team, rootrank);
      rootnode = make_knomial_tree(allnodes, team->total_ranks, in_type->params[0]);
       geom->rotation_points = (int*) gasneti_malloc(sizeof(int)*1);
       geom->num_rotations = 1;
       geom->rotation_points[0] = rootrank;
      break;
    case GASNETE_COLL_FLAT_TREE:
      allocate_nodes((tree_node_t**) &team->tree_construction_scratch , team, rootrank);
      rootnode = make_flat_tree(team->tree_construction_scratch, team->total_ranks);
      geom->rotation_points = (int*) gasneti_malloc(sizeof(int)*1);
      geom->num_rotations = 1;
      geom->rotation_points[0] = rootrank;
      break;
    case GASNETE_COLL_RECURSIVE_TREE:
      gasneti_assert(in_type->num_params ==1);
      allnodes = allocate_nodes((tree_node_t**) &team->tree_construction_scratch, team, rootrank);
      rootnode = make_recursive_tree(allnodes, team->total_ranks, in_type->params[0]);
      geom->rotation_points = (int*) gasneti_malloc(sizeof(int)*1);
      geom->num_rotations = 1;
      geom->rotation_points[0] = rootrank;
      break;
    case GASNETE_COLL_FORK_TREE:
      allnodes = allocate_nodes((tree_node_t**) &team->tree_construction_scratch, team, rootrank);
      rootnode = make_fork_tree(allnodes, team->total_ranks, in_type->params, in_type->num_params);
       geom->rotation_points = (int*) gasneti_malloc(sizeof(int)*1);
       geom->num_rotations = 1;
       geom->rotation_points[0] = rootrank;
      break;
    case GASNETE_COLL_HIERARCHICAL_TREE:
#if 0
      allnodes = team->tree_construction_scratch = allocate_nodes(allnodes, team, 0);
      rootnode = make_hiearchical_tree(in_type, allnodes, team->total_ranks);
       /* XXX ADD CODE TO GET ROTATION POINTS*/
      break;
#else
       gasneti_fatalerror("HIERARCHICAL_TREE not yet fully supported");
#endif
#endif
    default:
       gasneti_fatalerror("unknown tree type");
      break;
  }
  
  rootnode = setparents(rootnode);
  mynode = find_node(rootnode, team->myrank);

  geom->root = rootrank;
  geom->tree_type = in_type;
  geom->total_size = team->total_ranks;
  geom->parent = GET_PARENT_ID(mynode);
  geom->child_count = GET_NUM_CHILDREN(mynode);
  geom->mysubtree_size = treesize(mynode);
  geom->parent_subtree_size = treesize(mynode->parent);
  geom->children_reversed = mynode->children_reversed;
  if(rootrank != team->myrank) {
    geom->num_siblings = GET_NUM_CHILDREN(mynode->parent);
    geom->sibling_id = -1;
    geom->sibling_offset = 0;
    for(i=0; i<geom->num_siblings; i++) {
      int tmp_id;
      if(mynode->parent->children_reversed==1) {
        tmp_id = geom->num_siblings-1-i;
      } else {
        tmp_id =i;
      }
      if(GET_NODE_ID(GET_CHILD_IDX(mynode->parent, tmp_id))==team->myrank) {
        geom->sibling_id = tmp_id;
        break;
      } else {
        geom->sibling_offset += treesize(GET_CHILD_IDX(mynode->parent, tmp_id));
      }
    }
  } else {
    geom->num_siblings = 0;
    geom->sibling_id = 0;
    geom->sibling_offset = 0;
    /***** THIS NEEDS TO BE TAKEN OUT
      The DFS ordering that we impose on the trees will mean that this no longer needs to be kept around
      but it's in here for now for backward compatability sake until we make the neccessary changes to all the other collective algorithms
      ****/
    geom->dfs_order = (gasnet_node_t*) gasneti_malloc(sizeof(gasnet_node_t)*team->total_ranks);
    for(i=0; i<team->total_ranks; i++) {
      geom->dfs_order[i] = (i+rootrank)%team->total_ranks;
    }
  }
  geom->seq_dfs_order = 1;
  geom->child_list = (gasnet_node_t*) gasneti_malloc(sizeof(gasnet_node_t)*geom->child_count);
  geom->subtree_sizes = (gasnet_node_t*) gasneti_malloc(sizeof(gasnet_node_t)*geom->child_count);
  geom->child_offset = (gasnet_node_t*) gasneti_malloc(sizeof(gasnet_node_t)*geom->child_count);
  geom->grand_children = (gasnet_node_t*)gasneti_malloc(sizeof(gasnet_node_t)*geom->child_count); 
  geom->num_non_leaf_children=0;
  geom->num_leaf_children=0;
  geom->child_contains_wrap = 0;
  for(i=0; i<geom->child_count; i++) {
    geom->child_list[i] = GET_NODE_ID(GET_CHILD_IDX(mynode,i));
    geom->subtree_sizes[i] = treesize(GET_CHILD_IDX(mynode,i));
    geom->grand_children[i] = GET_NUM_CHILDREN(GET_CHILD_IDX(mynode, i));
    if(geom->subtree_sizes[i] > 1) {
      geom->num_non_leaf_children++;
    } else {
      geom->num_leaf_children++;
    }
    if(geom->child_list[i]+geom->subtree_sizes[i] > geom->total_size) {
      geom->child_contains_wrap = 1;
    }
    
  }
  gasneti_assert((geom->num_leaf_children+geom->num_non_leaf_children) == geom->child_count);
  
  if(mynode->children_reversed==1) {
    size_t temp_offset = 0;
    for(i=geom->child_count-1; i>=0; i--) {
      geom->child_offset[i] = temp_offset; 
      temp_offset+=geom->subtree_sizes[i];
    }
  } else {
    size_t temp_offset = 0;
    for(i=0; i<geom->child_count; i++) {
      geom->child_offset[i] = temp_offset; 
      temp_offset+=geom->subtree_sizes[i];
    }
  }

#if 0
  /* Not using the reference counts for now */
  gasneti_weakatomic_set(&(geom->ref_count), 0, 0);
  geom->base_geom = base_geom;
#endif

#if 0  
  gasnete_coll_print_tree(geom, gasneti_mynode);
#endif
  return geom;
}