static void register_variable_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface)
{
    struct starpu_variable_interface *variable_interface = (struct starpu_variable_interface *)data_interface;
    unsigned node;
    for (node = 0; node < STARPU_MAXNODES; node++)
    {
        struct starpu_variable_interface *local_interface = (struct starpu_variable_interface *)
                starpu_data_get_interface_on_node(handle, node);

        if (node == home_node)
        {
            local_interface->ptr = variable_interface->ptr;
            local_interface->dev_handle = variable_interface->dev_handle;
            local_interface->offset = variable_interface->offset;
        }
        else
        {
            local_interface->ptr = 0;
            local_interface->dev_handle = 0;
            local_interface->offset = 0;
        }

        local_interface->id = variable_interface->id;
        local_interface->elemsize = variable_interface->elemsize;
    }
}
Esempio n. 2
0
static void register_block_handle(starpu_data_handle handle, uint32_t home_node, void *interface)
{
	starpu_block_interface_t *block_interface = interface;

	unsigned node;
	for (node = 0; node < STARPU_MAXNODES; node++)
	{
		starpu_block_interface_t *local_interface =
			starpu_data_get_interface_on_node(handle, node);

		if (node == home_node) {
			local_interface->ptr = block_interface->ptr;
                        local_interface->dev_handle = block_interface->dev_handle;
                        local_interface->offset = block_interface->offset;
			local_interface->ldy  = block_interface->ldy;
			local_interface->ldz  = block_interface->ldz;
		}
		else {
			local_interface->ptr = 0;
                        local_interface->dev_handle = 0;
                        local_interface->offset = 0;
			local_interface->ldy  = 0;
			local_interface->ldz  = 0;
		}

		local_interface->nx = block_interface->nx;
		local_interface->ny = block_interface->ny;
		local_interface->nz = block_interface->nz;
		local_interface->elemsize = block_interface->elemsize;
	}
}
static size_t variable_interface_get_size(starpu_data_handle_t handle)
{
    struct starpu_variable_interface *variable_interface = (struct starpu_variable_interface *)
            starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);

    return variable_interface->elemsize;
}
int starpu_complex_get_nx(starpu_data_handle_t handle)
{
	struct starpu_complex_interface *complex_interface =
		(struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);

	return complex_interface->nx;
}
double *starpu_complex_get_imaginary(starpu_data_handle_t handle)
{
	struct starpu_complex_interface *complex_interface =
		(struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);

	return complex_interface->imaginary;
}
Esempio n. 6
0
static void register_csr_handle(starpu_data_handle handle, uint32_t home_node, void *interface)
{
	starpu_csr_interface_t *csr_interface = interface;

	unsigned node;
	for (node = 0; node < STARPU_MAXNODES; node++)
	{
		starpu_csr_interface_t *local_interface =
			starpu_data_get_interface_on_node(handle, node);

		if (node == home_node) {
			local_interface->nzval = csr_interface->nzval;
			local_interface->colind = csr_interface->colind;
		}
		else {
			local_interface->nzval = 0;
			local_interface->colind = NULL;
		}

		local_interface->rowptr = csr_interface->rowptr;
		local_interface->nnz = csr_interface->nnz;
		local_interface->nrow = csr_interface->nrow;
		local_interface->firstentry = csr_interface->firstentry;
		local_interface->elemsize = csr_interface->elemsize;

	}
}
Esempio n. 7
0
int _starpu_allocate_memory_on_node(starpu_data_handle handle, uint32_t dst_node, unsigned may_alloc)
{
	size_t allocated_memory;

	STARPU_ASSERT(handle);

	/* A buffer is already allocated on the node */
	if (handle->per_node[dst_node].allocated)
		return 0;

	if (!may_alloc)
		return ENOMEM;

	void *interface = starpu_data_get_interface_on_node(handle, dst_node);
	allocated_memory = _starpu_allocate_interface(handle, interface, dst_node);

	/* perhaps we could really not handle that capacity misses */
	if (!allocated_memory)
		return ENOMEM;

	/* perhaps we could really not handle that capacity misses */
	if (allocated_memory)
		register_mem_chunk(handle, dst_node, allocated_memory, 1);

	handle->per_node[dst_node].allocated = 1;
	handle->per_node[dst_node].automatically_allocated = 1;

	return 0;
}
uintptr_t starpu_variable_get_local_ptr(starpu_data_handle_t handle)
{
    unsigned node;
    node = _starpu_memory_node_get_local_key();

    STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));

    return STARPU_VARIABLE_GET_PTR(starpu_data_get_interface_on_node(handle, node));
}
static int unpack_variable_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count)
{
    STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));

    struct starpu_variable_interface *variable_interface = (struct starpu_variable_interface *)
            starpu_data_get_interface_on_node(handle, node);

    STARPU_ASSERT(count == variable_interface->elemsize);

    memcpy((void*)variable_interface->ptr, ptr, variable_interface->elemsize);
    return 0;
}
static int complex_unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count)
{
	char *data = ptr;
	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));

	struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *)
		starpu_data_get_interface_on_node(handle, node);

	STARPU_ASSERT(count == 2 * complex_interface->nx * sizeof(double));
	memcpy(complex_interface->real, data, complex_interface->nx*sizeof(double));
	memcpy(complex_interface->imaginary, data+complex_interface->nx*sizeof(double), complex_interface->nx*sizeof(double));

	return 0;
}
Esempio n. 11
0
static void register_variable_handle(starpu_data_handle handle, uint32_t home_node, void *interface)
{
	unsigned node;
	for (node = 0; node < STARPU_MAXNODES; node++)
	{
		starpu_variable_interface_t *local_interface = 
			starpu_data_get_interface_on_node(handle, node);

		if (node == home_node) {
			local_interface->ptr = STARPU_VARIABLE_GET_PTR(interface);
		}
		else {
			local_interface->ptr = 0;
		}

		local_interface->elemsize = STARPU_VARIABLE_GET_ELEMSIZE(interface);
	}
}
static int complex_pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count)
{
	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));

	struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *)
		starpu_data_get_interface_on_node(handle, node);

	*count = complex_get_size(handle);
	if (ptr != NULL)
	{
		char *data;
		starpu_malloc_flags((void**) &data, *count, 0);
		*ptr = data;
		memcpy(data, complex_interface->real, complex_interface->nx*sizeof(double));
		memcpy(data+complex_interface->nx*sizeof(double), complex_interface->imaginary, complex_interface->nx*sizeof(double));
	}

	return 0;
}
Esempio n. 13
0
static void register_mem_chunk(starpu_data_handle handle, uint32_t dst_node, size_t size, unsigned automatically_allocated)
{
	int res;

	starpu_mem_chunk_t mc;

	/* the interface was already filled by ops->allocate_data_on_node */
	void *src_interface = starpu_data_get_interface_on_node(handle, dst_node);
	size_t interface_size = handle->ops->interface_size;

	/* Put this memchunk in the list of memchunk in use */
	mc = _starpu_memchunk_init(handle, size, src_interface, interface_size, automatically_allocated); 

	res = pthread_rwlock_wrlock(&mc_rwlock[dst_node]);
	STARPU_ASSERT(!res);

	starpu_mem_chunk_list_push_front(mc_list[dst_node], mc);

	res = pthread_rwlock_unlock(&mc_rwlock[dst_node]);
	STARPU_ASSERT(!res);
}
static void complex_register_data_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface)
{
	struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) data_interface;

	unsigned node;
	for (node = 0; node < STARPU_MAXNODES; node++)
	{
		struct starpu_complex_interface *local_interface = (struct starpu_complex_interface *)
			starpu_data_get_interface_on_node(handle, node);

		local_interface->nx = complex_interface->nx;
		if (node == home_node)
		{
			local_interface->real = complex_interface->real;
			local_interface->imaginary = complex_interface->imaginary;
		}
		else
		{
			local_interface->real = 0;
			local_interface->imaginary = 0;
		}
	}
}
static size_t complex_get_size(starpu_data_handle_t handle)
{
	size_t size;
	struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);

	size = complex_interface->nx * 2 * sizeof(double);
	return size;
}
static void *variable_handle_to_pointer(starpu_data_handle_t handle, unsigned node)
{
    STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));

    return (void*) STARPU_VARIABLE_GET_PTR(starpu_data_get_interface_on_node(handle, node));
}
size_t starpu_variable_get_elemsize(starpu_data_handle_t handle)
{
    return STARPU_VARIABLE_GET_ELEMSIZE(starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM));
}
Esempio n. 18
0
	starpu_node_kind src_kind = _starpu_get_node_kind(src_node);
	starpu_node_kind dst_kind = _starpu_get_node_kind(dst_node);

	STARPU_ASSERT(src_handle->per_node[src_node].refcnt);
	STARPU_ASSERT(dst_handle->per_node[dst_node].refcnt);

	STARPU_ASSERT(src_handle->per_node[src_node].allocated);
	STARPU_ASSERT(dst_handle->per_node[dst_node].allocated);

#ifdef STARPU_USE_CUDA
	cudaError_t cures;
	cudaStream_t *stream;
#endif

	void *src_interface = starpu_data_get_interface_on_node(src_handle, src_node);
	void *dst_interface = starpu_data_get_interface_on_node(dst_handle, dst_node);

	switch (_STARPU_MEMORY_NODE_TUPLE(src_kind,dst_kind)) {
	case _STARPU_MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_CPU_RAM):
		/* STARPU_CPU_RAM -> STARPU_CPU_RAM */
		STARPU_ASSERT(copy_methods->ram_to_ram);
		copy_methods->ram_to_ram(src_interface, src_node, dst_interface, dst_node);
		break;
#ifdef STARPU_USE_CUDA
	case _STARPU_MEMORY_NODE_TUPLE(STARPU_CUDA_RAM,STARPU_CPU_RAM):
		/* CUBLAS_RAM -> STARPU_CPU_RAM */
		/* only the proper CUBLAS thread can initiate this ! */
		if (_starpu_get_local_memory_node() == src_node) {
			/* only the proper CUBLAS thread can initiate this directly ! */
			STARPU_ASSERT(copy_methods->cuda_to_ram);
struct starpu_task *_starpu_create_conversion_task_for_arch(starpu_data_handle_t handle,
						   enum starpu_node_kind node_kind)
{
	struct starpu_task *conversion_task;

#if defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA) || defined(STARPU_USE_MIC) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID)
	struct starpu_multiformat_interface *format_interface;
#endif

	conversion_task = starpu_task_create();
	conversion_task->name = "conversion_task";
	conversion_task->synchronous = 0;
	STARPU_TASK_SET_HANDLE(conversion_task, handle, 0);

#if defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA) || defined(STARPU_USE_MIC) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID)
	/* The node does not really matter here */
	format_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
#endif

	_starpu_spin_lock(&handle->header_lock);
	handle->refcnt++;
	handle->busy_count++;
	_starpu_spin_unlock(&handle->header_lock);

	switch(node_kind)
	{
	case STARPU_CPU_RAM:
	case STARPU_SCC_RAM:
	case STARPU_SCC_SHM:
		switch (starpu_node_get_kind(handle->mf_node))
		{
		case STARPU_CPU_RAM:
		case STARPU_SCC_RAM:
		case STARPU_SCC_SHM:
			STARPU_ABORT();
#if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
		case STARPU_CUDA_RAM:
		{
			struct starpu_multiformat_data_interface_ops *mf_ops;
			mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface);
			conversion_task->cl = mf_ops->cuda_to_cpu_cl;
			break;
		}
#endif
#if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
		case STARPU_OPENCL_RAM:
		{
			struct starpu_multiformat_data_interface_ops *mf_ops;
			mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface);
			conversion_task->cl = mf_ops->opencl_to_cpu_cl;
			break;
		}
#endif
#ifdef STARPU_USE_MIC
		case STARPU_MIC_RAM:
		{
			struct starpu_multiformat_data_interface_ops *mf_ops;
			mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface);
			conversion_task->cl = mf_ops->mic_to_cpu_cl;
			break;
		}
#endif
		default:
			_STARPU_ERROR("Oops : %u\n", handle->mf_node);
		}
		break;
#if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
	case STARPU_CUDA_RAM:
		{
			struct starpu_multiformat_data_interface_ops *mf_ops;
			mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface);
			conversion_task->cl = mf_ops->cpu_to_cuda_cl;
			break;
		}
#endif
#if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
	case STARPU_OPENCL_RAM:
	{
		struct starpu_multiformat_data_interface_ops *mf_ops;
		mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface);
		conversion_task->cl = mf_ops->cpu_to_opencl_cl;
		break;
	}
#endif
#ifdef STARPU_USE_MIC
	case STARPU_MIC_RAM:
	{
		struct starpu_multiformat_data_interface_ops *mf_ops;
		mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface);
		conversion_task->cl = mf_ops->cpu_to_mic_cl;
		break;
	}
#endif
	default:
		STARPU_ABORT();
	}

	STARPU_TASK_SET_MODE(conversion_task, STARPU_RW, 0);
	return conversion_task;
}
/* declare a new data with the variable interface */
void starpu_variable_data_register(starpu_data_handle_t *handleptr, unsigned home_node,
                                   uintptr_t ptr, size_t elemsize)
{
    struct starpu_variable_interface variable =
    {
        .id = STARPU_VARIABLE_INTERFACE_ID,
        .ptr = ptr,
        .dev_handle = ptr,
        .offset = 0,
        .elemsize = elemsize
    };

#ifdef STARPU_USE_SCC
    _starpu_scc_set_offset_in_shared_memory((void*)variable.ptr, (void**)&(variable.dev_handle),
                                            &(variable.offset));
#endif

    starpu_data_register(handleptr, home_node, &variable, &starpu_interface_variable_ops);
}

void starpu_variable_ptr_register(starpu_data_handle_t handle, unsigned node,
                                  uintptr_t ptr, uintptr_t dev_handle, size_t offset)
{
    struct starpu_variable_interface *variable_interface = starpu_data_get_interface_on_node(handle, node);
    starpu_data_ptr_register(handle, node);
    variable_interface->ptr = ptr;
    variable_interface->dev_handle = dev_handle;
    variable_interface->offset = offset;
}


static uint32_t footprint_variable_interface_crc32(starpu_data_handle_t handle)
{
    return starpu_hash_crc32c_be(starpu_variable_get_elemsize(handle), 0);
}

static int variable_compare(void *data_interface_a, void *data_interface_b)
{
    struct starpu_variable_interface *variable_a = (struct starpu_variable_interface *) data_interface_a;
    struct starpu_variable_interface *variable_b = (struct starpu_variable_interface *) data_interface_b;

    /* Two variables are considered compatible if they have the same size */
    return (variable_a->elemsize == variable_b->elemsize);
}

static void display_variable_interface(starpu_data_handle_t handle, FILE *f)
{
    struct starpu_variable_interface *variable_interface = (struct starpu_variable_interface *)
            starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);

    fprintf(f, "%ld\t", (long)variable_interface->elemsize);
}

static int pack_variable_handle(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count)
{
    STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));

    struct starpu_variable_interface *variable_interface = (struct starpu_variable_interface *)
            starpu_data_get_interface_on_node(handle, node);

    *count = variable_interface->elemsize;

    if (ptr != NULL)
    {
        starpu_malloc_flags(ptr, *count, 0);
        memcpy(*ptr, (void*)variable_interface->ptr, variable_interface->elemsize);
    }

    return 0;
}
Esempio n. 21
0
static void _starpu_data_partition(starpu_data_handle_t initial_handle, starpu_data_handle_t *childrenp, unsigned nparts, struct starpu_data_filter *f, int inherit_state)
{
	unsigned i;
	unsigned node;

	/* first take care to properly lock the data header */
	_starpu_spin_lock(&initial_handle->header_lock);

	initial_handle->nplans++;

	STARPU_ASSERT_MSG(nparts > 0, "Partitioning data %p in 0 piece does not make sense", initial_handle);

	/* allocate the children */
	if (inherit_state)
	{
		initial_handle->children = (struct _starpu_data_state *) calloc(nparts, sizeof(struct _starpu_data_state));
		STARPU_ASSERT(initial_handle->children);

		/* this handle now has children */
		initial_handle->nchildren = nparts;
	}

	unsigned nworkers = starpu_worker_get_count();

	for (node = 0; node < STARPU_MAXNODES; node++)
	{
		if (initial_handle->per_node[node].state != STARPU_INVALID)
			break;
	}
	if (node == STARPU_MAXNODES)
	{
		/* This is lazy allocation, allocate it now in main RAM, so as
		 * to have somewhere to gather pieces later */
		/* FIXME: mark as unevictable! */
		int ret = _starpu_allocate_memory_on_node(initial_handle, &initial_handle->per_node[STARPU_MAIN_RAM], 0);
#ifdef STARPU_DEVEL
#warning we should reclaim memory if allocation failed
#endif
		STARPU_ASSERT(!ret);
	}

	for (i = 0; i < nparts; i++)
	{
		starpu_data_handle_t child;

		if (inherit_state)
			child = &initial_handle->children[i];
		else
			child = childrenp[i];
		STARPU_ASSERT(child);

		struct starpu_data_interface_ops *ops;

		/* each child may have his own interface type */
		/* what's this child's interface ? */
		if (f->get_child_ops)
			ops = f->get_child_ops(f, i);
		else
			ops = initial_handle->ops;

		_starpu_data_handle_init(child, ops, initial_handle->mf_node);

		child->nchildren = 0;
		child->nplans = 0;
		child->switch_cl = NULL;
		child->partitioned = 0;
		child->readonly = 0;
                child->mpi_data = initial_handle->mpi_data;
		child->root_handle = initial_handle->root_handle;
		child->father_handle = initial_handle;
		child->sibling_index = i;
		child->depth = initial_handle->depth + 1;

		child->is_not_important = initial_handle->is_not_important;
		child->wt_mask = initial_handle->wt_mask;
		child->home_node = initial_handle->home_node;
		child->is_readonly = initial_handle->is_readonly;

		/* initialize the chunk lock */
		_starpu_data_requester_list_init(&child->req_list);
		_starpu_data_requester_list_init(&child->reduction_req_list);
		child->reduction_tmp_handles = NULL;
		child->write_invalidation_req = NULL;
		child->refcnt = 0;
		child->unlocking_reqs = 0;
		child->busy_count = 0;
		child->busy_waiting = 0;
		STARPU_PTHREAD_MUTEX_INIT(&child->busy_mutex, NULL);
		STARPU_PTHREAD_COND_INIT(&child->busy_cond, NULL);
		child->reduction_refcnt = 0;
		_starpu_spin_init(&child->header_lock);

		child->sequential_consistency = initial_handle->sequential_consistency;

		STARPU_PTHREAD_MUTEX_INIT(&child->sequential_consistency_mutex, NULL);
		child->last_submitted_mode = STARPU_R;
		child->last_sync_task = NULL;
		child->last_submitted_accessors.task = NULL;
		child->last_submitted_accessors.next = &child->last_submitted_accessors;
		child->last_submitted_accessors.prev = &child->last_submitted_accessors;
		child->post_sync_tasks = NULL;
		/* Tell helgrind that the race in _starpu_unlock_post_sync_tasks is fine */
		STARPU_HG_DISABLE_CHECKING(child->post_sync_tasks_cnt);
		child->post_sync_tasks_cnt = 0;

		/* The methods used for reduction are propagated to the
		 * children. */
		child->redux_cl = initial_handle->redux_cl;
		child->init_cl = initial_handle->init_cl;

#ifdef STARPU_USE_FXT
		child->last_submitted_ghost_sync_id_is_valid = 0;
		child->last_submitted_ghost_sync_id = 0;
		child->last_submitted_ghost_accessors_id = NULL;
#endif

		if (_starpu_global_arbiter)
			/* Just for testing purpose */
			starpu_data_assign_arbiter(child, _starpu_global_arbiter);
		else
			child->arbiter = NULL;
		_starpu_data_requester_list_init(&child->arbitered_req_list);

		for (node = 0; node < STARPU_MAXNODES; node++)
		{
			struct _starpu_data_replicate *initial_replicate;
			struct _starpu_data_replicate *child_replicate;

			initial_replicate = &initial_handle->per_node[node];
			child_replicate = &child->per_node[node];

			if (inherit_state)
				child_replicate->state = initial_replicate->state;
			else
				child_replicate->state = STARPU_INVALID;
			if (inherit_state || !initial_replicate->automatically_allocated)
				child_replicate->allocated = initial_replicate->allocated;
			else
				child_replicate->allocated = 0;
			/* Do not allow memory reclaiming within the child for parent bits */
			child_replicate->automatically_allocated = 0;
			child_replicate->refcnt = 0;
			child_replicate->memory_node = node;
			child_replicate->relaxed_coherency = 0;
			if (inherit_state)
				child_replicate->initialized = initial_replicate->initialized;
			else
				child_replicate->initialized = 0;

			/* update the interface */
			void *initial_interface = starpu_data_get_interface_on_node(initial_handle, node);
			void *child_interface = starpu_data_get_interface_on_node(child, node);

			STARPU_ASSERT_MSG(!(!inherit_state && child_replicate->automatically_allocated && child_replicate->allocated), "partition planning is currently not supported when handle has some automatically allocated buffers");
			f->filter_func(initial_interface, child_interface, f, i, nparts);
		}

		unsigned worker;
		for (worker = 0; worker < nworkers; worker++)
		{
			struct _starpu_data_replicate *child_replicate;
			child_replicate = &child->per_worker[worker];

			child_replicate->state = STARPU_INVALID;
			child_replicate->allocated = 0;
			child_replicate->automatically_allocated = 0;
			child_replicate->refcnt = 0;
			child_replicate->memory_node = starpu_worker_get_memory_node(worker);
			child_replicate->requested = 0;

			for (node = 0; node < STARPU_MAXNODES; node++)
			{
				child_replicate->request[node] = NULL;
			}

			child_replicate->relaxed_coherency = 1;
			child_replicate->initialized = 0;

			/* duplicate  the content of the interface on node 0 */
			memcpy(child_replicate->data_interface, child->per_node[0].data_interface, child->ops->interface_size);
		}

		/* We compute the size and the footprint of the child once and
		 * store it in the handle */
		child->footprint = _starpu_compute_data_footprint(child);

		void *ptr;
		ptr = starpu_data_handle_to_pointer(child, STARPU_MAIN_RAM);
		if (ptr != NULL)
			_starpu_data_register_ram_pointer(child, ptr);
	}
	/* now let the header */
	_starpu_spin_unlock(&initial_handle->header_lock);
}