void MemoryPressureHandler::platformReleaseMemory(Critical)
{
#ifdef __GLIBC__
    ReliefLogger log("Run malloc_trim");
    malloc_trim(0);
#endif
}
Exemplo n.º 2
0
END_TEST

START_TEST(test_malloc_trim)
{
	malloc_trim(NULL);
	fail_if(errno != ENOSYS);
}
Exemplo n.º 3
0
static int
force_nomem(void)
{
	struct rlimit rl;
	size_t sz;
	void *hog;

	malloc_trim(0);

	if (getrlimit(RLIMIT_AS, &rl)) {
		perror("Cannot get current AS limit");
		return -1;
	}
	rl.rlim_cur = 0;
	if (setrlimit(RLIMIT_AS, &rl)) {
		perror("Cannot set current AS limit");
		return -1;
	}
	for (sz = 1024; sz; sz -= sizeof(long)) {
		do {
			void *newhog;
			size_t mysz = 0;
			hog = NULL;
			while ( (newhog = realloc(hog, mysz += sz)) )
				hog = newhog;
			if (hog && nhogs < MAXHOGS)
				hogs[nhogs++] = hog;
		} while (hog);
	}

	return 0;
}
Exemplo n.º 4
0
bool MemoryWatcher::timerTicked()
{
	if (m_state != m_lastNotifiedState) {
		m_lastNotifiedState = m_state;
		signalMemoryStateChanged.fire(m_lastNotifiedState);
	}
	
	if (m_state == Normal)	{
		return true;
	} else if (m_state == Medium)	{
		static int count = 0;
		if (count++ > kMinIntervalBetweenMediumMemActionsMs) {
			malloc_trim(0);
			count = 0;
		}
		return true;
	}

	m_currRssUsage = getCurrentRssUsage();

	g_warning("WebKit MemoryWatcher: LOW MEMORY: State: %s, current RSS usage: %dMB\n",
			  nameForState(m_state), m_currRssUsage);

	doLowMemActions(true);
	
	return true;    
}
/*更新全局的部分数据,更新global_db_config中的某一个*/
int
update_global_single_handle_data(void** &query_handle_data, const char* db_name, 
		int num, char *output, uint32_t& output_len){
	if(NULL == p_global_db_company)
		return 1;

	GlobalDbInterface* p_new_db_interface = NULL;
	int result = p_global_db_company->update_global_db_interface
		(p_new_db_interface, db_name, "../conf/global_db_config.ini");
	
	if(result < 0)
		return result;
	else if(result == 1){
		sleep(SLEEP_TIME);
		//result == 1 is for already exists,0 is for new
		delete p_new_db_interface;
		p_new_db_interface = NULL;

		/*使用malloc_trim 归还堆上的空间*/
		malloc_trim(0);
	}

	return 1;

}
Exemplo n.º 6
0
void MemoryWatcher::doLowMemActions(bool allowExpensive)
{
	uint32_t curTime = Time::curTimeMs();
	if (curTime - m_timeAtLastLowMemAction < kMinIntervalBetweenLowMemActions) {
		return; // too soon to perform the Low mem actions again
	}
	
	malloc_trim(0);
	if(allowExpensive) {
		uint32_t timeout = kMinIntervalBetweenExpensiveLowMemActions;
		if (Low == m_state) {
			timeout *= kLowMemExpensiveTimeoutMultiplier;
		}
		
		allowExpensive = ((curTime - m_timeAtLastExpensiveLowMemAction) >= timeout);
	}
	
	g_warning("MemoryWatcher: Running Low memory actions....\n");

//	Palm::WebGlobal::notifyLowMemory();
	m_timeAtLastLowMemAction = Time::curTimeMs();
		
	if (allowExpensive) {
		g_warning("MemoryWatcher: doing expensive low memory actions.... \n");
		WebAppManager::instance()->restartHeadLessBootApps();
		m_timeAtLastExpensiveLowMemAction = m_timeAtLastLowMemAction;
	}

	m_currRssUsage = getCurrentRssUsage();			
    g_warning("MemoryWatcher: RSS usage after low memory actions: %dMB\n", m_currRssUsage);
}
Exemplo n.º 7
0
EXPORT_API int appcore_flush_memory(void)
{
	int (*flush_fn) (int);
	int size = 0;

	struct appcore *ac = &core;

	if (!core.state) {
		_ERR("Appcore not initialized");
		return -1;
	}

	_DBG("[APP %d] Flushing memory ...", _pid);

	if (ac->ops->cb_app) {
		ac->ops->cb_app(AE_MEM_FLUSH, ac->ops->data, NULL);
	}

	flush_fn = dlsym(RTLD_DEFAULT, "sqlite3_release_memory");
	if (flush_fn) {
		size = flush_fn(SQLITE_FLUSH_MAX);
	}

	malloc_trim(0);
	/*
	*Disabled - the impact of stack_trim() is unclear
	*stack_trim();
	*/

	_DBG("[APP %d] Flushing memory DONE", _pid);

	return 0;
}
Exemplo n.º 8
0
/*
 * Garbage collect and trim memory if possible
 *  This should be called after all big memory usages
 *  if possible.
 */
void garbage_collect_memory()
{
   close_memory_pool();         /* release free chain */
#ifdef HAVE_MALLOC_TRIM
   P(mutex);
   malloc_trim(8192);
   V(mutex);
#endif
}
Exemplo n.º 9
0
void vfs_dir_unload_thumbnails( VFSDir* dir, gboolean is_big )
{
    GList* l;
    VFSFileInfo* file;
    char* file_path;

    g_mutex_lock( dir->mutex );
    if( is_big )
    {
        for( l = dir->file_list; l; l = l->next )
        {
            file = (VFSFileInfo*)l->data;
            if( file->big_thumbnail )
            {
                g_object_unref( file->big_thumbnail );
                file->big_thumbnail = NULL;
            }
            /* This is a desktop entry file, so the icon needs reload
                 FIXME: This is not a good way to do things, but there is no better way now.  */
            if( file->flags & VFS_FILE_INFO_DESKTOP_ENTRY )
            {
                file_path = g_build_filename( dir->path, file->name, NULL );
                vfs_file_info_load_special_info( file, file_path );
                g_free( file_path );
            }
        }
    }
    else
    {
        for( l = dir->file_list; l; l = l->next )
        {
            file = (VFSFileInfo*)l->data;
            if( file->small_thumbnail )
            {
                g_object_unref( file->small_thumbnail );
                file->small_thumbnail = NULL;
            }
            /* This is a desktop entry file, so the icon needs reload
                 FIXME: This is not a good way to do things, but there is no better way now.  */
            if( file->flags & VFS_FILE_INFO_DESKTOP_ENTRY )
            {
                file_path = g_build_filename( dir->path, file->name, NULL );
                vfs_file_info_load_special_info( file, file_path );
                g_free( file_path );
            }
        }
    }
    g_mutex_unlock( dir->mutex );

    /* Ensuring free space at the end of the heap is freed to the OS,
     * mainly to deal with the possibility thousands of large thumbnails
     * have been freed but the memory not actually released by SpaceFM */
#if defined (__GLIBC__)
	malloc_trim(0);
#endif
}
Exemplo n.º 10
0
static int __sys_lowmem_post(void *data, void *evt)
{
#if defined(MEMORY_FLUSH_ACTIVATE)
	struct appcore *ac = data;
	ac->ops->cb_app(AE_LOWMEM_POST, ac->ops->data, NULL);
#else
	malloc_trim(0);
#endif
	return 0;
}
Exemplo n.º 11
0
void MemoryWatcher::memchuteCallback(MemchuteThreshold threshold)
{
	MemoryWatcher* mw = MemoryWatcher::instance();

	int oldState = mw->m_state;	

	switch (threshold) {
	case (MEMCHUTE_NORMAL):
		if (mw->m_state != Normal) {
			// close dashboard and popup
			WebAppManager::instance()->disableAppCaching(false);
		}		
		mw->m_state = Normal;
		break;
	case (MEMCHUTE_MEDIUM):
		WebAppManager::instance()->disableAppCaching(true);
		mw->m_state = Medium;
		malloc_trim(0);
		break;
	case (MEMCHUTE_LOW):
		WebAppManager::instance()->disableAppCaching(true);
		mw->m_state = Low;
		malloc_trim(0);
		break;
	case (MEMCHUTE_CRITICAL):
	case (MEMCHUTE_REBOOT):
		WebAppManager::instance()->disableAppCaching(true);
		mw->m_state = Critical;
		break;
	default:
		break;
	}

	// Transitioning out of Normal state. Drop the buffer caches
	if ((oldState >= Normal) && (mw->m_state > oldState))
		dropBufferCaches();
}
Exemplo n.º 12
0
/**
 * @brief Handling of SIGUSR2
 *
 * Print some info on memory usage and try to trim it.
 *
 * @param fd Dummy file-descriptor. Ignored.
 *
 * @param info Dummy pointer to extra info. Ignored.
 *
 * @return Always returns 0
 */
static int handleSIGUSR2(int fd, void *info)
{
    struct signalfd_siginfo sigInfo;

    /* Ignore data available on fd */
    if (read(fd, &sigInfo, sizeof(sigInfo)) < 0) {
	PSID_warn(-1, errno, "%s: read()", __func__);
    }

    printMallocInfo();
    malloc_trim(0);
    printMallocInfo();

    return 0;
}
Exemplo n.º 13
0
static int __sys_lowmem_post(void *data, void *evt)
{
	keynode_t *key = evt;
	int val;

	val = vconf_keynode_get_int(key);

	if (val >= VCONFKEY_SYSMAN_LOW_MEMORY_SOFT_WARNING)	{
#if defined(MEMORY_FLUSH_ACTIVATE)
		struct appcore *ac = data;
		ac->ops->cb_app(AE_LOWMEM_POST, ac->ops->data, NULL);
#else
		malloc_trim(0);
#endif
	}
	return 0;
}
Exemplo n.º 14
0
    static void sonOfScotland()
    {
        Q_ASSERT(qApp->thread() == QThread::currentThread());

        if (!qApp->property("__Akonadi__Braveheart").isNull()) {
            // One Scottish warrior is enough....
            return;
        }
        auto freedom = new QTimer(qApp);
        QObject::connect(freedom, &QTimer::timeout,
                         freedom, []() {
                            // They may take our lives, but they will never
                            // take our memory!
                            malloc_trim(50 * 1024 * 1024);
                        });
        // Fight for freedom every 15 minutes
        freedom->start(15 * 60 * 1000);
        qApp->setProperty("__Akonadi__Braveheart", true);
    };
Exemplo n.º 15
0
xmlNodePtr TwsXml::nextXmlNode()
{
	if( curNode!= NULL ) {
		curNode = curNode->next;
	}
	for( ; curNode!= NULL; curNode = curNode->next ) {
		if( curNode->type == XML_ELEMENT_NODE ) {
// 			fprintf(stderr, "Notice, return element '%s'.\n", curNode->name);
			return curNode;
		} else {
// 			fprintf(stderr, "Warning, ignore element '%s'.\n", curNode->name);
		}
	}
	assert( curNode == NULL );

	xmlNodePtr root;
	while( (root = nextXmlRoot()) != NULL ) {
		for( xmlNodePtr p = root->children; p!= NULL; p=p->next) {
			if( p->type == XML_ELEMENT_NODE ) {
				curNode = p;
				break;
			} else {
// 				fprintf(stderr, "Warning, ignore element '%s'.\n", p->name);
			}
		}
		if( curNode != NULL ) {
			break;
		} else {
			fprintf(stderr, "Warning, no usable element found.\n");
		}
	}

	if( curNode != NULL ) {
// 		fprintf(stderr, "Notice, return element '%s'.\n", curNode->name);
	} else {
// 		fprintf(stderr, "Notice, all elements parsed.\n");
#if defined HAVE_MALLOC_TRIM
		malloc_trim(0);
#endif
	}

	return curNode;
}
Exemplo n.º 16
0
void PSID_clearMem(void)
{
    //PSIDtask_clearMem(); @todo Disabled for the time being -> Discuss with MR
    PSsignal_gc();
    PSrsrvtn_clearMem();

    PSIDRDP_clearMem();
    PSIDclient_clearMem();
    /* This one has to wait until PSIDRDP and PSIDclient are cleaned up */
    PSIDMsgbuf_clearMem();

    /* This one has to wait until PSIDclient is cleaned up */
    PSIDFlwCntrl_clearMem();

    //PSIDnodes_clearMem(); @todo Disabled for the time being -> Discuss with MR
    RDP_clearMem();

    /* Now call all cleanup functions registered by plugins */
    PSIDhook_call(PSIDHOOK_CLEARMEM, NULL);

    malloc_trim(0);
}
Exemplo n.º 17
0
/*更新全局数据,更新global_db_config文件全部*/
int
update_global_handle_data(void** &query_handle_data, int num, char *output,
		            uint32_t& output_len){
	GlobalDbCompany* p_new_global_db_company = new GlobalDbCompany();
	if(NULL == p_new_global_db_company){
		 LOG_ERROR("new global db company is eror!");
		 return -1;
	}

	p_new_global_db_company->load_config("../conf/global_db_config.ini");
	
	void** handle_ptr_arr = query_handle_data;
	if (! handle_ptr_arr){
       	LOG_ERROR("calloc error");
		return -1;
    }
	
	for(int i = 0; i < num; ++i){
		thread_data_t *thr = (thread_data_t*)handle_ptr_arr[i];	
    	if(NULL == thr){
			continue;
		}

		thr->db_company_->initialize(p_new_global_db_company);
	}

	std::swap(p_global_db_company, p_new_global_db_company);
	sleep(SLEEP_TIME);
	delete p_new_global_db_company;
	p_new_global_db_company = NULL;

	/*使用malloc_trim 归还堆上的空间*/
	malloc_trim(0);

	return 1;

}
Exemplo n.º 18
0
GNUNET_ARM_memory_init ()
{
  mallopt (M_TRIM_THRESHOLD, 4 * 1024);
  mallopt (M_TOP_PAD, 1 * 1024);
  malloc_trim (0);
}
void MemoryPressureHandler::platformReleaseMemory(Critical)
{
    ReliefLogger log("Run malloc_trim");
    malloc_trim(0);
}
Exemplo n.º 20
0
static void
_task_malloc_trim(gpointer p)
{
	(void) p;
	malloc_trim (PERIODIC_MALLOC_TRIM_SIZE);
}
void MemoryPressureHandler::platformReleaseMemory(Critical)
{
#ifdef __GLIBC__
    malloc_trim(0);
#endif
}
/* for malloc trim and stack trim */
void e_illume_util_mem_trim (void)
{
   malloc_trim(0);
}
Exemplo n.º 23
0
CAMLprim value malloc_trim_stub(value v_n)
{
  int ret = malloc_trim(Int_val(v_n));
  if (ret != 1) caml_failwith("malloc_trim");
  return Val_unit;
}
ngx_int_t
ngx_http_lua_log_handler(ngx_http_request_t *r)
{
#if (NGX_HTTP_LUA_HAVE_MALLOC_TRIM)
    ngx_uint_t                   trim_cycle, trim_nreq;
    ngx_http_lua_main_conf_t    *lmcf;
#endif
    ngx_http_lua_loc_conf_t     *llcf;
    ngx_http_lua_ctx_t          *ctx;

#if (NGX_HTTP_LUA_HAVE_MALLOC_TRIM)
    lmcf = ngx_http_get_module_main_conf(r, ngx_http_lua_module);

    trim_cycle = lmcf->malloc_trim_cycle;

    if (trim_cycle > 0) {

        dd("cycle: %d", (int) trim_cycle);

        trim_nreq = ++lmcf->malloc_trim_req_count;

        if (trim_nreq >= trim_cycle) {
            lmcf->malloc_trim_req_count = 0;

#if (NGX_DEBUG)
            ngx_log_debug1(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
                           "malloc_trim(1) returned %d", malloc_trim(1));
#else
            (void) malloc_trim(1);
#endif
        }
    }
#   if (NGX_DEBUG)
    else {
        ngx_log_debug0(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
                       "malloc_trim() disabled");
    }
#   endif
#endif

    ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
                   "lua log handler, uri:\"%V\" c:%ud", &r->uri,
                   r->main->count);

    llcf = ngx_http_get_module_loc_conf(r, ngx_http_lua_module);

    if (llcf->log_handler == NULL) {
        dd("no log handler found");
        return NGX_DECLINED;
    }

    ctx = ngx_http_get_module_ctx(r, ngx_http_lua_module);

    dd("ctx = %p", ctx);

    if (ctx == NULL) {
        ctx = ngx_http_lua_create_ctx(r);
        if (ctx == NULL) {
            return NGX_ERROR;
        }
    }

    ctx->context = NGX_HTTP_LUA_CONTEXT_LOG;

    dd("calling log handler");
    return llcf->log_handler(r);
}
Exemplo n.º 25
0
int main (int argc, char *argv[]){

	char *x, *y, *z, *xbuf, *hbuf, *chrNames[MAXNBCHR];
	int fd;
	off_t hsiz;
	struct stat st;

	MPI_File mpi_filed;
	MPI_File mpi_file_split_comm;

	MPI_Offset fileSize, unmapped_start, discordant_start;
	int num_proc, rank;
	int res, nbchr, i, paired, write_sam;
	int ierr, errorcode = MPI_ERR_OTHER;
	char *file_name, *output_dir;

	char *header;

	unsigned int headerSize;
	unsigned char threshold;

	size_t input_file_size;
	size_t unmappedSize = 0;
	size_t discordantSize = 0;
	size_t *readNumberByChr = NULL, *localReadNumberByChr = NULL;
	Read **reads;

	double time_count;
	double time_count1;
	int g_rank, g_size;
	MPI_Comm split_comm; //used to split communication when jobs have no reads to sort
	int split_rank, split_size; //after split communication we update the rank and the size
	double tic, toc;
	int compression_level;
	size_t fsiz, lsiz, loff;
	const char *sort_name;
	MPI_Info finfo;

	/* Set default values */
	compression_level = 3;
	parse_mode = MODE_OFFSET;
	sort_name = "coordinate";
	paired = 0;
	threshold = 0;
	write_sam = 0;
	/* Check command line */
	while ((i = getopt(argc, argv, "c:hnpq:")) != -1) {
		switch(i) {
			case 'c': /* Compression level */
				compression_level = atoi(optarg);
				break;
			case 'h': /* Usage display */
				usage(basename(*argv));
				return 0;
			case 'n':
				parse_mode = MODE_NAME;
				sort_name = "queryname";
				break;
			case 'p': /* Paired reads */
				paired = 1;
				break;
			case 'q': /* Quality threshold */
				threshold = atoi(optarg);
				break;
			default:
				usage(basename(*argv));
				return 1;
		}
	}
	if (argc - optind != 2) {
		usage(basename(*argv));
		return 1;
	}
	file_name = argv[optind];
	output_dir = argv[optind+1];

	/* Check arguments */
	res = access(file_name, F_OK|R_OK);
	if (res == -1)
		err(1, "%s", file_name);
	res = access(output_dir, F_OK|W_OK);
	if (res == -1)
		err(1, "%s", output_dir);

	/* MPI inits */
	res = MPI_Init(&argc, &argv);
	assert(res == MPI_SUCCESS);
	res = MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	assert(res == MPI_SUCCESS);
	res = MPI_Comm_size(MPI_COMM_WORLD, &num_proc);
	assert(res == MPI_SUCCESS);

	g_rank = rank;
	g_size = num_proc;

	/* Small summary */
	if (rank == 0) {
		fprintf(stderr, "Number of processes : %d\n", num_proc);
		fprintf(stderr, "Reads' quality threshold : %d\n", threshold);
		fprintf(stderr, "Compression Level is : %d\n", compression_level);
		fprintf(stderr, "SAM file to read : %s\n", file_name);
		fprintf(stderr, "Output directory : %s\n", output_dir);
	}

	/* Process input file */
	fd = open(file_name, O_RDONLY, 0666);
	assert(fd != -1);
	assert(fstat(fd, &st) != -1);
	xbuf = mmap(NULL, (size_t)st.st_size, PROT_READ, MAP_FILE|MAP_PRIVATE, fd, 0);
	assert(xbuf != MAP_FAILED);

	/* Parse SAM header */
	memset(chrNames, 0, sizeof(chrNames));
	x = xbuf; nbchr = 0;
	while (*x == '@') {
		y = strchr(x, '\n');
		z = x; x = y + 1;
		if (strncmp(z, "@SQ", 3) != 0) continue;
		/* Save reference names */
		y = strstr(z, "SN:");
		assert(y != NULL);
		z = y + 3;
		while (*z && !isspace((unsigned char)*z)) z++;
		chrNames[nbchr++] = strndup(y + 3, z - y - 3);
		assert(nbchr < MAXNBCHR - 2);
	}
	chrNames[nbchr++] = strdup(UNMAPPED);
	chrNames[nbchr++] = strdup(DISCORDANT);

	hsiz = x - xbuf;
	hbuf = strndup(xbuf, hsiz);

	if (rank == 0) {
		fprintf(stderr, "The size of the file is %zu bytes\n", (size_t)st.st_size);
		fprintf(stderr, "Header has %d+2 references\n", nbchr - 2);
	}
	asprintf(&header, "@HD\tVN:1.0\tSO:%s\n%s", sort_name, hbuf);

	free(hbuf);

	assert(munmap(xbuf, (size_t)st.st_size) != -1);
	assert(close(fd) != -1);

	//task FIRST FINE TUNING FINFO FOR READING OPERATIONS


	MPI_Info_create(&finfo);
	/*
	 * In this part you shall adjust the striping factor and unit according
	 * to the underlying filesystem.
	 * Harmless for other file system.
	 *
	 */
	MPI_Info_set(finfo,"striping_factor", STRIPING_FACTOR);
	MPI_Info_set(finfo,"striping_unit", STRIPING_UNIT); //2G striping
	MPI_Info_set(finfo,"ind_rd_buffer_size", STRIPING_UNIT); //2gb buffer
	MPI_Info_set(finfo,"romio_ds_read",DATA_SIEVING_READ);

	/*
	 * for collective reading and writing
	 * should be adapted too and tested according to the file system
	 * Harmless for other file system.
	 */
	MPI_Info_set(finfo,"nb_proc", NB_PROC);
	MPI_Info_set(finfo,"cb_nodes", CB_NODES);
	MPI_Info_set(finfo,"cb_block_size", CB_BLOCK_SIZE);
	MPI_Info_set(finfo,"cb_buffer_size", CB_BUFFER_SIZE);


	//we open the input file
	ierr = MPI_File_open(MPI_COMM_WORLD, file_name,  MPI_MODE_RDONLY , finfo, &mpi_filed);
	//assert(in != -1);
	if (ierr){
		if (rank == 0) fprintf(stderr, "%s: Failed to open file in process 0 %s\n", argv[0], argv[1]);
		MPI_Abort(MPI_COMM_WORLD, errorcode);
		exit(2);
	}
	ierr = MPI_File_get_size(mpi_filed, &fileSize);
	assert(ierr == MPI_SUCCESS);
	input_file_size = (long long)fileSize;

	/* Get chunk offset and size */
	fsiz = input_file_size;
	lsiz = fsiz / num_proc;
	loff = rank * lsiz;

	tic = MPI_Wtime();

	headerSize = unmappedSize = discordantSize = strlen(header);

	//We place file offset of each process to the begining of one read's line
	size_t *goff =(size_t*)calloc((size_t)(num_proc+1), sizeof(size_t));
	init_goff(mpi_filed,hsiz,input_file_size,num_proc,rank,goff);

	//We calculate the size to read for each process
	lsiz = goff[rank+1]-goff[rank];
	//NOW WE WILL PARSE
	size_t j=0;
	size_t poffset = goff[rank]; //Current offset in file sam

	//nbchr because we add the discordant reads in the structure
	reads = (Read**)malloc((nbchr)*sizeof(Read));//We allocate a linked list of struct for each Chromosome (last chr = unmapped reads)
	readNumberByChr = (size_t*)malloc((nbchr)*sizeof(size_t));//Array with the number of reads found in each chromosome
	localReadNumberByChr = (size_t*)malloc((nbchr)*sizeof(size_t));//Array with the number of reads found in each chromosome
	Read ** anchor = (Read**)malloc((nbchr)*sizeof(Read));//Pointer on the first read of each chromosome

	//Init first read
	for(i = 0; i < (nbchr); i++){
		reads[i] = malloc(sizeof(Read));
		reads[i]->coord = 0;
		anchor[i] = reads[i];
		readNumberByChr[i]=0;
	}

	toc = MPI_Wtime();

	char *local_data_tmp = malloc(1024*1024);
	char *local_data =(char*)malloc(((goff[rank+1]-poffset)+1)*sizeof(char));
	size_t size_tmp= goff[rank+1]-poffset;
	local_data[goff[rank+1]-poffset] = 0;
	char *q=local_data;

	//We read the file sam and parse
	while(poffset < goff[rank+1]){

		size_t size_to_read = 0;

		if( (goff[rank+1]-poffset) < DEFAULT_INBUF_SIZE ){
			size_to_read = goff[rank+1]-poffset;
		}
		else{
			size_to_read = DEFAULT_INBUF_SIZE;
		}

		// we load the buffer
		//hold temporary size of SAM
		//due to limitation in MPI_File_read_at
		local_data_tmp =(char*)realloc(local_data_tmp, (size_to_read+1)*sizeof(char));
		local_data_tmp[size_to_read]=0;

		// Original reading part is before 18/09/2015
		MPI_File_read_at(mpi_filed, (MPI_Offset)poffset, local_data_tmp, size_to_read, MPI_CHAR, MPI_STATUS_IGNORE);
		size_t local_offset=0;
		assert(strlen(local_data_tmp) == size_to_read);

		//we look where is the last line read for updating next poffset
		size_t offset_last_line = size_to_read-1;

		size_t extra_char=0;
		while(local_data_tmp[offset_last_line] != '\n'){
			offset_last_line -- ;
			extra_char++;
		}

		local_data_tmp[size_to_read - extra_char]=0;
		size_t local_data_tmp_sz = strlen(local_data_tmp);

		//If it s the last line of file, we place a last '\n' for the function tokenizer
		if(rank == num_proc-1 && ((poffset+size_to_read) == goff[num_proc])){
			local_data_tmp[offset_last_line]='\n';
		}

		//Now we parse Read in local_data
		parser_paired(local_data_tmp, rank, poffset, threshold, nbchr, &readNumberByChr, chrNames, &reads);

		//now we copy local_data_tmp in local_data
		char *p = local_data_tmp;
		int pos =0;
		while (*p && (pos < local_data_tmp_sz)) {*q=*p;p++;q++;pos++;}

		//we go to the next line
		poffset+=(offset_last_line+1);
		local_offset+=(offset_last_line+1);

	}

	assert(size_tmp == strlen(local_data));

	fprintf(stderr, "%d (%.2lf)::::: *** FINISH PARSING FILE ***\n", rank, MPI_Wtime()-toc);

	if (local_data_tmp) free(local_data_tmp);
	malloc_trim(0);

	MPI_Barrier(MPI_COMM_WORLD);

	//We set attribute next of the last read and go back to first read of each chromosome
	for(i = 0; i < nbchr; i++){
		reads[i]->next = NULL;
		reads[i] = anchor[i];
	}
	free(anchor);

	//We count how many reads we found
	size_t nb_reads_total =0,nb_reads_global =0;
	for(j=0;j<nbchr;j++){
		nb_reads_total+=readNumberByChr[j];
	}

	MPI_Allreduce(&nb_reads_total, &nb_reads_global, 1, MPI_UNSIGNED_LONG, MPI_SUM, MPI_COMM_WORLD);

	/*
	 * We care for unmapped and discordants reads
	 */

	int s = 0;
	for (s = 1; s < 3; s++){

		MPI_File mpi_file_split_comm2;
		double time_count;

		size_t total_reads = 0;
		MPI_Allreduce(&readNumberByChr[nbchr-s], &total_reads , 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD);

		if ((rank == 0) && (s == 1))
			fprintf(stderr, "rank %d :::: total read to sort for unmapped = %zu \n", rank, total_reads);

		if ((rank == 0) && (s == 2))
			fprintf(stderr, "rank %d :::: total read to sort for discordant = %zu \n", rank, total_reads);

		MPI_Barrier(MPI_COMM_WORLD);

		if (total_reads == 0){
			// nothing to sort for unmapped
			// maybe write an empty bam file
		}
		else{
			int i1,i2;
			size_t *localReadsNum_rank0 = (size_t *)malloc(num_proc*sizeof(size_t));
			localReadsNum_rank0[0] = 0;
			int file_pointer_to_free = 0;
			int split_comm_to_free = 0;
			//we build a vector with rank job
			int val_tmp1 = 0;
			int val_tmp2 = 0;
			int chosen_rank = 0;
			// the color tells in what communicator the rank pertain
			// color = 0 will be the new communicator color
			// otherwise the color is 1
			int *color_vec_to_send =  (int *)malloc(num_proc*sizeof(int));
			// the key value tell the order in the new communicator
			int *key_vec_to_send =  (int *)malloc(num_proc*sizeof(int));

			//rank 0 gather the vector
			MPI_Allgather(&readNumberByChr[nbchr-s] , 1, MPI_LONG_LONG_INT, localReadsNum_rank0 , 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD);
			MPI_Barrier(MPI_COMM_WORLD);

			if (rank == 0){
				//we must chose the first rank with reads to sort
				i1=0;
				while (localReadsNum_rank0[i1] == 0){
					chosen_rank++;
					i1++;
				}
			}

			//we broadcast the chosen rank
			//task: replace the broadcast with a sendrecieve
			MPI_Bcast( &chosen_rank, 1, MPI_INT, 0, MPI_COMM_WORLD);
			MPI_Barrier(MPI_COMM_WORLD);

			//we must chose which rank is going to split the communication
			if (((rank == chosen_rank) || rank == 0) && (chosen_rank != 0)){
				//the rank 0 will recieve the key_vec_to_send and colorvec_to_send
				//first we exchange the size o
				if (rank == chosen_rank){
					header=(char *)malloc((headerSize + 1)*sizeof(char));
					MPI_Recv(header, headerSize + 1, MPI_CHAR, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
				}
				if (rank == 0){
					MPI_Send(header, headerSize + 1, MPI_CHAR, chosen_rank,  0, MPI_COMM_WORLD);
				}
			}
			else {
				//we do nothing here
			}

			if (rank == chosen_rank) {

				int counter = 0;
				//we compute the number of 0 in the localReadsNum_vec
				for(i1 = 0; i1 < num_proc; i1++){
					if (localReadsNum_rank0[i1] == 0) {
						counter++;
					}
				}
				// if no jobs without reads we do nothing
				if ( counter == 0 ){
					// nothing to do we associate split_comm with
					split_comm = MPI_COMM_WORLD;
					for (i2 = 0; i2 < num_proc; i2++) {

						if (localReadsNum_rank0[i2] == 0) {
							color_vec_to_send[i2] = 1;
							key_vec_to_send[i2] = val_tmp2;
							val_tmp2++;
						} else {
							color_vec_to_send[i2] = 0;
							key_vec_to_send[i2] = val_tmp1;
							val_tmp1++;
						}
					}
				}
				else{
					// now we compute the color according to
					// the number of reads to sort
					for(i2 = 0; i2 < num_proc; i2++){
						if (localReadsNum_rank0[i2] == 0){
							color_vec_to_send[i2] = 1;
							key_vec_to_send[i2] = val_tmp2;
							val_tmp2++;
						} else{
							color_vec_to_send[i2] = 0;
							key_vec_to_send[i2] = val_tmp1;
							val_tmp1++;
						}
					} // end for loop
				}// end if
			}// end if (rank == chosen_rank)

			MPI_Barrier(MPI_COMM_WORLD);
			// we scatter the key and color vector
			// we create key and color variable for each job
			int local_color = 0;
			int local_key = 0;
			// we scatter the color and key
			MPI_Scatter( color_vec_to_send, 1, MPI_INT, &local_color, 1, MPI_INT, chosen_rank, MPI_COMM_WORLD);
			MPI_Scatter( key_vec_to_send, 1, MPI_INT, &local_key, 1, MPI_INT, chosen_rank, MPI_COMM_WORLD);
			// we create a communicator
			// we group all communicator
			// with color of zero
			if (local_color == 0){

				MPI_Comm_split( MPI_COMM_WORLD, local_color, local_key, &split_comm);
				ierr = MPI_File_open(split_comm, file_name,  MPI_MODE_RDONLY , finfo, &mpi_file_split_comm2);
				//we ask to liberate file pointer
				file_pointer_to_free = 1;
				//we ask to liberate the split_comm
				split_comm_to_free = 1;
			}
			else{
				MPI_Comm_split( MPI_COMM_WORLD, MPI_UNDEFINED, local_key, &split_comm);
				mpi_file_split_comm2 = mpi_filed;
			}

			//now we change the rank in the reads structure
			if (local_color == 0){
				MPI_Comm_rank(split_comm, &split_rank);
				MPI_Comm_size(split_comm, &split_size);

				g_rank = split_rank;
				g_size = split_size;

				reads[nbchr-s] = reads[nbchr-s]->next;
				localReadNumberByChr[nbchr-s] = readNumberByChr[nbchr-s];
				if (s == 2){
					unmapped_start = startOffset(g_rank,
												 g_size,
												 unmappedSize,
												 headerSize,
												 nbchr-s,
												 localReadNumberByChr[nbchr-s],
												 split_comm
												 );

					if(!unmapped_start){
						fprintf(stderr, "No header was defined for unmapped. \n Shutting down.\n");
						MPI_Finalize();
						return 0;
					}

					time_count = MPI_Wtime();
					writeSam_discordant_and_unmapped(
							split_rank,
							output_dir,
							header,
							localReadNumberByChr[nbchr-s],
							chrNames[nbchr-s],
							reads[nbchr-s],
							split_size,
							split_comm,
							file_name,
							mpi_file_split_comm2,
							finfo,
							compression_level,
							local_data,
							goff[rank],
							write_sam);

					if (split_rank == chosen_rank){
							fprintf(stderr,	"rank %d :::::[MPISORT] Time to write chromosom %s ,  %f seconds \n\n\n", split_rank,
									chrNames[nbchr-s], MPI_Wtime() - time_count);
					}
				}
				else{
					discordant_start = startOffset(g_rank,
												   g_size,
												   discordantSize,
												   headerSize,
												   nbchr-s,
												   localReadNumberByChr[nbchr-s],
												   split_comm);

					if(!discordant_start){
						fprintf(stderr, "No header was defined for discordant.\n Shutting down.\n");
						MPI_Finalize();
						return 0;
					}
					time_count = MPI_Wtime();

					writeSam_discordant_and_unmapped(
							g_rank,
							output_dir,
							header,
							localReadNumberByChr[nbchr-s],
							chrNames[nbchr-s],
							reads[nbchr-s],
							g_size,
							split_comm,
							file_name,
							mpi_file_split_comm2,
							finfo,
							compression_level,
							local_data,
							goff[rank],
							write_sam
							);


					if (split_rank == chosen_rank){
							fprintf(stderr,	"rank %d :::::[MPISORT] Time to write chromosom %s ,  %f seconds \n\n\n", split_rank,
								chrNames[nbchr-s], MPI_Wtime() - time_count);
					}

				}
				while( reads[nbchr-s]->next != NULL){
						Read *tmp_chr = reads[nbchr-s];
						reads[nbchr-s] = reads[nbchr-s]->next;
						free(tmp_chr);
				}
				free(localReadsNum_rank0);
			}
			else{
				// we do nothing
			}

			//we put a barrier before freeing pointers
			MPI_Barrier(MPI_COMM_WORLD);
			//we free the file pointer

			if  (file_pointer_to_free)
				MPI_File_close(&mpi_file_split_comm2);

			//we free the split_comm
			if (split_comm_to_free)
				MPI_Comm_free(&split_comm);

			split_comm_to_free = 0;
			file_pointer_to_free = 0;

			free(color_vec_to_send);
			free(key_vec_to_send);

		}
	} //end for (s=1; s < 3; s++){

	/*
	 *  We write the mapped reads in a file named chrX.bam
	 *	We loop by chromosoms.
	 */

	MPI_Barrier(MPI_COMM_WORLD);
	for(i = 0; i < (nbchr-2); i++){

		/*
		 * First Part of the algorithm
		 *
		 * In this part we elected a rank which is the first rank
		 * to have reads to sort.
		 *
		 * Once elected a rank, we plit the communicator according to
		 * wether the rank has reads to sort for this chromosom.
		 *
		 * The new communicator is COMM_WORLD.
		 *
		 * If all jobs have reads to sort no need to split the communicator and then
		 * COMM_WORLD = MPI_COMM_WORLD
		 *
		 */

		int i1,i2;
		size_t localReadsNum_rank0[num_proc];
		localReadsNum_rank0[0]=0;
		int file_pointer_to_free = 0;
		int split_comm_to_free = 0;
		//we build a vector with rank job
		int val_tmp1 = 0;
		int val_tmp2 = 0;
		int chosen_rank = 0; //needed to tell what rank is going to compute the color and key
		int chosen_split_rank= 0; //the rank that collect data once the communication splitted normally this rank is 0

		// the color tells in what communicator the rank pertain
		// color = 0 will be the new communicator color
		// otherwise the color is 1
		// the key value tell the order in the new communicator
		int *color_vec_to_send 	=  malloc(num_proc * sizeof(int));
		int *key_vec_to_send 	=  malloc(num_proc * sizeof(int));

		// first we test if the there's reads to sort
		// rank 0 recieve the sum of all the reads count
		size_t total_reads_by_chr = 0;
		MPI_Allreduce(&readNumberByChr[i], &total_reads_by_chr, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD);

		//fprintf(stderr, "rank %d :::: readNumberByChr[i] = %zu \n", rank, readNumberByChr[i]);
		//fprintf(stderr, "rank %d :::: total_reads_by_chr = %zu \n", rank, total_reads_by_chr);

		if (total_reads_by_chr == 0)
			continue; //pass to next chromosome

		//rank 0 gather the vector
		MPI_Allgather(&readNumberByChr[i] , 1, MPI_LONG_LONG_INT, localReadsNum_rank0 , 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD);


		if (rank == 0){
			//the rank 0 chose the first rank with reads to sort
			i1=0;
			while ((localReadsNum_rank0[i1] == 0) && (i1 < num_proc)){
				chosen_rank++;
				i1++;
			}
			fprintf(stderr, "rank %d :::: Elected rank = %d \n", rank, chosen_rank);
		}

		//we broadcast the chosen rank
		//task: replace the broadcast with a sendrecieve
		MPI_Bcast( &chosen_rank, 1, MPI_INT, 0, MPI_COMM_WORLD);
		MPI_Barrier(MPI_COMM_WORLD);

		if (((rank == chosen_rank) || rank == 0) && (chosen_rank != 0)){

			//first we exchange the size o
			if (rank == chosen_rank){
				header = malloc((headerSize + 1)*sizeof(char));
				header[headerSize] = '\0';
				MPI_Recv(header, headerSize + 1, MPI_CHAR, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
			}
			if (rank == 0){
				MPI_Send(header, headerSize + 1, MPI_CHAR, chosen_rank,  0, MPI_COMM_WORLD);
			}
		}
		else {
			//we do nothing here
		}

		MPI_Barrier(MPI_COMM_WORLD);

		if (rank == chosen_rank) {
			int counter = 0;
			//we compute the number of 0 in the localReadsNum_vec
			for(i1 = 0; i1 < num_proc; i1++){
				if (localReadsNum_rank0[i1] == 0) {
						counter++;
					}
			}
			// if no jobs without reads we do nothing
			if ( counter == 0 ){
				// nothing to do we associate split_comm with
				fprintf(stderr, "rank %d ::::[MPISORT] we don't split the rank \n", rank);
				split_comm = MPI_COMM_WORLD;
				for (i2 = 0; i2 < num_proc; i2++) {
					if (localReadsNum_rank0[i2] == 0) {
						color_vec_to_send[i2] = 1;
						key_vec_to_send[i2] = val_tmp2;
						val_tmp2++;
					} else {
						color_vec_to_send[i2] = 0;
						key_vec_to_send[i2] = val_tmp1;
						val_tmp1++;
					}
				}
			}
			else{
				// now we compute the color according to
				// the number of reads to sort
				fprintf(stderr, "rank %d ::::[MPISORT] we split the rank \n", rank);
				for(i2 = 0; i2 < num_proc; i2++){
					if (localReadsNum_rank0[i2] == 0){
						color_vec_to_send[i2] = 1;
						key_vec_to_send[i2] = val_tmp2;
						val_tmp2++;
					} else{
						color_vec_to_send[i2] = 0;
						key_vec_to_send[i2] = val_tmp1;
						val_tmp1++;
					}
				} // end for loop
			}// end if
		}// end if (rank == plit_rank)

		MPI_Barrier(MPI_COMM_WORLD);
		//we create key and color variable for each job
		int local_color = 0;
		int local_key = 0;
		// rank 0 scatter the color and the key vector
		MPI_Scatter( color_vec_to_send, 1, MPI_INT, &local_color, 1, MPI_INT, chosen_rank, MPI_COMM_WORLD);
		MPI_Scatter( key_vec_to_send, 1, MPI_INT, &local_key, 1, MPI_INT, chosen_rank, MPI_COMM_WORLD);
		MPI_Barrier(MPI_COMM_WORLD);
		// now we create a communicator
		// we group all communicator
		// with color of zero
		if (local_color == 0){
			MPI_Comm_split( MPI_COMM_WORLD, local_color, local_key, &split_comm);
			ierr = MPI_File_open(split_comm, file_name,  MPI_MODE_RDONLY, finfo, &mpi_file_split_comm);
			//we ask to liberate file pointer
			file_pointer_to_free = 1;
			//we ask to liberate the split_comm
			split_comm_to_free = 1;
		}
		else{
			MPI_Comm_split( MPI_COMM_WORLD, MPI_UNDEFINED, local_key, &split_comm);
			mpi_file_split_comm = mpi_filed;
		}

		//now we change the rank in the reads structure
		if (local_color == 0){

			MPI_Comm_rank(split_comm, &split_rank);
			MPI_Comm_size(split_comm, &split_size);
			
			//we update g_rank
			g_rank = split_rank;
			g_size = split_size;
		}
		else{
			g_rank = split_rank;			
			g_size = split_size = num_proc;
		}

		localReadNumberByChr[i] = readNumberByChr[i];
		MPI_Barrier(MPI_COMM_WORLD);

		if ((local_color == 0) && (i < (nbchr - 2))) {

			/*
			 * Second part of the algorithm
			 *
			 * First we load coordinates, offset sources, and read size in vector
			 *
			 * Then we sort the coordinates of the reads
			 * with a bitonic sorter
			 *
			 * Then according to the reads coordinates we reoder the offset sources, and size
			 * this is done thanks to the index of the sorting.
			 *
			 * Afterward we compute the offsets of the reads in
			 * the destination file.
			 *
			 * Finally we dispatch the information to all ranks
			 * in the communicator for the next step.
			 */

			//we do a local merge sort
			if(reads[i] && reads[i]->next && reads[i]->next->next){
				mergeSort(reads[i], readNumberByChr[i]);
			}

			size_t local_readNum = localReadNumberByChr[i];

			reads[i] = reads[i]->next;

			//first we compute the dimension of the parabitonic sort
			// dimension is the number of processors where we
			// perform the bitonic sort
			// int dimensions = (int)(log2(num_processes));
			// find next ( must be greater) power, and go one back
			int dimensions = 1;
			while (dimensions <= split_size)
				dimensions <<= 1;

			dimensions >>= 1;

			// we get the maximum number of reads among
			// all the workers

			/*
			 * Here we split the programm in 2 cases
			 *
			 * 1) The first case de split_size is a power of 2 (the best case)
			 * 		this case is the simpliest we don't have extra communication to dispatch the read
			 * 		envenly between the jobs
			 *
			 * 2) The split_size is not a power of 2 (the worst case)
			 * 		well in this case we shall dispatch the jobs between jobs evenly.
			 *
			 */

			if (split_rank == chosen_split_rank){

				fprintf(stderr,	"Rank %d :::::[MPISORT] Dimensions for bitonic = %d \n", split_rank, dimensions);
				fprintf(stderr,	"Rank %d :::::[MPISORT] Split size 			   = %d \n", split_rank, split_size);

			}
			//we test the computed dimension
			if (dimensions == split_size ){

				size_t max_num_read = 0;
				MPI_Allreduce(&localReadNumberByChr[i], &max_num_read, 1, MPI_LONG_LONG_INT, MPI_MAX, split_comm);

				// if the dimension == split_size
				MPI_Barrier(split_comm);

				size_t first_local_readNum = local_readNum;

				/*
				 * Vector creation and allocation
				 				fprintf(stderr,	"split rank %d :::::[MPISORT] max_num_read = %zu \n", split_rank, max_num_read);
				 */
				local_readNum = max_num_read;

				time_count = MPI_Wtime();

				size_t *local_reads_coordinates_unsorted 	= calloc(local_readNum, sizeof(size_t));
				size_t *local_reads_coordinates_sorted 		= calloc(local_readNum, sizeof(size_t));
				size_t *local_offset_source_unsorted 		= calloc(local_readNum, sizeof(size_t));
				size_t *local_offset_source_sorted 			= calloc(local_readNum, sizeof(size_t));
				int *local_dest_rank_sorted 				= calloc(local_readNum, sizeof(int));
				int *local_reads_sizes_unsorted 			= calloc(local_readNum, sizeof(int));
				int *local_reads_sizes_sorted 				= calloc(local_readNum, sizeof(int));
				int *local_source_rank_unsorted 			= calloc(local_readNum, sizeof(int));
				int *local_source_rank_sorted 				= calloc(local_readNum, sizeof(int));

				if (split_rank == chosen_split_rank)
					fprintf(stderr,	"rank %d :::::[MPISORT][MALLOC 1] time spent = %f s\n", split_rank, MPI_Wtime() - time_count);

				local_reads_coordinates_unsorted[0] = 0;
				local_reads_coordinates_sorted[0] 	= 0;
				local_dest_rank_sorted[0] 			= 0;
				local_reads_sizes_unsorted[0] 		= 0;
				local_reads_sizes_sorted[0] 		= 0;
				local_source_rank_unsorted[0] 		= 0;
				local_source_rank_sorted[0] 		= 0;
				local_offset_source_unsorted[0] 	= 0;
				local_offset_source_sorted[0] 		= 0;

				//those vectors are the same that  local_..._sorted but without zero padding
				size_t *local_reads_coordinates_sorted_trimmed = NULL;
				int *local_dest_rank_sorted_trimmed = NULL;
				int *local_reads_sizes_sorted_trimmed = NULL;
				size_t *local_offset_source_sorted_trimmed = NULL;
				size_t *local_offset_dest_sorted_trimmed = NULL;
				int *local_source_rank_sorted_trimmed = NULL;

				//vectors used in the bruck just after the parabitonic sort
				size_t *local_reads_coordinates_sorted_trimmed_for_bruck = NULL;
				int *local_dest_rank_sorted_trimmed_for_bruck = NULL;
				int *local_reads_sizes_sorted_trimmed_for_bruck = NULL;
				size_t *local_offset_source_sorted_trimmed_for_bruck = NULL;
				size_t *local_offset_dest_sorted_trimmed_for_bruck = NULL;
				int *local_source_rank_sorted_trimmed_for_bruck = NULL;


				//task Init offset and size for source - free chr
				// from mpiSort_utils.c
				get_coordinates_and_offset_source_and_size_and_free_reads(
						split_rank,
						local_source_rank_unsorted,
						local_reads_coordinates_unsorted,
						local_offset_source_unsorted,
						local_reads_sizes_unsorted,
						reads[i],
						first_local_readNum
				);

				//init indices for qksort
				size_t *coord_index = (size_t*)malloc(local_readNum*sizeof(size_t));

				for(j = 0; j < local_readNum; j++){
					coord_index[j] = j;
				}

				//To start we sort locally the reads coordinates.
				//this is to facilitate the bitonic sorting
				//if the local coordinates to sort are to big we could get rid of
				//this step.
				time_count = MPI_Wtime();

				base_arr2 = local_reads_coordinates_unsorted;
				qksort(coord_index, local_readNum, sizeof(size_t), 0, local_readNum - 1, compare_size_t);

				if (split_rank == chosen_split_rank)
						fprintf(stderr,	"rank %d :::::[MPISORT][LOCAL SORT] time spent = %f s\n", split_rank, MPI_Wtime() - time_count);

				//We index data
				for(j = 0; j < local_readNum; j++){
					local_reads_coordinates_sorted[j] 			= local_reads_coordinates_unsorted[coord_index[j]];
					local_source_rank_sorted[j] 				= local_source_rank_unsorted[coord_index[j]];
					local_reads_sizes_sorted[j] 				= local_reads_sizes_unsorted[coord_index[j]];
					local_offset_source_sorted[j] 				= local_offset_source_unsorted[coord_index[j]];
					local_dest_rank_sorted[j] 					= rank; //will be updated after sorting the coordinates
				}

				/*
				*   FOR DEBUG
				*  
					

				for(j = 0; j < local_readNum - 1; j++){
					assert( local_reads_coordinates_sorted[j] < local_reads_coordinates_sorted[j+1]);
				}
				*/

				free(coord_index); 				 		//ok
				free(local_source_rank_unsorted); 	    //ok
				free(local_reads_coordinates_unsorted); //ok
				free(local_reads_sizes_unsorted); 		//ok
				free(local_offset_source_unsorted); 	//ok

				// we need the total number of reads.
				size_t total_num_read = 0;
				MPI_Allreduce(&localReadNumberByChr[i], &total_num_read, 1, MPI_LONG_LONG_INT, MPI_SUM, split_comm);

				/*
				 *
				 * In this section the number of bitonic dimension
				 * is equal to the split size.
				 *
				 * In this case there are less communication in preparation
				 * of the sorting.
				 *
				 * We use the parabitonic version 2.
				 */

				//we calll the bitonic

				time_count = MPI_Wtime();

				ParallelBitonicSort2(
					split_comm,
					split_rank,
					dimensions,
					local_reads_coordinates_sorted,
					local_reads_sizes_sorted,
					local_source_rank_sorted,
					local_offset_source_sorted,
					local_dest_rank_sorted,
					max_num_read
					);

				if (split_rank == chosen_split_rank)
					fprintf(stderr,	"rank %d :::::[MPISORT][BITONIC 2] time spent = %f s\n",
											split_rank, MPI_Wtime() - time_count);
				size_t k1;
				size_t tmp2 = 0;
				for (k1 = 1; k1 < max_num_read; k1++){
					assert(local_reads_coordinates_sorted[k1-1] <= local_reads_coordinates_sorted[k1]);
					local_dest_rank_sorted[k1]= split_rank;
				}
				/*
				for (k1 = 0; k1 < max_num_read; k1++){
					fprintf(stderr,	"rank %d :::::[MPISORT][BITONIC 2]  local_reads_coordinates_sorted[%zu]= %zu s\n",
											split_rank, k1, local_reads_coordinates_sorted[k1]);

					fprintf(stderr,	"rank %d :::::[MPISORT][BITONIC 2]  local_source_rank_sorted[%zu]= %d s\n",
											split_rank, k1, local_source_rank_sorted[k1]);							
				}
				*/
				size_t *local_offset_dest_sorted = malloc(max_num_read*sizeof(size_t));
				size_t last_local_offset = 0;


				// We compute the local_dest_offsets_sorted
				size_t local_total_offset = 0;

				for (k1 = 0; k1 <  max_num_read; k1++){
					local_offset_dest_sorted[k1] = local_reads_sizes_sorted[k1];
					local_total_offset += local_reads_sizes_sorted[k1];
				}

				//we make the cumulative sum of all offsets
				for (k1 = 1; k1 < max_num_read; k1++){
					local_offset_dest_sorted[k1] = local_offset_dest_sorted[k1 - 1] + local_offset_dest_sorted[k1];
				}

				//we exchange the last destination offset
				last_local_offset = local_offset_dest_sorted[max_num_read-1];


				//number of block to send
				int blocksize = 1;

				MPI_Offset *y  = calloc(split_size, sizeof(MPI_Offset));
				MPI_Offset *y2 = calloc(split_size + 1, sizeof(MPI_Offset));

				//we wait all processors

				MPI_Gather(&last_local_offset, 1, MPI_LONG_LONG_INT, y, 1, MPI_LONG_LONG_INT, 0, split_comm);

				if (split_rank ==0){
					for (k1 = 1; k1 < (split_size + 1); k1++) {
						y2[k1] = y[k1-1];
					}
				}

				if (split_rank ==0){
					for (k1 = 1; k1 < (split_size +1); k1++) {
						y2[k1] = y2[k1-1] + y2[k1];
					}
				}

				size_t offset_to_add = 0;
				MPI_Scatter(y2, 1, MPI_LONG_LONG_INT, &offset_to_add, 1, MPI_LONG_LONG_INT, 0, split_comm);

				free(y);
				free(y2);

				//we add offset of the previous rank
				for (k1 = 0; k1 < max_num_read; k1++){
					if (local_reads_sizes_sorted[k1] != 0)
						local_offset_dest_sorted[k1] += offset_to_add;
					else
						local_offset_dest_sorted[k1] = 0;
				}


				/*
				for (k1 = 0; k1 < max_num_read; k1++){

					fprintf(stderr, "\n");

					fprintf(stderr,	"rank %d :::::[MPISORT][BITONIC 2]  local_reads_coordinates_sorted[%zu]= %zu s\n",
											split_rank, k1, local_reads_coordinates_sorted[k1]);

					fprintf(stderr,	"rank %d :::::[MPISORT][BITONIC 2]  local_source_rank_sorted[%zu]= %d s\n",
											split_rank, k1, local_source_rank_sorted[k1]);							
				

					fprintf(stderr,	"rank %d :::::[MPISORT][BITONIC 2]  local_offset_dest_sorted[%zu]= %d s\n",
											split_rank, k1, local_offset_dest_sorted[k1]);							
				
					fprintf(stderr, "\n");
				}
				*/

				/*
				 * we update destination rank according to
				 * original number of reads read.
				 *
				 */

				//we compute the new rank dest according to max_num_read
				size_t previous_num_reads_per_job[dimensions];
				//we create a vector of size split_size with previous reads per job
				MPI_Allgather(&first_local_readNum , 1, MPI_LONG_LONG_INT, previous_num_reads_per_job , 1, MPI_LONG_LONG_INT, split_comm);

				// we compute the position of of the read in the first
				// reference without the zero padding of bitonic
				size_t pos_ref0 = 0;

				//we need the number of zeros we add for the padding
				size_t N0 = max_num_read*dimensions - total_num_read;

				int new_rank = 0;
				int previous_rank = 0;
				// we compute the new rank for
				// the reads sorted by offset destination
				size_t h = 0;


				pos_ref0 = max_num_read*split_rank - N0;
				for(j = 0; j < max_num_read; j++) {
					if ( local_reads_sizes_sorted[j] != 0){
						int new_rank = chosen_split_rank;
						pos_ref0 = (max_num_read*split_rank +j) - N0;
						if (pos_ref0 >= 0) {
							size_t tmp2 = 0;
							for (h = 0; h < dimensions; h++){
								tmp2 += previous_num_reads_per_job[h];
								if ( pos_ref0 < tmp2)  {
									new_rank = h;
									break;
									}
								}
							previous_rank = local_dest_rank_sorted[j];
							local_dest_rank_sorted[j] = new_rank;
						}
					}
				}

				MPI_Barrier(split_comm);

				size_t offset  = 0;
				size_t numItems = 0;
				size_t num_read_for_bruck = 0;
				int *p = local_reads_sizes_sorted;
				if (p[0] != 0) {offset = 0;};
				if (p[max_num_read -1] == 0){offset = max_num_read;}
				else {while ((*p == 0) && (offset < max_num_read )){ offset++; p++;}}

				/*
				 * REMOVE ZERO PADDING BEFORE BRUCK
				 *
				 */

				time_count = MPI_Wtime();

				if (offset > 0){

					// we remove zeros in the vector we have 2 cases
					// the first offset <  max_num_read
					// and the entire vector is null
					if ( offset < max_num_read ){

						numItems = max_num_read - offset;

						local_reads_coordinates_sorted_trimmed_for_bruck    = malloc(numItems * sizeof(size_t));
						local_offset_source_sorted_trimmed_for_bruck        = malloc(numItems * sizeof(size_t));
						local_offset_dest_sorted_trimmed_for_bruck			= malloc(numItems * sizeof(size_t));
						local_reads_sizes_sorted_trimmed_for_bruck          = malloc(numItems * sizeof(int));
						local_dest_rank_sorted_trimmed_for_bruck            = malloc(numItems * sizeof(int));
						local_source_rank_sorted_trimmed_for_bruck 		    = malloc(numItems * sizeof(int));
						size_t y=0;

						for (y = 0; y < numItems; y++){

							local_reads_coordinates_sorted_trimmed_for_bruck[y]    = local_reads_coordinates_sorted[y+offset];
							local_offset_source_sorted_trimmed_for_bruck[y]        = local_offset_source_sorted[y+offset];
							local_offset_dest_sorted_trimmed_for_bruck[y]		   = local_offset_dest_sorted[y+offset];
							local_reads_sizes_sorted_trimmed_for_bruck[y]          = local_reads_sizes_sorted[y+offset];
							local_dest_rank_sorted_trimmed_for_bruck[y]            = local_dest_rank_sorted[y+offset];
							local_source_rank_sorted_trimmed_for_bruck[y] 		   = local_source_rank_sorted[y+offset];
						}

						num_read_for_bruck = numItems;

						/*
						 *
						 * FOR DEBUG
						 *

						for(y = 0; y < num_read_for_bruck; y++){
							assert( local_reads_sizes_sorted_trimmed_for_bruck[y] 		!= 0 );
							assert( local_source_rank_sorted_trimmed_for_bruck[y] 		< dimensions);
							assert( local_dest_rank_sorted_trimmed_for_bruck[y]   		< dimensions);
							assert( local_offset_source_sorted_trimmed_for_bruck[y] 	!= 0);
							assert( local_offset_dest_sorted_trimmed_for_bruck[y] 	    != 0);
							assert( local_reads_coordinates_sorted_trimmed_for_bruck[y] != 0);
						}
						*/

					}
					else{

						numItems = 0;
						local_reads_coordinates_sorted_trimmed_for_bruck    = malloc(numItems * sizeof(size_t));
						local_offset_source_sorted_trimmed_for_bruck        = malloc(numItems * sizeof(size_t));
						local_offset_dest_sorted_trimmed_for_bruck          = malloc(numItems * sizeof(size_t));
						local_reads_sizes_sorted_trimmed_for_bruck          = malloc(numItems * sizeof(int));
						local_dest_rank_sorted_trimmed_for_bruck            = malloc(numItems * sizeof(int));
						local_source_rank_sorted_trimmed_for_bruck 		    = malloc(numItems * sizeof(int));
						num_read_for_bruck = 0;
					}
				}
				else {

					numItems = local_readNum;
					local_reads_coordinates_sorted_trimmed_for_bruck    = malloc(local_readNum * sizeof(size_t));
					local_offset_source_sorted_trimmed_for_bruck        = malloc(local_readNum * sizeof(size_t));
					local_offset_dest_sorted_trimmed_for_bruck          = malloc(local_readNum * sizeof(size_t));
					local_reads_sizes_sorted_trimmed_for_bruck          = malloc(local_readNum * sizeof(int));
					local_dest_rank_sorted_trimmed_for_bruck            = malloc(local_readNum * sizeof(int));
					local_source_rank_sorted_trimmed_for_bruck 		    = malloc(local_readNum * sizeof(int));

					size_t y=0;
					for (y = 0; y < local_readNum; y++){

						local_reads_coordinates_sorted_trimmed_for_bruck[y]    = local_reads_coordinates_sorted[y];
						local_offset_source_sorted_trimmed_for_bruck[y]        = local_offset_source_sorted[y];
						local_offset_dest_sorted_trimmed_for_bruck[y]          = local_offset_dest_sorted[y];
						local_reads_sizes_sorted_trimmed_for_bruck[y]          = local_reads_sizes_sorted[y];
						local_dest_rank_sorted_trimmed_for_bruck[y]            = local_dest_rank_sorted[y];
						local_source_rank_sorted_trimmed_for_bruck[y] 		   = local_source_rank_sorted[y];
					}

					num_read_for_bruck = numItems;

					/*
					 *
					 * FOR DEBUG
					 *
					for(y = 0; y < num_read_for_bruck; y++){
						assert( local_reads_sizes_sorted_trimmed_for_bruck[y] 		!= 0 );
						assert( local_source_rank_sorted_trimmed_for_bruck[y] 		< dimensions);
						assert( local_dest_rank_sorted_trimmed_for_bruck[y]   		< dimensions);
						assert( local_offset_source_sorted_trimmed_for_bruck[y] 	!= 0);
						assert( local_offset_dest_sorted_trimmed_for_bruck[y] 	    != 0);
						assert( local_reads_coordinates_sorted_trimmed_for_bruck[y] != 0);
					}
					*/
				}

				free(local_reads_coordinates_sorted);
				free(local_offset_source_sorted);
				free(local_offset_dest_sorted);
				free(local_reads_sizes_sorted);
				free(local_dest_rank_sorted);
				free(local_source_rank_sorted);


				if (split_rank == chosen_split_rank)
					fprintf(stderr,	"rank %d :::::[MPISORT][TRIMMING] time spent = %f s\n", split_rank, MPI_Wtime() - time_count);

				/*
				 * We do a Bruck on rank of origin reading
				 */

				size_t m=0;
				int num_proc = dimensions;
				size_t *number_of_reads_by_procs = calloc( dimensions, sizeof(size_t));

				//fprintf(stderr,	"rank %d :::::[MPISORT] num_read_for_bruck = %zu \n", split_rank, num_read_for_bruck);

				for(m = 0; m < num_read_for_bruck; m++){
					 //assert(new_pbs_orig_rank_off_phase1[m] < dimensions);
					 //assert(new_pbs_dest_rank_phase1[m] < dimensions);
					 number_of_reads_by_procs[local_source_rank_sorted_trimmed_for_bruck[m]]++;
				}

				int *local_source_rank_sorted_trimmed_for_bruckv2 = malloc( num_read_for_bruck * sizeof(int));

				for(m = 0; m < num_read_for_bruck; m++){
					local_source_rank_sorted_trimmed_for_bruckv2[m] = local_source_rank_sorted_trimmed_for_bruck[m];
				}

				size_t count6 = 0;
				for(m = 0; m < dimensions; m++){
					count6 += number_of_reads_by_procs[m];
				}

				assert( count6 == num_read_for_bruck );
				MPI_Barrier(split_comm);

				size_t **reads_coordinates 		= malloc(sizeof(size_t *) * dimensions);
				size_t **local_source_offsets 	= malloc(sizeof(size_t *) * dimensions);
				size_t **dest_offsets 			= malloc(sizeof(size_t *) * dimensions);
				int **read_size 				= malloc(sizeof(int *) * dimensions);
				int **dest_rank 				= malloc(sizeof(int *) * dimensions);
				int **source_rank				= malloc(sizeof(int *) * dimensions);

				/*
				 * We send in order
				 *
				 * local_offset_source_sorted_trimmed_for_bruck
				 * local_dest_rank_sorted_trimmed_for_bruck
				 * local_reads_coordinates_sorted_trimmed_for_bruck
				 * local_reads_sizes_sorted_trimmed_for_bruck
				 *
				 */

				COMM_WORLD = split_comm;
				time_count = MPI_Wtime();

				bruckWrite3(split_rank,
							dimensions,
							count6,
							number_of_reads_by_procs,
							local_source_rank_sorted_trimmed_for_bruckv2,
							local_offset_source_sorted_trimmed_for_bruck,     //offset sources
							&local_source_offsets,
							local_dest_rank_sorted_trimmed_for_bruck,     	  //destination rank
							&dest_rank,
							local_reads_coordinates_sorted_trimmed_for_bruck, //reads coordinates
							&reads_coordinates,
							local_reads_sizes_sorted_trimmed_for_bruck,       //read size
							&read_size,
							local_source_rank_sorted_trimmed_for_bruck,		  //source rank
							&source_rank,
							local_offset_dest_sorted_trimmed_for_bruck,
							&dest_offsets
				);

				if (split_rank == chosen_split_rank)
					fprintf(stderr,	"rank %d :::::[MPISORT][BRUCK 3] time spent = %f s\n",
							split_rank, MPI_Wtime() - time_count);


				time_count = MPI_Wtime();

				free(local_reads_coordinates_sorted_trimmed_for_bruck);
				free(local_dest_rank_sorted_trimmed_for_bruck);
				free(local_reads_sizes_sorted_trimmed_for_bruck);
				free(local_offset_source_sorted_trimmed_for_bruck);
				free(local_offset_dest_sorted_trimmed_for_bruck);
				free(local_source_rank_sorted_trimmed_for_bruck);
				free(local_source_rank_sorted_trimmed_for_bruckv2);

				local_reads_coordinates_sorted_trimmed 	  = malloc(first_local_readNum * sizeof(size_t));
				local_offset_source_sorted_trimmed   	  = malloc(first_local_readNum * sizeof(size_t));
				local_offset_dest_sorted_trimmed   	  	  = malloc(first_local_readNum * sizeof(size_t));
				local_dest_rank_sorted_trimmed   		  = malloc(first_local_readNum * sizeof(int));
				local_source_rank_sorted_trimmed		  = malloc(first_local_readNum * sizeof(int));
				local_reads_sizes_sorted_trimmed		  = malloc(first_local_readNum * sizeof(int));

				if (split_rank == chosen_split_rank)
					fprintf(stderr,	"rank %d :::::[MPISORT][FREE + MALLOC] time spent = %f s\n",
											split_rank, MPI_Wtime() - time_count);
				/*
				 * GET DATA AFTER BRUCK
				 *
				 */

				j=0;
				size_t k = 0;

				for(m = 0; m < num_proc; m++)
				{
					for(k = 0; k < number_of_reads_by_procs[m]; k++)
					{
						
						local_offset_dest_sorted_trimmed[k + j] 		= dest_offsets[m][k];
						local_dest_rank_sorted_trimmed[k + j] 			= dest_rank[m][k];
						local_reads_sizes_sorted_trimmed[k + j] 		= read_size[m][k];
						local_offset_source_sorted_trimmed[k + j] 		= local_source_offsets[m][k];
						local_reads_coordinates_sorted_trimmed[k + j] 	= reads_coordinates[m][k];
						local_source_rank_sorted_trimmed[k + j] 		= source_rank[m][k];

					}
					free(dest_offsets[m]);
					free(dest_rank[m]);
					free(read_size[m]);
					free(local_source_offsets[m]);
					free(reads_coordinates[m]);
					free(source_rank[m]);
					j += number_of_reads_by_procs[m];
				}


				free(number_of_reads_by_procs);
				if (dest_rank != NULL)
					free(dest_rank);
				if (read_size != NULL)
					free(read_size);
				if (local_source_offsets != NULL)
					free(local_source_offsets);
				if (reads_coordinates != NULL)
					free(reads_coordinates);
				if (source_rank != NULL)
					free(source_rank);
				if (dest_offsets != NULL)
					free(dest_offsets);

				local_readNum = first_local_readNum;


				/*
				 *
				 * FOR DEBUG
				 *
				for ( j = 0; j < local_readNum; j++){
					assert ( local_reads_coordinates_sorted_trimmed[j]    != 0 );
					assert ( local_offset_source_sorted_trimmed[j]        != 0 );
					assert ( local_offset_dest_sorted_trimmed[j]   		  != 0 );
					assert ( local_reads_sizes_sorted_trimmed 			  != 0 );
					assert ( local_dest_rank_sorted_trimmed[j]            < split_size );
					assert ( local_source_rank_sorted_trimmed[j] 		  < split_size );
				}
				*/

				free(local_reads_coordinates_sorted_trimmed);

				if (split_rank == chosen_split_rank)
					fprintf(stderr,	"rank %d :::::[MPISORT] we call write SAM \n", split_rank);

				malloc_trim(0);

				time_count = MPI_Wtime();

				writeSam(
					split_rank,
					output_dir,
					header,
					local_readNum,
					total_reads_by_chr,
					chrNames[i],
					reads[i],
					split_size,
					split_comm,
					chosen_split_rank,
					file_name,
					mpi_file_split_comm,
					finfo,
					compression_level,
					local_offset_dest_sorted_trimmed,
					local_offset_source_sorted_trimmed,
					local_reads_sizes_sorted_trimmed,
					local_dest_rank_sorted_trimmed,
					local_source_rank_sorted_trimmed,
					local_data,
					goff[rank],
					first_local_readNum
				);

				if (split_rank == chosen_split_rank){
					fprintf(stderr,	"rank %d :::::[MPISORT][WRITESAM] chromosom %s :::  %f seconds\n\n\n",
							split_rank, chrNames[i], MPI_Wtime() - time_count);

				}
			}
			else{

				/*
				 * We are in the case the number of cpu is
				 * not a power of 2
				 *
				 *
				 */

				parallel_sort_any_dim(
						dimensions, 				//dimension for parabitonic
						local_readNum,
						split_rank,
						split_size,
						reads,
						i, 							//chromosom number
						chosen_split_rank,
						split_comm,
						localReadNumberByChr,
						local_data,
						file_name,
						output_dir,
						finfo,
						compression_level,
						total_reads_by_chr,
						goff[rank],
						headerSize,
						header,
						chrNames[i],
						mpi_file_split_comm
					);

			} //end if dimensions < split_rank

		} //if ((local_color == 0) && (i < (nbchr - 2))) //in the splitted dimension
		else{
			//we do nothing here
		}

		//we put a barrier before freeing pointers
		MPI_Barrier(MPI_COMM_WORLD);
		//we free the file pointer
		if  (file_pointer_to_free)
			MPI_File_close(&mpi_file_split_comm);
		//we free the split_comm
		if (split_comm_to_free){
			MPI_Comm_free(&split_comm);
		}

		free(color_vec_to_send);
		free(key_vec_to_send);

	}// end loop upon chromosoms (line 665)
/**
 * MINIMIZE heap size (way below 128k) since this process doesn't need much.
 */
void __attribute__ ((constructor)) GNUNET_ARM_memory_init ()
{
  mallopt (M_TRIM_THRESHOLD, 4 * 1024);
  mallopt (M_TOP_PAD, 1 * 1024);
  malloc_trim (0);
}
Exemplo n.º 27
0
ucs_status_t ucm_malloc_install(int events)
{
    ucs_status_t status;

    pthread_mutex_lock(&ucm_malloc_hook_state.install_mutex);

    events &= UCM_EVENT_MMAP | UCM_EVENT_MUNMAP | UCM_EVENT_MREMAP | UCM_EVENT_SBRK;

    if (ucs_malloc_is_ready(events)) {
        goto out_succ;
    }

    ucm_malloc_test(events);
    if (ucs_malloc_is_ready(events)) {
        goto out_succ;
    }

    if (!ucm_malloc_hook_state.hook_called) {
        /* Try to leak less memory from original malloc */
        malloc_trim(0);
    }

    if (!(ucm_malloc_hook_state.install_state & UCM_MALLOC_INSTALLED_SBRK_EVH)) {
        ucm_debug("installing malloc-sbrk event handler");
        ucm_event_handler_add(&ucm_malloc_sbrk_handler);
        ucm_malloc_hook_state.install_state |= UCM_MALLOC_INSTALLED_SBRK_EVH;
    }

    /* When running on valgrind, don't even try malloc hooks.
     * We want to release original blocks to silence the leak check, so we must
     * have a way to call the original free(), also these hooks don't work with
     * valgrind anyway.
     */
#if HAVE_MALLOC_HOOK
    if (ucm_global_config.enable_malloc_hooks) {
        /* Install using malloc hooks.
         * TODO detect glibc support in configure-time.
         */
        if (!(ucm_malloc_hook_state.install_state & UCM_MALLOC_INSTALLED_HOOKS)) {
            ucm_debug("installing malloc hooks");
            __free_hook     = ucm_free;
            __realloc_hook  = ucm_realloc;
            __malloc_hook   = ucm_malloc;
            __memalign_hook = ucm_memalign;
            ucm_malloc_hook_state.install_state |= UCM_MALLOC_INSTALLED_HOOKS;
        }

        /* Just installed the hooks, test again. */
        ucm_malloc_test(events);
        if (ucm_malloc_hook_state.hook_called) {
            goto out_install_opt_syms;
        }
    } else
#endif
    {
        ucm_debug("using malloc hooks is disabled by configuration");
    }

    /* Install using malloc symbols */
    if (ucm_global_config.enable_malloc_reloc) {
        if (!(ucm_malloc_hook_state.install_state & UCM_MALLOC_INSTALLED_MALL_SYMS)) {
            ucm_debug("installing malloc relocations");
            ucm_malloc_populate_glibc_cache();
            ucm_malloc_install_symbols(ucm_malloc_symbol_patches);
            ucs_assert(ucm_malloc_symbol_patches[0].value == ucm_free);
            ucm_malloc_hook_state.free           = ucm_malloc_symbol_patches[0].prev_value;
            ucm_malloc_hook_state.install_state |= UCM_MALLOC_INSTALLED_MALL_SYMS;
        }
    } else {
        ucm_debug("installing malloc relocations is disabled by configuration");
    }

    /* Just installed the symbols, test again */
    ucm_malloc_test(events);
    if (ucm_malloc_hook_state.hook_called) {
        goto out_install_opt_syms;
    }

    status = UCS_ERR_UNSUPPORTED;
    goto out_unlock;

out_install_opt_syms:
    ucm_malloc_install_optional_symbols();
    ucm_malloc_install_mallopt();
out_succ:
    status = UCS_OK;
out_unlock:
    pthread_mutex_unlock(&ucm_malloc_hook_state.install_mutex);
    return status;
}