Exemplo n.º 1
0
/*
 * Gathers basic MPI information; sets up reader CM.
 */
int
adios_read_flexpath_init_method (MPI_Comm comm, PairStruct* params)
{    
    setenv("CMSelfFormats", "1", 1);
    fp_read_data = malloc(sizeof(flexpath_read_data));     
    if(!fp_read_data) {
        adios_error(err_no_memory, "Cannot allocate memory for flexpath.");
        return -1;
    }
    memset(fp_read_data, 0, sizeof(flexpath_read_data));
    
    fp_read_data->CM_TRANSPORT = attr_atom_from_string("CM_TRANSPORT");
    attr_list listen_list = NULL;
    char * transport = NULL;
    transport = getenv("CMTransport");

    // setup MPI stuffs
    fp_read_data->fp_comm = comm;
    MPI_Comm_size(fp_read_data->fp_comm, &(fp_read_data->fp_comm_size));
    MPI_Comm_rank(fp_read_data->fp_comm, &(fp_read_data->fp_comm_rank));

    fp_read_data->fp_cm = CManager_create();
    if(transport == NULL){
	if(CMlisten(fp_read_data->fp_cm) == 0) {
	    fprintf(stderr, "Flexpath ERROR: reader %d unable to initialize connection manager.\n",
		fp_read_data->fp_comm_rank);
	}
    }else{
	listen_list = create_attr_list();
	add_attr(listen_list, fp_read_data->CM_TRANSPORT, Attr_String, 
		 (attr_value)strdup(transport));
	CMlisten_specific(fp_read_data->fp_cm, listen_list);
    }
    int forked = CMfork_comm_thread(fp_read_data->fp_cm);
    if(!forked) {
	fprintf(stderr, "reader %d failed to fork comm_thread.\n", fp_read_data->fp_comm_rank);
	/*log_debug( "forked\n");*/
    }
    return 0;
}
Exemplo n.º 2
0
int
adios_read_icee_init_method (MPI_Comm comm, PairStruct* params)
{   
    log_debug ("%s\n", __FUNCTION__);

    int cm_port = 59997;
    char *cm_host = "localhost";
    int cm_remote_port = 59999;
    char *cm_remote_host = "localhost";
    char *cm_attr = NULL;
    //attr_list contact_list;
    icee_transport_t icee_transport_init = TCP;
    icee_transport_t icee_transport = TCP;

    icee_contactinfo_rec_t *remote_contact = NULL;
    int i;

    int use_single_remote_server = 1;
    char *remote_list_str = NULL;
    char *attr_list_str = NULL;

    int use_native_contact = 0;

    PairStruct * p = params;

    while (p)
    {
        if (!strcasecmp (p->name, "cm_attr"))
        {
            cm_attr = p->value;
        }
        else if (!strcasecmp (p->name, "cm_host"))
        {
            cm_host = p->value;
        }
        else if (!strcasecmp (p->name, "cm_port"))
        {
            cm_port = atoi(p->value);
        }
        else if (!strcasecmp (p->name, "cm_remote_host"))
        {
            cm_remote_host = p->value;
        }
        else if (!strcasecmp (p->name, "cm_remote_port"))
        {
            cm_remote_port = atoi(p->value);
        }
        else if (!strcasecmp (p->name, "remote_list"))
        {
            use_single_remote_server = 0;
            if (p->value)
                remote_list_str = strdup(p->value);
        }
        else if (!strcasecmp (p->name, "attr_list"))
        {
            use_single_remote_server = 0;
            if (p->value)
                attr_list_str = strdup(p->value);
        }
        else if (!strcasecmp (p->name, "transport"))
        {
            if (strcasecmp(p->value, "TCP") == 0)
                icee_transport = TCP;
            else if (strcasecmp(p->value, "ENET") == 0)
                icee_transport = ENET;
            else if (strcasecmp(p->value, "NNTI") == 0)
                icee_transport = NNTI;
            else if (strcasecmp(p->value, "IB") == 0)
                icee_transport = IB;
            else
                log_error ("No support: %s\n", p->value);
        }
        else if (!strcasecmp (p->name, "transport_init"))
        {
            if (strcasecmp(p->value, "TCP") == 0)
                icee_transport_init = TCP;
            else if (strcasecmp(p->value, "ENET") == 0)
                icee_transport_init = ENET;
            else if (strcasecmp(p->value, "NNTI") == 0)
                icee_transport_init = NNTI;
            else if (strcasecmp(p->value, "IB") == 0)
                icee_transport_init = IB;
            else
                log_error ("No support: %s\n", p->value);
        }
        else if (!strcasecmp (p->name, "num_parallel"))
        {
            icee_read_num_parallel = atoi(p->value);
        }
        else if (!strcasecmp (p->name, "is_passive"))
        {
            is_read_cm_passive = atoi(p->value);
        }
        else if (!strcasecmp (p->name, "use_native_contact"))
        {
            use_native_contact = atoi(p->value);
        }

        p = p->next;
    }

    pthread_mutex_init(&fileinfo_lock, NULL);

    if (use_single_remote_server)
    {
        num_remote_server = 1;

        attr_list contact_list = create_attr_list();
        set_contact_list(contact_list, icee_transport_init, cm_remote_host, cm_remote_port);

        icee_contactinfo_rec_t *p;
        p = malloc(sizeof(icee_contactinfo_rec_t));
        char *contact_string = attr_list_to_string(contact_list);
        p->contact_string = contact_string;
        p->stone_id = 0; // we assume. it can be wrong.
        p->next = NULL;

        remote_contact = p;
    }
    else
    {
        num_remote_server = 0;

        icee_contactinfo_rec_t *p;
        icee_contactinfo_rec_t *prev;

        char* token = strtok(remote_list_str, ",");
        while (token)
        {
            char host[256];
            int port = 0;

            if (token[0] == ':')
            {
                strcpy(host, cm_remote_host);
                port = atoi(token+1);
            }
            else
            {
                char *pch = strchr(token, ':');
                if (pch != NULL)
                {
                    strncpy(host, token, pch - token);
                    host[pch-token] = '\0';
                    port = atoi(pch+1);
                }
                else
                {
                    int len = strlen(token);
                    strncpy(host, token, len);
                    assert(len < 256);
                    host[len] = '\0';
                    port = cm_remote_port;
                }
            }
            log_debug("Remote server list: (%d) %s:%d\n", num_remote_server, host, port);

            p = malloc(sizeof(icee_contactinfo_rec_t));
            attr_list contact_list;
            
            contact_list = create_attr_list();
            set_contact_list(contact_list, icee_transport_init, host, port);
            p->contact_string = attr_list_to_string(contact_list);
            p->stone_id = 0; // we assume. it can be wrong.
            p->next = NULL;

            if (num_remote_server == 0)
                remote_contact = p;
            else
                prev->next = p;

            prev = p;
            num_remote_server++;
            token = strtok(NULL, ",");
        }
    }

    if (attr_list_str != NULL)
    {
        num_remote_server = 0;

        icee_contactinfo_rec_t *p;
        icee_contactinfo_rec_t *prev;

        char* token = strtok(attr_list_str, ",");
        while (token)
        {
            int remote_stone = 0;
            char string_list[256];

            sscanf(token, "%d:%s", &remote_stone, &string_list[0]);

            p = malloc(sizeof(icee_contactinfo_rec_t));
            attr_list contact_list;
            
            p->stone_id = remote_stone;
            p->contact_string = strdup(string_list);
            p->next = NULL;

            if (num_remote_server == 0)
                remote_contact = p;
            else
                prev->next = p;

            prev = p;
            num_remote_server++;
            token = strtok(NULL, ",");
        }
    }

    if (icee_read_num_parallel > ICEE_MAX_PARALLEL)
    {
        icee_read_num_parallel = ICEE_MAX_PARALLEL;
        log_info ("Max. number of threads is set to %d\n", icee_read_num_parallel);
    }

    log_debug ("transport : %s\n", icee_transport_name[icee_transport]);

    /*
      log_info ("cm_host : %s\n", cm_host);
      log_info ("cm_port : %d\n", cm_port);

      for (i = 0; i < num_remote_server; i++) 
      {
      log_info ("remote_list : %s:%d\n", remote_server[i].client_host, remote_server[i].client_port);
      }
    */

    if (!adios_read_icee_initialized)
    {
        if (is_read_cm_passive)
        {
            icee_contactinfo_rec_t *prev;
            for (i = 0; i < num_remote_server; i++)
            {
                attr_list contact_list;
                icee_contactinfo_rec_t *p = (i == 0)? remote_contact : prev->next;

                pcm[i] = CManager_create();

                if (!CMfork_comm_thread(pcm[i]))
                    printf("Fork of communication thread[%d] failed.\n", i);

                contact_list = attr_list_from_string(p->contact_string);

                log_debug("Passive remote contact: \"%s\"\n", attr_list_to_string(contact_list));
                if (adios_verbose_level > 5) dump_attr_list(contact_list);

                /*
                  attr_list contact_list  = create_attr_list();
                  add_string_attr(contact_list, attr_atom_from_string("IP_HOST"),
                  remote_server[i].client_host);
                  add_int_attr(contact_list, attr_atom_from_string("IP_PORT"),
                  remote_server[i].client_port);
                */

                CMConnection conn = CMinitiate_conn(pcm[i], contact_list);

                int n = 0;
                while (conn == NULL)
                {
                    log_error ("Passive connection failed (%d). Try again ...\n", i);
                    dump_attr_list(contact_list);

                    sleep(2);
                    conn = CMinitiate_conn(pcm[i], contact_list);
                    
                    if (n > 5) break;
                    n++;
                }

                if (conn == NULL)
                {
                    log_error ("Initializing passive connection failed (%d)\n", i);
                    
                }

                CMFormat fm_checkin, fm_fileinfo;
                fm_checkin = CMregister_format(pcm[i], icee_passivecheckin_format_list);

                CMregister_handler(fm_checkin, icee_passivecheckin_reply_handler, on_icee_passivecheckin_reply);

                fm_fileinfo = CMregister_format(pcm[i], icee_fileinfo_format_list);
                CMregister_handler(fm_fileinfo, icee_fileinfo_recv_handler, on_icee_fileinfo_recv);

                icee_passivecheckin_rec_t m;

                int condition = CMCondition_get(pcm[i], conn);
                CMCondition_set_client_data(pcm[i], condition, NULL);
                m.condition = condition;

                if (CMwrite(conn, fm_checkin, (void*)&m) != 1)
                    log_error ("Passive check-in failed (%d)\n", i);

                prev = p;
            }
            log_debug("Passive connection established");
            goto done;
        }

        EVstone   stone[ICEE_MAX_PARALLEL], remote_stone;
        EVsource  source;
        attr_list contact[ICEE_MAX_PARALLEL];

        icee_contactinfo_rec_t contact_msg[ICEE_MAX_PARALLEL];

        for (i=0; i<icee_read_num_parallel; i++)
        {
            icee_read_cm[i] = CManager_create();

            contact[i] = create_attr_list();
            set_contact_list(contact[i], icee_transport, cm_host, cm_port+i);

            if (CMlisten_specific(icee_read_cm[i], contact[i]) == 0)
                printf("Error: unable to initialize connection manager[%d].\n", i);
            
            if (!CMfork_comm_thread(icee_read_cm[i])) 
                printf("Fork of communication thread[%d] failed.\n", i);

            stone[i] = EValloc_stone(icee_read_cm[i]);
            if (adios_verbose_level > 5) 
            {
                log_debug("Reader contact: \"%d:%s\"\n", stone[i], attr_list_to_string(CMget_contact_list(icee_read_cm[i])));
                dump_attr_list(CMget_contact_list(icee_read_cm[i]));
            }

            EVassoc_terminal_action(icee_read_cm[i], stone[i], icee_fileinfo_format_list, icee_fileinfo_handler, NULL);

            contact_msg[i].stone_id = stone[i];
            attr_list contact_list;
            if (use_native_contact)
                contact_list = CMget_contact_list(icee_read_cm[i]);
            else
                contact_list = contact[i];
                
            contact_msg[i].contact_string = attr_list_to_string(contact_list);
            contact_msg[i].next = NULL;

            if (i>0)
                contact_msg[i-1].next = &contact_msg[i];
        }
        
        EVstone split_stone;
        EVaction split_action;

        split_stone = EValloc_stone(icee_read_cm[0]);
        split_action = EVassoc_split_action(icee_read_cm[0], split_stone, NULL);
        icee_contactinfo_rec_t *prev;
        for (i = 0; i < num_remote_server; i++) 
        {
            attr_list contact_list;
            EVstone remote_stone, output_stone;
            output_stone = EValloc_stone(icee_read_cm[0]);
            icee_contactinfo_rec_t *p = (i == 0)? remote_contact : prev->next;
            
            remote_stone = p->stone_id;
            contact_list = attr_list_from_string(p->contact_string);

            EVaction action;
            action = EVassoc_bridge_action(icee_read_cm[0], output_stone, contact_list, remote_stone);

            int n = 0;
            while (action == -1)
            {
                log_error ("Connection failed (%d). Try again ...\n", i);
                dump_attr_list(contact_list);
                
                sleep(2);
                action = EVassoc_bridge_action(icee_read_cm[0], output_stone, contact_list, remote_stone);
                
                if (n > 5) break;
                n++;
            }

            EVaction_add_split_target(icee_read_cm[0], split_stone, split_action, output_stone);

            prev = p;

            log_debug("Remote contact: \"%d:%s\"\n", remote_stone, attr_list_to_string(contact_list));
            if (adios_verbose_level > 5) dump_attr_list(contact_list);

        }

        source = EVcreate_submit_handle(icee_read_cm[0], split_stone, icee_contactinfo_format_list);
        
        //if (adios_verbose_level > 5) icee_contactinfo_print(contact_msg);
        
        EVsubmit(source, contact_msg, NULL);

    done:
        adios_read_icee_initialized = 1;
    }

    return 0;
}