Example #1
0
/*
 * Sets up local data structure for series of reads on an adios file
 * - create evpath graph and structures
 * -- create evpath control stone (outgoing)
 * -- create evpath data stone (incoming)
 * -- rank 0 dumps contact info to file
 * -- create connections using contact info from file
 */
ADIOS_FILE*
adios_read_flexpath_open(const char * fname,
			 MPI_Comm comm,
			 enum ADIOS_LOCKMODE lock_mode,
			 float timeout_sec)
{
    fp_log("FUNC", "entering flexpath_open\n");
    ADIOS_FILE *adiosfile = malloc(sizeof(ADIOS_FILE));        
    if(!adiosfile){
	adios_error (err_no_memory, 
		     "Cannot allocate memory for file info.\n");
	return NULL;
    }    
    
    flexpath_reader_file *fp = new_flexpath_reader_file(fname);
	
    adios_errno = 0;
    fp->stone = EValloc_stone(fp_read_data->fp_cm);	
    fp->comm = comm;

    MPI_Comm_size(fp->comm, &(fp->size));
    MPI_Comm_rank(fp->comm, &(fp->rank));

    EVassoc_terminal_action(fp_read_data->fp_cm,
			    fp->stone,
			    op_format_list,
			    op_msg_handler,
			    adiosfile);       

    EVassoc_terminal_action(fp_read_data->fp_cm,
			    fp->stone,
			    update_step_msg_format_list,
			    update_step_msg_handler,
			    adiosfile);       


    EVassoc_terminal_action(fp_read_data->fp_cm,
			    fp->stone,
			    evgroup_format_list,
			    group_msg_handler,
			    adiosfile);

    EVassoc_raw_terminal_action(fp_read_data->fp_cm,
				fp->stone,
				raw_handler,
				adiosfile);

    /* Gather the contact info from the other readers
       and write it to a file. Create a ready file so
       that the writer knows it can parse this file. */
    double setup_start = dgettimeofday();

    char writer_ready_filename[200];
    char writer_info_filename[200];
    char reader_ready_filename[200];
    char reader_info_filename[200];
	
    sprintf(reader_ready_filename, "%s_%s", fname, READER_READY_FILE);
    sprintf(reader_info_filename, "%s_%s", fname, READER_CONTACT_FILE);
    sprintf(writer_ready_filename, "%s_%s", fname, WRITER_READY_FILE);
    sprintf(writer_info_filename, "%s_%s", fname, WRITER_CONTACT_FILE);
	
    char *string_list;
    char data_contact_info[CONTACT_LENGTH];
    string_list = attr_list_to_string(CMget_contact_list(fp_read_data->fp_cm));
    sprintf(&data_contact_info[0], "%d:%s", fp->stone, string_list);
    free(string_list);

    char * recvbuf;
    if(fp->rank == 0){	
	recvbuf = (char*)malloc(sizeof(char)*CONTACT_LENGTH*(fp->size));
    }

    MPI_Gather(data_contact_info, CONTACT_LENGTH, MPI_CHAR, recvbuf,
	       CONTACT_LENGTH, MPI_CHAR, 0, fp->comm);

    if(fp->rank == 0){	
	// print our own contact information
	FILE * fp_out = fopen(reader_info_filename, "w");
	int i;
	if(!fp_out){	    
	    adios_error(err_file_open_error,
			"File for contact info could not be opened for writing.\n");
	    exit(1);
	}
	for(i=0; i<fp->size; i++) {
	    fprintf(fp_out,"%s\n", &recvbuf[i*CONTACT_LENGTH]);
	}
	fclose(fp_out);
	free(recvbuf);
	
	FILE * read_ready = fopen(reader_ready_filename, "w");
	fprintf(read_ready, "ready");
	fclose(read_ready);
    }
    MPI_Barrier(fp->comm);

    FILE * fp_in = fopen(writer_ready_filename,"r");
    while(!fp_in) {
	//CMsleep(fp_read_data->fp_cm, 1);
	fp_in = fopen(writer_ready_filename, "r");
    }
    fclose(fp_in);

    fp_in = fopen(writer_info_filename, "r");
    while(!fp_in){
	//CMsleep(fp_read_data->fp_cm, 1);
	fp_in = fopen(writer_info_filename, "r");
    }

    char in_contact[CONTACT_LENGTH] = "";
    //fp->bridges = malloc(sizeof(bridge_info));
    int num_bridges = 0;
    int their_stone;

    // change to read all numbers, dont create stones, turn bridge array into linked list
    while(fscanf(fp_in, "%d:%s", &their_stone, in_contact) != EOF){	
	//fprintf(stderr, "writer contact: %d:%s\n", their_stone, in_contact);
	fp->bridges = realloc(fp->bridges,
					  sizeof(bridge_info) * (num_bridges+1));
	fp->bridges[num_bridges].their_num = their_stone;
	fp->bridges[num_bridges].contact = strdup(in_contact);
	fp->bridges[num_bridges].created = 0;
	fp->bridges[num_bridges].step = 0;
	fp->bridges[num_bridges].opened = 0;
	fp->bridges[num_bridges].scheduled = 0;
	num_bridges++;
    }
    fclose(fp_in);
    fp->num_bridges = num_bridges;
    // clean up of writer's files
    MPI_Barrier(fp->comm);
    if(fp->rank == 0){
	unlink(writer_info_filename);
	unlink(writer_ready_filename);
    }	    

    adiosfile->fh = (uint64_t)fp;
    adiosfile->current_step = 0;
    
    /* Init with a writer to get initial scalar
       data so we can handle inq_var calls and
       also populate the ADIOS_FILE struct. */

    double bridge_start = MPI_Wtime();
    if(fp->size < num_bridges){
    	int mystart = (num_bridges/fp->size) * fp->rank;
    	int myend = (num_bridges/fp->size) * (fp->rank+1);
    	fp->writer_coordinator = mystart;
    	int z;
    	for(z=mystart; z<myend; z++){
    	    build_bridge(&fp->bridges[z]);
    	}
    }
    else{
	int writer_rank = fp->rank % num_bridges;
	build_bridge(&fp->bridges[writer_rank]);
	fp->writer_coordinator = writer_rank;
    }

    // requesting initial data.
    send_open_msg(fp, fp->writer_coordinator);
    

    fp->data_read = 0;
    send_flush_msg(fp, fp->writer_coordinator, DATA, 1);

    send_flush_msg(fp, fp->writer_coordinator, EVGROUP, 1);
    fp->data_read = 0;
    // this has to change. Writer needs to have some way of
    // taking the attributes out of the xml document
    // and sending them over ffs encoded. Not yet implemented.
    // the rest of this info for adiosfile gets filled in raw_handler.
    adiosfile->nattrs = 0;
    adiosfile->attr_namelist = NULL;
    // first step is at least one, otherwise raw_handler will not execute.
    // in reality, writer might be further along, so we might have to make
    // the writer explitly send across messages each time it calls close, to
    // indicate which timesteps are available. 
    adiosfile->last_step = 1;
    adiosfile->path = strdup(fname);
    // verifies these two fields. It's not BP, so no BP version.
    // It's a stream, so how can the file size be known?
    adiosfile->version = -1;
    adiosfile->file_size = 0;
    adios_errno = err_no_error;        
    fp_log("FUNC", "leaving flexpath_open\n");
    return adiosfile;
}
Example #2
0
int
adios_read_icee_init_method (MPI_Comm comm, PairStruct* params)
{   
    log_debug ("%s\n", __FUNCTION__);

    int cm_port = 59997;
    char *cm_host = "localhost";
    int cm_remote_port = 59999;
    char *cm_remote_host = "localhost";
    char *cm_attr = NULL;
    //attr_list contact_list;
    icee_transport_t icee_transport_init = TCP;
    icee_transport_t icee_transport = TCP;

    icee_contactinfo_rec_t *remote_contact = NULL;
    int i;

    int use_single_remote_server = 1;
    char *remote_list_str = NULL;
    char *attr_list_str = NULL;

    int use_native_contact = 0;

    PairStruct * p = params;

    while (p)
    {
        if (!strcasecmp (p->name, "cm_attr"))
        {
            cm_attr = p->value;
        }
        else if (!strcasecmp (p->name, "cm_host"))
        {
            cm_host = p->value;
        }
        else if (!strcasecmp (p->name, "cm_port"))
        {
            cm_port = atoi(p->value);
        }
        else if (!strcasecmp (p->name, "cm_remote_host"))
        {
            cm_remote_host = p->value;
        }
        else if (!strcasecmp (p->name, "cm_remote_port"))
        {
            cm_remote_port = atoi(p->value);
        }
        else if (!strcasecmp (p->name, "remote_list"))
        {
            use_single_remote_server = 0;
            if (p->value)
                remote_list_str = strdup(p->value);
        }
        else if (!strcasecmp (p->name, "attr_list"))
        {
            use_single_remote_server = 0;
            if (p->value)
                attr_list_str = strdup(p->value);
        }
        else if (!strcasecmp (p->name, "transport"))
        {
            if (strcasecmp(p->value, "TCP") == 0)
                icee_transport = TCP;
            else if (strcasecmp(p->value, "ENET") == 0)
                icee_transport = ENET;
            else if (strcasecmp(p->value, "NNTI") == 0)
                icee_transport = NNTI;
            else if (strcasecmp(p->value, "IB") == 0)
                icee_transport = IB;
            else
                log_error ("No support: %s\n", p->value);
        }
        else if (!strcasecmp (p->name, "transport_init"))
        {
            if (strcasecmp(p->value, "TCP") == 0)
                icee_transport_init = TCP;
            else if (strcasecmp(p->value, "ENET") == 0)
                icee_transport_init = ENET;
            else if (strcasecmp(p->value, "NNTI") == 0)
                icee_transport_init = NNTI;
            else if (strcasecmp(p->value, "IB") == 0)
                icee_transport_init = IB;
            else
                log_error ("No support: %s\n", p->value);
        }
        else if (!strcasecmp (p->name, "num_parallel"))
        {
            icee_read_num_parallel = atoi(p->value);
        }
        else if (!strcasecmp (p->name, "is_passive"))
        {
            is_read_cm_passive = atoi(p->value);
        }
        else if (!strcasecmp (p->name, "use_native_contact"))
        {
            use_native_contact = atoi(p->value);
        }

        p = p->next;
    }

    pthread_mutex_init(&fileinfo_lock, NULL);

    if (use_single_remote_server)
    {
        num_remote_server = 1;

        attr_list contact_list = create_attr_list();
        set_contact_list(contact_list, icee_transport_init, cm_remote_host, cm_remote_port);

        icee_contactinfo_rec_t *p;
        p = malloc(sizeof(icee_contactinfo_rec_t));
        char *contact_string = attr_list_to_string(contact_list);
        p->contact_string = contact_string;
        p->stone_id = 0; // we assume. it can be wrong.
        p->next = NULL;

        remote_contact = p;
    }
    else
    {
        num_remote_server = 0;

        icee_contactinfo_rec_t *p;
        icee_contactinfo_rec_t *prev;

        char* token = strtok(remote_list_str, ",");
        while (token)
        {
            char host[256];
            int port = 0;

            if (token[0] == ':')
            {
                strcpy(host, cm_remote_host);
                port = atoi(token+1);
            }
            else
            {
                char *pch = strchr(token, ':');
                if (pch != NULL)
                {
                    strncpy(host, token, pch - token);
                    host[pch-token] = '\0';
                    port = atoi(pch+1);
                }
                else
                {
                    int len = strlen(token);
                    strncpy(host, token, len);
                    assert(len < 256);
                    host[len] = '\0';
                    port = cm_remote_port;
                }
            }
            log_debug("Remote server list: (%d) %s:%d\n", num_remote_server, host, port);

            p = malloc(sizeof(icee_contactinfo_rec_t));
            attr_list contact_list;
            
            contact_list = create_attr_list();
            set_contact_list(contact_list, icee_transport_init, host, port);
            p->contact_string = attr_list_to_string(contact_list);
            p->stone_id = 0; // we assume. it can be wrong.
            p->next = NULL;

            if (num_remote_server == 0)
                remote_contact = p;
            else
                prev->next = p;

            prev = p;
            num_remote_server++;
            token = strtok(NULL, ",");
        }
    }

    if (attr_list_str != NULL)
    {
        num_remote_server = 0;

        icee_contactinfo_rec_t *p;
        icee_contactinfo_rec_t *prev;

        char* token = strtok(attr_list_str, ",");
        while (token)
        {
            int remote_stone = 0;
            char string_list[256];

            sscanf(token, "%d:%s", &remote_stone, &string_list[0]);

            p = malloc(sizeof(icee_contactinfo_rec_t));
            attr_list contact_list;
            
            p->stone_id = remote_stone;
            p->contact_string = strdup(string_list);
            p->next = NULL;

            if (num_remote_server == 0)
                remote_contact = p;
            else
                prev->next = p;

            prev = p;
            num_remote_server++;
            token = strtok(NULL, ",");
        }
    }

    if (icee_read_num_parallel > ICEE_MAX_PARALLEL)
    {
        icee_read_num_parallel = ICEE_MAX_PARALLEL;
        log_info ("Max. number of threads is set to %d\n", icee_read_num_parallel);
    }

    log_debug ("transport : %s\n", icee_transport_name[icee_transport]);

    /*
      log_info ("cm_host : %s\n", cm_host);
      log_info ("cm_port : %d\n", cm_port);

      for (i = 0; i < num_remote_server; i++) 
      {
      log_info ("remote_list : %s:%d\n", remote_server[i].client_host, remote_server[i].client_port);
      }
    */

    if (!adios_read_icee_initialized)
    {
        if (is_read_cm_passive)
        {
            icee_contactinfo_rec_t *prev;
            for (i = 0; i < num_remote_server; i++)
            {
                attr_list contact_list;
                icee_contactinfo_rec_t *p = (i == 0)? remote_contact : prev->next;

                pcm[i] = CManager_create();

                if (!CMfork_comm_thread(pcm[i]))
                    printf("Fork of communication thread[%d] failed.\n", i);

                contact_list = attr_list_from_string(p->contact_string);

                log_debug("Passive remote contact: \"%s\"\n", attr_list_to_string(contact_list));
                if (adios_verbose_level > 5) dump_attr_list(contact_list);

                /*
                  attr_list contact_list  = create_attr_list();
                  add_string_attr(contact_list, attr_atom_from_string("IP_HOST"),
                  remote_server[i].client_host);
                  add_int_attr(contact_list, attr_atom_from_string("IP_PORT"),
                  remote_server[i].client_port);
                */

                CMConnection conn = CMinitiate_conn(pcm[i], contact_list);

                int n = 0;
                while (conn == NULL)
                {
                    log_error ("Passive connection failed (%d). Try again ...\n", i);
                    dump_attr_list(contact_list);

                    sleep(2);
                    conn = CMinitiate_conn(pcm[i], contact_list);
                    
                    if (n > 5) break;
                    n++;
                }

                if (conn == NULL)
                {
                    log_error ("Initializing passive connection failed (%d)\n", i);
                    
                }

                CMFormat fm_checkin, fm_fileinfo;
                fm_checkin = CMregister_format(pcm[i], icee_passivecheckin_format_list);

                CMregister_handler(fm_checkin, icee_passivecheckin_reply_handler, on_icee_passivecheckin_reply);

                fm_fileinfo = CMregister_format(pcm[i], icee_fileinfo_format_list);
                CMregister_handler(fm_fileinfo, icee_fileinfo_recv_handler, on_icee_fileinfo_recv);

                icee_passivecheckin_rec_t m;

                int condition = CMCondition_get(pcm[i], conn);
                CMCondition_set_client_data(pcm[i], condition, NULL);
                m.condition = condition;

                if (CMwrite(conn, fm_checkin, (void*)&m) != 1)
                    log_error ("Passive check-in failed (%d)\n", i);

                prev = p;
            }
            log_debug("Passive connection established");
            goto done;
        }

        EVstone   stone[ICEE_MAX_PARALLEL], remote_stone;
        EVsource  source;
        attr_list contact[ICEE_MAX_PARALLEL];

        icee_contactinfo_rec_t contact_msg[ICEE_MAX_PARALLEL];

        for (i=0; i<icee_read_num_parallel; i++)
        {
            icee_read_cm[i] = CManager_create();

            contact[i] = create_attr_list();
            set_contact_list(contact[i], icee_transport, cm_host, cm_port+i);

            if (CMlisten_specific(icee_read_cm[i], contact[i]) == 0)
                printf("Error: unable to initialize connection manager[%d].\n", i);
            
            if (!CMfork_comm_thread(icee_read_cm[i])) 
                printf("Fork of communication thread[%d] failed.\n", i);

            stone[i] = EValloc_stone(icee_read_cm[i]);
            if (adios_verbose_level > 5) 
            {
                log_debug("Reader contact: \"%d:%s\"\n", stone[i], attr_list_to_string(CMget_contact_list(icee_read_cm[i])));
                dump_attr_list(CMget_contact_list(icee_read_cm[i]));
            }

            EVassoc_terminal_action(icee_read_cm[i], stone[i], icee_fileinfo_format_list, icee_fileinfo_handler, NULL);

            contact_msg[i].stone_id = stone[i];
            attr_list contact_list;
            if (use_native_contact)
                contact_list = CMget_contact_list(icee_read_cm[i]);
            else
                contact_list = contact[i];
                
            contact_msg[i].contact_string = attr_list_to_string(contact_list);
            contact_msg[i].next = NULL;

            if (i>0)
                contact_msg[i-1].next = &contact_msg[i];
        }
        
        EVstone split_stone;
        EVaction split_action;

        split_stone = EValloc_stone(icee_read_cm[0]);
        split_action = EVassoc_split_action(icee_read_cm[0], split_stone, NULL);
        icee_contactinfo_rec_t *prev;
        for (i = 0; i < num_remote_server; i++) 
        {
            attr_list contact_list;
            EVstone remote_stone, output_stone;
            output_stone = EValloc_stone(icee_read_cm[0]);
            icee_contactinfo_rec_t *p = (i == 0)? remote_contact : prev->next;
            
            remote_stone = p->stone_id;
            contact_list = attr_list_from_string(p->contact_string);

            EVaction action;
            action = EVassoc_bridge_action(icee_read_cm[0], output_stone, contact_list, remote_stone);

            int n = 0;
            while (action == -1)
            {
                log_error ("Connection failed (%d). Try again ...\n", i);
                dump_attr_list(contact_list);
                
                sleep(2);
                action = EVassoc_bridge_action(icee_read_cm[0], output_stone, contact_list, remote_stone);
                
                if (n > 5) break;
                n++;
            }

            EVaction_add_split_target(icee_read_cm[0], split_stone, split_action, output_stone);

            prev = p;

            log_debug("Remote contact: \"%d:%s\"\n", remote_stone, attr_list_to_string(contact_list));
            if (adios_verbose_level > 5) dump_attr_list(contact_list);

        }

        source = EVcreate_submit_handle(icee_read_cm[0], split_stone, icee_contactinfo_format_list);
        
        //if (adios_verbose_level > 5) icee_contactinfo_print(contact_msg);
        
        EVsubmit(source, contact_msg, NULL);

    done:
        adios_read_icee_initialized = 1;
    }

    return 0;
}