/* * Sets up local data structure for series of reads on an adios file * - create evpath graph and structures * -- create evpath control stone (outgoing) * -- create evpath data stone (incoming) * -- rank 0 dumps contact info to file * -- create connections using contact info from file */ ADIOS_FILE* adios_read_flexpath_open(const char * fname, MPI_Comm comm, enum ADIOS_LOCKMODE lock_mode, float timeout_sec) { fp_log("FUNC", "entering flexpath_open\n"); ADIOS_FILE *adiosfile = malloc(sizeof(ADIOS_FILE)); if(!adiosfile){ adios_error (err_no_memory, "Cannot allocate memory for file info.\n"); return NULL; } flexpath_reader_file *fp = new_flexpath_reader_file(fname); adios_errno = 0; fp->stone = EValloc_stone(fp_read_data->fp_cm); fp->comm = comm; MPI_Comm_size(fp->comm, &(fp->size)); MPI_Comm_rank(fp->comm, &(fp->rank)); EVassoc_terminal_action(fp_read_data->fp_cm, fp->stone, op_format_list, op_msg_handler, adiosfile); EVassoc_terminal_action(fp_read_data->fp_cm, fp->stone, update_step_msg_format_list, update_step_msg_handler, adiosfile); EVassoc_terminal_action(fp_read_data->fp_cm, fp->stone, evgroup_format_list, group_msg_handler, adiosfile); EVassoc_raw_terminal_action(fp_read_data->fp_cm, fp->stone, raw_handler, adiosfile); /* Gather the contact info from the other readers and write it to a file. Create a ready file so that the writer knows it can parse this file. */ double setup_start = dgettimeofday(); char writer_ready_filename[200]; char writer_info_filename[200]; char reader_ready_filename[200]; char reader_info_filename[200]; sprintf(reader_ready_filename, "%s_%s", fname, READER_READY_FILE); sprintf(reader_info_filename, "%s_%s", fname, READER_CONTACT_FILE); sprintf(writer_ready_filename, "%s_%s", fname, WRITER_READY_FILE); sprintf(writer_info_filename, "%s_%s", fname, WRITER_CONTACT_FILE); char *string_list; char data_contact_info[CONTACT_LENGTH]; string_list = attr_list_to_string(CMget_contact_list(fp_read_data->fp_cm)); sprintf(&data_contact_info[0], "%d:%s", fp->stone, string_list); free(string_list); char * recvbuf; if(fp->rank == 0){ recvbuf = (char*)malloc(sizeof(char)*CONTACT_LENGTH*(fp->size)); } MPI_Gather(data_contact_info, CONTACT_LENGTH, MPI_CHAR, recvbuf, CONTACT_LENGTH, MPI_CHAR, 0, fp->comm); if(fp->rank == 0){ // print our own contact information FILE * fp_out = fopen(reader_info_filename, "w"); int i; if(!fp_out){ adios_error(err_file_open_error, "File for contact info could not be opened for writing.\n"); exit(1); } for(i=0; i<fp->size; i++) { fprintf(fp_out,"%s\n", &recvbuf[i*CONTACT_LENGTH]); } fclose(fp_out); free(recvbuf); FILE * read_ready = fopen(reader_ready_filename, "w"); fprintf(read_ready, "ready"); fclose(read_ready); } MPI_Barrier(fp->comm); FILE * fp_in = fopen(writer_ready_filename,"r"); while(!fp_in) { //CMsleep(fp_read_data->fp_cm, 1); fp_in = fopen(writer_ready_filename, "r"); } fclose(fp_in); fp_in = fopen(writer_info_filename, "r"); while(!fp_in){ //CMsleep(fp_read_data->fp_cm, 1); fp_in = fopen(writer_info_filename, "r"); } char in_contact[CONTACT_LENGTH] = ""; //fp->bridges = malloc(sizeof(bridge_info)); int num_bridges = 0; int their_stone; // change to read all numbers, dont create stones, turn bridge array into linked list while(fscanf(fp_in, "%d:%s", &their_stone, in_contact) != EOF){ //fprintf(stderr, "writer contact: %d:%s\n", their_stone, in_contact); fp->bridges = realloc(fp->bridges, sizeof(bridge_info) * (num_bridges+1)); fp->bridges[num_bridges].their_num = their_stone; fp->bridges[num_bridges].contact = strdup(in_contact); fp->bridges[num_bridges].created = 0; fp->bridges[num_bridges].step = 0; fp->bridges[num_bridges].opened = 0; fp->bridges[num_bridges].scheduled = 0; num_bridges++; } fclose(fp_in); fp->num_bridges = num_bridges; // clean up of writer's files MPI_Barrier(fp->comm); if(fp->rank == 0){ unlink(writer_info_filename); unlink(writer_ready_filename); } adiosfile->fh = (uint64_t)fp; adiosfile->current_step = 0; /* Init with a writer to get initial scalar data so we can handle inq_var calls and also populate the ADIOS_FILE struct. */ double bridge_start = MPI_Wtime(); if(fp->size < num_bridges){ int mystart = (num_bridges/fp->size) * fp->rank; int myend = (num_bridges/fp->size) * (fp->rank+1); fp->writer_coordinator = mystart; int z; for(z=mystart; z<myend; z++){ build_bridge(&fp->bridges[z]); } } else{ int writer_rank = fp->rank % num_bridges; build_bridge(&fp->bridges[writer_rank]); fp->writer_coordinator = writer_rank; } // requesting initial data. send_open_msg(fp, fp->writer_coordinator); fp->data_read = 0; send_flush_msg(fp, fp->writer_coordinator, DATA, 1); send_flush_msg(fp, fp->writer_coordinator, EVGROUP, 1); fp->data_read = 0; // this has to change. Writer needs to have some way of // taking the attributes out of the xml document // and sending them over ffs encoded. Not yet implemented. // the rest of this info for adiosfile gets filled in raw_handler. adiosfile->nattrs = 0; adiosfile->attr_namelist = NULL; // first step is at least one, otherwise raw_handler will not execute. // in reality, writer might be further along, so we might have to make // the writer explitly send across messages each time it calls close, to // indicate which timesteps are available. adiosfile->last_step = 1; adiosfile->path = strdup(fname); // verifies these two fields. It's not BP, so no BP version. // It's a stream, so how can the file size be known? adiosfile->version = -1; adiosfile->file_size = 0; adios_errno = err_no_error; fp_log("FUNC", "leaving flexpath_open\n"); return adiosfile; }
int adios_read_icee_init_method (MPI_Comm comm, PairStruct* params) { log_debug ("%s\n", __FUNCTION__); int cm_port = 59997; char *cm_host = "localhost"; int cm_remote_port = 59999; char *cm_remote_host = "localhost"; char *cm_attr = NULL; //attr_list contact_list; icee_transport_t icee_transport_init = TCP; icee_transport_t icee_transport = TCP; icee_contactinfo_rec_t *remote_contact = NULL; int i; int use_single_remote_server = 1; char *remote_list_str = NULL; char *attr_list_str = NULL; int use_native_contact = 0; PairStruct * p = params; while (p) { if (!strcasecmp (p->name, "cm_attr")) { cm_attr = p->value; } else if (!strcasecmp (p->name, "cm_host")) { cm_host = p->value; } else if (!strcasecmp (p->name, "cm_port")) { cm_port = atoi(p->value); } else if (!strcasecmp (p->name, "cm_remote_host")) { cm_remote_host = p->value; } else if (!strcasecmp (p->name, "cm_remote_port")) { cm_remote_port = atoi(p->value); } else if (!strcasecmp (p->name, "remote_list")) { use_single_remote_server = 0; if (p->value) remote_list_str = strdup(p->value); } else if (!strcasecmp (p->name, "attr_list")) { use_single_remote_server = 0; if (p->value) attr_list_str = strdup(p->value); } else if (!strcasecmp (p->name, "transport")) { if (strcasecmp(p->value, "TCP") == 0) icee_transport = TCP; else if (strcasecmp(p->value, "ENET") == 0) icee_transport = ENET; else if (strcasecmp(p->value, "NNTI") == 0) icee_transport = NNTI; else if (strcasecmp(p->value, "IB") == 0) icee_transport = IB; else log_error ("No support: %s\n", p->value); } else if (!strcasecmp (p->name, "transport_init")) { if (strcasecmp(p->value, "TCP") == 0) icee_transport_init = TCP; else if (strcasecmp(p->value, "ENET") == 0) icee_transport_init = ENET; else if (strcasecmp(p->value, "NNTI") == 0) icee_transport_init = NNTI; else if (strcasecmp(p->value, "IB") == 0) icee_transport_init = IB; else log_error ("No support: %s\n", p->value); } else if (!strcasecmp (p->name, "num_parallel")) { icee_read_num_parallel = atoi(p->value); } else if (!strcasecmp (p->name, "is_passive")) { is_read_cm_passive = atoi(p->value); } else if (!strcasecmp (p->name, "use_native_contact")) { use_native_contact = atoi(p->value); } p = p->next; } pthread_mutex_init(&fileinfo_lock, NULL); if (use_single_remote_server) { num_remote_server = 1; attr_list contact_list = create_attr_list(); set_contact_list(contact_list, icee_transport_init, cm_remote_host, cm_remote_port); icee_contactinfo_rec_t *p; p = malloc(sizeof(icee_contactinfo_rec_t)); char *contact_string = attr_list_to_string(contact_list); p->contact_string = contact_string; p->stone_id = 0; // we assume. it can be wrong. p->next = NULL; remote_contact = p; } else { num_remote_server = 0; icee_contactinfo_rec_t *p; icee_contactinfo_rec_t *prev; char* token = strtok(remote_list_str, ","); while (token) { char host[256]; int port = 0; if (token[0] == ':') { strcpy(host, cm_remote_host); port = atoi(token+1); } else { char *pch = strchr(token, ':'); if (pch != NULL) { strncpy(host, token, pch - token); host[pch-token] = '\0'; port = atoi(pch+1); } else { int len = strlen(token); strncpy(host, token, len); assert(len < 256); host[len] = '\0'; port = cm_remote_port; } } log_debug("Remote server list: (%d) %s:%d\n", num_remote_server, host, port); p = malloc(sizeof(icee_contactinfo_rec_t)); attr_list contact_list; contact_list = create_attr_list(); set_contact_list(contact_list, icee_transport_init, host, port); p->contact_string = attr_list_to_string(contact_list); p->stone_id = 0; // we assume. it can be wrong. p->next = NULL; if (num_remote_server == 0) remote_contact = p; else prev->next = p; prev = p; num_remote_server++; token = strtok(NULL, ","); } } if (attr_list_str != NULL) { num_remote_server = 0; icee_contactinfo_rec_t *p; icee_contactinfo_rec_t *prev; char* token = strtok(attr_list_str, ","); while (token) { int remote_stone = 0; char string_list[256]; sscanf(token, "%d:%s", &remote_stone, &string_list[0]); p = malloc(sizeof(icee_contactinfo_rec_t)); attr_list contact_list; p->stone_id = remote_stone; p->contact_string = strdup(string_list); p->next = NULL; if (num_remote_server == 0) remote_contact = p; else prev->next = p; prev = p; num_remote_server++; token = strtok(NULL, ","); } } if (icee_read_num_parallel > ICEE_MAX_PARALLEL) { icee_read_num_parallel = ICEE_MAX_PARALLEL; log_info ("Max. number of threads is set to %d\n", icee_read_num_parallel); } log_debug ("transport : %s\n", icee_transport_name[icee_transport]); /* log_info ("cm_host : %s\n", cm_host); log_info ("cm_port : %d\n", cm_port); for (i = 0; i < num_remote_server; i++) { log_info ("remote_list : %s:%d\n", remote_server[i].client_host, remote_server[i].client_port); } */ if (!adios_read_icee_initialized) { if (is_read_cm_passive) { icee_contactinfo_rec_t *prev; for (i = 0; i < num_remote_server; i++) { attr_list contact_list; icee_contactinfo_rec_t *p = (i == 0)? remote_contact : prev->next; pcm[i] = CManager_create(); if (!CMfork_comm_thread(pcm[i])) printf("Fork of communication thread[%d] failed.\n", i); contact_list = attr_list_from_string(p->contact_string); log_debug("Passive remote contact: \"%s\"\n", attr_list_to_string(contact_list)); if (adios_verbose_level > 5) dump_attr_list(contact_list); /* attr_list contact_list = create_attr_list(); add_string_attr(contact_list, attr_atom_from_string("IP_HOST"), remote_server[i].client_host); add_int_attr(contact_list, attr_atom_from_string("IP_PORT"), remote_server[i].client_port); */ CMConnection conn = CMinitiate_conn(pcm[i], contact_list); int n = 0; while (conn == NULL) { log_error ("Passive connection failed (%d). Try again ...\n", i); dump_attr_list(contact_list); sleep(2); conn = CMinitiate_conn(pcm[i], contact_list); if (n > 5) break; n++; } if (conn == NULL) { log_error ("Initializing passive connection failed (%d)\n", i); } CMFormat fm_checkin, fm_fileinfo; fm_checkin = CMregister_format(pcm[i], icee_passivecheckin_format_list); CMregister_handler(fm_checkin, icee_passivecheckin_reply_handler, on_icee_passivecheckin_reply); fm_fileinfo = CMregister_format(pcm[i], icee_fileinfo_format_list); CMregister_handler(fm_fileinfo, icee_fileinfo_recv_handler, on_icee_fileinfo_recv); icee_passivecheckin_rec_t m; int condition = CMCondition_get(pcm[i], conn); CMCondition_set_client_data(pcm[i], condition, NULL); m.condition = condition; if (CMwrite(conn, fm_checkin, (void*)&m) != 1) log_error ("Passive check-in failed (%d)\n", i); prev = p; } log_debug("Passive connection established"); goto done; } EVstone stone[ICEE_MAX_PARALLEL], remote_stone; EVsource source; attr_list contact[ICEE_MAX_PARALLEL]; icee_contactinfo_rec_t contact_msg[ICEE_MAX_PARALLEL]; for (i=0; i<icee_read_num_parallel; i++) { icee_read_cm[i] = CManager_create(); contact[i] = create_attr_list(); set_contact_list(contact[i], icee_transport, cm_host, cm_port+i); if (CMlisten_specific(icee_read_cm[i], contact[i]) == 0) printf("Error: unable to initialize connection manager[%d].\n", i); if (!CMfork_comm_thread(icee_read_cm[i])) printf("Fork of communication thread[%d] failed.\n", i); stone[i] = EValloc_stone(icee_read_cm[i]); if (adios_verbose_level > 5) { log_debug("Reader contact: \"%d:%s\"\n", stone[i], attr_list_to_string(CMget_contact_list(icee_read_cm[i]))); dump_attr_list(CMget_contact_list(icee_read_cm[i])); } EVassoc_terminal_action(icee_read_cm[i], stone[i], icee_fileinfo_format_list, icee_fileinfo_handler, NULL); contact_msg[i].stone_id = stone[i]; attr_list contact_list; if (use_native_contact) contact_list = CMget_contact_list(icee_read_cm[i]); else contact_list = contact[i]; contact_msg[i].contact_string = attr_list_to_string(contact_list); contact_msg[i].next = NULL; if (i>0) contact_msg[i-1].next = &contact_msg[i]; } EVstone split_stone; EVaction split_action; split_stone = EValloc_stone(icee_read_cm[0]); split_action = EVassoc_split_action(icee_read_cm[0], split_stone, NULL); icee_contactinfo_rec_t *prev; for (i = 0; i < num_remote_server; i++) { attr_list contact_list; EVstone remote_stone, output_stone; output_stone = EValloc_stone(icee_read_cm[0]); icee_contactinfo_rec_t *p = (i == 0)? remote_contact : prev->next; remote_stone = p->stone_id; contact_list = attr_list_from_string(p->contact_string); EVaction action; action = EVassoc_bridge_action(icee_read_cm[0], output_stone, contact_list, remote_stone); int n = 0; while (action == -1) { log_error ("Connection failed (%d). Try again ...\n", i); dump_attr_list(contact_list); sleep(2); action = EVassoc_bridge_action(icee_read_cm[0], output_stone, contact_list, remote_stone); if (n > 5) break; n++; } EVaction_add_split_target(icee_read_cm[0], split_stone, split_action, output_stone); prev = p; log_debug("Remote contact: \"%d:%s\"\n", remote_stone, attr_list_to_string(contact_list)); if (adios_verbose_level > 5) dump_attr_list(contact_list); } source = EVcreate_submit_handle(icee_read_cm[0], split_stone, icee_contactinfo_format_list); //if (adios_verbose_level > 5) icee_contactinfo_print(contact_msg); EVsubmit(source, contact_msg, NULL); done: adios_read_icee_initialized = 1; } return 0; }