/*-------------------------------------------------------------------------*/ int FTI_WritePosixVar(int varID, FTIT_configuration* FTI_Conf, FTIT_execution* FTI_Exec, FTIT_topology* FTI_Topo, FTIT_checkpoint* FTI_Ckpt, FTIT_dataset* FTI_Data) { FILE *fd; int res; memcpy( &fd, FTI_Exec->iCPInfo.fh, sizeof(FTI_PO_FH) ); char str[FTI_BUFS]; long offset = 0; // write data into ckpt file int i; for (i = 0; i < FTI_Exec->nbVar; i++) { if( FTI_Data[i].id == varID ) { clearerr(fd); if ( fseek( fd, offset, SEEK_SET ) == -1 ) { FTI_Print("Error on fseek in writeposixvar", FTI_EROR ); return FTI_NSCS; } if ( !(FTI_Data[i].isDevicePtr) ){ FTI_Print(str,FTI_INFO); if (( res = FTI_Try(write_posix(FTI_Data[i].ptr, FTI_Data[i].size, fd),"Storing Data to Checkpoint file")) != FTI_SCES){ snprintf(str, FTI_BUFS, "Dataset #%d could not be written.", FTI_Data[i].id); FTI_Print(str, FTI_EROR); fclose(fd); return FTI_NSCS; } } #ifdef GPUSUPPORT // if data are stored to the GPU move them from device // memory to cpu memory and store them. else { FTI_Print(str,FTI_INFO); if ((res = FTI_Try( FTI_TransferDeviceMemToFileAsync(&FTI_Data[i], write_posix, fd), "moving data from GPU to storage")) != FTI_SCES) { snprintf(str, FTI_BUFS, "Dataset #%d could not be written.", FTI_Data[i].id); FTI_Print(str, FTI_EROR); fclose(fd); return FTI_NSCS; } } #endif if (ferror(fd)) { return FTI_NSCS; } } offset += FTI_Data[i].count*FTI_Data[i].eleSize; } FTI_Exec->iCPInfo.result = FTI_SCES; return FTI_SCES; }
/*-------------------------------------------------------------------------*/ int FTI_WriteMpiVar(int varID, FTIT_configuration* FTI_Conf, FTIT_execution* FTI_Exec, FTIT_topology* FTI_Topo, FTIT_checkpoint* FTI_Ckpt, FTIT_dataset* FTI_Data) { char str[FTI_BUFS]; WriteMPIInfo_t write_info; int res; memcpy( &write_info.pfh, FTI_Exec->iCPInfo.fh, sizeof(FTI_MI_FH) ); write_info.offset = FTI_Exec->iCPInfo.offset; write_info.FTI_Conf = FTI_Conf; int i; for (i = 0; i < FTI_Exec->nbVar; i++) { if ( FTI_Data[i].id == varID ) { if ( !(FTI_Data[i].isDevicePtr) ){ FTI_Print(str,FTI_INFO); if (( res = write_mpi(FTI_Data[i].ptr, FTI_Data[i].size, &write_info), "Storing Data to checkpoint file")!=FTI_SCES){ snprintf(str, FTI_BUFS, "Dataset #%d could not be written.", FTI_Data[i].id); FTI_Print(str, FTI_EROR); MPI_File_close(&write_info.pfh); return res; } } #ifdef GPUSUPPORT // dowload data from the GPU if necessary // Data are stored in the GPU side. else { snprintf(str, FTI_BUFS, "Dataset #%d Writing GPU Data.", FTI_Data[i].id); FTI_Print(str,FTI_INFO); if ((res = FTI_Try( FTI_TransferDeviceMemToFileAsync(&FTI_Data[i], write_mpi, &write_info), "moving data from GPU to storage")) != FTI_SCES) { snprintf(str, FTI_BUFS, "Dataset #%d could not be written.", FTI_Data[i].id); FTI_Print(str, FTI_EROR); MPI_File_close(&write_info.pfh); return res; } } #endif } write_info.offset += FTI_Data[i].size; } FTI_Exec->iCPInfo.result = FTI_SCES; return FTI_SCES; }
/*-------------------------------------------------------------------------*/ int FTI_WriteCkpt(FTIT_dataset* FTI_Data) { int i, res; FILE *fd; double tt = MPI_Wtime(); char fn[FTI_BUFS], str[FTI_BUFS]; snprintf(FTI_Exec.ckptFile, FTI_BUFS, "Ckpt%d-Rank%d.fti", FTI_Exec.ckptID, FTI_Topo.myRank); if (FTI_Ckpt[4].isInline && FTI_Exec.ckptLvel == 4) { sprintf(fn,"%s/%s",FTI_Conf.gTmpDir, FTI_Exec.ckptFile); mkdir(FTI_Conf.gTmpDir, 0777); } else { sprintf(fn,"%s/%s",FTI_Conf.lTmpDir, FTI_Exec.ckptFile); mkdir(FTI_Conf.lTmpDir, 0777); } fd = fopen(fn, "wb"); if (fd == NULL) { FTI_Print("FTI checkpoint file could not be opened.", FTI_EROR); return FTI_NSCS; } for(i = 0; i < FTI_Exec.nbVar; i++) { if (fwrite(FTI_Data[i].ptr, FTI_Data[i].eleSize, FTI_Data[i].count, fd) != FTI_Data[i].count) { sprintf(str, "Dataset #%d could not be written.", FTI_Data[i].id); FTI_Print(str, FTI_EROR); return FTI_NSCS; } } if (fflush(fd) != 0) { FTI_Print("FTI checkpoint file could not be flushed.", FTI_EROR); return FTI_NSCS; } if (fclose(fd) != 0) { FTI_Print("FTI checkpoint file could not be flushed.", FTI_EROR); return FTI_NSCS; } sprintf(str, "Time writing checkpoint file : %f seconds.", MPI_Wtime()-tt); FTI_Print(str, FTI_DBUG); int globalTmp = (FTI_Ckpt[4].isInline && FTI_Exec.ckptLvel == 4) ? 1 : 0; res = FTI_Try(FTI_CreateMetadata(globalTmp), "create metadata."); return res; }
/*-------------------------------------------------------------------------*/ int FTI_Listen() { MPI_Status status; char str[FTI_BUFS]; int i, buf, res, flags[7]; for (i = 0; i < 7; i++) { // Initialize flags flags[i] = 0; } FTI_Print("Head listening...", FTI_DBUG); for(i = 0; i < FTI_Topo.nbApprocs; i++) { // Iterate on the application processes in the node MPI_Recv(&buf, 1, MPI_INT, FTI_Topo.body[i], FTI_Conf.tag, FTI_Exec.globalComm, &status); sprintf(str, "The head received a %d message", buf); FTI_Print(str, FTI_DBUG); fflush(stdout); flags[buf-FTI_BASE] = flags[buf-FTI_BASE] + 1; } for (i = 1; i < 7; i++) { if (flags[i] == FTI_Topo.nbApprocs) { // Determining checkpoint level FTI_Exec.ckptLvel = i; } } if (flags[6] > 0) { FTI_Exec.ckptLvel = 6; } if (FTI_Exec.ckptLvel == 5) { // If we were asked to finalize return FTI_ENDW; } res = FTI_Try(FTI_PostCkpt(1, 0, FTI_Topo.nbApprocs), "postprocess the checkpoint."); if (res == FTI_SCES) { FTI_Exec.wasLastOffline = 1; FTI_Exec.lastCkptLvel = FTI_Exec.ckptLvel; res = FTI_Exec.ckptLvel; } for(i = 0; i < FTI_Topo.nbApprocs; i++) { // Send msg. to avoid checkpoint collision MPI_Send(&res, 1, MPI_INT, FTI_Topo.body[i], FTI_Conf.tag, FTI_Exec.globalComm); } return FTI_SCES; }
/*-------------------------------------------------------------------------*/ int FTI_FinalizeFtiffICP(FTIT_configuration* FTI_Conf, FTIT_execution* FTI_Exec, FTIT_topology* FTI_Topo, FTIT_checkpoint* FTI_Ckpt, FTIT_dataset* FTI_Data) { if ( FTI_Exec->iCPInfo.status == FTI_ICP_FAIL ) { return FTI_NSCS; } int fd; memcpy( &fd, FTI_Exec->iCPInfo.fh, sizeof(FTI_FF_FH) ); if ( FTI_Try( FTIFF_CreateMetadata( FTI_Exec, FTI_Topo, FTI_Data, FTI_Conf ), "Create FTI-FF meta data" ) != FTI_SCES ) { return FTI_NSCS; } FTIFF_writeMetaDataFTIFF( FTI_Exec, fd ); fdatasync( fd ); close( fd ); return FTI_SCES; }
/*-------------------------------------------------------------------------*/ int FTI_ReadHDF5Var(FTIT_dataset *FTI_DataVar) { char str[FTI_BUFS]; int res; hid_t dataset = H5Dopen(FTI_DataVar->h5group->h5groupID, FTI_DataVar->name, H5P_DEFAULT); hid_t dataspace = H5Dget_space(dataset); // If my data are stored in the CPU side // Just store the data to the file and return; #ifdef GPUSUPPORT if ( !FTI_DataVar->isDevicePtr ){ #endif res = H5Dread(dataset,FTI_DataVar->type->h5datatype, H5S_ALL, H5S_ALL, H5P_DEFAULT, FTI_DataVar->ptr); if (res < 0) { sprintf(str, "Dataset #%d could not be written", FTI_DataVar->id); FTI_Print(str, FTI_EROR); return FTI_NSCS; } res = H5Dclose(dataset); if (res < 0) { sprintf(str, "Dataset #%d could not be written", FTI_DataVar->id); FTI_Print(str, FTI_EROR); return FTI_NSCS; } res = H5Sclose(dataspace); if (res < 0) { sprintf(str, "Dataset #%d could not be written", FTI_DataVar->id); FTI_Print(str, FTI_EROR); return FTI_NSCS; } return FTI_SCES; #ifdef GPUSUPPORT } hsize_t dimLength[32]; int j; for (j = 0; j < FTI_DataVar->rank; j++) { dimLength[j] = FTI_DataVar->dimLength[j]; } // This code is only executed in the GPU case. hsize_t *count = (hsize_t*) malloc (sizeof(hsize_t)*FTI_DataVar->rank); hsize_t *offset= (hsize_t*) calloc (FTI_DataVar->rank,sizeof(hsize_t)); if ( !count|| !offset){ sprintf(str, "Could Not allocate count and offset regions"); FTI_Print(str, FTI_EROR); return FTI_NSCS; } hsize_t seperator; size_t fetchBytes; size_t hostBufSize = FTI_getHostBuffSize(); //Calculate How many dimension I can compute each time //and how bug should the HOST-GPU communication buffer should be fetchBytes = FTI_calculateCountDim(FTI_DataVar->eleSize, hostBufSize ,count, FTI_DataVar->rank, dimLength, &seperator); //If the buffer is smaller than the minimum amount //then I need to allocate a bigger one. if (hostBufSize < fetchBytes){ if ( FTI_Try( FTI_DestroyDevices(), "Deleting host buffers" ) != FTI_SCES){ free(offset); free(count); sprintf(str, "Dataset #%d could not be written", FTI_DataVar->id); FTI_Print(str, FTI_EROR); return FTI_NSCS; } if ( FTI_Try (FTI_InitDevices( fetchBytes ), "Allocating host buffers")!= FTI_SCES) { free(offset); free(count); sprintf(str, "Dataset #%d could not be written", FTI_DataVar->id); FTI_Print(str, FTI_EROR); return FTI_NSCS; } } unsigned char *basePtr = NULL; int id = 0; int prevId = 1; hsize_t totalBytes = FTI_DataVar->size; cudaStream_t streams[2]; //Create the streams for the asynchronous data movement. CUDA_ERROR_CHECK(cudaStreamCreate(&(streams[0]))); CUDA_ERROR_CHECK(cudaStreamCreate(&(streams[1]))); unsigned char *dPtr = FTI_DataVar->devicePtr; // Perform the while loop until all data // are processed. while( totalBytes ){ basePtr = FTI_getHostBuffer(id); //Read file res = FTI_ReadElements( dataspace, FTI_DataVar->type->h5datatype, dataset, count, offset, FTI_DataVar->rank , basePtr); CUDA_ERROR_CHECK(cudaMemcpyAsync( dPtr , basePtr, fetchBytes, cudaMemcpyHostToDevice, streams[id])); if (res != FTI_SCES ) { free(offset); free(count); sprintf(str, "Dataset #%d could not be written", FTI_DataVar->id); FTI_Print(str, FTI_EROR); return FTI_NSCS; } //Increase accordingly the file offset FTI_AdvanceOffset(seperator, offset,count, dimLength, FTI_DataVar->rank); //Syncing the cuda stream. CUDA_ERROR_CHECK(cudaStreamSynchronize(streams[prevId])); prevId = id; id = (id + 1)%2; dPtr = dPtr + fetchBytes; totalBytes -= fetchBytes; } CUDA_ERROR_CHECK(cudaStreamSynchronize(streams[prevId])); CUDA_ERROR_CHECK(cudaStreamDestroy(streams[0])); CUDA_ERROR_CHECK(cudaStreamDestroy(streams[1])); res = H5Dclose(dataset); if (res < 0) { free(offset); free(count); sprintf(str, "Dataset #%d could not be written", FTI_DataVar->id); FTI_Print(str, FTI_EROR); return FTI_NSCS; } res = H5Sclose(dataspace); if (res < 0) { free(offset); free(count); sprintf(str, "Dataset #%d could not be written", FTI_DataVar->id); FTI_Print(str, FTI_EROR); return FTI_NSCS; } free(offset); free(count); return FTI_SCES; #endif }
int FTI_WriteHDF5Var(FTIT_dataset *FTI_DataVar) { int j; hsize_t dimLength[32]; char str[FTI_BUFS]; int res; hid_t dcpl; for (j = 0; j < FTI_DataVar->rank; j++) { dimLength[j] = FTI_DataVar->dimLength[j]; } dcpl = H5Pcreate (H5P_DATASET_CREATE); res = H5Pset_fletcher32 (dcpl); res = H5Pset_chunk (dcpl, FTI_DataVar->rank, dimLength); hid_t dataspace = H5Screate_simple( FTI_DataVar->rank, dimLength, NULL); hid_t dataset = H5Dcreate2 ( FTI_DataVar->h5group->h5groupID, FTI_DataVar->name,FTI_DataVar->type->h5datatype, dataspace, H5P_DEFAULT, dcpl , H5P_DEFAULT); // If my data are stored in the CPU side // Just store the data to the file and return; #ifdef GPUSUPPORT if ( !FTI_DataVar->isDevicePtr ){ #endif res = H5Dwrite(dataset,FTI_DataVar->type->h5datatype, H5S_ALL, H5S_ALL, H5P_DEFAULT, FTI_DataVar->ptr); if (res < 0) { sprintf(str, "Dataset #%d could not be written", FTI_DataVar->id); FTI_Print(str, FTI_EROR); return FTI_NSCS; } res = H5Pclose (dcpl); if (res < 0) { sprintf(str, "Dataset #%d could not be written", FTI_DataVar->id); FTI_Print(str, FTI_EROR); return FTI_NSCS; } res = H5Dclose(dataset); if (res < 0) { sprintf(str, "Dataset #%d could not be written", FTI_DataVar->id); FTI_Print(str, FTI_EROR); return FTI_NSCS; } res = H5Sclose(dataspace); if (res < 0) { sprintf(str, "Dataset #%d could not be written", FTI_DataVar->id); FTI_Print(str, FTI_EROR); return FTI_NSCS; } return FTI_SCES; #ifdef GPUSUPPORT } // This code is only executed in the GPU case. hsize_t *count = (hsize_t*) malloc (sizeof(hsize_t)*FTI_DataVar->rank); hsize_t *offset= (hsize_t*) calloc (FTI_DataVar->rank,sizeof(hsize_t)); if ( !count|| !offset){ sprintf(str, "Could Not allocate count and offset regions"); FTI_Print(str, FTI_EROR); return FTI_NSCS; } hsize_t seperator; hsize_t fetchBytes = FTI_getHostBuffSize(); fetchBytes = FTI_calculateCountDim(FTI_DataVar->eleSize, fetchBytes ,count, FTI_DataVar->rank, dimLength, &seperator); sprintf(str,"GPU-Device Message: I Will Fetch %lld Bytes Per Stream Request", fetchBytes); FTI_Print(str,FTI_DBUG); FTIT_data_prefetch prefetcher; prefetcher.fetchSize = fetchBytes; prefetcher.totalBytesToFetch = FTI_DataVar->size; prefetcher.isDevice = FTI_DataVar->isDevicePtr; prefetcher.dptr = FTI_DataVar->devicePtr; size_t bytesToWrite; FTI_InitPrefetcher(&prefetcher); unsigned char *basePtr = NULL; if ( FTI_Try(FTI_getPrefetchedData(&prefetcher, &bytesToWrite, &basePtr), "Fetch next memory block from GPU to write to HDF5") != FTI_SCES){ return FTI_NSCS; } while( basePtr ){ res = FTI_WriteElements( dataspace, FTI_DataVar->type->h5datatype, dataset, count, offset, FTI_DataVar->rank , basePtr); if (res != FTI_SCES ) { free(offset); free(count); sprintf(str, "Dataset #%d could not be written", FTI_DataVar->id); FTI_Print(str, FTI_EROR); return FTI_NSCS; } FTI_AdvanceOffset(seperator, offset,count, dimLength, FTI_DataVar->rank); if ( FTI_Try(FTI_getPrefetchedData(&prefetcher, &bytesToWrite, &basePtr), "Fetch next memory block from GPU to write to HDF5") != FTI_SCES){ return FTI_NSCS; } } res = H5Dclose(dataset); if (res < 0) { free(offset); free(count); sprintf(str, "Dataset #%d could not be written", FTI_DataVar->id); FTI_Print(str, FTI_EROR); return FTI_NSCS; } res = H5Sclose(dataspace); if (res < 0) { free(offset); free(count); sprintf(str, "Dataset #%d could not be written", FTI_DataVar->id); FTI_Print(str, FTI_EROR); return FTI_NSCS; } free(offset); free(count); return FTI_SCES; #endif }
/*-------------------------------------------------------------------------*/ int FTI_WriteHdf5Var(int varID, FTIT_configuration* FTI_Conf, FTIT_execution* FTI_Exec, FTIT_topology* FTI_Topo, FTIT_checkpoint* FTI_Ckpt, FTIT_dataset* FTI_Data) { if ( FTI_Exec->iCPInfo.status == FTI_ICP_FAIL ) { return FTI_NSCS; } char str[FTI_BUFS]; hid_t file_id; memcpy( &file_id, FTI_Exec->iCPInfo.fh, sizeof(FTI_H5_FH) ); FTIT_H5Group* rootGroup = FTI_Exec->H5groups[0]; // write data into ckpt file int i; for (i = 0; i < FTI_Exec->nbVar; i++) { if( FTI_Data[i].id == varID ) { int toCommit = 0; if (FTI_Data[i].type->h5datatype < 0) { toCommit = 1; } sprintf(str, "Calling CreateComplexType [%d] with hid_t %ld", FTI_Data[i].type->id, (long)FTI_Data[i].type->h5datatype); FTI_Print(str, FTI_DBUG); FTI_CreateComplexType(FTI_Data[i].type, FTI_Exec->FTI_Type); if (toCommit == 1) { char name[FTI_BUFS]; if (FTI_Data[i].type->structure == NULL) { //this is the array of bytes with no name sprintf(name, "Type%d", FTI_Data[i].type->id); } else { strncpy(name, FTI_Data[i].type->structure->name, FTI_BUFS); } herr_t res = H5Tcommit(FTI_Data[i].type->h5group->h5groupID, name, FTI_Data[i].type->h5datatype, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); if (res < 0) { sprintf(str, "Datatype #%d could not be commited", FTI_Data[i].id); FTI_Print(str, FTI_EROR); int j; for (j = 0; j < FTI_Exec->H5groups[0]->childrenNo; j++) { FTI_CloseGroup(FTI_Exec->H5groups[rootGroup->childrenID[j]], FTI_Exec->H5groups); } H5Fclose(file_id); return FTI_NSCS; } } //convert dimLength array to hsize_t if ( FTI_Try(FTI_WriteHDF5Var(&FTI_Data[i]) , "Writing data to HDF5 filesystem") != FTI_SCES){ sprintf(str, "Dataset #%d could not be written", FTI_Data[i].id); FTI_Print(str, FTI_EROR); int j; for (j = 0; j < FTI_Exec->H5groups[0]->childrenNo; j++) { FTI_CloseGroup(FTI_Exec->H5groups[rootGroup->childrenID[j]], FTI_Exec->H5groups); } H5Fclose(file_id); return FTI_NSCS; } } } FTI_Exec->iCPInfo.result = FTI_SCES; return FTI_SCES; }