void DryadSubGraphVertex::EdgeInfo:: MakeFifo(UInt32 fifoLength, WorkQueue* workQueue) { LogAssert(m_reader == NULL); LogAssert(m_writer == NULL); UInt32 uniquifier = RChannelFactory::GetUniqueFifoId(); DrStr64 fifoName; fifoName.SetF("fifo://%u/internal-%u-%u.%u--%u.%u", fifoLength, uniquifier, m_sourceVertex, m_sourcePort, m_destinationVertex, m_destinationPort); DVErrorReporter errorReporter; RChannelFactory::OpenReader(fifoName, NULL, NULL, 1, NULL, 0, 0, workQueue, &errorReporter, &m_reader, NULL); LogAssert(errorReporter.NoError()); RChannelFactory::OpenWriter(fifoName, NULL, NULL, 1, NULL, 0, NULL, &errorReporter, &m_writer); LogAssert(errorReporter.NoError()); m_reader->GetReader()->Start(NULL); m_writer->GetWriter()->Start(); }
// // Create files which contain information used to restart the upcoming vertex command // Used for post-mortem debugging. // void DVertexPnController::DumpRestartCommand(DVertexCommandBlock* commandBlock) { DrError err; // // Create temporary buffer // DrRef<DrSimpleHeapBuffer> buf; buf.Attach(new DrSimpleHeapBuffer()); // // Write command block into buffer // { DrMemoryBufferWriter writer(buf); err = commandBlock->Serialize(&writer); } // // If write fails, log failure and return // if (err != DrError_OK) { DrLogE("Can't serialize command block for restart --- %s", DRERRORSTRING(err)); return; } // // Get data reference and byte count // const void* serializedData; Size_t availableToRead; serializedData = buf->GetReadAddress(0, &availableToRead); LogAssert(availableToRead >= buf->GetAvailableSize()); // // Get the process information // DVertexProcessStatus* ps = commandBlock->GetProcessStatus(); // // Build file for data required for rerun, open it // DrStr64 restartBlockName; restartBlockName.SetF("vertex-%u-%u-rerun-data.dat", ps->GetVertexId(), ps->GetVertexInstanceVersion()); FILE* fData = fopen(restartBlockName, "wb"); if (fData == NULL) { // // If failed to open file, log and return // err = DrGetLastError(); DrLogE( "Can't open re-run command block file '%s' --- %s", restartBlockName.GetString(), DRERRORSTRING(err)); return; } // // Build file for original information required for rerun, open it // DrStr64 originalInfoName; originalInfoName.SetF("vertex-%u-%u-rerun-originalInfo.txt", ps->GetVertexId(), ps->GetVertexInstanceVersion()); FILE* fOriginalText = fopen(originalInfoName, "w"); if (fOriginalText == NULL) { // // If failed to open file, log and return // err = DrGetLastError(); DrLogE( "Can't open re-run original info file '%s' --- %s", originalInfoName.GetString(), DRERRORSTRING(err)); // // Close data file // fclose(fData); return; } // // Build file for rerun command line, open it // DrStr64 originalRestartCommand; originalRestartCommand.SetF("vertex-%u-%u-rerun.cmd", ps->GetVertexId(), ps->GetVertexInstanceVersion()); FILE* fOriginalRestart = fopen(originalRestartCommand, "w"); if (fOriginalRestart == NULL) { // // If failed to open file, log and return // err = DrGetLastError(); DrLogE( "Can't open re-run original command file '%s' --- %s", originalRestartCommand.GetString(), DRERRORSTRING(err)); // // Close data and original text files // fclose(fData); fclose(fOriginalText); return; } // // Open file for local info // /* BUG 16322: Do not create this for SP3, since it is currently broken. Consider fixing for v4. DrStr64 localInfoName; localInfoName.SetF("vertex-%u-%u-rerun-localInfo.txt", ps->GetVertexId(), ps->GetVertexInstanceVersion()); FILE* fLocalText = fopen(localInfoName, "w"); if (fLocalText == NULL) { // // If failed to open file, log and return // err = DrGetLastError(); DrLogE( "Can't open re-run local info file '%s' --- %s", localInfoName.GetString(), DRERRORSTRING(err)); // // Close data, cmd, and original text files // fclose(fData); fclose(fOriginalText); fclose(fOriginalRestart); return; } */ // // Open file for rerun with local inputs // /* BUG 16322: Do not create this for SP3, since it is currently broken. Consider fixing for v4. DrStr64 localRestartCommand; localRestartCommand.SetF("vertex-%u-%u-rerun-local-inputs.cmd", ps->GetVertexId(), ps->GetVertexInstanceVersion()); FILE* fLocalRestart = fopen(localRestartCommand, "w"); if (fLocalRestart == NULL) { // // If failed to open file, log and return // err = DrGetLastError(); DrLogE( "Can't open re-run local command file '%s' --- %s", localRestartCommand.GetString(), DRERRORSTRING(err)); // // Close data, cmd, original, and local text files // fclose(fData); fclose(fOriginalText); fclose(fOriginalRestart); fclose(fLocalText); return; } */ // // Open file for fetching inputs // /* BUG 16322: Do not create this for SP3, since it is currently broken. Consider fixing for v4. DrStr64 copyCommand; copyCommand.SetF("vertex-%u-%u-rerun-fetch-inputs.cmd", ps->GetVertexId(), ps->GetVertexInstanceVersion()); FILE* fCopyCommand = fopen(copyCommand, "w"); if (fCopyCommand == NULL) { // // If failed to open file, log and return // err = DrGetLastError(); DrLogE( "Can't open re-run copy command file '%s' --- %s", localRestartCommand.GetString(), DRERRORSTRING(err)); // // Close data, original and localcmd, and original and local text files // fclose(fData); fclose(fOriginalText); fclose(fOriginalRestart); fclose(fLocalText); fclose(fLocalRestart); return; } */ // // Write out data to data file, then close it. // size_t written = fwrite(serializedData, 1, buf->GetAvailableSize(), fData); fclose(fData); if (written != buf->GetAvailableSize()) { // // If failed to write all the data, log failure // err = DrGetLastError(); DrLogE( "Failed to write re-run command block file '%s': only %Iu of %Iu bytes written --- %s", restartBlockName.GetString(), written, (size_t) (buf->GetAvailableSize()), DRERRORSTRING(err)); } // // Write original restart command // fprintf(fOriginalRestart, "%s --cmd -dump %s -overridetext %s\n", m_parent->GetRunningExePathName(), restartBlockName.GetString(), originalInfoName.GetString()); // // Write local restart command // /* BUG 16322: Do not create this for SP3, since it is currently broken. Consider fixing for v4. fprintf(fLocalRestart, "%s --vertex --cmd -dump %s -overridetext %s\n", m_parent->GetRunningExePathName(), restartBlockName.GetString(), localInfoName.GetString()); */ // // Record number of input files // fprintf(fOriginalText, "%u # input files\n", ps->GetInputChannelCount()); /* BUG 16322: Do not create this for SP3, since it is currently broken. Consider fixing for v4. fprintf(fLocalText, "%u # input files\n", ps->GetInputChannelCount()); */ // // Get the input channels and foreach channel, add copy command to copy script // DryadInputChannelDescription* inputs = ps->GetInputChannels(); for (UInt32 i=0; i<ps->GetInputChannelCount(); ++i) { const char* uri = inputs[i].GetChannelURI(); /* BUG 16322: Do not create this for SP3, since it is currently broken. Consider fixing for v4. if (::_strnicmp(uri, "file://", 7) == 0) { // // If reading from file, copy command doesn't want "file://" prefix // todo: remove reference to cosmos // fprintf(fCopyCommand, "cosmos.exe copy %s v%u.%u-i%u\n", uri+7, ps->GetVertexId(), ps->GetVertexInstanceVersion(), i); } else if (::_strnicmp(uri, "cosmos://", 9) == 0) { // // If reading from cosmos path, copy directly // todo: remove cosmos code // fprintf(fCopyCommand, "cosmos.exe copy %s v%u.%u-i%u\n", uri, ps->GetVertexId(), ps->GetVertexInstanceVersion(), i); } else { // // Otherwise, unable to copy // fprintf(fCopyCommand, "echo can't copy URI %s to v%u.%u-i%u\n", uri, ps->GetVertexId(), ps->GetVertexInstanceVersion(), i); } */ // // At reference to this URI to original and relative reference to local // fprintf(fOriginalText, "%s\n", uri); /* BUG 16322: Do not create this for SP3, since it is currently broken. Consider fixing for v4. fprintf(fLocalText, "file://v%u.%u-i%u\n", ps->GetVertexId(), ps->GetVertexInstanceVersion(), i); */ } // // Record number of output files // fprintf(fOriginalText, "%u # output files\n", ps->GetOutputChannelCount()); /* BUG 16322: Do not create this for SP3, since it is currently broken. Consider fixing for v4. fprintf(fLocalText, "%u # output files\n", ps->GetOutputChannelCount()); */ // // Get the output channels and record each one // DryadOutputChannelDescription* outputs = ps->GetOutputChannels(); for (UInt32 i=0; i<ps->GetOutputChannelCount(); ++i) { const char* uri = outputs[i].GetChannelURI(); // // Check if uri is writting to DSC partition. // If it is, redirect to local temp file to avoid writing to sealed stream // DrStr uriMod(""); if(ConcreteRChannel::IsDscPartition(uri)) { uriMod.AppendF("file://hpcdscpt_redirect_%d.dtf", i); uri = uriMod.GetString(); } fprintf(fOriginalText, "%s\n", uri); /* BUG 16322: Do not create this for SP3, since it is currently broken. Consider fixing for v4. fprintf(fLocalText, "%s\n", uri); */ } // // Record number of arguments // fprintf(fOriginalText, "%u # arguments\n", commandBlock->GetArgumentCount()); /* BUG 16322: Do not create this for SP3, since it is currently broken. Consider fixing for v4. fprintf(fLocalText, "%u # arguments\n", commandBlock->GetArgumentCount()); */ // // Foreach argument, record its value // for (UInt32 i=0; i<commandBlock->GetArgumentCount(); ++i) { DrStr64 arg = commandBlock->GetArgumentVector()[i]; fprintf(fOriginalText, "%s\n", arg.GetString()); /* BUG 16322: Do not create this for SP3, since it is currently broken. Consider fixing for v4. fprintf(fLocalText, "%s\n", arg.GetString()); */ } // // Close all files // todo: fData closed above, remove duplicate // fclose(fData); fclose(fOriginalText); fclose(fOriginalRestart); /* BUG 16322: Do not create this for SP3, since it is currently broken. Consider fixing for v4. fclose(fLocalText); fclose(fLocalRestart); fclose(fCopyCommand); */ }