Exemplo n.º 1
0
void DryadSubGraphVertex::EdgeInfo::
    MakeFifo(UInt32 fifoLength, WorkQueue* workQueue)
{
    LogAssert(m_reader == NULL);
    LogAssert(m_writer == NULL);

    UInt32 uniquifier = RChannelFactory::GetUniqueFifoId();

    DrStr64 fifoName;
    fifoName.SetF("fifo://%u/internal-%u-%u.%u--%u.%u",
                  fifoLength, uniquifier,
                  m_sourceVertex, m_sourcePort,
                  m_destinationVertex, m_destinationPort);

    DVErrorReporter errorReporter;
    RChannelFactory::OpenReader(fifoName, NULL, NULL, 1, NULL, 0, 0, workQueue,
                                &errorReporter, &m_reader, NULL);
    LogAssert(errorReporter.NoError());
    RChannelFactory::OpenWriter(fifoName, NULL, NULL, 1, NULL, 0, NULL,
                                &errorReporter, &m_writer);
    LogAssert(errorReporter.NoError());

    m_reader->GetReader()->Start(NULL);
    m_writer->GetWriter()->Start();
}
Exemplo n.º 2
0
//
// Create files which contain information used to restart the upcoming vertex command
// Used for post-mortem debugging.
//
void DVertexPnController::DumpRestartCommand(DVertexCommandBlock* commandBlock)
{
    DrError err;

    //
    // Create temporary buffer
    //
    DrRef<DrSimpleHeapBuffer> buf;
    buf.Attach(new DrSimpleHeapBuffer());

    //
    // Write command block into buffer
    //
    {
        DrMemoryBufferWriter writer(buf);
        err = commandBlock->Serialize(&writer);
    }

    //
    // If write fails, log failure and return
    //
    if (err != DrError_OK)
    {
        DrLogE("Can't serialize command block for restart --- %s",
            DRERRORSTRING(err));
        return;
    }

    //
    // Get data reference and byte count
    //
    const void* serializedData;
    Size_t availableToRead;
    serializedData = buf->GetReadAddress(0, &availableToRead);
    LogAssert(availableToRead >= buf->GetAvailableSize());

    //
    // Get the process information 
    //
    DVertexProcessStatus* ps = commandBlock->GetProcessStatus();

    //
    // Build file for data required for rerun, open it
    //
    DrStr64 restartBlockName;
    restartBlockName.SetF("vertex-%u-%u-rerun-data.dat",
                          ps->GetVertexId(), ps->GetVertexInstanceVersion());
    FILE* fData = fopen(restartBlockName, "wb");
    if (fData == NULL)
    {
        //
        // If failed to open file, log and return
        //
        err = DrGetLastError();
        DrLogE(
            "Can't open re-run command block file '%s' --- %s",
            restartBlockName.GetString(), DRERRORSTRING(err));
        return;
    }

    //
    // Build file for original information required for rerun, open it
    //
    DrStr64 originalInfoName;
    originalInfoName.SetF("vertex-%u-%u-rerun-originalInfo.txt",
                          ps->GetVertexId(), ps->GetVertexInstanceVersion());
    FILE* fOriginalText = fopen(originalInfoName, "w");
    if (fOriginalText == NULL)
    {
        //
        // If failed to open file, log and return
        //
        err = DrGetLastError();
        DrLogE(
            "Can't open re-run original info file '%s' --- %s",
            originalInfoName.GetString(), DRERRORSTRING(err));

        //
        // Close data file
        //
        fclose(fData);
        return;
    }

    //
    // Build file for rerun command line, open it
    //
    DrStr64 originalRestartCommand;
    originalRestartCommand.SetF("vertex-%u-%u-rerun.cmd",
                                ps->GetVertexId(),
                                ps->GetVertexInstanceVersion());
    FILE* fOriginalRestart = fopen(originalRestartCommand, "w");
    if (fOriginalRestart == NULL)
    {
        //
        // If failed to open file, log and return
        //
        err = DrGetLastError();
        DrLogE(
            "Can't open re-run original command file '%s' --- %s",
            originalRestartCommand.GetString(), DRERRORSTRING(err));

        //
        // Close data and original text files
        //
        fclose(fData);
        fclose(fOriginalText);
        return;
    }

    //
    // Open file for local info
    //

    /* BUG 16322: Do not create this for SP3, since it is currently broken.
       Consider fixing for v4.
    DrStr64 localInfoName;
    localInfoName.SetF("vertex-%u-%u-rerun-localInfo.txt",
                       ps->GetVertexId(), ps->GetVertexInstanceVersion());
    FILE* fLocalText = fopen(localInfoName, "w");
    if (fLocalText == NULL)
    {
        //
        // If failed to open file, log and return
        //
        err = DrGetLastError();
        DrLogE(
            "Can't open re-run local info file '%s' --- %s",
            localInfoName.GetString(), DRERRORSTRING(err));

        //
        // Close data, cmd, and original text files
        //
        fclose(fData);
        fclose(fOriginalText);
        fclose(fOriginalRestart);
        return;
    }
    */


    //
    // Open file for rerun with local inputs
    //

    /* BUG 16322: Do not create this for SP3, since it is currently broken.
       Consider fixing for v4.
    DrStr64 localRestartCommand;
    localRestartCommand.SetF("vertex-%u-%u-rerun-local-inputs.cmd",
                             ps->GetVertexId(),
                             ps->GetVertexInstanceVersion());
    FILE* fLocalRestart = fopen(localRestartCommand, "w");
    if (fLocalRestart == NULL)
    {
        //
        // If failed to open file, log and return
        //
        err = DrGetLastError();
        DrLogE(
            "Can't open re-run local command file '%s' --- %s",
            localRestartCommand.GetString(), DRERRORSTRING(err));

        //
        // Close data, cmd, original, and local text files
        //
        fclose(fData);
        fclose(fOriginalText);
        fclose(fOriginalRestart);
        fclose(fLocalText);
        return;
    }
    */

    //
    // Open file for fetching inputs
    //
    /* BUG 16322: Do not create this for SP3, since it is currently broken.
       Consider fixing for v4.
    DrStr64 copyCommand;
    copyCommand.SetF("vertex-%u-%u-rerun-fetch-inputs.cmd",
                     ps->GetVertexId(), ps->GetVertexInstanceVersion());
    FILE* fCopyCommand = fopen(copyCommand, "w");
    if (fCopyCommand == NULL)
    {
        //
        // If failed to open file, log and return
        //
        err = DrGetLastError();
        DrLogE(
            "Can't open re-run copy command file '%s' --- %s",
            localRestartCommand.GetString(), DRERRORSTRING(err));

        //
        // Close data, original and localcmd, and original and local text files
        //
        fclose(fData);
        fclose(fOriginalText);
        fclose(fOriginalRestart);
        fclose(fLocalText);
        fclose(fLocalRestart);
        return;
    }
    */

    //
    // Write out data to data file, then close it.
    //
    size_t written = fwrite(serializedData, 1, buf->GetAvailableSize(), fData);
    fclose(fData);
    if (written != buf->GetAvailableSize())
    {
        //
        // If failed to write all the data, log failure
        //
        err = DrGetLastError();
        DrLogE(
            "Failed to write re-run command block file '%s': only %Iu of %Iu bytes written --- %s",
            restartBlockName.GetString(),
            written, (size_t) (buf->GetAvailableSize()),
            DRERRORSTRING(err));
    }

    //
    // Write original restart command
    //
    fprintf(fOriginalRestart,
            "%s --cmd -dump %s -overridetext %s\n",
            m_parent->GetRunningExePathName(),
            restartBlockName.GetString(),
            originalInfoName.GetString());

    //
    // Write local restart command
    //
    /* BUG 16322: Do not create this for SP3, since it is currently broken.
       Consider fixing for v4.
    fprintf(fLocalRestart,
            "%s --vertex --cmd -dump %s -overridetext %s\n",
            m_parent->GetRunningExePathName(),
            restartBlockName.GetString(),
            localInfoName.GetString());
    */

    //
    // Record number of input files
    //
    fprintf(fOriginalText, "%u # input files\n", ps->GetInputChannelCount());

    /* BUG 16322: Do not create this for SP3, since it is currently broken.
       Consider fixing for v4.
    fprintf(fLocalText, "%u # input files\n", ps->GetInputChannelCount());
    */

    //
    // Get the input channels and foreach channel, add copy command to copy script
    //
    DryadInputChannelDescription* inputs = ps->GetInputChannels();
    for (UInt32 i=0; i<ps->GetInputChannelCount(); ++i)
    {
        const char* uri = inputs[i].GetChannelURI();

        /* BUG 16322: Do not create this for SP3, since it is currently broken.
           Consider fixing for v4.
        if (::_strnicmp(uri, "file://", 7) == 0)
        {
            //
            // If reading from file, copy command doesn't want "file://" prefix
            // todo: remove reference to cosmos
            //
            fprintf(fCopyCommand, "cosmos.exe copy %s v%u.%u-i%u\n",
                    uri+7,
                    ps->GetVertexId(), ps->GetVertexInstanceVersion(), i);
        }
        else if (::_strnicmp(uri, "cosmos://", 9) == 0)
        {
            //
            // If reading from cosmos path, copy directly
            // todo: remove cosmos code
            //
            fprintf(fCopyCommand, "cosmos.exe copy %s v%u.%u-i%u\n",
                    uri,
                    ps->GetVertexId(), ps->GetVertexInstanceVersion(), i);
        }
        else
        {
            //
            // Otherwise, unable to copy
            //
            fprintf(fCopyCommand, "echo can't copy URI %s to v%u.%u-i%u\n",
                    uri,
                    ps->GetVertexId(), ps->GetVertexInstanceVersion(), i);
        }
        */

        // 
        // At reference to this URI to original and relative reference to local
        //
        fprintf(fOriginalText, "%s\n", uri);

        /* BUG 16322: Do not create this for SP3, since it is currently broken.
           Consider fixing for v4.
        fprintf(fLocalText, "file://v%u.%u-i%u\n",
                ps->GetVertexId(), ps->GetVertexInstanceVersion(), i);
        */
    }

    //
    // Record number of output files
    //
    fprintf(fOriginalText, "%u # output files\n", ps->GetOutputChannelCount());

    /* BUG 16322: Do not create this for SP3, since it is currently broken.
       Consider fixing for v4.
    fprintf(fLocalText, "%u # output files\n", ps->GetOutputChannelCount());
    */

    //
    // Get the output channels and record each one
    //
    DryadOutputChannelDescription* outputs = ps->GetOutputChannels();
    for (UInt32 i=0; i<ps->GetOutputChannelCount(); ++i)
    {
        const char* uri = outputs[i].GetChannelURI();

        //
        // Check if uri is writting to DSC partition. 
        // If it is, redirect to local temp file to avoid writing to sealed stream
        // 
        DrStr uriMod("");
        if(ConcreteRChannel::IsDscPartition(uri))
        {
            uriMod.AppendF("file://hpcdscpt_redirect_%d.dtf", i);
            uri = uriMod.GetString();
        }

        fprintf(fOriginalText, "%s\n", uri);

        /* BUG 16322: Do not create this for SP3, since it is currently broken.
           Consider fixing for v4.
        fprintf(fLocalText, "%s\n", uri);
        */
    }

    //
    // Record number of arguments
    //
    fprintf(fOriginalText, "%u # arguments\n",
            commandBlock->GetArgumentCount());

    /* BUG 16322: Do not create this for SP3, since it is currently broken.
       Consider fixing for v4.
    fprintf(fLocalText, "%u # arguments\n", commandBlock->GetArgumentCount());
    */

    //
    // Foreach argument, record its value
    //
    for (UInt32 i=0; i<commandBlock->GetArgumentCount(); ++i)
    {
        DrStr64 arg = commandBlock->GetArgumentVector()[i];
        fprintf(fOriginalText, "%s\n", arg.GetString());

        /* BUG 16322: Do not create this for SP3, since it is currently broken.
           Consider fixing for v4.
        fprintf(fLocalText, "%s\n", arg.GetString());
        */
    }

    //
    // Close all files
    // todo: fData closed above, remove duplicate
    //
    fclose(fData);
    fclose(fOriginalText);
    fclose(fOriginalRestart);

    /* BUG 16322: Do not create this for SP3, since it is currently broken.
       Consider fixing for v4.
    fclose(fLocalText);
    fclose(fLocalRestart);
    fclose(fCopyCommand);
    */
}