//
// Handle response from xcompute
//
void DVertexXComputeSetStatus::Process(DrError err)
{
    //
    // If status successfully sent, log success and check on vertex status
    //
    if (err == DrError_OK)
    {
        DrLogI( "PN send succeeded. label %s", m_label.GetString());

        if (m_exitOnCompletion != DrExitCode_StillActive)
        {
            //
            // If vertex is not still active, report that it is exiting
            // this may kill this process if all verticies are complete
            //
            m_parentOuter->VertexExiting(m_exitOnCompletion);
        }

        return;
    }

    //
    // If there was a communication failure, retry up to 4 times
    //
    if (err == DrError_RemoteDisconnected ||
            err == DrError_LocalDisconnected ||
            err == DrError_ConnectionFailed)
    {
        if (m_sendCount < 4)
        {
            DrLogW( "Retrying PN send. error %s", DRERRORSTRING(err));

            m_parent->SendSetStatusRequest(this);

            return;
        }
    }

    //
    // If m_isAssert, just report warning, otherwise log and fail
    // todo: this seems backwards. I don't understand how m_isAssert is set.
    //
    if (m_isAssert)
    {
        DrLogW(
            "Send to PN failed: not asserting again. done %u sends, error %s",
            m_sendCount, DRERRORSTRING(err));
    }
    else
    {
        DrLogA(
            "Send to PN failed. done %u sends, error %s",
            m_sendCount, DRERRORSTRING(err));
    }
}
Example #2
0
void DrCriticalSectionBase::Enter( PCSTR functionName, PCSTR fileName, UINT lineNumber )
{
    UInt32 tms = GetTickCount();

    EnterCriticalSection( this );

    _enterTimeMs = GetTickCount();

    tms = _enterTimeMs - tms;

    Int32 entryCount = -1;
    Int32 contentionCount = -1;
//    if ( DebugInfo != NULL )
//    {
//        entryCount = (Int32)DebugInfo->EntryCount;
//        contentionCount = (Int32)DebugInfo->ContentionCount;
//    }

    _lastFunctionName = functionName;
    _lastFileName = fileName;
    _lastLineNumber = lineNumber;

    if ( tms > _logHeldTooLongTimeoutMs )
    {
        if (_lastFileName != NULL) {
            DrLogW( "CritSect WAITED TO ENTER TOO LONG %s at %s %s(%u), entryCount=%d, contentionCount=%d, waited for %ums, addr=%08Ix",
                _name, _lastFunctionName, _lastFileName, _lastLineNumber, entryCount, contentionCount, tms, this );
        }
        else
        {
            DrLogW( "CritSect WAITED TO ENTER TOO LONG %s, entryCount=%d, contentionCount=%d, waited for %ums, addr=%08Ix",
                _name, entryCount, contentionCount, tms, this );
        }
    }

    if ( _logUsage )
    {
        if (_lastFileName != NULL) {
            DrLogD( "CritSect ENTER %s at %s %s(%u), entryCount=%d, contentionCount=%d, waited for %ums, addr=%08Ix",
                _name, _lastFunctionName, _lastFileName, _lastLineNumber, entryCount, contentionCount, tms, this );
        }
        else
        {
            DrLogD( "CritSect ENTER %s at %s, entryCount=%d, contentionCount=%d, waited for %ums, addr=%08Ix",
                _name, _lastFunctionName, entryCount, contentionCount, tms, this );
        }
    }
}
Example #3
0
HRESULT DrVertexStatus::ParseProperty(DrPropertyReaderPtr reader, UINT16 enumID,
                                      UINT32 /* unused dataLen */)
{
    HRESULT err;

    switch (enumID)
    {
    default:
        DrLogW("Unknown property in vertex status message enumID %u", (UINT32) enumID);
        err = reader->SkipNextPropertyOrAggregate();
        break;

    case DrProp_VertexState:
        err = reader->ReadNextProperty(enumID, m_state);
        break;

    case DrProp_BeginTag:
        UINT16 tagValue;
        err = reader->PeekNextAggregateTag(&tagValue);
        if (err != S_OK)
        {
            DrLogW("Error reading DrProp_BeginTag %d", err);
        }
        else
        {
            switch (tagValue)
            {
            case DrTag_VertexProcessStatus:
                err = reader->ReadAggregate(tagValue, m_processStatus);
                break;

            default:
                DrLogW("Unexpected tag %d", tagValue);
                err = reader->SkipNextPropertyOrAggregate();
            }
        }
        break;
    }

    return err;
}
Example #4
0
HRESULT DrVertexCommandBlock::ParseProperty(DrPropertyReaderPtr reader, UINT16 enumID,
                                            UINT32 /* unused dataLen */)
{
    HRESULT err;

    switch (enumID)
    {
    default:
        DrLogW("Unknown property in vertex command message enumID %u", (UINT32) enumID);
        err = reader->SkipNextPropertyOrAggregate();
        break;

    case DrProp_VertexCommand:
        UINT32 marshaledCommand;
        err = reader->ReadNextProperty(enumID, marshaledCommand);
        if (err == S_OK)
        {
            if (marshaledCommand < DrVC_Max)
            {
                m_command = (DrVertexCommand) marshaledCommand;
            }
            else
            {
                DrLogW("Unknown vertex command %u", marshaledCommand);
                err = HRESULT_FROM_WIN32(ERROR_INVALID_PARAMETER);
            }
        }
        break;

    case DrProp_VertexArgumentCount:
        UINT32 nArguments;
        err = reader->ReadNextProperty(enumID, nArguments);
        if (err == S_OK)
        {
            if (nArguments < 0x80000000)
            {
                SetArgumentCount((int) nArguments);
            }
            else
            {
                DrLogW("Too large argument count %u", nArguments);
                err = HRESULT_FROM_WIN32(ERROR_INVALID_PARAMETER);
            }
        }
        break;

    case DrProp_VertexArgument:
        if (m_nextArgumentToRead >= m_argument->Allocated())
        {
            DrLogW("Too many arguments nextArgumentToRead=%d, nArguments=%d",
                   m_nextArgumentToRead, m_argument->Allocated());
            err = HRESULT_FROM_WIN32(ERROR_INVALID_PARAMETER);
        }
        else
        {
            DrString arg;
            err = reader->ReadNextProperty(enumID, arg);
            if (err == S_OK)
            {
                m_argument[m_nextArgumentToRead] = arg;
                ++m_nextArgumentToRead;
            }
        }
        break;

    case DrProp_VertexSerializedBlock:
        UINT32 blockLength;
        err = reader->PeekNextPropertyTag(&enumID, &blockLength);
        if (err == S_OK)
        {
            if (blockLength < 0x80000000)
            {
                DrByteArrayRef block = DrNew DrByteArray((int) blockLength);
                {
                    DRPIN(BYTE) data = &(block[0]);
                    err = reader->ReadNextProperty(enumID, (UINT32) blockLength, data);
                }
                if (err == S_OK)
                {
                    m_serializedBlock = block;
                }
            }
            else
            {
                DrLogW("Block too large %u", blockLength);
                err = HRESULT_FROM_WIN32(ERROR_INVALID_PARAMETER);
            }
        }
        break;

    case DrProp_DebugBreak:
        err = reader->ReadNextProperty(enumID, m_setBreakpointOnCommandArrival);
        break;

    case DrProp_BeginTag:
        UINT16 tagValue;
        err = reader->PeekNextAggregateTag(&tagValue);
        if (err != S_OK)
        {
            DrLogW("Error reading DrProp_BeginTag %d", err);
        }
        else
        {
            switch (tagValue)
            {
            case DrTag_VertexProcessStatus:
                err = reader->ReadAggregate(tagValue, m_processStatus);
                break;

            default:
                DrLogW("Unexpected tag %d", tagValue);
                err = reader->SkipNextPropertyOrAggregate();
            }
        }
        break;
    }

    return err;
}
Example #5
0
HRESULT DrVertexProcessStatus::ParseProperty(DrPropertyReaderPtr reader,
                                             UINT16 enumID, UINT32 /* unused dataLen */)
{
    HRESULT err;

    switch (enumID)
    {
    default:
        DrLogW("Unknown property in vertex status message enumID %u", (UINT32) enumID);
        err = reader->SkipNextPropertyOrAggregate();
        break;

    case DrProp_VertexId:
        UINT32 id;
        err = reader->ReadNextProperty(enumID, id);
        if (err == S_OK)
        {
            if (id >= 0x80000000)
            {
                DrLogW("Vertex ID out of range %u", id);
                err = HRESULT_FROM_WIN32(ERROR_INVALID_PARAMETER);
            }
            else
            {
                m_id = id;
            }
        }
        break;

    case DrProp_VertexVersion:
        UINT32 version;
        err = reader->ReadNextProperty(enumID, version);
        if (err == S_OK)
        {
            if (version >= 0x80000000)
            {
                DrLogW("Vertex version out of range %u", version);
                err = HRESULT_FROM_WIN32(ERROR_INVALID_PARAMETER);
            }
            else
            {
                m_version = version;
            }
        }
        break;

    case DrProp_VertexErrorCode:
        err = reader->ReadNextProperty(enumID, m_errorCode);
        break;

    case DrProp_VertexErrorString:
        {
            DrString errorString;
            err = reader->ReadNextProperty(enumID, errorString);
            if (err == S_OK)
            {
                SetVertexErrorString(errorString);
            }
        }
        break;

    case DrProp_VertexInputChannelCount:
        UINT32 nInputChannels;
        err = reader->ReadNextProperty(enumID, nInputChannels);
        if (err == S_OK)
        {
            if (nInputChannels >= 0x80000000)
            {
                DrLogW("Too many input channels %u", nInputChannels);
                err = HRESULT_FROM_WIN32(ERROR_INVALID_PARAMETER);
            }
            else
            {
                SetInputChannelCount((int) nInputChannels);
            }
        }
        break;

    case DrProp_VertexMaxOpenInputChannelCount:
        UINT32 maxInputChannels;
        err = reader->ReadNextProperty(enumID, maxInputChannels);
        if (err == S_OK)
        {
            if (maxInputChannels >= 0x80000000)
            {
                DrLogW("Too many max input channels %u", maxInputChannels);
                err = HRESULT_FROM_WIN32(ERROR_INVALID_PARAMETER);
            }
            else
            {
                m_maxInputChannels = (int) maxInputChannels;
            }
        }
        break;

    case DrProp_VertexOutputChannelCount:
        UINT32 nOutputChannels;
        err = reader->ReadNextProperty(enumID, nOutputChannels);
        if (err == S_OK)
        {
            if (nOutputChannels >= 0x80000000)
            {
                DrLogW("Too many output channels %u", nOutputChannels);
                err = HRESULT_FROM_WIN32(ERROR_INVALID_PARAMETER);
            }
            else
            {
                SetOutputChannelCount((int) nOutputChannels);
            }
        }
        break;

    case DrProp_VertexMaxOpenOutputChannelCount:
        UINT32 maxOutputChannels;
        err = reader->ReadNextProperty(enumID, maxOutputChannels);
        if (err == S_OK)
        {
            if (maxOutputChannels >= 0x80000000)
            {
                DrLogW("Too many max output channels %d", maxOutputChannels);
                err = HRESULT_FROM_WIN32(ERROR_INVALID_PARAMETER);
            }
            else
            {
                m_maxOutputChannels = (int) maxOutputChannels;
            }
        }
        break;

    case DrProp_CanShareWorkQueue:
        err = reader->ReadNextProperty(enumID, m_canShareWorkQueue);
        break;

    case DrProp_BeginTag:
        UINT16 tagValue;
        err = reader->PeekNextAggregateTag(&tagValue);
        if (err != S_OK)
        {
            DrLogW("Error reading DrProp_BeginTag %d", err);
        }
        else
        {
            switch (tagValue)
            {
            case DrTag_InputChannelDescription:
                if (m_nextInputChannelToRead >= m_inputChannel->Allocated())
                {
                    DrLogW("Too many input channel descriptions nextInputChannelToRead=%d, nInputChannels=%d",
                           m_nextInputChannelToRead, m_inputChannel->Allocated());
                    err = HRESULT_FROM_WIN32(ERROR_INVALID_PARAMETER);
                }
                else
                {
                    err = reader->ReadAggregate(tagValue, m_inputChannel[m_nextInputChannelToRead]);
                    if (err == S_OK)
                    {
                        ++m_nextInputChannelToRead;
                    }
                }
                break;

            case DrTag_OutputChannelDescription:
                if (m_nextOutputChannelToRead >= m_outputChannel->Allocated())
                {
                    DrLogW("Too many output channel descriptions nextOutputChannelToRead=%d, nOutputChannels=%d",
                           m_nextOutputChannelToRead, m_outputChannel->Allocated());
                    err = HRESULT_FROM_WIN32(ERROR_INVALID_PARAMETER);
                }
                else
                {
                    err = reader->ReadAggregate(tagValue, m_outputChannel[m_nextOutputChannelToRead]);
                    if (err == S_OK)
                    {
                        ++m_nextOutputChannelToRead;
                    }
                }
                break;

            case DrTag_VertexMetaData:
                {
                    DrMetaDataRef metaData = DrNew DrMetaData();
                    err = reader->ReadAggregate(tagValue, metaData);
                    if (err == S_OK)
                    {
                        m_metaData = metaData;
                    }
                }
                break;

            default:
                DrLogW("Unexpected tag %d", tagValue);
                err = reader->SkipNextPropertyOrAggregate();
            }
        }
        break;
    }

    return err;
}
Example #6
0
HRESULT DrChannelDescription::ParseProperty(DrPropertyReaderPtr reader,
                                            UINT16 enumID, UINT32 /* unused dataLen */)
{
    HRESULT err;

    switch (enumID)
    {
    default:
        DrLogW("Unknown property in channel description enumID %u", (UINT32) enumID);
        err = reader->SkipNextPropertyOrAggregate();
        break;

    case DrProp_ChannelState:
        err = reader->ReadNextProperty(enumID, m_state);
        break;

    case DrProp_ChannelURI:
        {
            DrString URI;
            err = reader->ReadNextProperty(enumID, URI);
            if (err == S_OK)
            {
                SetChannelURI(URI);
            }
        }
        break;

    case DrProp_ChannelErrorCode:
        err = reader->ReadNextProperty(enumID, m_errorCode);
        break;

    case DrProp_ChannelErrorString:
        {
            DrString errorString;
            err = reader->ReadNextProperty(enumID, errorString);
            if (err == S_OK)
            {
                SetChannelErrorString(errorString);
            }
        }
        break;

    case DrProp_ChannelTotalLength:
        err = reader->ReadNextProperty(enumID, m_totalLength);
        break;

    case DrProp_ChannelProcessedLength:
        err = reader->ReadNextProperty(enumID, m_processedLength);
        break;

    case DrProp_BeginTag:
        {
            UINT16 tagID;
            err = reader->PeekNextAggregateTag(&tagID);
            if (err == S_OK)
            {
                if (tagID == DrTag_ChannelMetaData)
                {
                    DrMetaDataRef mData = DrNew DrMetaData();
                    err = reader->ReadAggregate(tagID, mData);
                    if (err == S_OK)
                    {
                        SetChannelMetaData(mData);
                    }
                }
                else
                {
                    DrLogW("Unknown aggregate in channel description tagID %u", (UINT32) tagID);
                }
            }
        }
        break;
    }

    return err;
}
Example #7
0
void DrCohortProcess::ReceiveMessage(DrProcessInfoRef message)
{
    if (m_receivedProcess == false)
    {
        /* when the CohortStarter helper below actually scheduled the process, it
           wasn't holding our lock. So the first thing it does is send us a
           message containing our DrProcess. Because of the ordering guarantees
           of the message queue, that is the first message we will receive. There
           are two cases: either it sent the process to be scheduled, in which
           case we will hear from a subsequent message whether it succeeded or not,
           or there was an unsatisfiable hard constraint in which case we hear with
           an error right now. */

        m_receivedProcess = true;

        /* this can only be the fake message from the Cohort Starter */
        DrAssert(message->m_state->m_state == DPS_NotStarted);
        
        if (message->m_process.IsNull())
        {
            /* there was an error scheduling */
            DrAssert(message->m_state->m_status != DrNull);

            DrString msg = DrError::ToShortText(message->m_state->m_status);
            DrLogI("Cohort %s v.%d got scheduling error message on cohort startup %s",
                   m_parent->GetDescription().GetChars(), m_version, msg.GetChars());

			m_parent->GetGang()->CancelVersion(m_version, message->m_state->m_status);
        }
        else
        {
            DrAssert(message->m_state->m_status == DrNull);
            m_process = message->m_process;
            DrLogI("Cohort %s v.%d got startup message",
                   m_parent->GetDescription().GetChars(), m_version);
        }

        return;
    }

    if (m_process.IsNull())
    {
        /* we have already finished so do nothing */
        return;
    }

    DrLogI("Cohort %s v.%d got message state %d",
           m_parent->GetDescription().GetChars(), m_version,
           message->m_state->m_state);

    DrProcessStateRecordPtr state = message->m_state;
    if (state->m_state > DPS_Running)
    {
        /* in the normal course of affairs, we should have already seen the process
           start running, in which case the vertices have all initiated their own
           message sends to the process which will also return informing them that
           it has finished, at which point we will be notified cleanly via NotifyVertexCompletion.
           The DrProcess machinery is supposed to have delayed the message we are now
           receiving in order to give the vertex messages a chance to arrive. So if we
           ever get here, something has gone wrong: either the process never started or
           the vertex messages didn't get sent.

           We are going to call Cancel below on the gang, which will result in all our vertices
           calling NotifyVertexCompletion and eventually us cleaning up once they all report.
        */

        DrErrorRef error;

        if (state->m_state == DPS_Completed)
        {
            DrString reason;
            if (m_processHandle == DrNull)
            {
                if (state->m_status == DrNull)
                {
                    reason.SetF("Process completed with no error without starting");
                    error = DrNew DrError(DrError_VertexError, "DrCohortProcess", reason);
                }
                else
                {
                    reason.SetF("Process completed with code %s without starting",
                                DRERRORSTRING(state->m_status->m_code));
                    error = DrNew DrError(state->m_status->m_code, "DrCohortProcess", reason);
                    error->AddProvenance(state->m_status);
                }
            }
            else
            {
                if (state->m_status == DrNull)
                {
                    reason.SetF("Process completed with no error but vertex message was never delivered");
                    error = DrNew DrError(DrError_VertexError, "DrCohortProcess", reason);
                }
                else
                {
                    reason.SetF("Process completed with code %s but vertex message was never delivered",
                                DRERRORSTRING(state->m_status->m_code));
                    error = DrNew DrError(state->m_status->m_code, "DrCohortProcess", reason);
                    error->AddProvenance(state->m_status);
                }
            }
        }
        else
        {
            if (state->m_status == DrNull)
            {
                DrLogW("Empty status delivered with process info state %u", state->m_state);
                DrString reason;
                reason.SetF("Empty status with failed process state %u", state->m_state);
                error = DrNew DrError(DrError_Unexpected, "DrCohortProcess", reason);
            }
            else
            {
                DrString reason;
                reason.SetF("%s process code %s",
                            (state->m_state == DPS_Failed) ? "Failed" : "Zombie",
                            DRERRORSTRING(state->m_status->m_code));
                error = DrNew DrError(state->m_status->m_code, "DrCohortProcess", reason);
                error->AddProvenance(state->m_status);
            }
        }

        DrString eString = DrError::ToShortText(error);
        DrLogI("Cohort %s v.%d cancelling gang %s %s",
               m_parent->GetDescription().GetChars(), m_version, eString.GetChars(),
               error->m_explanation.GetChars());

		m_parent->GetGang()->CancelVersion(m_version, error);
    }
    else if (m_processHandle == DrNull && state->m_state == DPS_Running)
    {
        /* the process has started so tell everyone about it */
        m_processHandle = state->m_process;
        DrAssert(m_processHandle != DrNull);

        DrLogI("Cohort %s v.%d starting process",
               m_parent->GetDescription().GetChars(), m_version);

        m_parent->NotifyProcessHasStarted(m_version);
    }
}
Example #8
0
void DrCriticalSectionBase::Leave( PCSTR functionName, PCSTR fileName, UINT lineNumber )
{
    DebugLogAssert( Aquired() );

    UInt32 tms = 0;
    if ( _enterTimeMs != 0 )        // technically wrong, but it will fail to complain on 1 out of 4 billion slow locks
    {
        tms = GetTickCount() - _enterTimeMs;
        _enterTimeMs = 0;
    }

    Int32 entryCount = -1;
    Int32 contentionCount = -1;
//    if ( DebugInfo != NULL )
//    {
//        entryCount = (Int32)DebugInfo->EntryCount;
//        contentionCount = (Int32)DebugInfo->ContentionCount;
//    }

    if ( tms > _logHeldTooLongTimeoutMs )
    {
        if (fileName != NULL)
        {
            if (_lastFileName != NULL)
            {
                DrLogW( "CritSect LEAVE, HELD TOO LONG %s at %s %s(%u) entered at %s %s(%u), entryCount=%d, contentionCount=%d, time held=%ums, addr=%08Ix",
                    _name,  functionName, fileName, lineNumber, _lastFunctionName, _lastFileName, _lastLineNumber, entryCount, contentionCount, tms, this );
            }
            else
            {
                DrLogW( "CritSect LEAVE, HELD TOO LONG %s at %s %s(%u), entryCount=%d, contentionCount=%d, time held=%ums, addr=%08Ix",
                    _name,  functionName, fileName, lineNumber, entryCount, contentionCount, tms, this );
            }
        }
        else
        {
            if (_lastFileName != NULL)
            {
                DrLogW( "CritSect LEAVE, HELD TOO LONG %s entered at %s %s(%u), entryCount=%d, contentionCount=%d, time held=%ums, addr=%08Ix",
                    _name,  _lastFunctionName, _lastFileName, _lastLineNumber, entryCount, contentionCount, tms, this );
            }
            else
            {
                DrLogW( "CritSect LEAVE, HELD TOO LONG %s, entryCount=%d, contentionCount=%d, time held=%ums, addr=%08Ix",
                    _name, entryCount, contentionCount, tms, this );
            }
        }
    }

    if ( _logUsage )
    {
        if (fileName != NULL)
        {
            if (_lastFileName != NULL)
            {
                DrLogD( "CritSect LEAVE %s at %s %s(%u) entered at %s %s(%u), entryCount=%d, contentionCount=%d, time held=%ums, addr=%08Ix",
                    _name,  functionName, fileName, lineNumber, _lastFunctionName, _lastFileName, _lastLineNumber, entryCount, contentionCount, tms, this );
            }
            else
            {
                DrLogD( "CritSect LEAVE %s at %s %s(%u), entryCount=%d, contentionCount=%d, time held=%ums, addr=%08Ix",
                    _name,  functionName, fileName, lineNumber, entryCount, contentionCount, tms, this );
            }
        }
        else
        {
            if (_lastFileName != NULL)
            {
                DrLogD( "CritSect LEAVE %s entered at %s %s(%u), entryCount=%d, contentionCount=%d, time held=%ums, addr=%08Ix",
                    _name, _lastFunctionName, _lastFileName, _lastLineNumber, entryCount, contentionCount, tms, this );
            }
            else
            {
                DrLogD( "CritSect LEAVE %s, entryCount=%d, contentionCount=%d, time held=%ums, addr=%08Ix",
                    _name, entryCount, contentionCount, tms, this );
            }
        }
    }

    LeaveCriticalSection( this );
}
//
// Run in thread for each vertex
//
unsigned DVertexXComputePnController::CommandLoop()
{
    DrError err;
    UInt32 retries = 0;

    //
    // Get the vertex label
    //
    DrStr64 label;
    DVertexCommandBlock::GetPnPropertyLabel(&label,
                                            m_vertexId,
                                            m_vertexVersion);

    //
    // Wait for communication until error
    //
    do
    {
        //
        // Create request to get vertex version property
        //
        XC_SETANDGETPROCESSINFO_REQINPUT request;
        memset(&request, 0, sizeof(request));
        request.Size = sizeof(request);
        request.pBlockOnPropertyLabel = label.GetString();
        request.BlockOnPropertyversionLastSeen = m_currentCommandVersion;
        request.MaxBlockTime = XCTIMEINTERVAL_MINUTE;
        // XXXX
        request.pPropertyFetchTemplate = (char *) label.GetString();

        //
        // Send the request and check for errors
        //
        PXC_SETANDGETPROCESSINFO_REQRESULTS pResults = NULL;
        err = XcSetAndGetProcessInfo(NULL,//GetProcessHandle(),
                                     &request,
                                     &pResults,
                                     NULL);
        if (err == DrError_OK)
        {
            //
            // If request successfully sent, store process status and exit code
            //
            DrLogI( "Got command property");
            retries = 0;
            DrError processStatus = pResults->pProcessInfo->ProcessStatus;
            DrExitCode exitCode = pResults->pProcessInfo->ExitCode;

            if (processStatus == DrError_OK || exitCode != DrExitCode_StillActive)
            {
                //
                // If the PN thinks we have exited, so better make it so
                //
                err = DrError_Fail;
            }
        }

        //
        // If request was successful and other process doesn't think we're done
        //
        if (err == DrError_OK)
        {
            if (pResults->pProcessInfo->NumberofProcessProperties != 0)
            {
                //
                // Make sure there's only one property and it's the version
                //
                LogAssert(pResults->pProcessInfo->
                          NumberofProcessProperties == 1);
                PXC_PROCESSPROPERTY_INFO property =
                    pResults->pProcessInfo->ppProperties[0];
                LogAssert(::strcmp(property->pPropertyLabel, label) == 0);

                //
                // Update vertex version
                //
                UInt64 newVersion = property->PropertyVersion;
                if (newVersion < m_currentCommandVersion)
                {
                    //
                    // If vertex version is less than the current version, fail (logic error)
                    //
                    DrLogE(
                        "Property version went back in time. Property %s old version %I64u new version %I64u",
                        label.GetString(),
                        m_currentCommandVersion, newVersion);
                    err = DrError_ProcessPropertyVersionMismatch;
                }
                else if (newVersion == m_currentCommandVersion)
                {
                    //
                    // If version the same, report version the same
                    //
                    DrLogI(
                        "Command timeout with same version. Property %s version %I64u",
                        label.GetString(), m_currentCommandVersion);
                }
                else if (newVersion > m_currentCommandVersion)
                {
                    //
                    // If new vertex version, let GM know what process is handling it
                    //
                    DrLogI(
                        "Property got new version. Property %s old version %I64u new version %I64u",
                        label.GetString(),
                        m_currentCommandVersion, newVersion);

                    m_currentCommandVersion = newVersion;

                    DrRef<DVertexCommandBlock> newCommand;
                    newCommand.Attach(new DVertexCommandBlock());

                    DrRef<DryadXComputePnProcessPropertyResponse> response;
                    response.Attach(new DryadXComputePnProcessPropertyResponse(pResults->pProcessInfo));

                    //
                    // Get new vertex command
                    //
                    err = newCommand->ReadFromResponseMessage(response, m_vertexId, m_vertexVersion);

                    //
                    // If no errors in getting command, act on it. Log any failures below
                    //
                    if (err == DrError_OK)
                    {
                        err = ActOnCommand(newCommand);
                    }
                }
            }
        }
        else
        {
            //
            // Log error and continue
            //
            DrLogE( "XcSetAndGetProcessInfo got error: %s", DRERRORSTRING(err));
        }

        //
        // If the error is related to disconnection, retry up to 4 times
        //
        if (err == DrError_RemoteDisconnected ||
                err == DrError_LocalDisconnected ||
                err == DrError_ConnectionFailed ||
                err == DrError_ResponseDisconnect)
        {
            ++retries;
            // todo: move 4 to global
            if (retries < 4)
            {
                DrLogW( "Retrying get");
                err = DrError_OK;
            }
        }

        //
        // If result was allocated, free it before next iteration
        //
        if (pResults != NULL)
        {
            XCERROR freeError = XcFreeMemory(pResults);
            LogAssert(freeError == DrError_OK);
        }
    } while (err == DrError_OK);

    //
    // Close this controller and take no more requests
    //
    DrLogD( "About to terminate");
    Terminate(err, DrExitCode_Fail);

    //
    // Sleep forever, waiting for verticies to complete and take down the process
    //
    Sleep(INFINITE);

    return 0;
}
//
// Send updated status to vertex service
//
void DVertexXComputePnController::
SendSetStatusRequest(DryadPnProcessPropertyRequest* r)
{
    //
    // Cast request to required type and make sure it's valid
    //
    DVertexXComputeSetStatus* request =
        dynamic_cast<DVertexXComputeSetStatus*>(r);
    LogAssert(request != NULL);

    //
    // Wrap request in XComputeSetStatusOverlapped
    //
    XComputeSetStatusOverlapped* overlapped =
        new XComputeSetStatusOverlapped(request);

    //
    // Create asynchronous execution information
    //
    XC_ASYNC_INFO asyncInfo;
    memset(&asyncInfo, 0, sizeof(asyncInfo));
    asyncInfo.cbSize = sizeof(asyncInfo);
    asyncInfo.pOperationState = overlapped->GetOperationState();
    asyncInfo.IOCP = g_dryadNativePort->GetCompletionPort();
    asyncInfo.pOverlapped = overlapped->GetOverlapped();

    //
    // Update request counters
    //
    request->IncrementSendCount();
    g_dryadNativePort->IncrementOutstandingRequests();

    //
    // Update process info
    //
    XCERROR err =
        XcSetAndGetProcessInfo(NULL,//GetProcessHandle(),
                               request->MarshalProperty(),
                               request->GetResults(),
                               &asyncInfo);

    LogAssert(err != DrError_OK);

    if (err != HRESULT_FROM_WIN32(ERROR_IO_PENDING))
    {
        //
        // If failed (other than due to pending IO) log failure and update request counter
        //
        g_dryadNativePort->DecrementOutstandingRequests();

        //
        // If request assertion true, report errors as warnings, otherwise report as error and fail
        // todo: this still seems backwards - need to figure out rational
        // request handles retries itself
        //
        if (request->IsAssert())
        {
            DrLogW(
                "Status request send failed synchronously during assert: not asserting again. done %u send tries, error %s",
                request->GetSendCount(), DRERRORSTRING(err));
        }
        else
        {
            DrLogA(
                "Status request send failed synchronously. done %u send tries, error %s",
                request->GetSendCount(), DRERRORSTRING(err));
        }

        delete overlapped;
    }
}