コード例 #1
0
int tMPI_Thread_join(tMPI_Thread_t thread, void **value_ptr)
{
    DWORD ret,retval;

    ret = WaitForSingleObject(thread->th, INFINITE);

    if (ret != 0)
    {
        tMPI_Fatal_error(TMPI_FARGS,"Failed to join thread. error code=%d",
                         GetLastError());
        return -1;
    }

    if (value_ptr)
    {
        if (!GetExitCodeThread(thread, &retval))
        {
            /* TODO: somehow assign value_ptr */
            tMPI_Fatal_error(TMPI_FARGS,
                             "Failed to get thread exit code: error=%d",
                             GetLastError());
            return -1;
        }
    }
    CloseHandle(thread->th);
    tMPI_Free(thread);

    return 0;
}
コード例 #2
0
ファイル: winthreads.c プロジェクト: yupinov/gromacs
/* add an entry to the thread ID list, assuming it's locked */
static int tMPI_Thread_id_list_add_locked(DWORD               thread_id,
        struct tMPI_Thread *th)
{
    if (Nalloc_thread_id_list < N_thread_id_list + 1)
    {
        thread_id_list_t* new_list;
        int               i;

        /* double the size */
        Nalloc_thread_id_list *= 2;
        /* and allocate the new list */
        new_list = (thread_id_list_t*)malloc(sizeof(thread_id_list_t)*
                                             Nalloc_thread_id_list);
        if (new_list == NULL)
        {
            return ENOMEM;
        }
        /* and copy over all elements */
        for (i = 0; i < N_thread_id_list; i++)
        {
            new_list[i] = thread_id_list[i];
        }
        /* free the old list */
        tMPI_Free(thread_id_list);
        thread_id_list = new_list;
    }
    thread_id_list[ N_thread_id_list ].thread_id = thread_id;
    thread_id_list[ N_thread_id_list ].th        = th;
    N_thread_id_list++;

    return 0;
}
コード例 #3
0
int tMPI_Thread_create_aff(tMPI_Thread_t *thread,
                           void *(*start_routine)(void *), void *arg)
{
    DWORD thread_id;
    struct tMPI_Thread_starter_param *prm;

    tMPI_Init_initers();
    tMPI_Set_main_thread_affinity();

    /* a small memory leak to be sure that it doesn't get deallocated 
       once this function ends, before the newly created thread uses it. */
    prm=(struct tMPI_Thread_starter_param*)
              tMPI_Malloc(sizeof(struct tMPI_Thread_starter_param));
    prm->start_routine= start_routine;
    prm->param=arg;

    *thread=(struct tMPI_Thread*)tMPI_Malloc(sizeof(struct tMPI_Thread)*1);

    if(thread==NULL)
    {
        tMPI_Fatal_error(TMPI_FARGS,"Invalid thread pointer.");
        return EINVAL;
    }

    if( g_ulHighestNumaNodeNumber != 0 )
    {
        /* if running on a NUMA system, use the group and NUMA aware thread 
           creation logic */
        (*thread)->th = tMPI_Thread_create_NUMA(NULL,
                                                0,
                                                tMPI_Win32_thread_starter,
                                                prm,
                                                0, 
                                                &thread_id);
    } else {
        /* TODO: for now, non-NUMA systems don't set thread affinity. */
        (*thread)->th = CreateThread(NULL,
                                     0,
                                     tMPI_Win32_thread_starter,
                                     prm,
                                     0, 
                                     &thread_id);
    }

    if((*thread)->th==NULL)
    {
        tMPI_Free(thread);
        tMPI_Fatal_error(TMPI_FARGS,"Failed to create thread, error code=%d",
                         GetLastError());
        return -1;
    }

    /* inherit the thread priority from the parent thread. */
    /* TODO: is there value in setting this, vs. just allowing it to default 
       from the process?  currently, this limits the effectivenes of changing 
       the priority in eg: TaskManager. */
    SetThreadPriority(((*thread)->th), GetThreadPriority(GetCurrentThread()));

    return 0;
}
コード例 #4
0
int tMPI_Thread_key_delete(tMPI_Thread_key_t key)
{
    TlsFree(key.key->wkey);
    tMPI_Free(key.key);

    return 0;
}
コード例 #5
0
int tMPI_Thread_cond_destroy(tMPI_Thread_cond_t *cond) 
{
#if 0
    /* use this code once Vista is the minimum version required */
    /* windows doesnt have this function */
#else
    DeleteCriticalSection(&(cond->condp->wtr_lock));
    tMPI_Free(cond->condp);
#endif
    return 0;
}
コード例 #6
0
int tMPI_Thread_mutex_destroy(tMPI_Thread_mutex_t *mtx) 
{
    if(mtx == NULL)
    {
        return EINVAL;
    }

    DeleteCriticalSection(&(mtx->mutex->cs));
    tMPI_Free(mtx->mutex);

    return 0;
}
コード例 #7
0
int tMPI_Thread_barrier_destroy(tMPI_Thread_barrier_t *barrier)
{   
    if(barrier==NULL)
    {
        return EINVAL;
    }

#if 0
    DeleteCriticalSection(&(barrier->barrierp->cs));
#else
    tMPI_Thread_mutex_destroy(&(barrier->barrierp->cs));
#endif

    tMPI_Thread_cond_destroy(&(barrier->barrierp->cv));

    tMPI_Free(barrier->barrierp);

    return 0;
}
コード例 #8
0
ファイル: winthreads.c プロジェクト: yupinov/gromacs
/*  returns 0 on success.
    Success is returned if the system is non-NUMA, OR the system doesn't
    support appropriate NUMA APIs, OR the system is NUMA and we successfully
    initialized support.

    returns -1 on error.
    This can happen if an API returned an error, a memory allocation failed, or
    we failed to initialize affinity mapping information.
 */
int tMPI_Init_NUMA(void)
{
    /* module handle to kernel32.dll -- we already reference it, so it's already loaded */
    HMODULE hModKernel32 = NULL;
    /* 0-based NUMA node count -- does not imply all nodes have available (eg: hot-plug) processors */
    ULONG   ulHighestNumaNodeNumber;
    /* total number of processors available per affinity masks */
    DWORD   dwTotalProcessors = 0;
    ULONG   i                 = 0;

    /* calling thread PROCESSOR_NUMBER */
    PROCESSOR_NUMBER CurrentProcessorNumber;
    /* calling thread GROUP_AFFINITY */
    /*GROUP_AFFINITY CurrentThreadGroupAffinity; */
    /* calling thread NUMA node */
    /*USHORT CurrentNumaNodeNumber;*/

    WORD wActiveGroupCount;
    WORD GroupIndex;

    /* array of processor information structures */
    MPI_NUMA_PROCESSOR_INFO *pMPI_ProcessorInfo = NULL;

    /* assume an error condition */
    int iRet = -1;

    hModKernel32 = GetModuleHandleA("kernel32.dll");

    if (hModKernel32 == NULL)
    {
        return 0;
    }

    /* obtain addresses of relevant NUMA functions, most of which are
       Windows 7 / Windows Server 2008R2 only functions
       this is done using GetProcAddress to enable the binary to run on older
       Windows versions.
     */

    func_GetNumaHighestNodeNumber = (func_GetNumaHighestNodeNumber_t) GetProcAddress( hModKernel32, "GetNumaHighestNodeNumber" );
    func_SetThreadIdealProcessor  = (func_SetThreadIdealProcessor_t) GetProcAddress( hModKernel32, "SetThreadIdealProcessor" );

    if (func_GetNumaHighestNodeNumber == NULL)
    {
        return 0;
    }

    /* determine if we're on a NUMA system and if so, determine the number of
       (potential) nodes */

    if (!func_GetNumaHighestNodeNumber( &ulHighestNumaNodeNumber ))
    {
        return -1;
    }



    func_SetThreadGroupAffinity            = (func_SetThreadGroupAffinity_t)GetProcAddress( hModKernel32, "SetThreadGroupAffinity" );
    func_SetThreadIdealProcessorEx         = (func_SetThreadIdealProcessorEx_t)GetProcAddress( hModKernel32, "SetThreadIdealProcessorEx" );
    func_CreateRemoteThreadEx              = (func_CreateRemoteThreadEx_t)GetProcAddress( hModKernel32, "CreateRemoteThreadEx" );
    func_GetNumaNodeProcessorMaskEx        = (func_GetNumaNodeProcessorMaskEx_t)GetProcAddress( hModKernel32, "GetNumaNodeProcessorMaskEx" );
    func_GetNumaProcessorNodeEx            = (func_GetNumaProcessorNodeEx_t)GetProcAddress( hModKernel32, "GetNumaProcessorNodeEx" );
    func_GetCurrentProcessorNumberEx       = (func_GetCurrentProcessorNumberEx_t)GetProcAddress( hModKernel32, "GetCurrentProcessorNumberEx" );
    func_GetActiveProcessorCount           = (func_GetActiveProcessorCount_t)GetProcAddress( hModKernel32, "GetActiveProcessorCount" );
    func_GetActiveProcessorGroupCount      = (func_GetActiveProcessorGroupCount_t)GetProcAddress( hModKernel32, "GetActiveProcessorGroupCount" );
    func_InitializeProcThreadAttributeList = (func_InitializeProcThreadAttributeList_t)GetProcAddress( hModKernel32, "InitializeProcThreadAttributeList" );
    func_UpdateProcThreadAttribute         = (func_UpdateProcThreadAttribute_t)GetProcAddress( hModKernel32, "UpdateProcThreadAttribute" );
    func_DeleteProcThreadAttributeList     = (func_DeleteProcThreadAttributeList_t)GetProcAddress( hModKernel32, "DeleteProcThreadAttributeList" );

    if ( (func_SetThreadGroupAffinity == NULL) ||
            (func_SetThreadIdealProcessorEx == NULL) ||
            (func_CreateRemoteThreadEx == NULL) ||
            (func_GetNumaNodeProcessorMaskEx == NULL) ||
            (func_GetNumaProcessorNodeEx == NULL) ||
            (func_GetCurrentProcessorNumberEx == NULL) ||
            (func_GetActiveProcessorCount == NULL) ||
            (func_GetActiveProcessorGroupCount == NULL) ||
            (func_InitializeProcThreadAttributeList == NULL) ||
            (func_UpdateProcThreadAttribute == NULL) ||
            (func_DeleteProcThreadAttributeList == NULL) )
    {
        /* if any addresses couldn't be located, assume NUMA functionality
           isn't supported */
        return 0;
    }
#if 0
    if (ulHighestNumaNodeNumber == 0)
    {
        /* system is not NUMA */
        return 0;
    }
#endif

    /* count the active processors across the groups */

    func_GetCurrentProcessorNumberEx(&CurrentProcessorNumber);

    wActiveGroupCount = func_GetActiveProcessorGroupCount();

    dwTotalProcessors = func_GetActiveProcessorCount( ALL_PROCESSOR_GROUPS );

#if !((defined WIN64 || defined _WIN64))
    /* WOW64 doesn't allow setting the affinity correctly beyond 32
       processors -- the KAFFINITY mask is only 32 bits wide
       This check is only here for completeness -- large systems should be
       running 64bit Gromacs code, where the processor quantity is not
       constrained.
       By failing here, the WOW64 32bit client will use normal CreateThread(),
       which can schedule up to 64 un-affinitized threads
     */

    if (dwTotalProcessors > 32)
    {
        return 0;
    }
#endif

    /* allocate array of processor info blocks */

    pMPI_ProcessorInfo = malloc( sizeof(MPI_NUMA_PROCESSOR_INFO) *
                                 dwTotalProcessors );
    if (pMPI_ProcessorInfo == NULL)
    {
        goto cleanup;
    }

    /* zero fill to cover reserved must be-zero fields */
    memset(pMPI_ProcessorInfo, 0, sizeof(MPI_NUMA_PROCESSOR_INFO) * dwTotalProcessors);

    /* loop through each processor group, and for each group, capture the
       processor numbers and NUMA node information. */

    for (GroupIndex = 0; GroupIndex < wActiveGroupCount; GroupIndex++)
    {
        DWORD dwGroupProcessorCount;
        BYTE  ProcessorIndex;

        dwGroupProcessorCount = func_GetActiveProcessorCount( GroupIndex );

        for (ProcessorIndex = 0; ProcessorIndex < dwGroupProcessorCount;
                ProcessorIndex++)
        {
            PROCESSOR_NUMBER *pProcessorNumber = &(pMPI_ProcessorInfo[i].ProcessorNumber);
            GROUP_AFFINITY   *pGroupAffinity   = &(pMPI_ProcessorInfo[i].GroupAffinity);
            USHORT           *pNodeNumber      = &(pMPI_ProcessorInfo[i].NumaNodeNumber);

            pProcessorNumber->Group  = GroupIndex;
            pProcessorNumber->Number = ProcessorIndex;

            /* save an index to the processor array entry for the current processor
               this is used to enable subsequent threads to be created in a round
               robin fashion starting at the next array entry
             */

            if ( (CurrentProcessorNumber.Group == pProcessorNumber->Group ) &&
                    (CurrentProcessorNumber.Number == pProcessorNumber->Number) )
            {
                /* set global: current thread index into processor array */
                g_ulThreadIndex = i;
            }

            /* capture the node number and group affinity associated with processor entry
               any failures here are assumed to be catastrophic and disable
               the group & NUMA aware thread support
             */

            if (!func_GetNumaProcessorNodeEx(pProcessorNumber, pNodeNumber))
            {
                goto cleanup;
            }

            if (!func_GetNumaNodeProcessorMaskEx(*pNodeNumber, pGroupAffinity))
            {
                goto cleanup;
            }

            /* future enhancement: construct GroupAffinity (single) processor
               mask within NUMA node for this processor entry */

            /* increment processor array index */
            i++;

            /* sanity check, should never happen */

            if (i > dwTotalProcessors)
            {
                goto cleanup;
            }
        }
    }


    /* capture number of processors, highest NUMA node number, and processor
       array */
    g_ulTotalProcessors       = dwTotalProcessors;
    g_ulHighestNumaNodeNumber = ulHighestNumaNodeNumber;
    g_MPI_ProcessorInfo       = pMPI_ProcessorInfo;

    iRet = 0;

cleanup:

    if (iRet != 0)
    {
        if (pMPI_ProcessorInfo)
        {
            tMPI_Free( pMPI_ProcessorInfo );
        }
    }

    return 0;
}
コード例 #9
0
HANDLE tMPI_Thread_create_NUMA(LPSECURITY_ATTRIBUTES lpThreadAttributes,
                               SIZE_T dwStackSize,
                               LPTHREAD_START_ROUTINE lpStartAddress,
                               LPVOID lpParameter,
                               DWORD dwCreationFlags,
                               LPDWORD lpThreadId)
{
    LPPROC_THREAD_ATTRIBUTE_LIST pAttributeList = NULL;
    HANDLE hThread = NULL;
    SIZE_T cbAttributeList = 0;
    GROUP_AFFINITY GroupAffinity;
    PROCESSOR_NUMBER IdealProcessorNumber;
    ULONG CurrentProcessorIndex;

    /* for each thread created, round-robin through the set of valid 
       processors and affinity masks.
       the assumption is that callers of tMPI_Thread_create_NUMA are creating 
       threads that saturate a given processor.
       for cases where threads are being created that rarely do work, standard 
       thread creation (eg: CreateThread) should be invoked instead.
    */

    CurrentProcessorIndex = (ULONG)InterlockedIncrement((volatile LONG *)&g_ulThreadIndex);
    CurrentProcessorIndex = CurrentProcessorIndex % g_ulTotalProcessors;

    /* group, mask. */

    memcpy(&GroupAffinity, 
           &(g_MPI_ProcessorInfo[CurrentProcessorIndex].GroupAffinity), 
           sizeof(GROUP_AFFINITY));

    /* group, processor number */
    
    memcpy(&IdealProcessorNumber, 
           &(g_MPI_ProcessorInfo[CurrentProcessorIndex].ProcessorNumber), 
           sizeof(PROCESSOR_NUMBER)); 

    /* determine size of allocation for AttributeList */

    if(!func_InitializeProcThreadAttributeList(pAttributeList,
                                               2,
                                               0,
                                               &cbAttributeList))
    {
        DWORD dwLastError = GetLastError();
        if( dwLastError != ERROR_INSUFFICIENT_BUFFER )
        {
            tMPI_Fatal_error(TMPI_FARGS,
                             "InitializeProcThreadAttributeList, error code=%d",
                             dwLastError);
            goto cleanup;
        }
    }

    pAttributeList = (LPPROC_THREAD_ATTRIBUTE_LIST)tMPI_Malloc( cbAttributeList );
    if( pAttributeList == NULL )
    {
        tMPI_Fatal_error(TMPI_FARGS,"Failed to allocate pAttributeList");
        goto cleanup;
    }

    memset( pAttributeList, 0, cbAttributeList );

    if(!func_InitializeProcThreadAttributeList(pAttributeList,
                                               2,
                                               0,
                                               &cbAttributeList))
    {
        tMPI_Fatal_error(TMPI_FARGS,
                         "InitializeProcThreadAttributeList, error code=%d",
                         GetLastError());
        goto cleanup;
    }

    if(!func_UpdateProcThreadAttribute(pAttributeList,
                                       0,
                                       PROC_THREAD_ATTRIBUTE_GROUP_AFFINITY,
                                       &GroupAffinity,
                                       sizeof(GroupAffinity),
                                       NULL,
                                       NULL))
    {
        tMPI_Fatal_error(TMPI_FARGS,"UpdateProcThreadAttribute, error code=%d",
                         GetLastError());
        goto cleanup;
    }

    if(!func_UpdateProcThreadAttribute(pAttributeList,
                                       0,
                                       PROC_THREAD_ATTRIBUTE_IDEAL_PROCESSOR,
                                       &IdealProcessorNumber,
                                       sizeof(IdealProcessorNumber),
                                       NULL,
                                       NULL))
    {
        tMPI_Fatal_error(TMPI_FARGS,"UpdateProcThreadAttribute, error code=%d",
                         GetLastError());
        goto cleanup;
    }


    hThread = func_CreateRemoteThreadEx( GetCurrentProcess(),
                                         lpThreadAttributes,
                                         dwStackSize,
                                         lpStartAddress,
                                         lpParameter,
                                         dwCreationFlags,
                                         pAttributeList,
                                         lpThreadId);
            
    func_DeleteProcThreadAttributeList( pAttributeList );

#if 0   
	// TODO: debug only or DISCARD
    if( hThread )
    {
        PROCESSOR_NUMBER ProcNumber;
        USHORT NodeNumber;

        GetThreadIdealProcessorEx(hThread, &ProcNumber);
        GetNumaProcessorNodeEx(&ProcNumber, &NodeNumber);

        printf("started thread tid=%lu group=%lu mask=0x%I64x number=%lu numanode=%lu\n",
            *lpThreadId,
            GroupAffinity.Group,
            (ULONGLONG)GroupAffinity.Mask,
            ProcNumber.Number,
            NodeNumber
            );
    }
#endif

cleanup:
    
    if( pAttributeList )
    {
        tMPI_Free( pAttributeList );
    }

    return hThread;
}