Beispiel #1
0
static void _profile_put_message_block( uint32_t id, const char* message )
{
	profile_block_t* subblock = 0;
	int len = (int)string_length( message );

	//Allocate new master block
	profile_block_t* block = _profile_allocate_block();
	if( !block )
		return;
	block->data.id = id;
	block->data.processor = thread_hardware();
	block->data.thread = (uint32_t)thread_id();
	block->data.start  = time_current() - _profile_ground_time;
	block->data.end = atomic_add32( &_profile_counter, 1 );
	memcpy( block->data.name, message, ( len >= MAX_MESSAGE_LENGTH ) ? MAX_MESSAGE_LENGTH : len );

	len -= MAX_MESSAGE_LENGTH;
	message += MAX_MESSAGE_LENGTH;
	subblock = block;

	while( len > 0 )
	{
		//add subblock
		profile_block_t* cblock = _profile_allocate_block();
		uint16_t cblock_index;
		if( !cblock )
			return;
		cblock_index = BLOCK_INDEX( cblock );
		cblock->data.id = id + 1;
		cblock->data.parentid = (uint32_t)subblock->data.end;
		cblock->data.processor = block->data.processor;
		cblock->data.thread = block->data.thread;
		cblock->data.start  = block->data.start;
		cblock->data.end    = atomic_add32( &_profile_counter, 1 );
		memcpy( cblock->data.name, message, ( len >= MAX_MESSAGE_LENGTH ) ? MAX_MESSAGE_LENGTH : len );

		cblock->sibling = subblock->child;
		if( cblock->sibling )
			GET_BLOCK( cblock->sibling )->previous = cblock_index;
		subblock->child = cblock_index;
		cblock->previous = BLOCK_INDEX( subblock );
		subblock = cblock;

		len -= MAX_MESSAGE_LENGTH;
		message += MAX_MESSAGE_LENGTH;
	}

	_profile_put_simple_block( BLOCK_INDEX( block ) );
}
Beispiel #2
0
void profile_begin_block( const char* message )
{
	uint32_t parent;
	if( !_profile_enable )
		return;

	parent = get_thread_profile_block();
	if( !parent )
	{
		//Allocate new master block
		profile_block_t* block = _profile_allocate_block();
		uint32_t blockindex;
		if( !block )
			return;
		blockindex = BLOCK_INDEX( block );
		block->data.id = atomic_add32( &_profile_counter, 1 );
		string_copy( block->data.name, message, MAX_MESSAGE_LENGTH );
		block->data.processor = thread_hardware();
		block->data.thread = (uint32_t)thread_id();
		block->data.start  = time_current() - _profile_ground_time;
		set_thread_profile_block( blockindex );
	}
	else
	{
		//Allocate new child block
		profile_block_t* parentblock;
		profile_block_t* subblock = _profile_allocate_block();
		uint32_t subindex;
		if( !subblock )
			return;
		subindex = BLOCK_INDEX( subblock );
		parentblock = GET_BLOCK( parent );
		subblock->data.id = atomic_add32( &_profile_counter, 1 );
		subblock->data.parentid = parentblock->data.id;
		string_copy( subblock->data.name, message, MAX_MESSAGE_LENGTH );
		subblock->data.processor = thread_hardware();
		subblock->data.thread = (uint32_t)thread_id();
		subblock->data.start  = time_current() - _profile_ground_time;
		subblock->previous = parent;
		subblock->sibling = parentblock->child;
		if( parentblock->child )
			GET_BLOCK( parentblock->child )->previous = subindex;
		parentblock->child = subindex;
		set_thread_profile_block( subindex );
	}
}
Beispiel #3
0
sval
h_remove(struct cpu_thread *thread, uval flags, uval ptex, uval avpn)
{
	union pte *cur_htab = (union pte *)GET_HTAB(thread->cpu->os);
	union pte *pte;
	uval *shadow;

	if (check_index(thread->cpu->os, ptex))
		return H_Parameter;

	/*
	 * XXX acquire & release per-pte lock (bit 57)
	 * specified in 18.5.4.1.1
	 */

	pte = &cur_htab[ptex];
	shadow = &thread->cpu->os->htab.shadow[ptex];

	if ((flags & H_AVPN) && ((pte->bits.avpn << 7) != avpn))
		return H_Not_Found;

	if ((flags & H_ANDCOND) && ((avpn & pte->words.vsidWord) != 0))
		return H_Not_Found;

	/* return old PTE in regs 4 and 5 */
	save_pte(pte, shadow, thread, 4, 5);

	/* XXX - I'm very skeptical of doing ANYTHING if not bits.v */
	/* XXX - I think the spec should be questioned in this case (MFM) */
	if (pte->bits.v) {
		struct logical_chunk_info *lci;
		uval laddr = *shadow << LOG_PGSIZE;

		lci = laddr_to_lci(thread->cpu->os, laddr);

		if (!lci)
			return H_Parameter;

		if (lci->lci_arch_ops->amo_rem_ptep) {
			lci->lci_arch_ops->amo_rem_ptep(lci, laddr,
							pte->bits.rpn, pte);
		}
		atomic_add32(&lci->lci_ptecount, ~0);	/* subtract 1 */
	}

	/*
	 * invalidating the pte is the only update required,
	 * though the memory consistency requirements are large.
	 */
	pte->bits.v = 0;
	ptesync();
	do_tlbie(pte, ptex);

	return H_Success;
}
 void task_pool::join()
 {
    while (atomic_add32(&m_num_outstanding_tasks, 0) > 0)
    {
       task tsk;
       if (m_task_stack.pop(tsk))
       {
          process_task(tsk);
       }
       else
       {
          lzham_sleep(1);
       }
    }
 }
    void task_pool::join()
    {
        // Try to steal any outstanding tasks. This could cause one or more worker threads to wake up and immediately go back to sleep, which is wasteful but should be harmless.
        task tsk;
        while (m_task_stack.pop(tsk))
            process_task(tsk);

        // At this point the task stack is empty.
        // Now wait for all concurrent tasks to complete. The m_all_tasks_completed semaphore has a max count of 1, so it's possible it could have saturated to 1 as the tasks
        // where issued and asynchronously completed, so this loop may iterate a few times.
        const int total_submitted_tasks = static_cast<int>(atomic_add32(&m_total_submitted_tasks, 0));
        while (m_total_completed_tasks != total_submitted_tasks)
        {
            // If the previous (m_total_completed_tasks != total_submitted_tasks) check failed the semaphore MUST be eventually signalled once the last task completes.
            // So I think this can actually be an INFINITE delay, but it shouldn't really matter if it's 1ms.
            m_all_tasks_completed.wait(1);
        }
    }
Beispiel #6
0
static void*
add_thread(void* arg) {
	int loop = 0;
	int32_t icount = 0;
	FOUNDATION_UNUSED(arg);
	while (!thread_try_wait(0) && (loop < 65535)) {
		for (icount = 0; icount < 128; ++icount) {
			atomic_add32(&val_32, icount % 2 ? -icount : icount);
			atomic_exchange_and_add64(&val_64, icount % 2 ? -icount : icount);
		}
		for (icount = 0; icount < 128; ++icount) {
			atomic_exchange_and_add32(&val_32, icount % 2 ? icount : -icount);
			atomic_add64(&val_64, icount % 2 ? icount : -icount);
		}

		++loop;
		thread_yield();
	}
	return 0;
}
Beispiel #7
0
void* add_thread( object_t thread, void* arg )
{
    int loop = 0;
    int32_t icount = 0;
    while( !thread_should_terminate( thread ) && ( loop < 65535 ) )
    {
        for( icount = 0; icount < 128; ++icount )
        {
            atomic_add32( &val_32, icount % 2 ? -icount : icount );
            atomic_exchange_and_add64( &val_64, icount % 2 ? -icount : icount );
        }
        for( icount = 0; icount < 128; ++icount )
        {
            atomic_exchange_and_add32( &val_32, icount % 2 ? icount : -icount );
            atomic_add64( &val_64, icount % 2 ? icount : -icount );
        }

        ++loop;
        thread_yield();
    }
    return 0;
}
Beispiel #8
0
osd_work_item *osd_work_item_queue_multiple(osd_work_queue *queue, osd_work_callback callback, INT32 numitems, void *parambase, INT32 paramstep, UINT32 flags)
{
	osd_work_item *itemlist = NULL, *lastitem = NULL;
	osd_work_item **item_tailptr = &itemlist;
	INT32 lockslot;
	int itemnum;

	// loop over items, building up a local list of work
	for (itemnum = 0; itemnum < numitems; itemnum++)
	{
		osd_work_item *item;

		// first allocate a new work item; try the free list first
		INT32 lockslot = osd_scalable_lock_acquire(queue->lock);
		do
		{
			item = (osd_work_item *)queue->free;
		} while (item != NULL && compare_exchange_ptr((PVOID volatile *)&queue->free, item, item->next) != item);
		osd_scalable_lock_release(queue->lock, lockslot);

		// if nothing, allocate something new
		if (item == NULL)
		{
			// allocate the item
			item = (osd_work_item *)osd_malloc(sizeof(*item));
			if (item == NULL)
				return NULL;
			item->event = NULL;
			item->queue = queue;
			item->done = FALSE;
		}
		else
		{
			atomic_exchange32(&item->done, FALSE); // needs to be set this way to prevent data race/usage of uninitialized memory on Linux
		}

		// fill in the basics
		item->next = NULL;
		item->callback = callback;
		item->param = parambase;
		item->result = NULL;
		item->flags = flags;

		// advance to the next
		lastitem = item;
		*item_tailptr = item;
		item_tailptr = &item->next;
		parambase = (UINT8 *)parambase + paramstep;
	}

	// enqueue the whole thing within the critical section
	lockslot = osd_scalable_lock_acquire(queue->lock);
	*queue->tailptr = itemlist;
	queue->tailptr = item_tailptr;
	osd_scalable_lock_release(queue->lock, lockslot);

	// increment the number of items in the queue
	atomic_add32(&queue->items, numitems);
	add_to_stat(&queue->itemsqueued, numitems);

	// look for free threads to do the work
	if (queue->livethreads < queue->threads)
	{
		int threadnum;

		// iterate over all the threads
		for (threadnum = 0; threadnum < queue->threads; threadnum++)
		{
			work_thread_info *thread = &queue->thread[threadnum];

			// if this thread is not active, wake him up
			if (!thread->active)
			{
				osd_event_set(thread->wakeevent);
				add_to_stat(&queue->setevents, 1);

				// for non-shared, the first one we find is good enough
				if (--numitems == 0)
					break;
			}
		}
	}

	// if no threads, run the queue now on this thread
	if (queue->threads == 0)
	{
		end_timing(queue->thread[0].waittime);
		worker_thread_process(queue, &queue->thread[0]);
		begin_timing(queue->thread[0].waittime);
	}
	// only return the item if it won't get released automatically
	return (flags & WORK_ITEM_FLAG_AUTO_RELEASE) ? NULL : lastitem;
}