static TC_THREAD_PROC EncryptionThreadProc (void *threadArg)
{
	EncryptionThreadPoolWorkItem *workItem;

	while (!StopPending)
	{
		TC_ACQUIRE_MUTEX (&DequeueMutex);

		workItem = &WorkItemQueue[DequeuePosition++];

		if (DequeuePosition >= TC_ENC_THREAD_POOL_QUEUE_SIZE)
			DequeuePosition = 0;

		while (!StopPending && GetWorkItemState (workItem) != WorkItemReady)
		{
			TC_WAIT_EVENT (WorkItemReadyEvent);
		}

		SetWorkItemState (workItem, WorkItemBusy);

		TC_RELEASE_MUTEX (&DequeueMutex);

		if (StopPending)
			break;

		switch (workItem->Type)
		{
		case DecryptDataUnitsWork:
			DecryptDataUnitsCurrentThread (workItem->Encryption.Data, &workItem->Encryption.StartUnitNo, workItem->Encryption.UnitCount, workItem->Encryption.CryptoInfo);
			break;

		case EncryptDataUnitsWork:
			EncryptDataUnitsCurrentThread (workItem->Encryption.Data, &workItem->Encryption.StartUnitNo, workItem->Encryption.UnitCount, workItem->Encryption.CryptoInfo);
			break;

		case DeriveKeyWork:
			switch (workItem->KeyDerivation.Pkcs5Prf)
			{
			case RIPEMD160:
				derive_key_ripemd160 (workItem->KeyDerivation.Password, workItem->KeyDerivation.PasswordLength, workItem->KeyDerivation.Salt, PKCS5_SALT_SIZE,
					workItem->KeyDerivation.IterationCount, workItem->KeyDerivation.DerivedKey, GetMaxPkcs5OutSize());
				break;

			case SHA512:
				derive_key_sha512 (workItem->KeyDerivation.Password, workItem->KeyDerivation.PasswordLength, workItem->KeyDerivation.Salt, PKCS5_SALT_SIZE,
					workItem->KeyDerivation.IterationCount, workItem->KeyDerivation.DerivedKey, GetMaxPkcs5OutSize());
				break;

			case WHIRLPOOL:
				derive_key_whirlpool (workItem->KeyDerivation.Password, workItem->KeyDerivation.PasswordLength, workItem->KeyDerivation.Salt, PKCS5_SALT_SIZE,
					workItem->KeyDerivation.IterationCount, workItem->KeyDerivation.DerivedKey, GetMaxPkcs5OutSize());
				break;

			case SHA256:
				derive_key_sha256 (workItem->KeyDerivation.Password, workItem->KeyDerivation.PasswordLength, workItem->KeyDerivation.Salt, PKCS5_SALT_SIZE,
					workItem->KeyDerivation.IterationCount, workItem->KeyDerivation.DerivedKey, GetMaxPkcs5OutSize());
				break;

			case STREEBOG:
				derive_key_streebog(workItem->KeyDerivation.Password, workItem->KeyDerivation.PasswordLength, workItem->KeyDerivation.Salt, PKCS5_SALT_SIZE,
					workItem->KeyDerivation.IterationCount, workItem->KeyDerivation.DerivedKey, GetMaxPkcs5OutSize());
				break;

			default:
				TC_THROW_FATAL_EXCEPTION;
			}

			InterlockedExchange (workItem->KeyDerivation.CompletionFlag, TRUE);
			TC_SET_EVENT (*workItem->KeyDerivation.CompletionEvent);

			if (InterlockedDecrement (workItem->KeyDerivation.OutstandingWorkItemCount) == 0)
				TC_SET_EVENT (*workItem->KeyDerivation.NoOutstandingWorkItemEvent);

			SetWorkItemState (workItem, WorkItemFree);
			TC_SET_EVENT (WorkItemCompletedEvent);
			continue;

		default:
			TC_THROW_FATAL_EXCEPTION;
		}

		if (workItem != workItem->FirstFragment)
		{
			SetWorkItemState (workItem, WorkItemFree);
			TC_SET_EVENT (WorkItemCompletedEvent);
		}

		if (InterlockedDecrement (&workItem->FirstFragment->OutstandingFragmentCount) == 0)
			TC_SET_EVENT (workItem->FirstFragment->ItemCompletedEvent);
	}

#ifdef DEVICE_DRIVER
	PsTerminateSystemThread (STATUS_SUCCESS);
#else
	_endthreadex (0);
    return 0;
#endif
}
void EncryptionThreadPoolDoWork (EncryptionThreadPoolWorkType type, byte *data, const UINT64_STRUCT *startUnitNo, TC_LARGEST_COMPILER_UINT unitCount, PCRYPTO_INFO cryptoInfo)
{
	size_t fragmentCount;
	size_t unitsPerFragment;
	size_t remainder;

	byte *fragmentData;
	TC_LARGEST_COMPILER_UINT fragmentStartUnitNo;

	EncryptionThreadPoolWorkItem *workItem;
	EncryptionThreadPoolWorkItem *firstFragmentWorkItem;
	
	if (unitCount == 0)
		return;
	
	if (!ThreadPoolRunning || unitCount == 1)
	{
		switch (type)
		{
		case DecryptDataUnitsWork:
			DecryptDataUnitsCurrentThread (data, startUnitNo, unitCount, cryptoInfo);
			break;

		case EncryptDataUnitsWork:
			EncryptDataUnitsCurrentThread (data, startUnitNo, unitCount, cryptoInfo);
			break;

		default:
			TC_THROW_FATAL_EXCEPTION;
		}

		return;
	}

	if (unitCount <= ThreadCount)
	{
		fragmentCount = (size_t) unitCount;
		unitsPerFragment = 1;
		remainder = 0;
	}
	else
	{
		/* Note that it is not efficient to divide the data into fragments smaller than a few hundred bytes.
		The reason is that the overhead associated with thread handling would in most cases make a multi-threaded 
		process actually slower than a single-threaded process. */

		fragmentCount = ThreadCount;
		unitsPerFragment = (size_t) unitCount / ThreadCount;
		remainder = (size_t) unitCount % ThreadCount;

		if (remainder > 0)
			++unitsPerFragment;
	}
	
	fragmentData = data;
	fragmentStartUnitNo = startUnitNo->Value;

	TC_ACQUIRE_MUTEX (&EnqueueMutex);
	firstFragmentWorkItem = &WorkItemQueue[EnqueuePosition];

	while (GetWorkItemState (firstFragmentWorkItem) != WorkItemFree)
	{
		TC_WAIT_EVENT (WorkItemCompletedEvent);
	}

	firstFragmentWorkItem->OutstandingFragmentCount = fragmentCount;

	while (fragmentCount-- > 0)
	{
		workItem = &WorkItemQueue[EnqueuePosition++];
		if (EnqueuePosition >= TC_ENC_THREAD_POOL_QUEUE_SIZE)
			EnqueuePosition = 0;

		while (GetWorkItemState (workItem) != WorkItemFree)
		{
			TC_WAIT_EVENT (WorkItemCompletedEvent);
		}

		workItem->Type = type;
		workItem->FirstFragment = firstFragmentWorkItem;

		workItem->Encryption.CryptoInfo = cryptoInfo;
		workItem->Encryption.Data = fragmentData;
		workItem->Encryption.UnitCount = unitsPerFragment;
		workItem->Encryption.StartUnitNo.Value = fragmentStartUnitNo;

 		fragmentData += unitsPerFragment * ENCRYPTION_DATA_UNIT_SIZE;
		fragmentStartUnitNo += unitsPerFragment;

		if (remainder > 0 && --remainder == 0)
			--unitsPerFragment;

		SetWorkItemState (workItem, WorkItemReady);
		TC_SET_EVENT (WorkItemReadyEvent);
	}

	TC_RELEASE_MUTEX (&EnqueueMutex);

	TC_WAIT_EVENT (firstFragmentWorkItem->ItemCompletedEvent);
	SetWorkItemState (firstFragmentWorkItem, WorkItemFree);
	TC_SET_EVENT (WorkItemCompletedEvent);
}