static TC_THREAD_PROC EncryptionThreadProc (void *threadArg) { EncryptionThreadPoolWorkItem *workItem; while (!StopPending) { TC_ACQUIRE_MUTEX (&DequeueMutex); workItem = &WorkItemQueue[DequeuePosition++]; if (DequeuePosition >= TC_ENC_THREAD_POOL_QUEUE_SIZE) DequeuePosition = 0; while (!StopPending && GetWorkItemState (workItem) != WorkItemReady) { TC_WAIT_EVENT (WorkItemReadyEvent); } SetWorkItemState (workItem, WorkItemBusy); TC_RELEASE_MUTEX (&DequeueMutex); if (StopPending) break; switch (workItem->Type) { case DecryptDataUnitsWork: DecryptDataUnitsCurrentThread (workItem->Encryption.Data, &workItem->Encryption.StartUnitNo, workItem->Encryption.UnitCount, workItem->Encryption.CryptoInfo); break; case EncryptDataUnitsWork: EncryptDataUnitsCurrentThread (workItem->Encryption.Data, &workItem->Encryption.StartUnitNo, workItem->Encryption.UnitCount, workItem->Encryption.CryptoInfo); break; case DeriveKeyWork: switch (workItem->KeyDerivation.Pkcs5Prf) { case RIPEMD160: derive_key_ripemd160 (workItem->KeyDerivation.Password, workItem->KeyDerivation.PasswordLength, workItem->KeyDerivation.Salt, PKCS5_SALT_SIZE, workItem->KeyDerivation.IterationCount, workItem->KeyDerivation.DerivedKey, GetMaxPkcs5OutSize()); break; case SHA512: derive_key_sha512 (workItem->KeyDerivation.Password, workItem->KeyDerivation.PasswordLength, workItem->KeyDerivation.Salt, PKCS5_SALT_SIZE, workItem->KeyDerivation.IterationCount, workItem->KeyDerivation.DerivedKey, GetMaxPkcs5OutSize()); break; case WHIRLPOOL: derive_key_whirlpool (workItem->KeyDerivation.Password, workItem->KeyDerivation.PasswordLength, workItem->KeyDerivation.Salt, PKCS5_SALT_SIZE, workItem->KeyDerivation.IterationCount, workItem->KeyDerivation.DerivedKey, GetMaxPkcs5OutSize()); break; case SHA256: derive_key_sha256 (workItem->KeyDerivation.Password, workItem->KeyDerivation.PasswordLength, workItem->KeyDerivation.Salt, PKCS5_SALT_SIZE, workItem->KeyDerivation.IterationCount, workItem->KeyDerivation.DerivedKey, GetMaxPkcs5OutSize()); break; case STREEBOG: derive_key_streebog(workItem->KeyDerivation.Password, workItem->KeyDerivation.PasswordLength, workItem->KeyDerivation.Salt, PKCS5_SALT_SIZE, workItem->KeyDerivation.IterationCount, workItem->KeyDerivation.DerivedKey, GetMaxPkcs5OutSize()); break; default: TC_THROW_FATAL_EXCEPTION; } InterlockedExchange (workItem->KeyDerivation.CompletionFlag, TRUE); TC_SET_EVENT (*workItem->KeyDerivation.CompletionEvent); if (InterlockedDecrement (workItem->KeyDerivation.OutstandingWorkItemCount) == 0) TC_SET_EVENT (*workItem->KeyDerivation.NoOutstandingWorkItemEvent); SetWorkItemState (workItem, WorkItemFree); TC_SET_EVENT (WorkItemCompletedEvent); continue; default: TC_THROW_FATAL_EXCEPTION; } if (workItem != workItem->FirstFragment) { SetWorkItemState (workItem, WorkItemFree); TC_SET_EVENT (WorkItemCompletedEvent); } if (InterlockedDecrement (&workItem->FirstFragment->OutstandingFragmentCount) == 0) TC_SET_EVENT (workItem->FirstFragment->ItemCompletedEvent); } #ifdef DEVICE_DRIVER PsTerminateSystemThread (STATUS_SUCCESS); #else _endthreadex (0); return 0; #endif }
void EncryptionThreadPoolDoWork (EncryptionThreadPoolWorkType type, byte *data, const UINT64_STRUCT *startUnitNo, TC_LARGEST_COMPILER_UINT unitCount, PCRYPTO_INFO cryptoInfo) { size_t fragmentCount; size_t unitsPerFragment; size_t remainder; byte *fragmentData; TC_LARGEST_COMPILER_UINT fragmentStartUnitNo; EncryptionThreadPoolWorkItem *workItem; EncryptionThreadPoolWorkItem *firstFragmentWorkItem; if (unitCount == 0) return; if (!ThreadPoolRunning || unitCount == 1) { switch (type) { case DecryptDataUnitsWork: DecryptDataUnitsCurrentThread (data, startUnitNo, unitCount, cryptoInfo); break; case EncryptDataUnitsWork: EncryptDataUnitsCurrentThread (data, startUnitNo, unitCount, cryptoInfo); break; default: TC_THROW_FATAL_EXCEPTION; } return; } if (unitCount <= ThreadCount) { fragmentCount = (size_t) unitCount; unitsPerFragment = 1; remainder = 0; } else { /* Note that it is not efficient to divide the data into fragments smaller than a few hundred bytes. The reason is that the overhead associated with thread handling would in most cases make a multi-threaded process actually slower than a single-threaded process. */ fragmentCount = ThreadCount; unitsPerFragment = (size_t) unitCount / ThreadCount; remainder = (size_t) unitCount % ThreadCount; if (remainder > 0) ++unitsPerFragment; } fragmentData = data; fragmentStartUnitNo = startUnitNo->Value; TC_ACQUIRE_MUTEX (&EnqueueMutex); firstFragmentWorkItem = &WorkItemQueue[EnqueuePosition]; while (GetWorkItemState (firstFragmentWorkItem) != WorkItemFree) { TC_WAIT_EVENT (WorkItemCompletedEvent); } firstFragmentWorkItem->OutstandingFragmentCount = fragmentCount; while (fragmentCount-- > 0) { workItem = &WorkItemQueue[EnqueuePosition++]; if (EnqueuePosition >= TC_ENC_THREAD_POOL_QUEUE_SIZE) EnqueuePosition = 0; while (GetWorkItemState (workItem) != WorkItemFree) { TC_WAIT_EVENT (WorkItemCompletedEvent); } workItem->Type = type; workItem->FirstFragment = firstFragmentWorkItem; workItem->Encryption.CryptoInfo = cryptoInfo; workItem->Encryption.Data = fragmentData; workItem->Encryption.UnitCount = unitsPerFragment; workItem->Encryption.StartUnitNo.Value = fragmentStartUnitNo; fragmentData += unitsPerFragment * ENCRYPTION_DATA_UNIT_SIZE; fragmentStartUnitNo += unitsPerFragment; if (remainder > 0 && --remainder == 0) --unitsPerFragment; SetWorkItemState (workItem, WorkItemReady); TC_SET_EVENT (WorkItemReadyEvent); } TC_RELEASE_MUTEX (&EnqueueMutex); TC_WAIT_EVENT (firstFragmentWorkItem->ItemCompletedEvent); SetWorkItemState (firstFragmentWorkItem, WorkItemFree); TC_SET_EVENT (WorkItemCompletedEvent); }