bool TaskScheduler::Initialize(uint fiberPoolSize, GlobalArgs *globalArgs) {
	for (uint i = 0; i < fiberPoolSize; ++i) {
		FiberType newFiber = FTLCreateFiber(524288, FiberStart, reinterpret_cast<fiber_arg_t>(globalArgs));
		m_fiberPool.enqueue(newFiber);
	}

	// Create an additional thread for each logical processor
	m_numThreads = FTLGetNumHardwareThreads();
	m_threads = new ThreadType[m_numThreads];
	m_numActiveWorkerThreads.store((uint)m_numThreads - 1);

	// Create switching fibers for this thread
	m_fiberSwitchingFibers[FTLGetCurrentThreadId()] = FTLCreateFiber(FTL_HELPER_FIBER_STACK_SIZE, FiberSwitchStart, reinterpret_cast<fiber_arg_t>(&globalArgs->g_taskScheduler));
	m_counterWaitingFibers[FTLGetCurrentThreadId()] = FTLCreateFiber(FTL_HELPER_FIBER_STACK_SIZE, CounterWaitStart, reinterpret_cast<fiber_arg_t>(&globalArgs->g_taskScheduler));

	// Set the affinity for the current thread and convert it to a fiber
	FTLSetCurrentThreadAffinity(1);
	m_threads[0] = FTLGetCurrentThread();
	FiberType mainThreadFiber = FTLConvertThreadToFiber();

	FTLSetCurrentFiber(mainThreadFiber);

	// Create the remaining threads
	for (uint i = 1; i < m_numThreads; ++i) {
		ThreadStartArgs *threadArgs = new ThreadStartArgs();
		threadArgs->globalArgs = globalArgs;
		threadArgs->threadIndex = i;

		ThreadType threadHandle;
		if (!FTLCreateThread(&threadHandle, 524288, ThreadStart, threadArgs, i)) {
			return false;
		}
		m_threads[i] = threadHandle;
	}

	return true;
}
void TaskScheduler::Run(uint fiberPoolSize, TaskFunction mainTask, void *mainTaskArg) {
	// Create and populate the fiber pool
	m_fiberPoolSize = fiberPoolSize;
	m_fibers = new Fiber[fiberPoolSize];
	m_freeFibers = new std::atomic<bool>[fiberPoolSize];
	m_waitingFibers = new std::atomic<bool>[fiberPoolSize];

	for (uint i = 0; i < fiberPoolSize; ++i) {
		m_fibers[i] = std::move(Fiber(512000, FiberStart, reinterpret_cast<std::intptr_t>(this)));
		m_freeFibers[i].store(true, std::memory_order_release);
		m_waitingFibers[i].store(false, std::memory_order_release);
	}
	m_waitingBundles.resize(fiberPoolSize);

	// 1 thread for each logical processor
	m_numThreads = FTLGetNumHardwareThreads();

	// Initialize all the things
	m_quit.store(false, std::memory_order_release);
	m_threads.resize(m_numThreads);
	m_tls.resize(m_numThreads);

	// Set the properties for the current thread
	FTLSetCurrentThreadAffinity(1);
	m_threads[0] = FTLGetCurrentThread();

	// Create the remaining threads
	for (uint i = 1; i < m_numThreads; ++i) {
		ThreadStartArgs *threadArgs = new ThreadStartArgs();
		threadArgs->taskScheduler = this;
		threadArgs->threadIndex = i;

		if (!FTLCreateThread(524288, ThreadStart, threadArgs, i, &m_threads[i])) {
			printf("Error: Failed to create all the worker threads");
			return;
		}
	}


	// Start the main task

	// Get a free fiber
	std::size_t freeFiberIndex = GetNextFreeFiberIndex();
	Fiber *freeFiber = &m_fibers[freeFiberIndex];

	// Repurpose it as the main task fiber and switch to it
	MainFiberStartArgs mainFiberArgs;
	mainFiberArgs.taskScheduler = this;
	mainFiberArgs.MainTask = mainTask;
	mainFiberArgs.Arg = mainTaskArg;

	freeFiber->Reset(MainFiberStart, reinterpret_cast<std::intptr_t>(&mainFiberArgs));
	m_tls[0].CurrentFiberIndex = freeFiberIndex;
	m_tls[0].ThreadFiber.SwitchToFiber(freeFiber);


	// And we're back
	// Wait for the worker threads to finish
	for (std::size_t i = 1; i < m_numThreads; ++i) {
		FTLJoinThread(m_threads[i]);
	}

	return;
}