/* PsychFixupNative10BitFramebufferEnableAfterEndOfSceneMarker()
 *
 * Undo changes made by the graphics driver to the framebuffer pixel format control register
 * as part of an OpenGL/Graphics op that marks "End of Scene", e.g., a glClear() command, that
 * would revert the framebuffers opmode to standard 8bpc mode and thereby kill our 10 bpc mode
 * setting.
 *
 * This routine *must* be called after each such problematic "End of scene" marker command like
 * glClear(). The routine does nothing if 10bpc mode is not enabled/requested for the corresponding
 * display head associated with the given onscreen window. It rewrites the control register on
 * 10 bpc configured windows to basically undo the unwanted change of the gfx-driver *before*
 * a vertical retrace cycle starts, ie., before that changes take effect (The register is double-
 * buffered and latched to update only at VSYNC time, so we just have to be quick enough).
 *
 *
 * Expected Sequence of operations is:
 * 1. Some EOS command like glClear() issued.
 * 2. EOS command schedules ctrl register update to "bad" value at next VSYNC.
 * 3. This routine gets called, detects need for fixup, glGetError() waits for "2." to finish.
 * 4. This routine undos the "bad" value change request by overwriting the latch with our
 *    "good" value --> Scheduled for next VSYNC. Then it returns...
 * 5. At next VSYNC or old "good" value is overwritten/latched with our new old "good" value,
 *    --> "good value" persists, framebuffer stays in 2101010 configuration --> All good.
 *
 * So far the theory, let's see if this really works in real world...
 *
 * This is not needed in Carbon+AGL windowed mode, as the driver doesnt' mess with the control
 * register there, but that mode has its own share of drawback, e.g., generally reduced performance
 * and less robust stimulus onset timing and timestamping... Life's full of tradeoffs...
 */
void PsychFixupNative10BitFramebufferEnableAfterEndOfSceneMarker(PsychWindowRecordType* windowRecord)
{
#if PSYCH_SYSTEM == PSYCH_OSX || PSYCH_SYSTEM == PSYCH_LINUX

	int i,si,ei, headid, screenId;
	unsigned int lutreg, ctlreg, value, status;

	// Fixup needed? Only if 10bpc mode is supposed to be active! Early exit if not:
	if (!(windowRecord->specialflags & kPsychNative10bpcFBActive)) return;

	// This command must be called with the OpenGL context of the given windowRecord active, so
	// we can rely on glGetError() waiting for the correct pipeline to settle! Wait via glGetError()
	// for the end-of-scene marker to finish completely, so our register write happens after
	// the "wrong" register write of that command. glFinish() doesn't work here for unknown
	// reasons - probably it waits too long or whatever. Pretty shaky this stuff...
	glGetError();
	
	// Ok, now rewrite the double-buffered (latched) register with our "good" value for keeping
	// the 10 bpc framebuffer online:
	
	// Map windows screen to gfx-headid aka register subset. TODO : We'll need something better,
	// more generic, abstracted out for the future, but as a starter this will do:
	screenId = windowRecord->screenNumber;
	headid = (screenId<=0) ? 0 : 1;
	ctlreg = (headid == 0) ? RADEON_D1GRPH_CONTROL : RADEON_D2GRPH_CONTROL;
	
	// One-liner read-modify-write op, which simply sets bit 8 of the register - the "Enable 2101010 mode" bit:
	PsychOSKDWriteRegister(screenId, ctlreg, (0x1 << 8) | PsychOSKDReadRegister(screenId, ctlreg, NULL), NULL);
	
	// Debug output, if wanted:
	if (PsychPrefStateGet_Verbosity() > 5) printf("PTB-DEBUG: PsychFixupNative10BitFramebufferEnableAfterEndOfSceneMarker(): ARGB2101010 bit set on screen %i, head %i.\n", screenId, headid);

#endif

	// Done.
	return;
}
PsychError SCREENGetWindowInfo(void) 
{
    const char *FieldNames[]={ "Beamposition", "LastVBLTimeOfFlip", "LastVBLTime", "VBLCount", "TimeAtSwapRequest", "TimePostSwapRequest", "RawSwapTimeOfFlip",
							   "VBLTimePostFlip", "OSSwapTimestamp", "GPULastFrameRenderTime", "StereoMode", "ImagingMode", "MultiSampling", "MissedDeadlines", "FlipCount", "StereoDrawBuffer",
							   "GuesstimatedMemoryUsageMB", "VBLStartline", "VBLEndline", "VideoRefreshFromBeamposition", "GLVendor", "GLRenderer", "GLVersion", "GPUCoreId", 
							   "GLSupportsFBOUpToBpc", "GLSupportsBlendingUpToBpc", "GLSupportsTexturesUpToBpc", "GLSupportsFilteringUpToBpc", "GLSupportsPrecisionColors",
							   "GLSupportsFP32Shading", "BitsPerColorComponent", "IsFullscreen", "SpecialFlags", "SwapGroup", "SwapBarrier" };
							   
	const int  fieldCount = 35;
	PsychGenericScriptType	*s;

    PsychWindowRecordType *windowRecord;
    double beamposition, lastvbl;
	int infoType = 0, retIntArg;
	double auxArg1, auxArg2, auxArg3;
	CGDirectDisplayID displayId;
	psych_uint64 postflip_vblcount;
	double vbl_startline;
	long scw, sch;
	psych_bool onscreen;
    
    //all subfunctions should have these two lines.  
    PsychPushHelp(useString, synopsisString, seeAlsoString);
    if(PsychIsGiveHelp()){PsychGiveHelp();return(PsychError_none);};
    
    PsychErrorExit(PsychCapNumInputArgs(5));     //The maximum number of inputs
    PsychErrorExit(PsychRequireNumInputArgs(1)); //The required number of inputs	
    PsychErrorExit(PsychCapNumOutputArgs(1));    //The maximum number of outputs

    // Query infoType flag: Defaults to zero.
    PsychCopyInIntegerArg(2, FALSE, &infoType);
	if (infoType < 0 || infoType > 5) PsychErrorExitMsg(PsychError_user, "Invalid 'infoType' argument specified! Valid are 0, 1, 2, 3, 4 and 5.");

	// Windowserver info requested?
	if (infoType == 2 || infoType == 3) {
		// Return info about WindowServer:
		#if PSYCH_SYSTEM == PSYCH_OSX

		const char *CoreGraphicsFieldNames[]={ "CGSFps", "CGSValue1", "CGSValue2", "CGSValue3", "CGSDebugOptions" };
		const int CoreGraphicsFieldCount = 5;
		float cgsFPS, val1, val2, val3;
		
		// This (undocumented) Apple call retrieves information about performance statistics of
		// the Core graphics server, also known as WindowServer or Quartz compositor:
		CGSGetPerformanceData(_CGSDefaultConnection(), &cgsFPS, &val1, &val2, &val3);
		if (CGSGetDebugOptions(&retIntArg)) {
			if (PsychPrefStateGet_Verbosity() > 1) printf("PTB-WARNING: GetWindowInfo: Call to CGSGetDebugOptions() failed!\n");
		}
		
		PsychAllocOutStructArray(1, FALSE, 1, CoreGraphicsFieldCount, CoreGraphicsFieldNames, &s);
		PsychSetStructArrayDoubleElement("CGSFps", 0   , cgsFPS, s);
		PsychSetStructArrayDoubleElement("CGSValue1", 0, val1, s);
		PsychSetStructArrayDoubleElement("CGSValue2", 0, val2, s);
		PsychSetStructArrayDoubleElement("CGSValue3", 0, val3, s);
		PsychSetStructArrayDoubleElement("CGSDebugOptions", 0, (double) retIntArg, s);
		
		if ( (infoType == 3) && PsychCopyInDoubleArg(3, FALSE, &auxArg1) ) {
			// Type 3 setup request with auxArg1 provided. Apple auxArg1 as debugFlag setting
			// for the CoreGraphics server: DANGEROUS!
			if (CGSSetDebugOptions((unsigned int) auxArg1)) {
				if (PsychPrefStateGet_Verbosity() > 1) printf("PTB-WARNING: GetWindowInfo: Call to CGSSetDebugOptions() failed!\n");
			}
		}

		#endif
		
		#if PSYCH_SYSTEM == PSYCH_WINDOWS
		psych_uint64 onsetVBLCount, frameId;
		double onsetVBLTime, compositionRate;
		psych_uint64 targetVBL;
		
		PsychAllocInWindowRecordArg(kPsychUseDefaultArgPosition, TRUE, &windowRecord);
		// Query all DWM presentation timing info, return full info as struct in optional return argument '1':
		if (PsychOSGetPresentationTimingInfo(windowRecord, TRUE, 0, &onsetVBLCount, &onsetVBLTime, &frameId, &compositionRate, 1)) {
			// Query success: Info struct has been created and returned by PsychOSGetPresentationTimingInfo()...
			auxArg1 = auxArg2 = 0;
			auxArg3 = 2;
			
			// Want us to change settings?
			if ( (infoType == 3) && PsychCopyInDoubleArg(3, FALSE, &auxArg1) && PsychCopyInDoubleArg(4, FALSE, &auxArg2) && PsychCopyInDoubleArg(5, FALSE, &auxArg3)) {
				if (auxArg1 < 0) auxArg1 = 0;
				targetVBL = auxArg1;
				if (PsychOSSetPresentParameters(windowRecord, targetVBL, (int) auxArg3, auxArg2)) {
					if (PsychPrefStateGet_Verbosity() > 5) printf("PTB-DEBUG: GetWindowInfo: Call to PsychOSSetPresentParameters(%i, %i, %f) SUCCESS!\n", (int) auxArg1, (int) auxArg3, auxArg2);
				}
				else {
					if (PsychPrefStateGet_Verbosity() > 1) printf("PTB-WARNING: GetWindowInfo: Call to PsychOSSetPresentParameters() failed!\n");
				}
			}
		}
		else {
			// Unsupported / Failed:
			PsychCopyOutDoubleArg(1, FALSE, -1);
		}

		#endif

		#if PSYCH_SYSTEM == PSYCH_LINUX
			if (infoType == 2) {
				// MMIO register Read for screenid "auxArg1", register offset "auxArg2":
				PsychCopyInDoubleArg(3, TRUE, &auxArg1);
				PsychCopyInDoubleArg(4, TRUE, &auxArg2);
				PsychCopyOutDoubleArg(1, FALSE, (double) PsychOSKDReadRegister((int) auxArg1, (unsigned int) auxArg2, NULL));
			}
			
			if (infoType == 3) {
				// MMIO register Write for screenid "auxArg1", register offset "auxArg2", to value "auxArg3":
				PsychCopyInDoubleArg(3, TRUE, &auxArg1);
				PsychCopyInDoubleArg(4, TRUE, &auxArg2);
				PsychCopyInDoubleArg(5, TRUE, &auxArg3);
				PsychOSKDWriteRegister((int) auxArg1, (unsigned int) auxArg2, (unsigned int) auxArg3, NULL);
			}
		#endif

		// Done.
		return(PsychError_none);
	}

    // Get the window record:
    PsychAllocInWindowRecordArg(kPsychUseDefaultArgPosition, TRUE, &windowRecord);
	onscreen = PsychIsOnscreenWindow(windowRecord);

	if (onscreen) {
		// Query rasterbeam position: Will return -1 if unsupported.
		PsychGetCGDisplayIDFromScreenNumber(&displayId, windowRecord->screenNumber);
		beamposition = (double) PsychGetDisplayBeamPosition(displayId, windowRecord->screenNumber);
	}
	else {
		beamposition = -1;
	}
	
	if (infoType == 1) {
		// Return the measured beamposition:
		PsychCopyOutDoubleArg(1, FALSE, beamposition);
	}
    else if (infoType == 4) {
        // Return async flip state: 1 = Active, 0 = Inactive.
        PsychCopyOutDoubleArg(1, FALSE, (((NULL != windowRecord->flipInfo) && (0 != windowRecord->flipInfo->asyncstate)) ? 1 : 0));
    }
	else if (infoType == 5) {
		// Create a GL_EXT_timer_query object for this window:
		if (glewIsSupported("GL_EXT_timer_query")) {
			// Pending queries finished?
			if (windowRecord->gpuRenderTimeQuery > 0) {
				PsychErrorExitMsg(PsychError_user, "Tried to create a new GPU rendertime query, but last query not yet finished! Call Screen('Flip') first!");
			}
			
			// Enable our rendering context by selecting this window as drawing target:
			PsychSetDrawingTarget(windowRecord);
			
			// Generate Query object:
			glGenQueries(1, &windowRecord->gpuRenderTimeQuery);
			
			// Emit Query: GPU will measure elapsed processing time in Nanoseconds, starting
			// with the first GL command executed after this command:
			glBeginQuery(GL_TIME_ELAPSED_EXT, windowRecord->gpuRenderTimeQuery);
			
			// Reset last measurement:
			windowRecord->gpuRenderTime = 0;
		}
		else {
			if (PsychPrefStateGet_Verbosity() > 4) printf("PTB-INFO: GetWindowInfo for infoType 5: GPU timer query objects are unsupported on this platform and GPU. Call ignored!\n");
		}
	}
	else {
		// Return all information:
		PsychAllocOutStructArray(1, FALSE, 1, fieldCount, FieldNames, &s);

		// Rasterbeam position:
		PsychSetStructArrayDoubleElement("Beamposition", 0, beamposition, s);

		// Time of last vertical blank when a double-buffer swap occured:
		if ((windowRecord->flipCount > 0) && (windowRecord->time_at_last_vbl == 0) && (PsychPrefStateGet_VBLTimestampingMode() == 4)) {
			// If time_at_last_vbl for an already finished or at least pending flip isn't available and
			// we have support for OS-Builtin timestamping enabled, we try to employ OS-Builtin timestamping
			// to get a timestamp for the most recent pending or finished flip. If this fails or is unsupported,
			// it will have no effect:
			PsychOSGetSwapCompletionTimestamp(windowRecord, 0, &(windowRecord->time_at_last_vbl));
		}

		// Return it - or the value zero if it is (still) undefined/unavailable:
		PsychSetStructArrayDoubleElement("LastVBLTimeOfFlip", 0, windowRecord->time_at_last_vbl, s);

		// Uncorrected timestamp of flip swap completion:
		PsychSetStructArrayDoubleElement("RawSwapTimeOfFlip", 0, windowRecord->rawtime_at_swapcompletion, s);

		// Timestamp immediately prior to call to PsychOSFlipWindowBuffers(), i.e., time at swap request submission:
		PsychSetStructArrayDoubleElement("TimeAtSwapRequest", 0, windowRecord->time_at_swaprequest, s);

		// Timestamp immediately after call to PsychOSFlipWindowBuffers() returns, i.e., time at swap request submission completion:
		PsychSetStructArrayDoubleElement("TimePostSwapRequest", 0, windowRecord->time_post_swaprequest, s);

		// Timestamp immediately after call to PsychOSFlipWindowBuffers() returns, i.e., time at swap request submission completion:
		PsychSetStructArrayDoubleElement("VBLTimePostFlip", 0, windowRecord->postflip_vbltimestamp, s);

		// Swap completion timestamp for most recently completed swap, according to OS-builtin PsychOSGetSwapCompletionTimestamp() method:
		PsychSetStructArrayDoubleElement("OSSwapTimestamp", 0, windowRecord->osbuiltin_swaptime, s);

		// Result from last GPU rendertime query as triggered by infoType 5: Zero if undefined.
		PsychSetStructArrayDoubleElement("GPULastFrameRenderTime", 0, windowRecord->gpuRenderTime, s);

		// Try to determine system time of last VBL on display, independent of any
		// flips / bufferswaps.
		lastvbl = -1;
		postflip_vblcount = 0;
		
		// On supported systems, we can query the OS for the system time of last VBL, so we can
		// use the most recent VBL timestamp as baseline for timing calculations, 
		// instead of one far in the past.
		if (onscreen) { lastvbl = PsychOSGetVBLTimeAndCount(windowRecord, &postflip_vblcount); }

		// If we couldn't determine this information we just set lastvbl to the last known
		// vbl timestamp of last flip -- better than nothing...
		if (lastvbl < 0) lastvbl = windowRecord->time_at_last_vbl;
		PsychSetStructArrayDoubleElement("LastVBLTime", 0, lastvbl, s);
		PsychSetStructArrayDoubleElement("VBLCount", 0, (double) (psych_int64) postflip_vblcount, s);
        
		// Misc. window parameters:
		PsychSetStructArrayDoubleElement("StereoMode", 0, windowRecord->stereomode, s);
		PsychSetStructArrayDoubleElement("ImagingMode", 0, windowRecord->imagingMode, s);
		PsychSetStructArrayDoubleElement("SpecialFlags", 0, windowRecord->specialflags, s);
		PsychSetStructArrayDoubleElement("IsFullscreen", 0, (windowRecord->specialflags & kPsychIsFullscreenWindow) ? 1 : 0, s);
		PsychSetStructArrayDoubleElement("MultiSampling", 0, windowRecord->multiSample, s);
		PsychSetStructArrayDoubleElement("MissedDeadlines", 0, windowRecord->nr_missed_deadlines, s);
		PsychSetStructArrayDoubleElement("FlipCount", 0, windowRecord->flipCount, s);
		PsychSetStructArrayDoubleElement("StereoDrawBuffer", 0, windowRecord->stereodrawbuffer, s);
		PsychSetStructArrayDoubleElement("GuesstimatedMemoryUsageMB", 0, (double) windowRecord->surfaceSizeBytes / 1024 / 1024, s);
		PsychSetStructArrayDoubleElement("BitsPerColorComponent", 0, (double) windowRecord->bpc, s);
		
		// Query real size of the underlying display in order to define the vbl_startline:
		PsychGetScreenSize(windowRecord->screenNumber, &scw, &sch);
		vbl_startline = (double) sch;
		PsychSetStructArrayDoubleElement("VBLStartline", 0, vbl_startline, s);

		// And VBL endline:
		PsychSetStructArrayDoubleElement("VBLEndline", 0, windowRecord->VBL_Endline, s);

		// Video refresh interval duration from beamposition method:
		PsychSetStructArrayDoubleElement("VideoRefreshFromBeamposition", 0, windowRecord->ifi_beamestimate, s);
    
		// Swap group assignment and swap barrier assignment, if any:
		PsychSetStructArrayDoubleElement("SwapGroup", 0, windowRecord->swapGroup, s);
		PsychSetStructArrayDoubleElement("SwapBarrier", 0, windowRecord->swapBarrier, s);
	
        // Which basic GPU architecture is this?
		PsychSetStructArrayStringElement("GPUCoreId", 0, windowRecord->gpuCoreId, s);
		
		// FBO's supported, and how deep?
		if (windowRecord->gfxcaps & kPsychGfxCapFBO) {
			if (windowRecord->gfxcaps & kPsychGfxCapFPFBO32) {
				PsychSetStructArrayDoubleElement("GLSupportsFBOUpToBpc", 0, 32, s);
			} else
			if (windowRecord->gfxcaps & kPsychGfxCapFPFBO16) {
				PsychSetStructArrayDoubleElement("GLSupportsFBOUpToBpc", 0, 16, s);
			} else PsychSetStructArrayDoubleElement("GLSupportsFBOUpToBpc", 0, 8, s);
		}
		else {
			PsychSetStructArrayDoubleElement("GLSupportsFBOUpToBpc", 0, 0, s);
		}

		// How deep is alpha blending supported?
		if (windowRecord->gfxcaps & kPsychGfxCapFPBlend32) {
			PsychSetStructArrayDoubleElement("GLSupportsBlendingUpToBpc", 0, 32, s);
		} else if (windowRecord->gfxcaps & kPsychGfxCapFPBlend16) {
			PsychSetStructArrayDoubleElement("GLSupportsBlendingUpToBpc", 0, 16, s);
		} else PsychSetStructArrayDoubleElement("GLSupportsBlendingUpToBpc", 0, 8, s);
		
		// How deep is texture mapping supported?
		if (windowRecord->gfxcaps & kPsychGfxCapFPTex32) {
			PsychSetStructArrayDoubleElement("GLSupportsTexturesUpToBpc", 0, 32, s);
		} else if (windowRecord->gfxcaps & kPsychGfxCapFPTex16) {
			PsychSetStructArrayDoubleElement("GLSupportsTexturesUpToBpc", 0, 16, s);
		} else PsychSetStructArrayDoubleElement("GLSupportsTexturesUpToBpc", 0, 8, s);
		
		// How deep is texture filtering supported?
		if (windowRecord->gfxcaps & kPsychGfxCapFPFilter32) {
			PsychSetStructArrayDoubleElement("GLSupportsFilteringUpToBpc", 0, 32, s);
		} else if (windowRecord->gfxcaps & kPsychGfxCapFPFilter16) {
			PsychSetStructArrayDoubleElement("GLSupportsFilteringUpToBpc", 0, 16, s);
		} else PsychSetStructArrayDoubleElement("GLSupportsFilteringUpToBpc", 0, 8, s);

		if (windowRecord->gfxcaps & kPsychGfxCapVCGood) {
			PsychSetStructArrayDoubleElement("GLSupportsPrecisionColors", 0, 1, s);
		} else PsychSetStructArrayDoubleElement("GLSupportsPrecisionColors", 0, 0, s);

		if (windowRecord->gfxcaps & kPsychGfxCapFP32Shading) {
			PsychSetStructArrayDoubleElement("GLSupportsFP32Shading", 0, 1, s);
		} else PsychSetStructArrayDoubleElement("GLSupportsFP32Shading", 0, 0, s);

		// Renderer information: This comes last, and would fail if async flips
        // are active, because it needs PsychSetDrawingTarget, which in turn needs async
        // flips to be inactive:
        PsychSetDrawingTarget(windowRecord);
        PsychSetStructArrayStringElement("GLVendor", 0, (char*) glGetString(GL_VENDOR), s);
        PsychSetStructArrayStringElement("GLRenderer", 0, (char*) glGetString(GL_RENDERER), s);
        PsychSetStructArrayStringElement("GLVersion", 0, (char*) glGetString(GL_VERSION), s);
    }
    
    // Done.
    return(PsychError_none);
}
/* PsychFixupNative10BitFramebufferEnableAfterEndOfSceneMarker()
 *
 * Undo changes made by the graphics driver to the framebuffer pixel format control register
 * as part of an OpenGL/Graphics op that marks "End of Scene", e.g., a glClear() command, that
 * would revert the framebuffers opmode to standard 8bpc mode and thereby kill our 10 bpc mode
 * setting.
 *
 * This routine *must* be called after each such problematic "End of scene" marker command like
 * glClear(). The routine does nothing if 10bpc mode is not enabled/requested for the corresponding
 * display head associated with the given onscreen window. It rewrites the control register on
 * 10 bpc configured windows to basically undo the unwanted change of the gfx-driver *before*
 * a vertical retrace cycle starts, ie., before that changes take effect (The register is double-
 * buffered and latched to update only at VSYNC time, so we just have to be quick enough).
 *
 *
 * Expected Sequence of operations is:
 * 1. Some EOS command like glClear() issued.
 * 2. EOS command schedules ctrl register update to "bad" value at next VSYNC.
 * 3. This routine gets called, detects need for fixup, glGetError() waits for "2." to finish.
 * 4. This routine undos the "bad" value change request by overwriting the latch with our
 *    "good" value --> Scheduled for next VSYNC. Then it returns...
 * 5. At next VSYNC or old "good" value is overwritten/latched with our new old "good" value,
 *    --> "good value" persists, framebuffer stays in 2101010 configuration --> All good.
 *
 * So far the theory, let's see if this really works in real world...
 *
 * This is not needed in Carbon+AGL windowed mode, as the driver doesnt' mess with the control
 * register there, but that mode has its own share of drawback, e.g., generally reduced performance
 * and less robust stimulus onset timing and timestamping... Life's full of tradeoffs...
 */
void PsychFixupNative10BitFramebufferEnableAfterEndOfSceneMarker(PsychWindowRecordType* windowRecord)
{
#if PSYCH_SYSTEM == PSYCH_OSX || PSYCH_SYSTEM == PSYCH_LINUX
    int headiter, headid, screenId;
    unsigned int ctlreg, lutreg, val1, val2;
    int gpuMaintype, gpuMinortype;

    // Fixup needed? Only if 10bpc mode is supposed to be active! Early exit if not:
    if (!(windowRecord->specialflags & kPsychNative10bpcFBActive)) return;

    // Map windows screen to gfx-headid aka register subset. TODO : We'll need something better,
    // more generic, abstracted out for the future, but as a starter this will do:
    screenId = windowRecord->screenNumber;

    // We only support Radeon GPU's, nothing else:
    if (!PsychGetGPUSpecs(screenId, &gpuMaintype, &gpuMinortype, NULL, NULL) ||
        (gpuMaintype != kPsychRadeon) || (gpuMinortype >= 0xffff)) {
        return;
    }

    // This command must be called with the OpenGL context of the given windowRecord active, so
    // we can rely on glGetError() waiting for the correct pipeline to settle! Wait via glGetError()
    // for the end-of-scene marker to finish completely, so our register write happens after
    // the "wrong" register write of that command. glFinish() doesn't work here for unknown
    // reasons - probably it waits too long or whatever. Pretty shaky this stuff...
    glGetError();

    // Ok, now rewrite the double-buffered (latched) register with our "good" value for keeping
    // the 10 bpc framebuffer online:

    // Iterate over range of all assigned heads for this screenId 'i' and reconfigure them:
    for (headiter = 0; headiter < kPsychMaxPossibleCrtcs; headiter++) {
        // Map screenid to headid for headiter'th head:
        headid = PsychScreenToCrtcId(screenId, headiter);

        // We're done as soon as we encounter invalid negative headid.
        if (headid < 0) break;

        // Select Radeon HW registers for corresponding heads:
        if (gpuMinortype < 0x40) {
            // DCE-3 and earlier:
            lutreg = (headid == 0) ? RADEON_D1GRPH_LUT_SEL : RADEON_D2GRPH_LUT_SEL;
            ctlreg = (headid == 0) ? RADEON_D1GRPH_CONTROL : RADEON_D2GRPH_CONTROL;
        }
        else {
            // DCE-4 and later:
            if (headid > DCE4_MAXHEADID) {
                printf("PTB-ERROR: Invalid headId %i (greater than max %i) provided for DCE-4+ display engine!\n", headid, DCE4_MAXHEADID);
                return;
            }

            lutreg = EVERGREEN_DC_LUT_10BIT_BYPASS + crtcoff[headid];
            ctlreg = EVERGREEN_GRPH_CONTROL + crtcoff[headid];
        }

        // Get current state of registers at high debug levels:
        if (PsychPrefStateGet_Verbosity() > 9) {
            val1 = PsychOSKDReadRegister(screenId, lutreg, NULL);
            val2 = PsychOSKDReadRegister(screenId, ctlreg, NULL);
            printf("PTB-DEBUG: PsychFixupNative10BitFramebufferEnableAfterEndOfSceneMarker(): Screen %i, head %i: LUT = %i [%i], GRPHCONT = %i [%i]\n", screenId, headid, val1 & (0x1 << 8), val1, val2 & (0x1 << 8), val2);
        }

        // One-liner read-modify-write op, which simply sets bit 8 of the register - the "10 bit LUT bypass" bit:
        PsychOSKDWriteRegister(screenId, lutreg, (0x1 << 8) | PsychOSKDReadRegister(screenId, lutreg, NULL), NULL);

        // One-liner read-modify-write op, which simply sets bit 8 of the register - the "Enable 2101010 mode" bit:
        PsychOSKDWriteRegister(screenId, ctlreg, (0x1 << 8) | PsychOSKDReadRegister(screenId, ctlreg, NULL), NULL);

        // Debug output, if wanted:
        if (PsychPrefStateGet_Verbosity() > 5) printf("PTB-DEBUG: PsychFixupNative10BitFramebufferEnableAfterEndOfSceneMarker(): ARGB2101010 bit set on screen %i, head %i.\n", screenId, headid);
    }
#endif

    // Done.
    return;
}
/* PsychEnableNative10BitFramebuffer()  - Enable/Disable native 10 bpc RGB framebuffer modes.
 *
 * This function enables or disables the native ARGB2101010 framebuffer readout mode of supported
 * graphics hardware. Currently the ATI Radeon X1000/HD2000/HD3000 and later cards should allow this.
 *
 * This needs support from the Psychtoolbox kernel level support driver for low-level register reads
 * and writes to the GPU registers.
 *
 * 'windowRecord'	Is used to find the Id of the screen for which mode should be changed, as well as enable
 *					flags to see if a change is required at all, and the OpenGL context for some specific
 *					fixups. A value of NULL will try to apply the operation to all heads, but may only work
 *					for *disabling* 10 bpc mode, not for enabling it -- Mostly useful for a master reset to
 *					system default, e.g., as part of error handling or Screen shutdown handling.
 * 'enable'   True = Enable ABGR2101010 support, False = Disable ARGB2101010 support, reenable ARGB8888 support. 
 *
 */
boolean	PsychEnableNative10BitFramebuffer(PsychWindowRecordType* windowRecord, boolean enable)
{
	int i,si,ei, headid, screenId;
	unsigned int lutreg, ctlreg, value, status;
	
	// Child protection:
	if (windowRecord && !PsychIsOnscreenWindow(windowRecord)) PsychErrorExitMsg(PsychError_internal, "Invalid non-onscreen windowRecord provided!!!");
	
	// Either screenid from windowRecord or our special -1 "all Screens" Id:
	screenId = (windowRecord) ? windowRecord->screenNumber : -1;
	
	// Define range of screens: Either a single specific one, or all:
	si = (screenId!=-1) ? screenId   : 0;
	ei = (screenId!=-1) ? screenId+1 : PsychGetNumDisplays();

#if PSYCH_SYSTEM == PSYCH_OSX || PSYCH_SYSTEM == PSYCH_LINUX
	// Loop over all target screens:
	for (i=si; i<ei; i++) {
		// Map screenid to headid: For now we only support 2 heads and assume
		// screenId 0 == head 0, all others equal head 1:
		headid = (i<=0) ? 0 : 1;
		
		// Select Radeon HW registers for corresponding heads:
		lutreg = (headid == 0) ? RADEON_D1GRPH_LUT_SEL : RADEON_D2GRPH_LUT_SEL;
		ctlreg = (headid == 0) ? RADEON_D1GRPH_CONTROL : RADEON_D2GRPH_CONTROL;

		// Enable or Disable?
		if (enable) {
			// Enable:
			
			// Switch hardware LUT's to bypass mode:
			// We set bit 8 to enable "bypass LUT in 2101010 mode":
			value = PsychOSKDReadRegister(screenId, lutreg, &status);
			if (status) {
				printf("PTB-ERROR: Failed to set 10 bit framebuffer mode (LUTReg read failed).\n");
				return(false);
			}

			// Set the bypass bit:
			value = value | 0x1 << 8;

			PsychOSKDWriteRegister(screenId, lutreg, value, &status);
			if (status) {
				printf("PTB-ERROR: Failed to set 10 bit framebuffer mode (LUTReg write failed).\n");
				return(false);
			}
			
			// Switch CRTC to ABGR2101010 readout mode:
			// We set bit 8 to enable that mode:
			value = PsychOSKDReadRegister(screenId, ctlreg, &status);
			if (status) {
				printf("PTB-ERROR: Failed to set 10 bit framebuffer mode (CTLReg read failed).\n");
				return(false);
			}

			// Set 2101010 moe bit:
			value = value | 0x1 << 8;

			PsychOSKDWriteRegister(screenId, ctlreg, value, &status);
			if (status) {
				printf("PTB-ERROR: Failed to set 10 bit framebuffer mode (CTLReg write failed).\n");
				return(false);
			}
			
			// Pipe should be in 10 bpc mode now...
			if (PsychPrefStateGet_Verbosity() > 2) printf("PTB-INFO: System framebuffer switched to ARGB2101010 mode for screen %i [head %i].\n", i, headid);
		}
		else {
			// Disable:

			// Switch CRTC to ABGR8888 readout mode:
			// We clear bit 8 to enable that mode:
			value = PsychOSKDReadRegister(screenId, ctlreg, &status);
			if (status) {
				// This codepath gets always called in PsychCloseWindow(), so we should skip it
				// silently if register read fails, as this is expected on MS-Windows and on all
				// non-Radeon hardware and if kernel driver isn't loaded:
				if (PsychPrefStateGet_Verbosity() > 5) printf("PTB-ERROR: Failed to set 8 bit framebuffer mode (CTLReg read failed).\n");
				return(false);
			}
			else if (PsychPrefStateGet_Verbosity() > 5) printf("PTB-DEBUG: In disable 10bpc: Readreg. ctlreg yields %lx\n", value);

			// Clear 2101010 mode bit:
			value = value & ~(0x1 << 8);

			PsychOSKDWriteRegister(screenId, ctlreg, value, &status);
			if (status) {
				printf("PTB-ERROR: Failed to set 8 bit framebuffer mode (CTLReg write failed).\n");
				return(false);
			}
			else if (PsychPrefStateGet_Verbosity() > 5) printf("PTB-DEBUG: In disable 10bpc: ctlreg reset\n");

			// Wait 500 msecs for GPU to settle:
			PsychWaitIntervalSeconds(0.5);

			// Switch hardware LUT's to standard mapping mode:
			// We clear bit 8 to disable "bypass LUT in 2101010 mode":
			value = PsychOSKDReadRegister(screenId, lutreg, &status);
			if (status) {
				printf("PTB-ERROR: Failed to set 8 bit framebuffer mode (LUTReg read failed).\n");
				return(false);
			}
			else if (PsychPrefStateGet_Verbosity() > 5) printf("PTB-DEBUG: In disable 10bpc: Readreg. lutreg yields %lx\n", value);

			// Clear LUT bypass bit:
			value = value & ~(0x1 << 8);

			PsychOSKDWriteRegister(screenId, lutreg, value, &status);
			if (status) {
				printf("PTB-ERROR: Failed to set 8 bit framebuffer mode (LUTReg write failed).\n");
				return(false);
			}
			else if (PsychPrefStateGet_Verbosity() > 5) printf("PTB-DEBUG: In disable 10bpc: lutreg reset\n");

			// Pipe should be in 8 bpc mode now...
			if (PsychPrefStateGet_Verbosity() > 2) printf("PTB-INFO: System framebuffer switched to standard ARGB8888 mode for screen %i [head %i].\n", i, headid);
		}

		// Next display head...
	}
#else
	// This cool stuff not supported on the uncool Windows OS:
	return(FALSE);
#endif

	// Done.
	return(TRUE);
}
/* PsychEnableNative10BitFramebuffer()  - Enable/Disable native 10 bpc RGB framebuffer modes.
 *
 * This function enables or disables the native ARGB2101010 framebuffer readout mode of supported
 * graphics hardware. Currently the ATI Radeon X1000/HD2000/HD3000 and later cards should allow this.
 *
 * This needs support from the Psychtoolbox kernel level support driver for low-level register reads
 * and writes to the GPU registers.
 *
 * 'windowRecord'	Is used to find the Id of the screen for which mode should be changed, as well as enable
 *					flags to see if a change is required at all, and the OpenGL context for some specific
 *					fixups. A value of NULL will try to apply the operation to all heads, but may only work
 *					for *disabling* 10 bpc mode, not for enabling it -- Mostly useful for a master reset to
 *					system default, e.g., as part of error handling or Screen shutdown handling.
 * 'enable'   True = Enable ABGR2101010 support, False = Disable ARGB2101010 support, reenable ARGB8888 support. 
 *
 */
psych_bool	PsychEnableNative10BitFramebuffer(PsychWindowRecordType* windowRecord, psych_bool enable)
{
#if PSYCH_SYSTEM == PSYCH_OSX || PSYCH_SYSTEM == PSYCH_LINUX
	int i, si, ei, headid, headiter, screenId;
	unsigned int lutreg, ctlreg, value, status;
	int gpuMaintype, gpuMinortype;

	// Child protection:
	if (windowRecord && !PsychIsOnscreenWindow(windowRecord)) PsychErrorExitMsg(PsychError_internal, "Invalid non-onscreen windowRecord provided!!!");
	
	// Either screenid from windowRecord or our special -1 "all Screens" Id:
	screenId = (windowRecord) ? windowRecord->screenNumber : -1;

	// We only support Radeon GPU's, nothing else:
	if (!PsychGetGPUSpecs(screenId, &gpuMaintype, &gpuMinortype, NULL, NULL) ||
	    (gpuMaintype != kPsychRadeon) || (gpuMinortype >= 0xffff)) {
	  return(FALSE);
	}
	
	// Define range of screens: Either a single specific one, or all:
	si = (screenId!=-1) ? screenId   : 0;
	ei = (screenId!=-1) ? screenId+1 : PsychGetNumDisplays();

	// Loop over all target screens:
	for (i = si; i < ei; i++) {
		// Iterate over range of all assigned heads for this screenId 'i' and reconfigure them:
		for (headiter = 0; headiter < kPsychMaxPossibleCrtcs; headiter++) {
            // Map screenid to headid for headiter'th head:
            headid = PsychScreenToCrtcId(i, headiter);

            // We're done as soon as we encounter invalid negative headid.
            if (headid < 0) break;

            // Select Radeon HW registers for corresponding heads:
            if (gpuMinortype < 0x40) {
                // DCE-3 and earlier:
                lutreg = (headid == 0) ? RADEON_D1GRPH_LUT_SEL : RADEON_D2GRPH_LUT_SEL;
                ctlreg = (headid == 0) ? RADEON_D1GRPH_CONTROL : RADEON_D2GRPH_CONTROL;
            }
            else {
                // DCE-4 and later:
                if (headid > DCE4_MAXHEADID) {
                    printf("PTB-ERROR: Invalid headId %i (greater than max %i) provided for DCE-4+ display engine!\n", headid, DCE4_MAXHEADID);
                    return(false);
                }

                lutreg = EVERGREEN_DC_LUT_10BIT_BYPASS + crtcoff[headid];
                ctlreg = EVERGREEN_GRPH_CONTROL + crtcoff[headid];
            }

			// Enable or Disable?
			if (enable) {
				// Enable:
			
				// Switch hardware LUT's to bypass mode:
				// We set bit 8 to enable "bypass LUT in 2101010 mode":
				value = PsychOSKDReadRegister(screenId, lutreg, &status);
				if (status) {
					printf("PTB-ERROR: Failed to set 10 bit framebuffer mode (LUTReg read failed).\n");
					return(false);
				}

				// Set the bypass bit:
				value = value | 0x1 << 8;

				PsychOSKDWriteRegister(screenId, lutreg, value, &status);
				if (status) {
					printf("PTB-ERROR: Failed to set 10 bit framebuffer mode (LUTReg write failed).\n");
					return(false);
				}

				// Only reconfigure framebuffer scanout if this is really our true Native10bpc hack:
				// This is usually skipped on FireGL/FirePro GPU's as their drivers do it already...
				if (windowRecord->specialflags & kPsychNative10bpcFBActive) {
					// Switch CRTC to ABGR2101010 readout mode:
					// We set bit 8 to enable that mode:
					value = PsychOSKDReadRegister(screenId, ctlreg, &status);
					if (status) {
						printf("PTB-ERROR: Failed to set 10 bit framebuffer mode (CTLReg read failed).\n");
						return(false);
					}
                
					// Set 2101010 mode bit:
					value = value | 0x1 << 8;
                
					PsychOSKDWriteRegister(screenId, ctlreg, value, &status);
					if (status) {
						printf("PTB-ERROR: Failed to set 10 bit framebuffer mode (CTLReg write failed).\n");
						return(false);
					}
				}
            
				// Pipe should be in 10 bpc mode now...
				if (PsychPrefStateGet_Verbosity() > 2) printf("PTB-INFO: System framebuffer switched to ARGB2101010 mode for screen %i [head %i].\n", i, headid);
			} else {
				// Disable:

				// Only reconfigure framebuffer scanout if this is really our true Native10bpc hack:
				// This is usually skipped on FireGL/FirePro GPU's as their drivers do it already...
				if (windowRecord->specialflags & kPsychNative10bpcFBActive) {
					// Switch CRTC to ABGR8888 readout mode:
					// We clear bit 8 to enable that mode:
					value = PsychOSKDReadRegister(screenId, ctlreg, &status);
					if (status) {
						// This codepath gets always called in PsychCloseWindow(), so we should skip it
						// silently if register read fails, as this is expected on MS-Windows and on all
						// non-Radeon hardware and if kernel driver isn't loaded:
						if (PsychPrefStateGet_Verbosity() > 5) printf("PTB-ERROR: Failed to set 8 bit framebuffer mode (CTLReg read failed).\n");
						return(false);
					}
					else if (PsychPrefStateGet_Verbosity() > 5) printf("PTB-DEBUG: In disable 10bpc: Readreg. ctlreg yields %lx\n", value);
                
					// Clear 2101010 mode bit:
					value = value & ~(0x1 << 8);
                
					PsychOSKDWriteRegister(screenId, ctlreg, value, &status);
					if (status) {
						printf("PTB-ERROR: Failed to set 8 bit framebuffer mode (CTLReg write failed).\n");
						return(false);
					}
					else if (PsychPrefStateGet_Verbosity() > 5) printf("PTB-DEBUG: In disable 10bpc: ctlreg reset\n");
                
					// Wait 500 msecs for GPU to settle:
					PsychWaitIntervalSeconds(0.5);
				}
            
				// Switch hardware LUT's to standard mapping mode:
				// We clear bit 8 to disable "bypass LUT in 2101010 mode":
				value = PsychOSKDReadRegister(screenId, lutreg, &status);
				if (status) {
					printf("PTB-ERROR: Failed to set 8 bit framebuffer mode (LUTReg read failed).\n");
					return(false);
				}
				else if (PsychPrefStateGet_Verbosity() > 5) printf("PTB-DEBUG: In disable 10bpc: Readreg. lutreg yields %lx\n", value);

				// Clear LUT bypass bit:
				value = value & ~(0x1 << 8);

				PsychOSKDWriteRegister(screenId, lutreg, value, &status);
				if (status) {
					printf("PTB-ERROR: Failed to set 8 bit framebuffer mode (LUTReg write failed).\n");
					return(false);
				}
				else if (PsychPrefStateGet_Verbosity() > 5) printf("PTB-DEBUG: In disable 10bpc: lutreg reset\n");

				// Pipe should be in 8 bpc mode now...
				if (PsychPrefStateGet_Verbosity() > 2) printf("PTB-INFO: System framebuffer switched to standard ARGB8888 mode for screen %i [head %i].\n", i, headid);
			}
		} // Next display head...
	} // Next screenId.

	// Done.
	return(TRUE);
	
#else
	// This cool stuff not supported on the uncool Windows OS:
	return(FALSE);
#endif
}