void CPushPinDesktop::reReadCurrentStartXY(int isReRead) {
	__int64 start = StartCounter();

	// assume 0 means not set...negative ignore :)
	// TODO no overflows, that's a bad value too... they cause a crash, I think! [position youtube too far bottom right, track it...]
	int old_x = m_rScreen.left;
	int old_y = m_rScreen.top;

	int config_start_x = read_config_setting(TEXT("start_x"), m_rScreen.left, true);
    m_rScreen.left = config_start_x;

	// is there a better way to do this registry stuff?
	int config_start_y = read_config_setting(TEXT("start_y"), m_rScreen.top, true);
	m_rScreen.top = config_start_y;
	if(old_x != m_rScreen.left || old_y != m_rScreen.top) {
	  if(isReRead) {
		m_rScreen.right = m_rScreen.left + m_iCaptureConfigWidth;
		m_rScreen.bottom = m_rScreen.top + m_iCaptureConfigHeight;
	  }
	}

	if(show_performance) {
      wchar_t out[1000];
	  swprintf(out, 1000, L"new screen pos from reg: %d %d\n", config_start_x, config_start_y);
	  LocalOutput("[re]readCurrentPosition (including swprintf call) took %.02fms", GetCounterSinceStartMillis(start)); // takes 0.42ms (2000 fps)
	  LocalOutput(out);
	}
}
// default child constructor...
CPushPinDesktop::CPushPinDesktop(HRESULT *phr, CPushSourceDesktop *pFilter)
    : CSourceStream(NAME("Push Source CPushPinDesktop child"), phr, pFilter, L"Capture"),
      m_FramesWritten(0),
      // m_bZeroMemory(0),
      m_iFrameNumber(0),
      //m_nCurrentBitDepth(32), // negotiated...
      m_pParent(pFilter),
      formatAlreadySet(false)
{

    // The main point of this sample is to demonstrate how to take a DIB
    // in host memory and insert it into a video stream.

    // To keep this sample as simple as possible, we just read the desktop image
    // from a file and copy it into every frame that we send downstream.
    //
    // In the filter graph, we connect this filter to the AVI Mux, which creates
    // the AVI file with the video frames we pass to it. In this case,
    // the end result is a screen capture video (GDI images only, with no
    // support for overlay surfaces).

    // Get the device context of the main display, just to get some metrics for it...
    globalStart = GetTickCount();

    hScrDc = CreateDC(TEXT("DISPLAY"), NULL, NULL, NULL); // SLOW for aero desktop ...
    ASSERT(hScrDc != 0);
    // Get the dimensions of the main desktop window
    m_rScreen.left   = m_rScreen.top = 0;
    m_rScreen.right  = GetDeviceCaps(hScrDc, HORZRES);
    m_rScreen.bottom = GetDeviceCaps(hScrDc, VERTRES);

    m_iScreenBitRate = GetTrueScreenDepth(hScrDc);// no 15/16 diff here -> GetDeviceCaps(hScrDc, BITSPIXEL); // http://us.generation-nt.com/answer/get-desktop-format-help-26384242.html

    // my custom config settings...

    WarmupCounter();
    // assume 0 means not set...negative ignore :)
    // TODO no overflows, that's a bad value too... they crash it, I think! [position youtube too far bottom right, run it...]
    int config_start_x = read_config_setting(TEXT("start_x"));
    if(config_start_x != 0) { // negatives allowed...
        m_rScreen.left = config_start_x;
    }

    // is there a better way to do this registry stuff?
    int config_start_y = read_config_setting(TEXT("start_y"));
    if(config_start_y != 0) {
        m_rScreen.top = config_start_y;
    }

    int config_width = read_config_setting(TEXT("width"));
    ASSERT(config_width >= 0); // negatives not allowed...
    if(config_width > 0) {
        int desired = m_rScreen.left + config_width; // using DWORD here makes the math wrong to allow for negative values [dual monitor...]
        int max_possible = m_rScreen.right;
        if(desired < max_possible)
            m_rScreen.right = desired;
        else
            m_rScreen.right = max_possible;
    }

    int config_height = read_config_setting(TEXT("height"));
    ASSERT(config_width >= 0);
    if(config_height > 0) {
        int desired = m_rScreen.top + config_height;
        int max_possible = m_rScreen.bottom;
        if(desired < max_possible)
            m_rScreen.bottom = desired;
        else
            m_rScreen.bottom = max_possible;
    }

    // Save dimensions for later use in FillBuffer() et al
    m_iImageWidth  = m_rScreen.right  - m_rScreen.left;
    m_iImageHeight = m_rScreen.bottom - m_rScreen.top;

    int config_max_fps = read_config_setting(TEXT("default_max_fps")); // LODO allow floats in here, too!
    ASSERT(config_max_fps >= 0);
    if(config_max_fps == 0) {
        // TODO my force_max_fps logic is "off" by one frame, assuming it ends up getting used at all :P
        config_max_fps = 30; // set a high default so that the "caller" application knows that we can serve 'em up fast if desired...of course, if they just never set it then we'll probably be flooding them but who's problem is that, eh?
        // LODO only do this on some special pin or something [?] this way seems ok...
    }
    m_fFps = config_max_fps;
    m_rtFrameLength = UNITS / config_max_fps;
    wchar_t out[1000];
    swprintf(out, 1000, L"default from reg: %d %d %d %d -> %d %d %d %d %dfps\n", config_start_x, config_start_y, config_height, config_width,
             m_rScreen.top, m_rScreen.bottom, m_rScreen.left, m_rScreen.right, config_max_fps);
    LocalOutput(out);
    // does this work with flash?
    // set_config_string_setting(L"last_set_it_to", out);
}
//
// DecideBufferSize
//
// This will always be called after the format has been sucessfully
// negotiated (this is negotiatebuffersize). So we have a look at m_mt to see what size image we agreed.
// Then we can ask for buffers of the correct size to contain them.
//
HRESULT CPushPinDesktop::DecideBufferSize(IMemAllocator *pAlloc,
                                      ALLOCATOR_PROPERTIES *pProperties)
{
    CheckPointer(pAlloc,E_POINTER);
    CheckPointer(pProperties,E_POINTER);

    CAutoLock cAutoLock(m_pFilter->pStateLock());
    HRESULT hr = NOERROR;

    VIDEOINFO *pvi = (VIDEOINFO *) m_mt.Format();
	BITMAPINFOHEADER header = pvi->bmiHeader;
	ASSERT_RETURN(header.biPlanes == 1); // sanity check
	// ASSERT_RAISE(header.biCompression == 0); // meaning "none" sanity check, unless we are allowing for BI_BITFIELDS [?] so leave commented out for now
	// now try to avoid this crash [XP, VLC 1.1.11]: vlc -vvv dshow:// :dshow-vdev="screen-capture-recorder" :dshow-adev --sout  "#transcode{venc=theora,vcodec=theo,vb=512,scale=0.7,acodec=vorb,ab=128,channels=2,samplerate=44100,audio-sync}:standard{access=file,mux=ogg,dst=test.ogv}" with 10x10 or 1000x1000
	// LODO check if biClrUsed is passed in right for 16 bit [I'd guess it is...]
	// pProperties->cbBuffer = pvi->bmiHeader.biSizeImage; // too small. Apparently *way* too small.
	
	int bytesPerLine;
	// there may be a windows method that would do this for us...GetBitmapSize(&header); but might be too small for VLC? LODO try it :)
	// some pasted code...
	int bytesPerPixel = (header.biBitCount/8);
	if(m_bConvertToI420) {
	  bytesPerPixel = 32/8; // we convert from a 32 bit to i420, so need more space in this case
	}

    bytesPerLine = header.biWidth * bytesPerPixel;
    /* round up to a dword boundary for stride */
    if (bytesPerLine & 0x0003) 
    {
      bytesPerLine |= 0x0003;
      ++bytesPerLine;
    }

	ASSERT_RETURN(header.biHeight > 0); // sanity check
	ASSERT_RETURN(header.biWidth > 0); // sanity check
	// NB that we are adding in space for a final "pixel array" (http://en.wikipedia.org/wiki/BMP_file_format#DIB_Header_.28Bitmap_Information_Header.29) even though we typically don't need it, this seems to fix the segfaults
	// maybe somehow down the line some VLC thing thinks it might be there...weirder than weird.. LODO debug it LOL.
	int bitmapSize = 14 + header.biSize + (long)(bytesPerLine)*(header.biHeight) + bytesPerLine*header.biHeight;
	pProperties->cbBuffer = bitmapSize;
	//pProperties->cbBuffer = max(pProperties->cbBuffer, m_mt.GetSampleSize()); // didn't help anything
	if(m_bConvertToI420) {
	  pProperties->cbBuffer = header.biHeight * header.biWidth*3/2; // necessary to prevent an "out of memory" error for FMLE. Yikes. Oh wow yikes.
	}

    pProperties->cBuffers = 1; // 2 here doesn't seem to help the crashes...

    // Ask the allocator to reserve us some sample memory. NOTE: the function
    // can succeed (return NOERROR) but still not have allocated the
    // memory that we requested, so we must check we got whatever we wanted.
    ALLOCATOR_PROPERTIES Actual;
    hr = pAlloc->SetProperties(pProperties,&Actual);
    if(FAILED(hr))
    {
        return hr;
    }

    // Is this allocator unsuitable?
    if(Actual.cbBuffer < pProperties->cbBuffer)
    {
        return E_FAIL;
    }

	// now some "once per run" setups
	
	// LODO reset aer with each run...somehow...somehow...Stop method or something...
	OSVERSIONINFOEX version;
    ZeroMemory(&version, sizeof(OSVERSIONINFOEX));
    version.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEX);
	GetVersionEx((LPOSVERSIONINFO)&version);
	if(version.dwMajorVersion >= 6) { // meaning vista +
	  if(read_config_setting(TEXT("disable_aero_for_vista_plus_if_1"), 0, true) == 1) {
		printf("turning aero off/disabling aero");
	    turnAeroOn(false);
	  }
	  else {
		printf("leaving aero on");
	    turnAeroOn(true);
	  }
	}
	
	if(pOldData) {
		free(pOldData);
		pOldData = NULL;
	}
    pOldData = (BYTE *) malloc(max(pProperties->cbBuffer*pProperties->cBuffers, bitmapSize)); // we convert from a 32 bit to i420, so need more space, hence max
    memset(pOldData, 0, pProperties->cbBuffer*pProperties->cBuffers); // reset it just in case :P	
	
    // create a bitmap compatible with the screen DC
	if(hRawBitmap)
		DeleteObject (hRawBitmap); // delete the old one in case it exists...
	hRawBitmap = CreateCompatibleBitmap(hScrDc, getNegotiatedFinalWidth(), getNegotiatedFinalHeight());
	
	previousFrameEndTime = 0; // reset
	m_iFrameNumber = 0;

    return NOERROR;
} // DecideBufferSize
// the default child constructor...
CPushPinDesktop::CPushPinDesktop(HRESULT *phr, CPushSourceDesktop *pFilter)
        : CSourceStream(NAME("Push Source CPushPinDesktop child/pin"), phr, pFilter, L"Capture"),
		m_bReReadRegistry(0),
		m_bDeDupe(0),
        m_iFrameNumber(0),
		pOldData(NULL),
		m_bConvertToI420(false),
		m_pParent(pFilter),
		m_bFormatAlreadySet(false),
		hRawBitmap(NULL),
		m_bUseCaptureBlt(false),
		previousFrameEndTime(0)
{
	// Get the device context of the main display, just to get some metrics for it...
	globalStart = GetTickCount();

	m_iHwndToTrack = (HWND) read_config_setting(TEXT("hwnd_to_track"), NULL, false);
	if(m_iHwndToTrack) {
	  LocalOutput("using specified hwnd no decoration: %d", m_iHwndToTrack);
	  hScrDc = GetDC(m_iHwndToTrack); // using GetDC here seemingly allows you to capture "just a window" without decoration
	  m_bHwndTrackDecoration = false;
	} else {
      m_iHwndToTrack = (HWND) read_config_setting(TEXT("hwnd_to_track_with_window_decoration"), NULL, false);
	  if(m_iHwndToTrack) {
	    LocalOutput("using specified hwnd with decoration: %d", m_iHwndToTrack);
	    hScrDc = GetWindowDC(m_iHwndToTrack); 
	    m_bHwndTrackDecoration = true;
	  } else {
		int useForeGroundWindow = read_config_setting(TEXT("capture_foreground_window_if_1"), 0, true);
	    if(useForeGroundWindow) {
		  LocalOutput("using foreground window %d", GetForegroundWindow());
          hScrDc = GetDC(GetForegroundWindow());
	    } else {
		  // the default, just capture desktop
          // hScrDc = CreateDC(TEXT("DISPLAY"), NULL, NULL, NULL); // possibly better than GetDC(0), supposed to be multi monitor?
          // LocalOutput("using the dangerous CreateDC DISPLAY\n");
	      // danger, CreateDC DC is only good as long as this particular thread is still alive...hmm...is it better for directdraw
		  hScrDc = GetDC(NULL);
	    }
	  }
	}
	//m_iScreenBitDepth = GetTrueScreenDepth(hScrDc);
	ASSERT_RAISE(hScrDc != 0); // 0 implies failure... [if using hwnd, can mean the window is gone!]
	
    // Get the dimensions of the main desktop window as the default
    m_rScreen.left   = m_rScreen.top = 0;
    m_rScreen.right  = GetDeviceCaps(hScrDc, HORZRES); // NB this *fails* for dual monitor support currently... but we just get the wrong width by default, at least with aero windows 7 both can capture both monitors
    m_rScreen.bottom = GetDeviceCaps(hScrDc, VERTRES);

	// now read some custom settings...
	WarmupCounter();
	if(!m_iHwndToTrack) {
      reReadCurrentStartXY(0);
	} else {
	  LocalOutput("ignoring startx, starty since hwnd was specified");
	}

	int config_width = read_config_setting(TEXT("capture_width"), 0, false);
	ASSERT_RAISE(config_width >= 0); // negatives not allowed...
	int config_height = read_config_setting(TEXT("capture_height"), 0, false);
	ASSERT_RAISE(config_height >= 0); // negatives not allowed, if it's set :)

	if(config_width > 0) {
		int desired = m_rScreen.left + config_width;
		//int max_possible = m_rScreen.right; // disabled check until I get dual monitor working. or should I allow off screen captures anyway?
		//if(desired < max_possible)
			m_rScreen.right = desired;
		//else
		//	m_rScreen.right = max_possible;
	} else {
		// leave full screen
	}

	m_iCaptureConfigWidth = m_rScreen.right - m_rScreen.left;
	ASSERT_RAISE(m_iCaptureConfigWidth > 0);

	if(config_height > 0) {
		int desired = m_rScreen.top + config_height;
		//int max_possible = m_rScreen.bottom; // disabled, see above.
		//if(desired < max_possible)
			m_rScreen.bottom = desired;
		//else
		//	m_rScreen.bottom = max_possible;
	} else {
		// leave full screen
	}
	m_iCaptureConfigHeight = m_rScreen.bottom - m_rScreen.top;
	ASSERT_RAISE(m_iCaptureConfigHeight > 0);

	m_iStretchToThisConfigWidth = read_config_setting(TEXT("stretch_to_width"), 0, false);
	m_iStretchToThisConfigHeight = read_config_setting(TEXT("stretch_to_height"), 0, false);
	m_iStretchMode = read_config_setting(TEXT("stretch_mode_high_quality_if_1"), 0, true); // guess it's either stretch mode 0 or 1
	ASSERT_RAISE(m_iStretchToThisConfigWidth >= 0 && m_iStretchToThisConfigHeight >= 0 && m_iStretchMode >= 0); // sanity checks

	m_bUseCaptureBlt = read_config_setting(TEXT("capture_transparent_windows_including_mouse_in_non_aero_if_1_causes_annoying_mouse_flicker"), 0, true) == 1;
	m_bCaptureMouse = read_config_setting(TEXT("capture_mouse_default_1"), 1, true) == 1;

	// default 30 fps...hmm...
	int config_max_fps = read_config_setting(TEXT("default_max_fps"), 30, false); // TODO allow floats [?] when ever requested
	ASSERT_RAISE(config_max_fps > 0);	

	// m_rtFrameLength is also re-negotiated later...
  	m_rtFrameLength = UNITS / config_max_fps; 

	if(is_config_set_to_1(TEXT("track_new_x_y_coords_each_frame_if_1"))) {
		m_bReReadRegistry = 1; // takes 0.416880ms, but I thought it took more when I made it off by default :P
	}
	if(is_config_set_to_1(TEXT("dedup_if_1"))) {
		m_bDeDupe = 1; // takes 10 or 20ms...but useful to me! :)
	}
	m_millisToSleepBeforePollForChanges = read_config_setting(TEXT("millis_to_sleep_between_poll_for_dedupe_changes"), 10, true);

    wchar_t out[10000];
	swprintf(out, 10000, L"default/from reg read config as: %dx%d -> %dx%d (%d top %d bottom %d l %d r) %dfps, dedupe? %d, millis between dedupe polling %d, m_bReReadRegistry? %d hwnd:%d \n", 
	  m_iCaptureConfigHeight, m_iCaptureConfigWidth, getCaptureDesiredFinalHeight(), getCaptureDesiredFinalWidth(), m_rScreen.top, m_rScreen.bottom, m_rScreen.left, m_rScreen.right, config_max_fps, m_bDeDupe, m_millisToSleepBeforePollForChanges, m_bReReadRegistry, m_iHwndToTrack);

	// warmup the debugging message system
	__int64 measureDebugOutputSpeed = StartCounter();
	LocalOutput(out);
	LocalOutput("writing a large-ish debug itself took: %.02Lf ms", GetCounterSinceStartMillis(measureDebugOutputSpeed));
	set_config_string_setting(L"last_init_config_was", out);
}
// the default child constructor...
CPushPinDesktop::CPushPinDesktop(HRESULT *phr, CPushSourceDesktop *pFilter)
        : CSourceStream(NAME("Push Source CPushPinDesktop child/pin"), phr, pFilter, L"Capture"),
        m_FramesWritten(0),
		m_bReReadRegistry(0),
		m_bDeDupe(0),
        m_iFrameNumber(0),
		pOldData(NULL),
		m_bConvertToI420(false),
        //m_nCurrentBitDepth(32), // negotiated later...
		m_pParent(pFilter),
		m_bFormatAlreadySet(false),
		hRawBitmap(NULL)
{

    // Get the device context of the main display, just to get some metrics for it...
	globalStart = GetTickCount();

	m_iHwndToTrack = (HWND) read_config_setting(TEXT("hwnd_to_track"), NULL);
    hScrDc = GetDC(m_iHwndToTrack);
	m_iScreenBitDepth = GetTrueScreenDepth(hScrDc);
	ASSERT(hScrDc != 0);
	
	GdiSetBatchLimit(1); // disable any GDI...just in case this helps anybody...

    // Get the dimensions of the main desktop window as the default
    m_rScreen.left   = m_rScreen.top = 0;
    m_rScreen.right  = GetDeviceCaps(hScrDc, HORZRES); // NB this *fails* for dual monitor support currently... but we just get the wrong width by default, at least with aero windows 7 both can capture both monitors
    m_rScreen.bottom = GetDeviceCaps(hScrDc, VERTRES);

	// now read some custom settings...
	WarmupCounter();
    reReadCurrentPosition(0);

	int config_width = read_config_setting(TEXT("capture_width"), 0);
	ASSERT(config_width >= 0); // negatives not allowed...
	int config_height = read_config_setting(TEXT("capture_height"), 0);
	ASSERT(config_height >= 0); // negatives not allowed, if it's set :)

	if(config_width > 0) {
		int desired = m_rScreen.left + config_width;
		//int max_possible = m_rScreen.right; // disabled check until I get dual monitor working. or should I allow off screen captures anyway?
		//if(desired < max_possible)
			m_rScreen.right = desired;
		//else
		//	m_rScreen.right = max_possible;
	} else {
		// leave full screen
	}

	m_iCaptureConfigWidth = m_rScreen.right - m_rScreen.left;
	ASSERT(m_iCaptureConfigWidth  > 0);

	if(config_height > 0) {
		int desired = m_rScreen.top + config_height;
		//int max_possible = m_rScreen.bottom; // disabled, see above.
		//if(desired < max_possible)
			m_rScreen.bottom = desired;
		//else
		//	m_rScreen.bottom = max_possible;
	} else {
		// leave full screen
	}
	m_iCaptureConfigHeight = m_rScreen.bottom - m_rScreen.top;
	ASSERT(m_iCaptureConfigHeight > 0);	

	m_iStretchToThisConfigWidth = read_config_setting(TEXT("stretch_to_width"), 0);
	m_iStretchToThisConfigHeight = read_config_setting(TEXT("stretch_to_height"), 0);
	m_iStretchMode = read_config_setting(TEXT("stretch_mode_high_quality_if_1"), 0);
	ASSERT(m_iStretchToThisConfigWidth >= 0 && m_iStretchToThisConfigHeight >= 0 && m_iStretchMode >= 0); // sanity checks

	// default 30 fps...hmm...
	int config_max_fps = read_config_setting(TEXT("default_max_fps"), 30); // TODO allow floats [?] when ever requested
	ASSERT(config_max_fps >= 0);	

	// m_rtFrameLength is also re-negotiated later...
  	m_rtFrameLength = UNITS / config_max_fps; 

	if(is_config_set_to_1(TEXT("track_new_x_y_coords_each_frame_if_1"))) {
		m_bReReadRegistry = 1; // takes 0.416880ms, but I thought it took more when I made it off by default :P
	}
	if(is_config_set_to_1(TEXT("dedup_if_1"))) {
		m_bDeDupe = 1; // takes 10 or 20ms...but useful to me! :)
	}
	m_millisToSleepBeforePollForChanges = read_config_setting(TEXT("millis_to_sleep_between_poll_for_dedupe_changes"), 10);


    wchar_t out[1000];
	swprintf(out, 1000, L"default/from reg read config as: %dx%d -> %dx%d (%dtop %db %dl %dr) %dfps, dedupe? %d, millis between dedupe polling %d, m_bReReadRegistry? %d \n", 
	  m_iCaptureConfigHeight, m_iCaptureConfigWidth, getCaptureDesiredFinalHeight(), getCaptureDesiredFinalWidth(), m_rScreen.top, m_rScreen.bottom, m_rScreen.left, m_rScreen.right, config_max_fps, m_bDeDupe, m_millisToSleepBeforePollForChanges, m_bReReadRegistry);

	LocalOutput(out); // warmup for the below debug :)
	__int64 measureDebugOutputSpeed = StartCounter();
	LocalOutput(out);
	LocalOutput("writing a large-ish debug itself took: %.0Lf ms", GetCounterSinceStartMillis(measureDebugOutputSpeed));
	// does this work with flash?
	set_config_string_setting(L"last_init_config_was", out);
}