void CPushPinDesktop::CopyScreenToDataBlock(HDC hScrDC, BYTE *pData, BITMAPINFO *pHeader, IMediaSample *pSample) { HDC hMemDC; // screen DC and memory DC HBITMAP hOldBitmap; // handles to device-dependent bitmaps int nX, nY; // coordinates of rectangle to grab int iFinalStretchHeight = getNegotiatedFinalHeight(); int iFinalStretchWidth = getNegotiatedFinalWidth(); ASSERT(!IsRectEmpty(&m_rScreen)); // that would be unexpected // create a DC for the screen and create // a memory DC compatible to screen DC hMemDC = CreateCompatibleDC(hScrDC); // 0.02ms Anything else to reuse, this one's pretty fast...? // determine points of where to grab from it, though I think we control these with m_rScreen nX = m_rScreen.left; nY = m_rScreen.top; // sanity checks--except we don't want it apparently, to allow upstream to dynamically change the size? Can it do that? ASSERT(m_rScreen.bottom - m_rScreen.top == iFinalStretchHeight); ASSERT(m_rScreen.right - m_rScreen.left == iFinalStretchWidth); // select new bitmap into memory DC hOldBitmap = (HBITMAP) SelectObject(hMemDC, hRawBitmap); doJustBitBltOrScaling(hMemDC, m_iCaptureConfigWidth, m_iCaptureConfigHeight, iFinalStretchWidth, iFinalStretchHeight, hScrDC, nX, nY); AddMouse(hMemDC, &m_rScreen, hScrDC, m_iHwndToTrack); // select old bitmap back into memory DC and get handle to // bitmap of the capture...whatever this even means... HBITMAP hRawBitmap2 = (HBITMAP) SelectObject(hMemDC, hOldBitmap); BITMAPINFO tweakableHeader; memcpy(&tweakableHeader, pHeader, sizeof(BITMAPINFO)); if(m_bConvertToI420) { tweakableHeader.bmiHeader.biBitCount = 32; tweakableHeader.bmiHeader.biCompression = BI_RGB; tweakableHeader.bmiHeader.biHeight = -tweakableHeader.bmiHeader.biHeight; // prevent upside down conversion from i420... tweakableHeader.bmiHeader.biSizeImage = GetBitmapSize(&tweakableHeader.bmiHeader); } if(m_bConvertToI420) { // copy it to a temporary buffer first doDIBits(hScrDC, hRawBitmap2, iFinalStretchHeight, pOldData, &tweakableHeader); // memcpy(/* dest */ pOldData, pData, pSample->GetSize()); // 12.8ms for 1920x1080 desktop // TODO smarter conversion/memcpy's here [?] we could combine scaling with rgb32_to_i420 for instance... // or maybe we should integrate with libswscale here so they can request whatever they want LOL. (might be a higher quality i420 conversion...) // now convert it to i420 into the "real" buffer rgb32_to_i420(iFinalStretchWidth, iFinalStretchHeight, (const char *) pOldData, (char *) pData);// took 36.8ms for 1920x1080 desktop } else { doDIBits(hScrDC, hRawBitmap2, iFinalStretchHeight, pData, &tweakableHeader); } // clean up DeleteDC(hMemDC); }
// // DecideBufferSize // // This will always be called after the format has been sucessfully // negotiated (this is negotiatebuffersize). So we have a look at m_mt to see what size image we agreed. // Then we can ask for buffers of the correct size to contain them. // HRESULT CPushPinDesktop::DecideBufferSize(IMemAllocator *pAlloc, ALLOCATOR_PROPERTIES *pProperties) { CheckPointer(pAlloc,E_POINTER); CheckPointer(pProperties,E_POINTER); CAutoLock cAutoLock(m_pFilter->pStateLock()); HRESULT hr = NOERROR; VIDEOINFO *pvi = (VIDEOINFO *) m_mt.Format(); BITMAPINFOHEADER header = pvi->bmiHeader; ASSERT_RETURN(header.biPlanes == 1); // sanity check // ASSERT_RAISE(header.biCompression == 0); // meaning "none" sanity check, unless we are allowing for BI_BITFIELDS [?] so leave commented out for now // now try to avoid this crash [XP, VLC 1.1.11]: vlc -vvv dshow:// :dshow-vdev="screen-capture-recorder" :dshow-adev --sout "#transcode{venc=theora,vcodec=theo,vb=512,scale=0.7,acodec=vorb,ab=128,channels=2,samplerate=44100,audio-sync}:standard{access=file,mux=ogg,dst=test.ogv}" with 10x10 or 1000x1000 // LODO check if biClrUsed is passed in right for 16 bit [I'd guess it is...] // pProperties->cbBuffer = pvi->bmiHeader.biSizeImage; // too small. Apparently *way* too small. int bytesPerLine; // there may be a windows method that would do this for us...GetBitmapSize(&header); but might be too small for VLC? LODO try it :) // some pasted code... int bytesPerPixel = (header.biBitCount/8); if(m_bConvertToI420) { bytesPerPixel = 32/8; // we convert from a 32 bit to i420, so need more space in this case } bytesPerLine = header.biWidth * bytesPerPixel; /* round up to a dword boundary for stride */ if (bytesPerLine & 0x0003) { bytesPerLine |= 0x0003; ++bytesPerLine; } ASSERT_RETURN(header.biHeight > 0); // sanity check ASSERT_RETURN(header.biWidth > 0); // sanity check // NB that we are adding in space for a final "pixel array" (http://en.wikipedia.org/wiki/BMP_file_format#DIB_Header_.28Bitmap_Information_Header.29) even though we typically don't need it, this seems to fix the segfaults // maybe somehow down the line some VLC thing thinks it might be there...weirder than weird.. LODO debug it LOL. int bitmapSize = 14 + header.biSize + (long)(bytesPerLine)*(header.biHeight) + bytesPerLine*header.biHeight; pProperties->cbBuffer = bitmapSize; //pProperties->cbBuffer = max(pProperties->cbBuffer, m_mt.GetSampleSize()); // didn't help anything if(m_bConvertToI420) { pProperties->cbBuffer = header.biHeight * header.biWidth*3/2; // necessary to prevent an "out of memory" error for FMLE. Yikes. Oh wow yikes. } pProperties->cBuffers = 1; // 2 here doesn't seem to help the crashes... // Ask the allocator to reserve us some sample memory. NOTE: the function // can succeed (return NOERROR) but still not have allocated the // memory that we requested, so we must check we got whatever we wanted. ALLOCATOR_PROPERTIES Actual; hr = pAlloc->SetProperties(pProperties,&Actual); if(FAILED(hr)) { return hr; } // Is this allocator unsuitable? if(Actual.cbBuffer < pProperties->cbBuffer) { return E_FAIL; } // now some "once per run" setups // LODO reset aer with each run...somehow...somehow...Stop method or something... OSVERSIONINFOEX version; ZeroMemory(&version, sizeof(OSVERSIONINFOEX)); version.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEX); GetVersionEx((LPOSVERSIONINFO)&version); if(version.dwMajorVersion >= 6) { // meaning vista + if(read_config_setting(TEXT("disable_aero_for_vista_plus_if_1"), 0, true) == 1) { printf("turning aero off/disabling aero"); turnAeroOn(false); } else { printf("leaving aero on"); turnAeroOn(true); } } if(pOldData) { free(pOldData); pOldData = NULL; } pOldData = (BYTE *) malloc(max(pProperties->cbBuffer*pProperties->cBuffers, bitmapSize)); // we convert from a 32 bit to i420, so need more space, hence max memset(pOldData, 0, pProperties->cbBuffer*pProperties->cBuffers); // reset it just in case :P // create a bitmap compatible with the screen DC if(hRawBitmap) DeleteObject (hRawBitmap); // delete the old one in case it exists... hRawBitmap = CreateCompatibleBitmap(hScrDc, getNegotiatedFinalWidth(), getNegotiatedFinalHeight()); previousFrameEndTime = 0; // reset m_iFrameNumber = 0; return NOERROR; } // DecideBufferSize
void CPushPinDesktop::CopyScreenToDataBlock(HDC hScrDC, BYTE *pData, BITMAPINFO *pHeader, IMediaSample *pSample) { HDC hMemDC; // screen DC and memory DC HBITMAP hOldBitmap; // handles to device-dependent bitmaps int nX, nY; // coordinates of rectangle to grab int iFinalStretchHeight = getNegotiatedFinalHeight(); int iFinalStretchWidth = getNegotiatedFinalWidth(); ASSERT_RAISE(!IsRectEmpty(&m_rScreen)); // that would be unexpected // create a DC for the screen and create // a memory DC compatible to screen DC hMemDC = CreateCompatibleDC(hScrDC); // 0.02ms Anything else to reuse, this one's pretty fast...? // determine points of where to grab from it, though I think we control these with m_rScreen nX = m_rScreen.left; nY = m_rScreen.top; // sanity checks--except we don't want it apparently, to allow upstream to dynamically change the size? Can it do that? ASSERT_RAISE(m_rScreen.bottom - m_rScreen.top == iFinalStretchHeight); ASSERT_RAISE(m_rScreen.right - m_rScreen.left == iFinalStretchWidth); // select new bitmap into memory DC hOldBitmap = (HBITMAP) SelectObject(hMemDC, hRawBitmap); doJustBitBltOrScaling(hMemDC, m_iCaptureConfigWidth, m_iCaptureConfigHeight, iFinalStretchWidth, iFinalStretchHeight, hScrDC, nX, nY); if(m_bCaptureMouse) AddMouse(hMemDC, &m_rScreen, hScrDC, m_iHwndToTrack); // select old bitmap back into memory DC and get handle to // bitmap of the capture...whatever this even means... HBITMAP hRawBitmap2 = (HBITMAP) SelectObject(hMemDC, hOldBitmap); BITMAPINFO tweakableHeader; memcpy(&tweakableHeader, pHeader, sizeof(BITMAPINFO)); if(m_bConvertToI420) { tweakableHeader.bmiHeader.biBitCount = 32; tweakableHeader.bmiHeader.biCompression = BI_RGB; tweakableHeader.bmiHeader.biHeight = -tweakableHeader.bmiHeader.biHeight; // prevent upside down conversion from i420... tweakableHeader.bmiHeader.biSizeImage = GetBitmapSize(&tweakableHeader.bmiHeader); } if(m_bConvertToI420) { // copy it to a temporary buffer first doDIBits(hScrDC, hRawBitmap2, iFinalStretchHeight, pOldData, &tweakableHeader); // memcpy(/* dest */ pOldData, pData, pSample->GetSize()); // 12.8ms for 1920x1080 desktop // TODO smarter conversion/memcpy's here [?] we could combine scaling with rgb32_to_i420 for instance... // or maybe we should integrate with libswscale here so they can request whatever they want LOL. (might be a higher quality i420 conversion...) // now convert it to i420 into the "real" buffer rgb32_to_i420(iFinalStretchWidth, iFinalStretchHeight, (const char *) pOldData, (char *) pData);// took 36.8ms for 1920x1080 desktop } else { doDIBits(hScrDC, hRawBitmap2, iFinalStretchHeight, pData, &tweakableHeader); // if we're on vlc work around for odd pixel widths and 24 bit...<sigh>, like a width of 134 breaks vlc with 24bit. wow. see also GetMediaType comments wchar_t buffer[MAX_PATH + 1]; // on the stack GetModuleFileName(NULL, buffer, MAX_PATH); if(wcsstr(buffer, L"vlc.exe") > 0) { int bitCount = tweakableHeader.bmiHeader.biBitCount; int stride = (iFinalStretchWidth * (bitCount / 8)) % 4; // see if lines have some padding at the end... //int stride2 = (tweakableHeader.bmiHeader.biWidth * (tweakableHeader.bmiHeader.biBitCount / 8) + 3) & ~3; // ?? if(stride > 0) { stride = 4 - stride; // they round up to 4 word boundary // don't need to copy the first line :P int lineSizeBytes = iFinalStretchWidth*(bitCount/8); int lineSizeTotal = lineSizeBytes + stride; for(int line = 1; line < iFinalStretchHeight; line++) { //*dst, *src, size // memmove required since these overlap... memmove(&pData[line*lineSizeBytes], &pData[line*lineSizeTotal], lineSizeBytes); } } } } // clean up DeleteDC(hMemDC); }
HRESULT CPushPinDesktop::FillBuffer(IMediaSample *pSample) { LocalOutput("video frame requested"); __int64 startThisRound = StartCounter(); BYTE *pData; CheckPointer(pSample, E_POINTER); if(m_bReReadRegistry) { reReadCurrentStartXY(1); } if(!ever_started) { // allow it to startup until Run is called...so StreamTime can work see http://stackoverflow.com/questions/2469855/how-to-get-imediacontrol-run-to-start-a-file-playing-with-no-delay/2470548#2470548 // since StreamTime anticipates that the graph's start time has already been set FILTER_STATE myState; CSourceStream::m_pFilter->GetState(INFINITE, &myState); while(myState != State_Running) { // TODO accomodate for pausing better, we're single run only currently [does VLC do pausing even?] Sleep(1); LocalOutput("sleeping till graph running for audio..."); m_pParent->GetState(INFINITE, &myState); } ever_started = true; } // Access the sample's data buffer pSample->GetPointer(&pData); // Make sure that we're still using video format ASSERT_RETURN(m_mt.formattype == FORMAT_VideoInfo); VIDEOINFOHEADER *pVih = (VIDEOINFOHEADER*) m_mt.pbFormat; boolean gotNew = false; // dedupe stuff while(!gotNew) { CopyScreenToDataBlock(hScrDc, pData, (BITMAPINFO *) &(pVih->bmiHeader), pSample); if(m_bDeDupe) { if(memcmp(pData, pOldData, pSample->GetSize())==0) { // took desktop: 10ms for 640x1152, still 100 fps uh guess... Sleep(m_millisToSleepBeforePollForChanges); } else { gotNew = true; memcpy( /* dest */ pOldData, pData, pSample->GetSize()); // took 4ms for 640x1152, but it's worth it LOL. // LODO memcmp and memcpy in the same loop LOL. } } else { // it's always new for everyone else (the typical case) gotNew = true; } } // capture some debug stats (how long it took) before we add in our own arbitrary delay to enforce fps... long double millisThisRoundTook = GetCounterSinceStartMillis(startThisRound); fastestRoundMillis = min(millisThisRoundTook, fastestRoundMillis); // keep stats :) sumMillisTook += millisThisRoundTook; CRefTime now; CRefTime endFrame; now = 0; CSourceStream::m_pFilter->StreamTime(now); if((now > 0) && (now < previousFrameEndTime)) { // now > 0 to accomodate for if there is no reference graph clock at all...also at boot strap time to ignore it XXXX can negatives even ever happen anymore though? while(now < previousFrameEndTime) { // guarantees monotonicity too :P LocalOutput("sleeping because %llu < %llu", now, previousFrameEndTime); Sleep(1); CSourceStream::m_pFilter->StreamTime(now); } // avoid a tidge of creep since we sleep until [typically] just past the previous end. endFrame = previousFrameEndTime + m_rtFrameLength; previousFrameEndTime = endFrame; } else { // if there's no reference clock, it will "always" think it missed a frame if(show_performance) { if(now == 0) LocalOutput("probable none reference clock, streaming fastly"); else LocalOutput("it missed a frame--can't keep up %d %llu %llu", countMissed++, now, previousFrameEndTime); // we don't miss time typically I don't think, unless de-dupe is turned on, or aero, or slow computer, buffering problems downstream, etc. } // have to add a bit here, or it will always be "it missed a frame" for the next round...forever! endFrame = now + m_rtFrameLength; // most of this stuff I just made up because it "sounded right" //LocalOutput("checking to see if I can catch up again now: %llu previous end: %llu subtr: %llu %i", now, previousFrameEndTime, previousFrameEndTime - m_rtFrameLength, previousFrameEndTime - m_rtFrameLength); if(now > (previousFrameEndTime - (long long) m_rtFrameLength)) { // do I even need a long long cast? // let it pretend and try to catch up, it's not quite a frame behind previousFrameEndTime = previousFrameEndTime + m_rtFrameLength; } else { endFrame = now + m_rtFrameLength/2; // ?? seems to not hurt, at least...I guess previousFrameEndTime = endFrame; } } // accomodate for 0 to avoid startup negatives, which would kill our math on the next loop... previousFrameEndTime = max(0, previousFrameEndTime); pSample->SetTime((REFERENCE_TIME *) &now, (REFERENCE_TIME *) &endFrame); //pSample->SetMediaTime((REFERENCE_TIME *)&now, (REFERENCE_TIME *) &endFrame); LocalOutput("timestamping video packet as %lld -> %lld", now, endFrame); m_iFrameNumber++; // Set TRUE on every sample for uncompressed frames http://msdn.microsoft.com/en-us/library/windows/desktop/dd407021%28v=vs.85%29.aspx pSample->SetSyncPoint(TRUE); // only set discontinuous for the first...I think... pSample->SetDiscontinuity(m_iFrameNumber <= 1); #ifdef _DEBUG // the swprintf costs like 0.04ms (25000 fps LOL) double m_fFpsSinceBeginningOfTime = ((double) m_iFrameNumber)/(GetTickCount() - globalStart)*1000; swprintf(out, L"done video frame! total frames: %d this one %dx%d -> (%dx%d) took: %.02Lfms, %.02f ave fps (%.02f is the theoretical max fps based on this round, ave. possible fps %.02f, fastest round fps %.02f, negotiated fps %.06f), frame missed %d", m_iFrameNumber, m_iCaptureConfigHeight, m_iCaptureConfigWidth, getNegotiatedFinalWidth(), getNegotiatedFinalHeight(), millisThisRoundTook, m_fFpsSinceBeginningOfTime, 1.0*1000/millisThisRoundTook, /* average */ 1.0*1000*m_iFrameNumber/sumMillisTook, 1.0*1000/fastestRoundMillis, GetFps(), countMissed); LocalOutput(out); set_config_string_setting(L"frame_stats", out); #endif return S_OK; }
HRESULT CPushPinDesktop::FillBuffer(IMediaSample *pSample) { __int64 startThisRound = StartCounter(); BYTE *pData; CheckPointer(pSample, E_POINTER); if(m_bReReadRegistry) { reReadCurrentPosition(1); } // Access the sample's data buffer pSample->GetPointer(&pData); // Make sure that we're still using video format ASSERT(m_mt.formattype == FORMAT_VideoInfo); VIDEOINFOHEADER *pVih = (VIDEOINFOHEADER*) m_mt.pbFormat; // for some reason the timings are messed up initially, as there's no start time at all for the first frame (?) we don't start in State_Running ? // race condition? // so don't do some calculations unless we're in State_Running FILTER_STATE myState; CSourceStream::m_pFilter->GetState(INFINITE, &myState); bool fullyStarted = myState == State_Running; boolean gotNew = false; while(!gotNew) { CopyScreenToDataBlock(hScrDc, pData, (BITMAPINFO *) &(pVih->bmiHeader), pSample); if(m_bDeDupe) { if(memcmp(pData, pOldData, pSample->GetSize())==0) { // took desktop: 10ms for 640x1152, still 100 fps uh guess... Sleep(m_millisToSleepBeforePollForChanges); } else { gotNew = true; memcpy( /* dest */ pOldData, pData, pSample->GetSize()); // took 4ms for 640x1152, but it's worth it LOL. // LODO memcmp and memcpy in the same loop LOL. } } else { // it's always new for everyone else! gotNew = true; } } // capture how long it took before we add in our own arbitrary delay to enforce fps... long double millisThisRoundTook = GetCounterSinceStartMillis(startThisRound); fastestRoundMillis = min(millisThisRoundTook, fastestRoundMillis); // keep stats :) sumMillisTook += millisThisRoundTook; CRefTime now; CRefTime endFrame; CSourceStream::m_pFilter->StreamTime(now); // wait until we "should" send this frame out... if((now > 0) && (now < previousFrameEndTime)) { // now > 0 to accomodate for if there is no reference graph clock at all...also boot strap time ignore it :P while(now < previousFrameEndTime) { // guarantees monotonicity too :P Sleep(1); CSourceStream::m_pFilter->StreamTime(now); } // avoid a tidge of creep since we sleep until [typically] just past the previous end. endFrame = previousFrameEndTime + m_rtFrameLength; previousFrameEndTime = endFrame; } else { if(show_performance) LocalOutput("it missed a frame--can't keep up %d", countMissed++); // we don't miss time typically I don't think, unless de-dupe is turned on, or aero, or slow computer, buffering problems downstream, etc. // have to add a bit here, or it will always be "it missed some time" for the next round...forever! endFrame = now + m_rtFrameLength; // most of this stuff I just made up because it "sounded right" //LocalOutput("checking to see if I can catch up again now: %llu previous end: %llu subtr: %llu %i", now, previousFrameEndTime, previousFrameEndTime - m_rtFrameLength, previousFrameEndTime - m_rtFrameLength); if(now > (previousFrameEndTime - (long long) m_rtFrameLength)) { // do I need a long long cast? // let it pretend and try to catch up, it's not quite a frame behind previousFrameEndTime = previousFrameEndTime + m_rtFrameLength; } else { endFrame = now + m_rtFrameLength/2; // ?? seems to work...I guess... previousFrameEndTime = endFrame; } } previousFrameEndTime = max(0, previousFrameEndTime);// avoid startup negatives, which would kill our math on the next loop... // LocalOutput("marking frame with timestamps: %llu %llu", now, endFrame); pSample->SetTime((REFERENCE_TIME *) &now, (REFERENCE_TIME *) &endFrame); //pSample->SetMediaTime((REFERENCE_TIME *)&now, (REFERENCE_TIME *) &endFrame); //useless seemingly if(fullyStarted) { m_iFrameNumber++; } // Set TRUE on every sample for uncompressed frames http://msdn.microsoft.com/en-us/library/windows/desktop/dd407021%28v=vs.85%29.aspx pSample->SetSyncPoint(TRUE); // only set discontinuous for the first...I think... pSample->SetDiscontinuity(m_iFrameNumber <= 1); // the swprintf costs like 0.04ms (25000 fps LOL) m_fFpsSinceBeginningOfTime = ((double) m_iFrameNumber)/(GetTickCount() - globalStart)*1000; swprintf(out, L"done frame! total frames: %d this one %dx%d -> (%dx%d) took: %.02Lfms, %.02f ave fps (%.02f is the theoretical max fps based on this round, ave. possible fps %.02f, fastest round fps %.02f, negotiated fps %.06f), frame missed %d", m_iFrameNumber, m_iCaptureConfigHeight, m_iCaptureConfigWidth, getNegotiatedFinalWidth(), getNegotiatedFinalHeight(), millisThisRoundTook, m_fFpsSinceBeginningOfTime, 1.0*1000/millisThisRoundTook, /* average */ 1.0*1000*m_iFrameNumber/sumMillisTook, 1.0*1000/fastestRoundMillis, GetFps(), countMissed); //#ifdef _DEBUG // probably not worth it but we do hit this a lot...hmm... LocalOutput(out); set_config_string_setting(L"frame_stats", out); //#endif return S_OK; }