Beispiel #1
0
Boolean IsNonSilArc(LArc *larc) /*returns TRUE if this word is non SENT_START etc. */
{
   if (larc->nAlign>1 || (larc->nAlign==1 && !IsSilence(larc->lAlign[0].label->name))) 
      return TRUE;
   else
      return FALSE;
}
Beispiel #2
0
Boolean NonSil_and_Quinphone_IsStartPhone(LArc *larc, int i){
  if(!Quinphone){ 
    return (Boolean)(i < larc->nAlign-1 || !IsSilence(larc->lAlign[larc->nAlign-1].label->name)); /* not end phone or end phone is non-sil. */
  }
  else{
    return (Boolean)(i % 3 == 0 && i != larc->nAlign-1); /* assuming each has 3 states and a terminal sil/sp [This could be wrong later, but just for simplicity
						   do it this way now.....]*/
    
  }
}
Beispiel #3
0
int GetNumPhones(LArc *larc){ /*returns num phones in a word...*/
  if(!Quinphone){
    int nAlign = larc->nAlign;
    if(IsSilence(larc->lAlign[nAlign-1].label->name)) return nAlign-1;
    else return nAlign;
  } else {
    int nAlign = larc->nAlign;
    int i = nAlign/3;
    if(nAlign != (i*3)+1) HError(1, "Problem with quinphone code...");
    return i;
  }
}
void Converter::onMessageReceived(const sp<AMessage> &msg) {
    switch (msg->what()) {
        case kWhatMediaPullerNotify:
        {
            int32_t what;
            CHECK(msg->findInt32("what", &what));

            if (!mIsPCMAudio && mEncoder == NULL) {
                ALOGV("got msg '%s' after encoder shutdown.",
                      msg->debugString().c_str());

                if (what == MediaPuller::kWhatAccessUnit) {
                    sp<ABuffer> accessUnit;
                    CHECK(msg->findBuffer("accessUnit", &accessUnit));

                    accessUnit->setMediaBufferBase(NULL);
                }
                break;
            }

            if (what == MediaPuller::kWhatEOS) {
                mInputBufferQueue.push_back(NULL);

                feedEncoderInputBuffers();

                scheduleDoMoreWork();
            } else {
                CHECK_EQ(what, MediaPuller::kWhatAccessUnit);

                sp<ABuffer> accessUnit;
                CHECK(msg->findBuffer("accessUnit", &accessUnit));

                if (mNumFramesToDrop > 0 || mEncodingSuspended) {
                    if (mNumFramesToDrop > 0) {
                        --mNumFramesToDrop;
                        ALOGI("dropping frame.");
                    }

                    accessUnit->setMediaBufferBase(NULL);
                    break;
                }

#if 0
                MediaBuffer *mbuf =
                    (MediaBuffer *)(accessUnit->getMediaBufferBase());
                if (mbuf != NULL) {
                    ALOGI("queueing mbuf %p", mbuf);
                    mbuf->release();
                }
#endif

#if ENABLE_SILENCE_DETECTION
                if (!mIsVideo) {
                    if (IsSilence(accessUnit)) {
                        if (mInSilentMode) {
                            break;
                        }

                        int64_t nowUs = ALooper::GetNowUs();

                        if (mFirstSilentFrameUs < 0ll) {
                            mFirstSilentFrameUs = nowUs;
                        } else if (nowUs >= mFirstSilentFrameUs + 10000000ll) {
                            mInSilentMode = true;
                            ALOGI("audio in silent mode now.");
                            break;
                        }
                    } else {
                        if (mInSilentMode) {
                            ALOGI("audio no longer in silent mode.");
                        }
                        mInSilentMode = false;
                        mFirstSilentFrameUs = -1ll;
                    }
                }
#endif

                mInputBufferQueue.push_back(accessUnit);

                feedEncoderInputBuffers();

                scheduleDoMoreWork();
            }
            break;
        }

        case kWhatEncoderActivity:
        {
#if 0
            int64_t whenUs;
            if (msg->findInt64("whenUs", &whenUs)) {
                int64_t nowUs = ALooper::GetNowUs();
                ALOGI("[%s] kWhatEncoderActivity after %lld us",
                      mIsVideo ? "video" : "audio", nowUs - whenUs);
            }
#endif

            mDoMoreWorkPending = false;

            if (mEncoder == NULL) {
                break;
            }

            status_t err = doMoreWork();

            if (err != OK) {
                notifyError(err);
            } else {
                scheduleDoMoreWork();
            }
            break;
        }

        case kWhatRequestIDRFrame:
        {
            if (mEncoder == NULL) {
                break;
            }

            if (mIsVideo) {
                ALOGV("requesting IDR frame");
                mEncoder->requestIDRFrame();
            }
            break;
        }

        case kWhatShutdown:
        {
            ALOGI("shutting down %s encoder", mIsVideo ? "video" : "audio");

            releaseEncoder();

            AString mime;
            CHECK(mOutputFormat->findString("mime", &mime));
            ALOGI("encoder (%s) shut down.", mime.c_str());

            sp<AMessage> notify = mNotify->dup();
            notify->setInt32("what", kWhatShutdownCompleted);
            notify->post();
            break;
        }

        case kWhatDropAFrame:
        {
            ++mNumFramesToDrop;
            break;
        }

        case kWhatReleaseOutputBuffer:
        {
            if (mEncoder != NULL) {
                size_t bufferIndex;
                CHECK(msg->findInt32("bufferIndex", (int32_t*)&bufferIndex));
                CHECK(bufferIndex < mEncoderOutputBuffers.size());
                mEncoder->releaseOutputBuffer(bufferIndex);
            }
            break;
        }

        case kWhatSuspendEncoding:
        {
            int32_t suspend;
            CHECK(msg->findInt32("suspend", &suspend));

            mEncodingSuspended = suspend;

            if (mFlags & FLAG_USE_SURFACE_INPUT) {
                sp<AMessage> params = new AMessage;
                params->setInt32("drop-input-frames",suspend);
                mEncoder->setParameters(params);
            }
            break;
        }

        default:
            TRESPASS();
    }
}
void FVoiceCaptureWindows::ProcessData()
{
	DWORD CurrentCapturePos = 0;
	DWORD CurrentReadPos = 0;

	HRESULT hr = CV->VoiceCaptureBuffer8 ? CV->VoiceCaptureBuffer8->GetCurrentPosition(&CurrentCapturePos, &CurrentReadPos) : E_FAIL;
	if (FAILED(hr))
	{
		UE_LOG(LogVoiceCapture, Warning, TEXT("Failed to get voice buffer cursor position 0x%08x"), hr);
		VoiceCaptureState = EVoiceCaptureState::Error;
		return;
	}

	DWORD LockSize = ((CurrentReadPos - CV->NextCaptureOffset) + CV->VoiceCaptureBufferCaps8.dwBufferBytes) % CV->VoiceCaptureBufferCaps8.dwBufferBytes;
	if(LockSize != 0) 
	{ 
		DWORD CaptureFlags = 0;
		DWORD CaptureLength = 0;
		void* CaptureData = NULL;
		DWORD CaptureLength2 = 0;
		void* CaptureData2 = NULL;
		hr = CV->VoiceCaptureBuffer8->Lock(CV->NextCaptureOffset, LockSize,
			&CaptureData, &CaptureLength, 
			&CaptureData2, &CaptureLength2, CaptureFlags);
		if (SUCCEEDED(hr))
		{
			CaptureLength = FMath::Min(CaptureLength, (DWORD)MAX_UNCOMPRESSED_VOICE_BUFFER_SIZE);
			CaptureLength2 = FMath::Min(CaptureLength2, (DWORD)MAX_UNCOMPRESSED_VOICE_BUFFER_SIZE - CaptureLength);

			UncompressedAudioBuffer.Empty(MAX_UNCOMPRESSED_VOICE_BUFFER_SIZE);
			UncompressedAudioBuffer.AddUninitialized(CaptureLength + CaptureLength2);

			uint8* AudioBuffer = UncompressedAudioBuffer.GetData();
			FMemory::Memcpy(AudioBuffer, CaptureData, CaptureLength);

			if (CaptureData2 && CaptureLength2 > 0)
			{
				FMemory::Memcpy(AudioBuffer + CaptureLength, CaptureData2, CaptureLength2);
			}

			CV->VoiceCaptureBuffer8->Unlock(CaptureData, CaptureLength, CaptureData2, CaptureLength2);

			// Move the capture offset forward.
			CV->NextCaptureOffset = (CV->NextCaptureOffset + CaptureLength) % CV->VoiceCaptureBufferCaps8.dwBufferBytes;
			CV->NextCaptureOffset = (CV->NextCaptureOffset + CaptureLength2) % CV->VoiceCaptureBufferCaps8.dwBufferBytes;

			if (IsSilence((int16*)AudioBuffer, UncompressedAudioBuffer.Num()))
			{
				VoiceCaptureState = EVoiceCaptureState::NoData;
			}
			else
			{
				VoiceCaptureState = EVoiceCaptureState::Ok;
			}

#if !UE_BUILD_SHIPPING
			static double LastCapture = 0.0;
			double NewTime = FPlatformTime::Seconds();
			UE_LOG(LogVoiceCapture, VeryVerbose, TEXT("LastCapture: %f %s"), (NewTime - LastCapture) * 1000.0, EVoiceCaptureState::ToString(VoiceCaptureState));
			LastCapture = NewTime;
#endif
		}
		else
		{
			UE_LOG(LogVoiceCapture, Warning, TEXT("Failed to lock voice buffer 0x%08x"), hr);
			VoiceCaptureState = EVoiceCaptureState::Error;
		}
	}
}
tbool CExportClipTask::DoWork()
{
	tbool bSuccess = false;

	switch (miActionOrder) {
		case geExportClip_Before:
			{
				sOut = sDestFolder + sDestNameAndExt;

				if (bDoEncode) {
					mpEncoder = ac::IEncoder::Create(meCodecDst);

					if (meCodecDst == ac::geAudioCodecWave)
						msProgress = "Exporting";
					else
						msProgress = "Compressing";
				}
				if (bDoCopy)
					msProgress = "Copying";
				if (IsSilence()) {
					msProgress += " silent bit";
				}
				else {
					msProgress += std::string(" '") + sClipName + "'";
					muiProgressIx = 0;
					muiProgressTarget = muiDuration - muiStartIx;
				}

				miActionOrder++;
				bSuccess = (mpEncoder != NULL); //true;
			}
			break;

		case geExportClip_Action:
			{
				if (bDoEncode) {
					bSuccess = DoEncode();
				}
				else if (bDoCopy) {
					bSuccess = DoCopy();
				}
			}
			break;

		case geExportClip_After:
			{
				msProgress = "Export done";
				muiProgressIx = muiProgressTarget = 1;

				miActionOrder++;
				bSuccess = true;
			}
			break;

		default:
			// Why are we here?
			break;

	}

	if (!bSuccess) {
		miActionOrder = geExportClip_Done;
	}
	return bSuccess;
} // DoWork
tbool CExportClipTask::DoEncode()
{
	tbool bError = false;

	if (mpfDst == NULL) {
		// First time here
		bError = !DoEncode_FirstTimeHere();
	}

	if (!bError) {
		tint64 iActuallyProcessed = 0;

		// Maybe prepare a chunk of silence
		if (IsSilence() && (mpcSilence == NULL)) {
			tint32 iSilence24bitSize = iMaxToProcess * (24 / 8);
			mpcSilence = new tchar[iSilence24bitSize];
			memset(mpcSilence, '\0', iSilence24bitSize);
		}

		if (muiSamplesNeeded > 0) {
			// How many samples now?
			tint32 iWantsToProcess = iMaxToProcess;
			if (muiSamplesNeeded < iWantsToProcess)
				iWantsToProcess = (tint32)muiSamplesNeeded;

			tbool bPartSuccess = false;
			switch (iFiles) {
				case 0: // silence
					{
						tint64 iOF_Dummy = 0;
						bPartSuccess = mpEncoder->SetRawMode(true, 1, false, 24, iSampleRate);
						if (bPartSuccess) {
							tint32 iBytes = iWantsToProcess * (24 / 8);
							bPartSuccess = mpEncoder->ProcessRaw(mpcSilence, NULL, iBytes, &iOF_Dummy);
							mpEncoder->SetRawMode(false, 0, false, 0, 0);
							iActuallyProcessed = iWantsToProcess;
						}
					}
					break;
				case 1: // mono
					bPartSuccess = mpEncoder->Process(
						pfWaveL,
						(tint32)muiCurrEncodeIx, iWantsToProcess, &iActuallyProcessed);
					break;
				default: // stereo
					bPartSuccess = mpEncoder->Process(
						pfWaveL, pfWaveR,
						(tint32)muiCurrEncodeIx, iWantsToProcess, &iActuallyProcessed);
					break;
			}
			if (bPartSuccess) {
				if (muiSamplesNeeded == (tuint64)-1) {
					// Figure out how big the file really is (to show progress correctly)
					muiSamplesNeeded = mpEncoder->miInputSamples_IncludingSkipped;
					muiProgressTarget = muiSamplesNeeded;
				}

				muiProgressIx += iActuallyProcessed;
				muiCurrEncodeIx += iActuallyProcessed;
				muiSamplesNeeded -= iActuallyProcessed;
			}
			else {
				tchar pszErr[1024];
				mpEncoder->GetErrMsg(pszErr, 1024, true);
				msExtendedError += pszErr;
				iActuallyProcessed = 0;
				bError = true;
			}
		}
	}

	if ((!bError) && (muiSamplesNeeded == 0)) {
		// We're done with this clip / silent bit
		
		// Prepare to advance to next task
		miActionOrder++;

		// Test if we should concatenate another file
		if (pConcatenateNextTask) {
			// Transfer encoder and other stuff to next clip (we don't close them)
			pConcatenateNextTask->mpEncoder = mpEncoder;
			mpEncoder = NULL;
			pConcatenateNextTask->mpfDst = mpfDst;
			mpfDst = NULL;
			pConcatenateNextTask->sOut = sOut;
		}
		else {
			// We're done - close encoder and other stuff
			if (!mpEncoder->Finalize()) {
				// Encoder finalize failed!
				tchar pszErr[1024];
				mpEncoder->GetErrMsg(pszErr, 1024, true);
				msExtendedError = pszErr;
				bError = true;
			}
			else {
				mpEncoder->Destroy();
				mpEncoder = NULL;

				mpfDst->Destroy();
				mpfDst = NULL;
			}
		}
	}

	return !bError;
} // DoEncode
void Converter::onMessageReceived(const sp<AMessage> &msg) {
    switch (msg->what()) {
        case kWhatMediaPullerNotify:
        {
            int32_t what;
            CHECK(msg->findInt32("what", &what));

            if (!mIsPCMAudio && mEncoder == NULL) {
                ALOGV("got msg '%s' after encoder shutdown.",
                      msg->debugString().c_str());

                if (what == MediaPuller::kWhatAccessUnit) {
                    sp<ABuffer> accessUnit;
                    CHECK(msg->findBuffer("accessUnit", &accessUnit));

                    void *mbuf;
                    if (accessUnit->meta()->findPointer("mediaBuffer", &mbuf)
                            && mbuf != NULL) {
                        ALOGV("releasing mbuf %p", mbuf);

                        accessUnit->meta()->setPointer("mediaBuffer", NULL);

                        static_cast<MediaBuffer *>(mbuf)->release();
                        mbuf = NULL;
                    }
                }
                break;
            }

            if (what == MediaPuller::kWhatEOS) {
                mInputBufferQueue.push_back(NULL);

                feedEncoderInputBuffers();

                scheduleDoMoreWork();
            } else {
                CHECK_EQ(what, MediaPuller::kWhatAccessUnit);

                sp<ABuffer> accessUnit;
                CHECK(msg->findBuffer("accessUnit", &accessUnit));

#if 0
                void *mbuf;
                if (accessUnit->meta()->findPointer("mediaBuffer", &mbuf)
                        && mbuf != NULL) {
                    ALOGI("queueing mbuf %p", mbuf);
                }
#endif

#if ENABLE_SILENCE_DETECTION
                if (!mIsVideo) {
                    if (IsSilence(accessUnit)) {
                        if (mInSilentMode) {
                            break;
                        }

                        int64_t nowUs = ALooper::GetNowUs();

                        if (mFirstSilentFrameUs < 0ll) {
                            mFirstSilentFrameUs = nowUs;
                        } else if (nowUs >= mFirstSilentFrameUs + 10000000ll) {
                            mInSilentMode = true;
                            ALOGI("audio in silent mode now.");
                            break;
                        }
                    } else {
                        if (mInSilentMode) {
                            ALOGI("audio no longer in silent mode.");
                        }
                        mInSilentMode = false;
                        mFirstSilentFrameUs = -1ll;
                    }
                }
#endif

                mInputBufferQueue.push_back(accessUnit);

                feedEncoderInputBuffers();

                scheduleDoMoreWork();
            }
            break;
        }

        case kWhatEncoderActivity:
        {
#if 0
            int64_t whenUs;
            if (msg->findInt64("whenUs", &whenUs)) {
                int64_t nowUs = ALooper::GetNowUs();
                ALOGI("[%s] kWhatEncoderActivity after %lld us",
                      mIsVideo ? "video" : "audio", nowUs - whenUs);
            }
#endif

            mDoMoreWorkPending = false;

            if (mEncoder == NULL) {
                break;
            }

            status_t err = doMoreWork();

            if (err != OK) {
                notifyError(err);
            } else {
                scheduleDoMoreWork();
            }
            break;
        }

        case kWhatRequestIDRFrame:
        {
            if (mEncoder == NULL) {
                break;
            }

            if (mIsVideo) {
                ALOGI("requesting IDR frame");
                mEncoder->requestIDRFrame();
            }
            break;
        }

        case kWhatShutdown:
        {
            ALOGI("shutting down encoder");

            /*bugfix: release queue buffer,it may fall into blackhold.
             *		when 4kplayer is floating in the dynamic desktop,
             *		and someone disable wifi in the quicksetting, this will lead to UI deadlock.
             *   It mainly let the source emit onDisplayDisconnect msg to framework.
             */
            while (!mInputBufferQueue.empty()) {
                sp<ABuffer> accessUnit = *mInputBufferQueue.begin();
                mInputBufferQueue.erase(mInputBufferQueue.begin());
                void *mbuf = NULL;
                if (accessUnit->meta()->findPointer("mediaBuffer", &mbuf)
                        && mbuf != NULL) {
                    ALOGI(">>releasing mbuf %p", mbuf);
                    accessUnit->meta()->setPointer("mediaBuffer", NULL);
                    static_cast<MediaBuffer *>(mbuf)->release();
                    mbuf = NULL;
                }
            }

            if (mEncoder != NULL) {
                mEncoder->release();
                mEncoder.clear();
            }

            AString mime;
            CHECK(mInputFormat->findString("mime", &mime));
            ALOGI("encoder (%s) shut down.", mime.c_str());
            break;
        }

        default:
            TRESPASS();
    }
}
Beispiel #9
0
void DoExactCorrectness(FBLatInfo *fbInfo, Lattice *lat){
  if(!PhoneMEE){ 
    /* Minimum Word Error (exact).  Get the "sausage" of correct words (this is a linear sausage, no alternatives) and call DoCorrectness. */

    LNode *node; LArc *larc;  int a,n,p;
    int i,nWords=0,w; int **iwords;
    short int *niwords; /* just 1's. */
    short int *minn_of_t, *maxn_of_t;
    unsigned char *nonempty;
	 
    if(!lat){ HError(-1, "No extraNumLat for MPE! "); }
	 
    /* Count words. */
    for(node=lat->lnodes+0; node->foll; node=node->foll->end) /*This code appears to rely on first node being silence. */
      if(node->foll->nAlign > 1 ||  (node->foll->nAlign==1 && ! IsSilence(node->foll->lAlign[0].label->name))) /* a word [ not sil. ]...*/
	nWords++; 

    /* for each word arc, number it in list of words. */
    for(larc=lat->larcs,a=0;a<lat->na;larc++,a++){  
      if(larc->start->pred){ n=(int)larc->start->pred->score; }
      else n=0;
      if(larc->start->pred && IsNonSilArc(larc->start->pred))
	n++; /* prev is non-silence so increment.. */
      larc->score = (float) n;
      if((n>=nWords&&IsNonSilArc(larc)) || n<0) HError(1, "n out of range [0...nWords-1], PhoneMEE.");
    }
	 
    niwords = (short int*)New(&fbInfo->tempStack, sizeof(short int) * (nWords+1));	   
    iwords = (int**)New(&fbInfo->tempStack, sizeof(int*) * (nWords+1));
    for(w=0;w<=nWords;w++) niwords[w] = 1;
	 
    nonempty = (unsigned char*)New(&fbInfo->tempStack, sizeof(char) * (nWords+1));
    minn_of_t = (short int*)New(&fbInfo->tempStack, sizeof(short int) * fbInfo->T); minn_of_t--;
    maxn_of_t = (short int*)New(&fbInfo->tempStack, sizeof(short int) * fbInfo->T); maxn_of_t--;
    for(i=1;i<=fbInfo->T;i++){
      minn_of_t[i] = nWords;
      maxn_of_t[i] = 0;
    }
    for(p=0;p<nWords;p++){
      niwords[p] = 1; /*all 1 & will stay that way, for MWE case */
      nonempty[p] = 1;  /*all 1 & will stay that way, for MWE case */
      iwords[p] = (int*)New(&fbInfo->tempStack, sizeof(int) * 1);
    }
	 
    /* Get word list. */
    w=0;
    for(node=lat->lnodes+0; node->foll; node=node->foll->end)
      if(node->foll->nAlign > 1 ||  (node->foll->nAlign==1 && ! IsSilence(node->foll->lAlign[0].label->name))) /* a word [ not sil. ]...*/
	iwords[w++][0] = (int)node->foll->end->word->wordName; /* word is at the node at the end of the arc. */
	 
    for(larc=lat->larcs,a=0;a<lat->na;larc++,a++){   
      if(IsNonSilArc(larc)){  /* Is a word [not sil]*/
	int startT, endT;
	int w = (int) larc->score;
	if(w<0 || w>=nWords) HError(-1, "Problem with word numbering [2] (%d,%d)...",w,nWords);
	GetTimes(larc, 0, &startT, &endT); /* get times [of first phone]... */
	if(startT<1){ HError(-1, "Invalid start time..."); startT=1;}
	if(endT>fbInfo->T){ HError(-1, "Invalid end time..."); endT=fbInfo->T; }
	if(startT>fbInfo->T){ HError(-1, "Invalid start time..."); startT=fbInfo->T;}
	if(endT<1){ HError(-1, "Invalid end time..."); endT=1; }
	minn_of_t[endT] = MIN(minn_of_t[endT], w);
	maxn_of_t[startT] = MAX(maxn_of_t[startT], w);
      }
    }
	 
    /* set {min,max}n_of_t */
    { int min=nWords-1; for(i=fbInfo->T;i>=1;i--) if(minn_of_t[i]>min) minn_of_t[i]=min; else min=minn_of_t[i]; } /*make sure increasing.*/
    { int max=0; for(i=1;i<=fbInfo->T;i++) if(maxn_of_t[i]<max) maxn_of_t[i]=max; else max=maxn_of_t[i]; } /*make sure increasing.*/
    if(maxn_of_t[1]<0 || maxn_of_t[fbInfo->T]>=nWords || minn_of_t[1]<0 || minn_of_t[fbInfo->T]>=nWords)
      HError(1, "Problem with minn_of_t or maxn_of_t...");
	 
    for(i=1;i<=fbInfo->Q;i++) fbInfo->aInfo->ac[i].mpe_occscale = 0; 
    fbInfo->AvgCorr = fbInfo->MPEFileLength + 
      DoCorrectness(fbInfo, &fbInfo->tempStack, fbInfo->aInfo, EXACTCORR_PRUNE/*prune*/, PHONE_BEAM, 
		    minn_of_t, maxn_of_t, niwords, iwords, nonempty, fbInfo->T, nWords, InsCorrectness, FALSE/*Quinphone*/, fbInfo->pr);
	 
	 
  } else { 
    /* Minimum Phone Error (Exact).  Get the "sausage" of correct phones and call DoCorrectness. */
    LNode *node; LArc *larc; 
	 
    int nWords=0; short int *maxNPhones, *nArcs, *phoneStart;
    int maxNArcs=0,n,a,w,p,i;
    int **iphone;
    short int *niphones, nPhones; unsigned char *nonempty; /*doesnt contain empty phone...*/
    short int *minn_of_t, *maxn_of_t;
	 
    if(!lat){ HError(-1, "No extraNumLat for MPE! "); }

    for(node=lat->lnodes+0; node->foll; node=node->foll->end){
      if(node->foll->nAlign > 1 ||  (node->foll->nAlign==1 && ! IsSilence(node->foll->lAlign[0].label->name))){ /* a word [ not sil. ]...*/
	nWords++; 
      }
    }
    maxNPhones = (short int*)New(&fbInfo->tempStack, sizeof(short int) * (nWords+1));
    for(w=0;w<=nWords;w++) maxNPhones[w]=0;
    nArcs = (short int*)New(&fbInfo->tempStack, sizeof(short int) * (nWords+1));
    for(w=0;w<=nWords;w++) nArcs[w]=0;
    phoneStart = (short int*)New(&fbInfo->tempStack, sizeof(short int) * (nWords+1)); /* phone no. at which word starts.. */
	   
    for(larc=lat->larcs,a=0;a<lat->na;larc++,a++){  /* for each word arc, number it in list of words, & get maxNPhones[ ].....*/
      int np;
      if(larc->start->pred){ n=(int)larc->start->pred->score; } 
      else n=0;
      if(larc->start->pred && IsNonSilArc(larc->start->pred))
	n++; /* prev is non-silence so increment.. */
      larc->score = (float) n;
      if(IsNonSilArc(larc)){
	np = GetNumPhones(larc); /*works for quinphones too...*/
	if(np > maxNPhones[n]){
	  maxNPhones[n] = np;
	}
	nArcs[n]++;
	if(nArcs[n]>maxNArcs) maxNArcs=nArcs[n];
	if(n >= nWords) HError(-1, "Problem with word numbering...");
      }
    }
    nPhones=0;
    for(w=0;w<nWords;w++){
      phoneStart[w] = nPhones;
      nPhones += maxNPhones[w];
    }
	   
    iphone = (int**)New(&fbInfo->tempStack, sizeof(int*) * (nPhones+1));
    niphones = (short int*)New(&fbInfo->tempStack, sizeof(short int) * (nPhones+1));
    nonempty = (unsigned char*)New(&fbInfo->tempStack, sizeof(char) * (nPhones+1));
    minn_of_t = (short int*)New(&fbInfo->tempStack, sizeof(short int) * fbInfo->T); minn_of_t--;
    maxn_of_t = (short int*)New(&fbInfo->tempStack, sizeof(short int) * fbInfo->T); maxn_of_t--;
    for(i=1;i<=fbInfo->T;i++){
      minn_of_t[i] = nPhones;
      maxn_of_t[i] = 0;
    }

    for(p=0;p<nPhones;p++){
      niphones[p] = 0;
      nonempty[p] = 1;
      iphone[p] = (int*)New(&fbInfo->tempStack, sizeof(int) * maxNArcs);
    }


    for(larc=lat->larcs,a=0;a<lat->na;larc++,a++){   /* set up the 'iphone[..]' and 'niphones' and 'nonempty' arrays.*/
      if(IsNonSilArc(larc)){  /* Is a word [not sil]*/
	int startPos,j;
	int w = (int) larc->score;
	if(w<0 || w>=nWords) HError(-1, "Problem with word numbering [2] (%d,%d)...",w,nWords);
	startPos = phoneStart[w];
	       
	p=0; /* indx of "real" phone [start of quinphone]. */
	for(j=0;j<larc->nAlign;j++){ 
	  if(NonSil_and_Quinphone_IsStartPhone(larc,j)){ /* Is a starting phone... ( x_nnn_2 or some such, I think...), and is not silence. */
	    int startT, endT;
	    int nStates_quinphone, state_quinphone=0,x; Boolean Found=FALSE;
	    int local_iphone = GetNoContextPhone(larc->lAlign[j].label,&nStates_quinphone, &state_quinphone,NULL,NULL); 
	    if(Quinphone && state_quinphone != 2) HError(1, "Quinphone problem... check code, may not be compat with this quinphone set.");
	    for(x=0;x<niphones[startPos+p];x++)if(local_iphone==iphone[startPos+p][x]){ Found=TRUE; break; } 
	    if(!Found){ iphone[startPos+p][niphones[startPos+p]++] = local_iphone; }
	    GetTimes(larc, j, &startT, &endT); /* set times... */
	    if(startT<1){ HError(-1, "Invalid start time..."); startT=1;}
	    if(endT>fbInfo->T){ HError(-1, "Invalid end time..."); endT=fbInfo->T; }
	    if(startT>fbInfo->T){ HError(-1, "Invalid start time..."); startT=fbInfo->T;}
	    if(endT<1){ HError(-1, "Invalid end time..."); endT=1; }

	    minn_of_t[endT] = MIN(minn_of_t[endT], startPos+p);
	    maxn_of_t[startT] = MAX(maxn_of_t[startT], startPos+p);
	    p++;
	  }
	}
	for(;p<maxNPhones[w];p++){
	  nonempty[startPos+p] = 0; /* contains an empty phone. */
	}
      }
    }
    for(n=0;n<nPhones;n++){ if(niphones[n]==0 || niphones[n]>maxNArcs) HError(1, "niphones > maxNArcs...."); }
	   
    { int min=nPhones-1; for(i=fbInfo->T;i>=1;i--) if(minn_of_t[i]>min) minn_of_t[i]=min; else min=minn_of_t[i]; } /*make sure increasing.*/
    { int max=0; for(i=1;i<=fbInfo->T;i++) if(maxn_of_t[i]<max) maxn_of_t[i]=max; else max=maxn_of_t[i]; } /*make sure increasing.*/
    if(maxn_of_t[1]<0 || maxn_of_t[fbInfo->T]>=nPhones || minn_of_t[1]<0 || minn_of_t[fbInfo->T]>=nPhones)
      HError(1, "Problem with minn_of_t or maxn_of_t...");
	   
#ifdef DEBUG_MEE
    for(i=0;i<nPhones;i++){ 
      int j; printf(" %d(", i);for(j=0;j<niphones[i];j++){ int phone=iphone[i][j];char *cphone=(char*)(&phone);while(!(*cphone))cphone++; printf("%s:",cphone); }
      printf("%d) ", nonempty[i]);
    } 
    printf("\n");
#endif	   
	   
    for(i=1;i<=fbInfo->Q;i++) fbInfo->aInfo->ac[i].mpe_occscale = 0; 
    fbInfo->AvgCorr = fbInfo->MPEFileLength + 
      DoCorrectness(fbInfo, &fbInfo->tempStack, fbInfo->aInfo, EXACTCORR_PRUNE/*prune*/, PHONE_BEAM, 
		    minn_of_t, maxn_of_t, niphones, iphone, nonempty, fbInfo->T, nPhones, InsCorrectness, Quinphone, fbInfo->pr);
	   
  }
}
Beispiel #10
0
float DoCorrectness(FBLatInfo *fbInfo, MemHeap *mem, ArcInfo *ai, float prune, 
                    int beamN/*phones on either side...*/,
		    short int *minn_of_t,    /* lowest sausage position active at time t. */
                    short int *maxn_of_t,    /* highest ..*/
                    short int *niphones,     /* num alternative sausage positions */
                    int **iphone,            /* phone [0..N-1][0..niphones[n]-1] */
		    unsigned char *nonempty, /* if TRUE then no null transition at that sausage position */
                    int T, int N, 
                    float InsCorrectness, Boolean Quinphone, float pr_in){ 
  HArc *a;  
  CorrN  *startNode = NULL, *endNode = NULL; 

  double local_pr=LZERO; double local_pr_beta=LZERO; 
  double avg_correct = 0; double avg_correct_beta = 0;
  CorrN *cn;


  for(a=ai->start;a;a=a->foll) a->mpe->cn = NULL;

  for(a=ai->start;a;a=a->foll){ /* This loop attaches the 'cn' structure to the lattice */
    float locc = a->alpha + a->betaPlus - fbInfo->pr - a->ac->aclike*latProbScale;

    if(locc > prune){   /* ... if above prune threshold then attach the 'cn' structure */
      if(!PhoneMEE && StartOfWord(a)/*expands to a->pos==0*/){  /* This is the MWE case. Create a cn structure for the first phone of the word. */
	LArc *la = a->parentLarc; 
	int iword = (int)/*from LabId*/ la->end->word->wordName;
	int id = (a->calcArc ? a->calcArc->id : a->id);
	HArc *b,*lastArc; int x;

	cn = (CorrN*)New(mem, sizeof(CorrN));
	cn->next = NULL; 
	if(endNode){ endNode->next=cn; cn->prev=endNode; endNode=cn;} else {startNode=cn;endNode=cn;cn->prev=NULL;}	
	a->mpe->cn = cn;
	cn->me_start = a;
	cn->iphone = iword;
	cn->IsSilence = (Boolean)IsSilence(a->phone->name); /* First arc of word is sil->silence word. */
	cn->follTrans=cn->precTrans=NULL;
	cn->scaled_aclike = fbInfo->aInfo->ac[id].aclike * latProbScale;
	
	cn->nArcs = la->nAlign; x=1; /*n arcs in cn.*/
	lastArc=a;
	if(a->follTrans)
	  for(b=a->follTrans->end;b->parentLarc==la;b->follTrans&&(b=b->follTrans->end)){
	    HArc *cb =  (b->calcArc ? b->calcArc : b);
	    x++;
	    b->mpe->cn = (CorrN*)(void*)-1;
	    cn->scaled_aclike += cb->ac->aclike * latProbScale 
	      + translm(b->precTrans->lmlike)/*should be zero unless inspen used in a funny way.*/;
	    lastArc=b;
	  }
	if(x!=cn->nArcs) HError(1, "Problem with nArcs [wordMee]...");
	cn->me_end = lastArc;
      } else if(PhoneMEE && !a->mpe->cn){ 
        /* This is the MPE case.   !a->mpe->cn is to rule out silence (see this block 
           of code, in which cn is set to -1. */

	/* Quinphone stuff [only set if quinphone]: */
	int Quinphone_NStates=1; int Quinphone_State=2; /* these defaults correspond to the non-quinphone case. */
	int iphone;
	HArc *ca = (a->calcArc ? a->calcArc : a);
	HArc *b=a; int x;
        
	iphone = GetNoContextPhone(a->phone,&Quinphone_NStates,&Quinphone_State,NULL,NULL);
	
	if(Quinphone_NStates>1 && Quinphone_State != 2){ /*not a start state.*/
	  HError(-1, "Not a [quinphone] start state.  This should happen very rarely if at all. "/*due to pruning, in fact it shouldn't happen at all.*/);
	  continue; /*continue with loop, don't do this one. */
	}
        
	cn = (CorrN*)New(mem, sizeof(CorrN));
	
	cn->next = NULL; 
	if(endNode){ endNode->next=cn; cn->prev=endNode; endNode=cn;} else {startNode=cn;endNode=cn;cn->prev=NULL;}	

	a->mpe->cn = cn;
	cn->me_start = a;
	cn->iphone = iphone;
	cn->IsSilence = (Boolean)IsSilence(a->phone->name);
	cn->follTrans=cn->precTrans=NULL;

	/* Following code is the general case, for quinphones as well as triphones. */
	cn->nArcs = Quinphone_NStates; /* number of sequential phone arcs.*/
	cn->scaled_aclike = ca->ac->aclike * latProbScale;
	
	for(x=cn->nArcs;x>1;x--){ /* loop only happens in Quinphone case (when nArcs>1). */
	  if(b){
	    HArc *cb;
	    b=b->follTrans->end; /*so b is last one ... */
	    b->mpe->cn = (CorrN*)(void*)-1; /* set to -1 for all others but the first...*/
	    cb = (b->calcArc ? b->calcArc : b);
	    cn->scaled_aclike += b->ac->aclike * latProbScale + translm(b->precTrans->lmlike)/*should be zero unless inspen used in a funny way.*/;
	  } /* else  will be error . */
	}

	if(b && b->follTrans && !b->follTrans->start_foll && IsSilence(b->follTrans->end->phone->name)){ /*might as well include b->foll as well since it's silence....*/
	  HArc *cb;
	  b = b->follTrans->end;
	  b->mpe->cn = (CorrN*)(void*)-1; /* set to -1 for all others but the first...*/
	  cb = (b->calcArc ? b->calcArc : b);
	  cn->scaled_aclike += cb->ac->aclike * latProbScale + translm(b->precTrans->lmlike)
	    /*should be zero unless inspen used in a funny way.*/;
	  cn->nArcs++;
	}
	
	if(!b) HError(1, "Null arc in DoCorrectness [code or possibly lattice error]...");
	cn->me_end = b;
      }
    }
  }
  
  for(cn=startNode;cn;cn=cn->next){    /* Attach transitions to the cn structure. */
    HArc *a = cn->me_start;
    ArcTrans *at;
    for(at=cn->me_end->follTrans;at;at=at->start_foll){
      if(at->end->mpe->cn){ /* If the next arc is also within the beam... */
	if(at->end->mpe->cn==(CorrN*)(void*)-1) HError(1, "Not expecting -1 for this node..."); /* -1 only for nodes which are not the primary node
                                                                                                   of the arc, i.e. states>2 of quinphone, or end-of-word silence.*/
	AddTrans(mem, a->mpe->cn, at->end->mpe->cn, translm(at->lmlike)); /* add transition. */
      }
    }
  }
  
  /* Now recalculate alphas given new pruning, and get pr.... */
  for(cn=startNode;cn;cn=cn->next){
    CorrA *ca;
    if(!cn->me_start->precTrans) cn->alpha = 0; else cn->alpha = LZERO;

    for(ca=cn->precTrans;ca;ca=ca->end_foll){
      cn->alpha = LAdd(cn->alpha, ca->sc_lmlike + ca->start->alpha);
    }
    cn->alpha += cn->scaled_aclike; /* acoustic likelihood. */
    if(! cn->me_end->follTrans) local_pr = LAdd(local_pr, cn->alpha);
  }

  /* check local_pr:  should be same as normal pr, bar pruning:*/
  if(fabs(local_pr - pr_in) > 0.2) HError(-1, "DoCorrectness: possible problem with pr (%f != %f)...difference shouldnt be too large, decrease EXACTCORRPRUNE.",local_pr,pr_in);


  /* Now set up the arrays attached to the cn structure... */
  for(cn=startNode;cn;cn=cn->next){
    int i,ns,ne;
    int istart = cn->me_start->t_start, iend = cn->me_end->t_end;
    i = (istart+iend)/2; if(i<1||i>T){  HError(1, "istart/iend out of range."); }
    ns = minn_of_t[i];
    ne = maxn_of_t[i];

    /*following may not be needed.*/
    if(!cn->me_start->precTrans) ns = 0; /*start node.*/
    if(!cn->me_end->follTrans) ne = N;   /*end node.*/

    ns = MAX(0, ns - beamN); ne = MIN(N, ne + beamN);  /*A node can start at N although N-1 is the last phone, this may be necessary for silences not consuming any phone.*/
    cn->alphaCorr = (float*)New(mem, sizeof(float) * (ne-ns+1)); cn->alphaCorr -= ns;
    cn->betaCorr = (float*)New(mem, sizeof(float) * (ne-ns+1)); cn->betaCorr -= ns;
    cn->beta = (double*)New(mem, sizeof(double) * (ne-ns+1)); cn->beta -= ns;
    cn->starti = ns; cn->endi = ne;
    for(i=ns;i<=ne;i++){ cn->betaCorr[i]=0; cn->beta[i]=LZERO; }
  }


  /*  Now set cn->alphaCorr[i] for each node cn, which is the average correctness 
      of sentences leading up to reference phone cn where the last hypothesis sausage
      position is i. */

  for(cn=startNode;cn;cn=cn->next){
    int i;
    if(!cn->me_start->precTrans){ /* start node... */
      if(cn->starti > 0) HError(1, "start node but doesn't include zero...");
      cn->alphaCorr[0]=0; 
      for(i=1;i<=cn->endi;i++) cn->alphaCorr[i]=-10000;  /*very negative so wont be used.*/
    } else {   /* Not start node so sum over preceding nodes. */
      CorrA *ca; CorrN *cn_prev;
      for(i=cn->starti;i<=cn->endi;i++){
	cn->alphaCorr[i]=0; 
	if(!cn->precTrans) /* has no preceding nodes-- may be the case due to pruning. */
	  cn->alphaCorr[i]=-10000; 

	for(ca=cn->precTrans;ca;ca=ca->end_foll){  /* recursively calculate the correctness of this cn at this sausage-pos i,
                                                      given that previous cn's will have their correctnesses calculated at all positions. */
	  float BestCorr = -10000;
	  float occ;
	  cn_prev = ca->start;

          if(GetBestCorrectness(&BestCorr, NULL, NULL, i, cn_prev,
                                minn_of_t,maxn_of_t,niphones,iphone, nonempty,T,N,InsCorrectness)){

          
            occ = cn_prev->alpha+ca->sc_lmlike+cn->scaled_aclike - cn->alpha; /* lg(occ as fraction of total occ of cn). */
            if(occ<MINEARG) occ=0.0; else occ=exp(occ); 
            cn->alphaCorr[i] += BestCorr * occ; /* these occs will sum to 1 over all preceding arcs. */
            
            /* Checking: */
            if(BestCorr > 10000 || ((BestCorr < -500) && cn_prev->alpha>LSMALL)){
              if(debug_bestcorr > 0){
                debug_bestcorr--;
                HError(-1, "BestCorr too big (or this is a very long or strange file)... (%f)", BestCorr);
              } else if(!debug_bestcorr){
                HError(-1, "Not warning about this any more, BestCorr too big.");
                debug_bestcorr--;
              }
            }
          }
	}
      }
      if( (!cn->me_end->follTrans)) { /* end node, so get contribution to avg correctness... */
	double alpha = cn->alpha; 
	double occ = alpha - local_pr;
	occ = (occ>MINEARG ? exp(occ) : 0.0);
	
	if(cn->endi < N) HError(1, "Last node of lattice doesn't include N in alphaCorr vector.");
	if(occ > 1.1) HError(1, "Occ too big!");
        
	avg_correct += occ * cn->alphaCorr[N];  /* was MAX(cn->alphaCorr[N], cn->alphaCorr[N-1]); */
        /* This is N+1 the way I've written it in my PhD, I start from 1 not 0 there. */
        /* Only works if last phone = silence or NULL, otherwise technique wont work!! */
      }
    }
  }
  
  /* Now set beta and betaCorrect for all nodes and times. */
  /* This is a traceback of the procedure that sets alpha and alphaCorrect. */
  for(cn=endNode;cn;cn=cn->prev){
    int i;
    
    if(!cn->me_start->precTrans){ /* start node... */
      local_pr_beta = LAdd(local_pr_beta, cn->beta[0] + cn->scaled_aclike);
      avg_correct_beta = avg_correct_beta + cn->betaCorr[0] * (cn->beta[0]+cn->scaled_aclike-local_pr<MINEARG?0.0:exp(cn->beta[0]+cn->scaled_aclike-local_pr));
    } else {   /* Not start node so sum over preceding nodes. */
      CorrA *ca; CorrN *cn_prev;
      if(!cn->me_end->follTrans){ /* end node... */
         /* was: if(cn->alphaCorr[N] >  cn->alphaCorr[N-1])  N is time of this phone. */
         cn->beta[N] = 0.0; /* This is N+1 the way I've written it in my PhD, I start from 1 not 0 there. */
          /* was:   else      cn->beta[N-1] = 0.0;  */
	/* All other betas are previously initialised to LZERO and betaCorr to 0.0. */
      }
      for(i=cn->starti;i<=cn->endi;i++){
	if(cn->beta[i] + cn->alpha - local_pr > 0.001){
	  HError(-1, "Too big pr!");
	}
	if(cn->beta[i] > LZERO+1000){
	  for(ca=cn->precTrans;ca;ca=ca->end_foll){
            float betaCorr,betaCorr_prev;
            float BestCorrPart; int bestj=-1;
            double beta_prev,beta_trans,beta_sum;
	    cn_prev = ca->start;


            if( GetBestCorrectness(NULL, &BestCorrPart,  &bestj, i, cn_prev,
                                   minn_of_t,maxn_of_t,niphones,iphone, nonempty,T,N,InsCorrectness) ){ /* if there is nonzero likelihood to cn_prev.. */
            
              /* Add this contribution of beta to the previous beta, and
                 set the previous betaCorr to a weighted avg of the
                 betaCorrs (weighted by the betas. */
              
              beta_prev = cn_prev->beta[bestj];                              /* previous value of beta [beta is a likelihood] */
              beta_trans = cn->beta[i] + ca->sc_lmlike + cn->scaled_aclike;  /* a likelihood: beta due to this transition. */
              beta_sum = LAdd(beta_prev,beta_trans);                         /* the new value [the sum of old and added] */
	    
              betaCorr = cn->betaCorr[i] + BestCorrPart; /*I.e, contribution from this phone and transition...*/
              betaCorr_prev = cn_prev->betaCorr[bestj];
              {
                double occ,occ_prev;
                occ = beta_trans - beta_sum;        /* lg(occ of new part as fraction of total occ) */
                occ_prev = beta_prev - beta_sum;    /* lg(occ of old part as fraction of total occ) */
                occ=(occ>MINEARG?exp(occ):0.0); occ_prev=(occ_prev>MINEARG?exp(occ_prev):0.0);
                cn_prev->betaCorr[bestj] = betaCorr*occ + betaCorr_prev*occ_prev;
              }
              cn_prev->beta[bestj] = beta_sum;
            }
	  }
	}
      }
    }
  }
 
  /* check local_pr = local_pr_beta: forward and backward same. */
  if(fabs(local_pr - local_pr_beta) > 0.0001) HError(-1, "DoCorrectness: possible problem with pr (forward and backward %f,%f....) ",local_pr,local_pr_beta);

  /* check correctness when calculated forward and backward is the same. */
  if(fabs(avg_correct_beta - avg_correct) > 0.0001)
    HError(-1, "avg_correct{,beta} differ, %f,%f", avg_correct, avg_correct_beta);

  
  for(cn=startNode;cn;cn=cn->next){ /* Now set the "MPE occupancy" gamma_q^MPE =  gamma_q ( corr_q - corr_avg )
                                       actually we set mpe_occscale to corr_q - corr_avg, and get gamma_q^MPE later. */

    int i; float total_diff=0; /* equals the sum of: (corr-avgCorr)*occ. */
    HArc *a;
    /*float arc_occ;*/
    for(i=cn->starti;i<=cn->endi;i++){  /* correctness of node is a sum over preceding transitions... */
      if(cn->beta[i] > LSMALL){         /* only one sausage-position i should have nonzero beta, I think */
	float locc,occ;
	float correctness_diff = cn->betaCorr[i] + cn->alphaCorr[i] - avg_correct; /* difference in correctness for this i.*/
	locc = cn->alpha + cn->beta[i] - local_pr; /* The occupation probability gamma_q due to this transition. */
	occ=(locc>MINEARG?exp(locc):0.0);
	total_diff += occ * correctness_diff;
      }
    }

    a=cn->me_start;
    for(i=1;i<=cn->nArcs;i++){ /* This iterates over arcs a,  see the line "a = a->follTrans->end". */
                                /* In [non-quinphone] MPE, this loop will only have 1 iteration. */
      HArc *ca = (a->calcArc ? a->calcArc : a);
      float total_occ = ca->ac->locc; /*occ of this group of arcs [sharing this start&end&name */
      if(total_occ > MINEARG+5){
	a->ac->mpe_occscale += exp(-total_occ) * total_diff;  /* total_diff is for this arc, summed over preceding arcs. total_occ  is 
                                                                 occupation probability gamma_q for this arc. */
        /* total_diff is  gamma_q ( corr_q - corr_avg ),
           total_occ is gamma_q,
           mpe_occscale = (corr_q - corr_avg) */
      }
      if(i!=cn->nArcs){
	if(!a->follTrans) HError(1, "Problem with quinphone-related code for exact correctness.");
	a = a->follTrans->end;
      }
    }
  }

  return avg_correct;
}
Beispiel #11
0
Boolean IsNonSilArc(LArc *larc){ /*returns TRUE if this word is non SENT_START etc. */
  return (Boolean)(larc->nAlign>1 || (larc->nAlign==1 && !IsSilence(larc->lAlign[0].label->name)));
}
Beispiel #12
0
//---------------------------------------------------------------------------
bool __fastcall TSound::ReadBusy(void)
{
  return ! IsSilence();
}