void TitleSubst::Load(const std::string &filename, const std::string §ion) { XNode xml; if( !XmlFileUtil::LoadFromFileShowErrors(xml,filename) ) { // LoadFromFile will show its own error //LOG->Trace("Error opening %s: %s", filename.c_str(), f.GetError().c_str() ); return; } XNode *pGroup = xml.GetChild( section ); if( pGroup == nullptr ) { return; } for (auto const *child: *pGroup) { if( child == nullptr || child->GetName() != "Translation" ) { continue; } TitleTrans tr; tr.LoadFromNode( child ); AddTrans(tr); } }
int CSQLiteDE::ProcessCSV2SQLite(int PMode, // currently just the one mode...default is to parse from CSV and create/populate SQLite database bool bSafe, // if true then use indexing on all tables whilst inserting... much slower but perhaps safer if multiple threads ... char *pszExprName, // name by which this experiment is identified char *pszExprDescr, // describes experiment char *pszCtrlConditions, // control conditions char *pszExprConditions, // experiment conditions char *pszInFile, // parse from this input CSV file char *pszDatabase) // SQLite database file { int Rslt; bool bExtdBins; // false if CSV file does not contain bins, true if determined that CSV contains bin counts int sqlite_error; sqlite3_stmt *prepstatement = NULL; int ExprID; // experiment identifier int TransID; int ExpresID; char *pszTransName; char *pszContOrExpr; int TransLen; int NumExons; int Class; int Score; int DECntsScore; int PearsonScore; int CtrlUniqueLoci; int ExprUniqueLoci; double CtrlExprLociRatio; double PValueMedian; double PValueLow95; double PValueHi95; int TotCtrlCnts; int TotExprCnts; int TotCtrlExprCnts; double ObsFoldChange; double FoldMedian; double FoldLow95; double FoldHi95; double ObsPearson; double PearsonMedian; double PearsonLow95; double PearsonHi95; int CtrlAndExprBins; int CtrlOnlyBins; int ExprOnlyBins; int TotBins; int BinIdx; int BinValues[100 * 2]; // sized to contain 100 bin counts for both control and experiment int *pBinValue; int NumFields; int NumElsRead; int NumBins; int BinID; int ExpNumBins; int ExpNumFields; // load CSV file and determine number of fields, from these it can be determined if the file also contains the bin counts CCSVFile *pCSV = new CCSVFile; if(pCSV == NULL) { gDiagnostics.DiagOut(eDLFatal,gszProcName,"Unable to instantiate CCSVfile"); CloseDatabase(true); return(eBSFerrObj); } pCSV->SetMaxFields(cDESummMaxBinFields + 10); // could be upto 100 bins, add 10 in case more fields than expected! if((Rslt=pCSV->Open(pszInFile))!=eBSFSuccess) { while(pCSV->NumErrMsgs()) gDiagnostics.DiagOut(eDLFatal,gszProcName,pCSV->GetErrMsg()); gDiagnostics.DiagOut(eDLFatal,gszProcName,"Unable to open file: %s",pszInFile); delete pCSV; return(Rslt); } if((Rslt=pCSV->NextLine()) < 1) // have to be able to read at least one! { gDiagnostics.DiagOut(eDLFatal,gszProcName,"Unable to read any lines from %s",pszInFile); delete pCSV; return(Rslt); } NumFields = pCSV->GetCurFields(); // expecting at least 24, if 24 then summary, if 27+ then summary plus bin counts if(NumFields < cDESummFields) { gDiagnostics.DiagOut(eDLFatal,gszProcName,"CSV transcipt expression file '%s' are expected to contain at least %d fields, parsed %d",pszInFile,cDESummFields,NumFields); delete pCSV; return(eBSFerrFieldCnt); } if(NumFields > cDESummFields && NumFields < cDESummMinBinFields) { gDiagnostics.DiagOut(eDLFatal,gszProcName,"Expected CSV file '%s' to contain either %d (no bins) or at least %d (with bins) fields, file has %d fields",pszInFile,cDESummFields,cDESummMinBinFields,NumFields); delete pCSV; return(eBSFerrFieldCnt); } if(NumFields > cDESummMaxBinFields) // if summary plus bins then expecting at most 100 bins { gDiagnostics.DiagOut(eDLFatal,gszProcName,"Expected CSV file '%s' to contain no more than 100 bins",pszInFile); delete pCSV; return(eBSFerrFieldCnt); } if(NumFields == cDESummFields) { bExtdBins = false; NumBins = 0; } else { bExtdBins = true; NumBins = NumBins = 5 + NumFields - cDESummMinBinFields; } pCSV->Close(); sqlite3_initialize(); if((CreateDatabase(bSafe,pszDatabase))==NULL) { sqlite3_shutdown(); return(eBSFerrInternal); } if((Rslt = CreateExperiment(bExtdBins ? 1 : 0,pszInFile,pszExprName,pszExprDescr,pszCtrlConditions,pszExprConditions,NumBins)) < 1) { CloseDatabase(true); return(Rslt); } ExprID = Rslt; char *pszBeginTransaction = (char *)"BEGIN TRANSACTION"; char *pszEndTransaction = (char *)"END TRANSACTION"; char *pszDropIndexes = (char *)"DROP INDEX IF EXISTS 'Markers_LociID'"; char *pszPragmaSyncOff = (char *)"PRAGMA synchronous = OFF"; char *pszPragmaSyncOn = (char *)"PRAGMA synchronous = ON"; char *pszPragmaJournMem = (char *)"PRAGMA journal_mode = MEMORY"; gDiagnostics.DiagOut(eDLInfo,gszProcName,"sqlite - populating tables"); // synchronous writes off if((sqlite_error = sqlite3_exec(m_pDB,pszPragmaSyncOff,NULL,NULL,NULL))!=SQLITE_OK) { gDiagnostics.DiagOut(eDLFatal,gszProcName,"sqlite - can't turn synchronous writes off: %s", sqlite3_errmsg(m_pDB)); CloseDatabase(true); return(eBSFerrInternal); } // bracket inserts as a single transaction if((sqlite_error = sqlite3_exec(m_pDB,pszBeginTransaction,NULL,NULL,NULL))!=SQLITE_OK) { gDiagnostics.DiagOut(eDLFatal,gszProcName,"sqlite - can't begin transactions: %s",sqlite3_errmsg(m_pDB)); CloseDatabase(true); return(eBSFerrInternal); } // load CSV file and start populating the SQLite database if((Rslt=pCSV->Open(pszInFile))!=eBSFSuccess) { while(pCSV->NumErrMsgs()) gDiagnostics.DiagOut(eDLFatal,gszProcName,pCSV->GetErrMsg()); gDiagnostics.DiagOut(eDLFatal,gszProcName,"Unable to open file: %s",pszInFile); delete pCSV; CloseDatabase(true); return(Rslt); } bExtdBins = false; NumElsRead = 0; ExpNumBins = 0; ExpNumFields = 0; while((Rslt=pCSV->NextLine()) > 0) // onto next line containing fields { if(!(NumElsRead % (bSafe ? 5000 : 100000)) && NumElsRead > 0) gDiagnostics.DiagOut(eDLInfo,gszProcName,"Parsed %d CSV lines - Transcripts: %d",NumElsRead, m_NumTrans); NumFields = pCSV->GetCurFields(); // expecting at least 24, if 24 then summary, if 27+ then summary plus bin counts if(ExpNumFields > 0 && NumFields != ExpNumFields) { gDiagnostics.DiagOut(eDLFatal,gszProcName,"CSV transcipt expression file '%s' has varying number of fields : expected %d, parsed %d at line %d",pszInFile,ExpNumFields,NumFields,NumElsRead); delete pCSV; CloseDatabase(true); return(eBSFerrFieldCnt); } if(ExpNumFields == 0) { ExpNumFields = NumFields; if(NumFields < cDESummFields) { gDiagnostics.DiagOut(eDLFatal,gszProcName,"CSV transcipt expression file '%s' are expected to contain at least %d fields, parsed %d at line %d",pszInFile,cDESummFields,NumFields,NumElsRead); delete pCSV; CloseDatabase(true); return(eBSFerrFieldCnt); } if(NumFields > cDESummFields) // summary plus bins? { if(NumFields < cDESummMinBinFields) // if summary plus bins then expecting at least 5 bins { gDiagnostics.DiagOut(eDLFatal,gszProcName,"Expected CSV file '%s' to contain at least 5 bins at line %d",pszInFile,NumElsRead); delete pCSV; CloseDatabase(true); return(eBSFerrFieldCnt); } if(NumFields > cDESummMaxBinFields) // if summary plus bins then expecting at most 100 bins { gDiagnostics.DiagOut(eDLFatal,gszProcName,"Expected CSV file '%s' to contain no more than 100 bins at line %d",pszInFile,NumElsRead); delete pCSV; CloseDatabase(true); return(eBSFerrFieldCnt); } NumBins = 5 + NumFields - cDESummMinBinFields; ExpNumBins = NumBins; bExtdBins = true; } } if(!NumElsRead && pCSV->IsLikelyHeaderLine()) continue; NumElsRead += 1; // first 20 fields are common to both CSV file types pCSV->GetInt(1,&Class); pCSV->GetText(2,&pszTransName); pCSV->GetInt(3,&TransLen); pCSV->GetInt(4,&NumExons); pCSV->GetInt(5,&Score); pCSV->GetInt(6,&DECntsScore); pCSV->GetInt(7,&PearsonScore); pCSV->GetInt(8,&CtrlUniqueLoci); pCSV->GetInt(9,&ExprUniqueLoci); pCSV->GetDouble(10,&CtrlExprLociRatio); pCSV->GetDouble(11,&PValueMedian); pCSV->GetDouble(12,&PValueLow95); pCSV->GetDouble(13,&PValueHi95); pCSV->GetInt(14,&TotCtrlCnts); pCSV->GetInt(15,&TotExprCnts); pCSV->GetInt(16,&TotCtrlExprCnts); pCSV->GetDouble(17,&ObsFoldChange); pCSV->GetDouble(18,&FoldMedian); pCSV->GetDouble(19,&FoldLow95); pCSV->GetDouble(20,&FoldHi95); // file formats diverge dependent on if containing bin counts if(!bExtdBins) { pszContOrExpr = NULL; pCSV->GetDouble(21,&ObsPearson); pCSV->GetDouble(22,&PearsonMedian); pCSV->GetDouble(23,&PearsonLow95); pCSV->GetDouble(24,&PearsonHi95); TotBins = 0; CtrlAndExprBins = 0; CtrlOnlyBins = 0; ExprOnlyBins = 0; } else { pCSV->GetText(21,&pszContOrExpr); pCSV->GetDouble(22,&ObsPearson); pCSV->GetDouble(23,&PearsonMedian); pCSV->GetDouble(24,&PearsonLow95); pCSV->GetDouble(25,&PearsonHi95); pCSV->GetInt(26,&TotBins); pCSV->GetInt(27,&CtrlAndExprBins); pCSV->GetInt(28,&CtrlOnlyBins); pCSV->GetInt(29,&ExprOnlyBins); if(!stricmp(pszContOrExpr,"Control")) pBinValue = &BinValues[0]; else pBinValue = &BinValues[1]; for(BinIdx = 0; BinIdx < NumBins; BinIdx++,pBinValue += 2) *pBinValue = pCSV->GetInt(BinIdx + 30,pBinValue); } if(!bExtdBins || (bExtdBins && stricmp(pszContOrExpr,"Control"))) { TransID = AddTrans(ExprID,pszTransName,NumExons,TransLen,(char *)"N/A"); ExpresID = AddExpres(ExprID,TransID,Class,Score,DECntsScore,PearsonScore,CtrlUniqueLoci,ExprUniqueLoci,CtrlExprLociRatio,PValueMedian,PValueLow95,PValueHi95, TotCtrlCnts,TotExprCnts,TotCtrlExprCnts,ObsFoldChange,FoldMedian,FoldLow95,FoldHi95,ObsPearson,PearsonMedian,PearsonLow95,PearsonHi95, CtrlAndExprBins,CtrlOnlyBins,ExprOnlyBins); } if(bExtdBins && stricmp(pszContOrExpr,"Control")) { pBinValue = &BinValues[0]; for(BinIdx = 1; BinIdx <= NumBins; BinIdx++,pBinValue += 2) BinID = AddBin(ExprID,TransID,ExpresID,BinIdx,*pBinValue,pBinValue[1]); } } gDiagnostics.DiagOut(eDLInfo,gszProcName,"Parsed %d CSV lines - transcripts: %d",NumElsRead, m_NumTrans); // end transaction if((sqlite_error = sqlite3_exec(m_pDB,pszEndTransaction,NULL,NULL,NULL))!=SQLITE_OK) { gDiagnostics.DiagOut(eDLFatal,gszProcName,"sqlite - can't end transactions on '%s': %s", "Markers",sqlite3_errmsg(m_pDB)); CloseDatabase(true); return(eBSFerrInternal); } gDiagnostics.DiagOut(eDLInfo,gszProcName,"Completed populating the sqlite database"); gDiagnostics.DiagOut(eDLInfo,gszProcName,"Generating indexes ..."); tsDEStmSQL *pStms; pStms = m_StmSQL; int TblIdx; for(TblIdx = 0; TblIdx < 4; TblIdx++,pStms++) { if(pStms->pszCreateIndexes == NULL) continue; gDiagnostics.DiagOut(eDLInfo,gszProcName,"Creating indexes on table %s ...", pStms->pTblName); if((sqlite_error = sqlite3_exec(m_pDB,pStms->pszCreateIndexes,0,0,0))!=SQLITE_OK) { gDiagnostics.DiagOut(eDLFatal,gszProcName,"sqlite - can't create indexes on table %s : %s", pStms->pTblName,sqlite3_errmsg(m_pDB)); gDiagnostics.DiagOut(eDLFatal,gszProcName,"sqlite - statement: %s",pStms->pszCreateIndexes); CloseDatabase(true); return(eBSFerrInternal); } } gDiagnostics.DiagOut(eDLInfo,gszProcName,"Indexes generated"); // synchronous writes off if((sqlite_error = sqlite3_exec(m_pDB,pszPragmaSyncOn,NULL,NULL,NULL))!=SQLITE_OK) { gDiagnostics.DiagOut(eDLFatal,gszProcName,"sqlite - can't turn synchronous writes on: %s", sqlite3_errmsg(m_pDB)); CloseDatabase(true); return(eBSFerrInternal); } CloseDatabase(); sqlite3_shutdown(); gDiagnostics.DiagOut(eDLInfo,gszProcName,"SQLite database ready for use"); return(eBSFSuccess); }
void TitleSubst::Load(const CString &filename, const CString §ion) { RageFile f; if( !f.Open(filename) ) { LOG->Trace("Error opening %s: %s", filename.c_str(), f.GetError().c_str() ); return; } CString CurrentSection; TitleTrans tr; while (!f.AtEOF()) { CString line; int ret = f.GetLine( line ); if( ret == 0 ) break; if( ret < 0 ) { LOG->Trace("Error reading %s: %s", filename.c_str(), f.GetError().c_str() ); break; } if(line.size() > 0 && utf8_get_char(line.c_str()) == 0xFEFF) { /* Annoying header that Windows puts on UTF-8 plaintext * files; remove it. */ line.erase(0, utf8_get_char_len(line[0])); } TrimLeft(line); TrimRight(line); if(line.size() == 0) continue; /* blank */ if(line[0] == '#') continue; /* comment */ if(!line.CompareNoCase("DontTransliterate")) { tr.translit = false; continue; } size_t pos = line.find_first_of(':'); if(pos != string::npos) { /* x: y */ CString id = line.substr(0, pos); CString txt = line.substr(pos+1); TrimLeft(txt); /* Surround each regex with ^(...)$, to force all comparisons to default * to being a full-line match. (Add ".*" manually if this isn't wanted.) */ if(!id.CompareNoCase("TitleFrom")) tr.TitleFrom = "^(" + txt + ")$"; else if(!id.CompareNoCase("ArtistFrom")) tr.ArtistFrom = "^(" + txt + ")$"; else if(!id.CompareNoCase("SubtitleFrom")) tr.SubFrom = "^(" + txt + ")$"; else if(!id.CompareNoCase("TitleTo")) tr.Dest.Title = txt; else if(!id.CompareNoCase("ArtistTo")) tr.Dest.Artist = txt; else if(!id.CompareNoCase("SubtitleTo")) tr.Dest.Subtitle = txt; else if(!id.CompareNoCase("TitleTransTo")) tr.Dest.TitleTranslit = txt; else if(!id.CompareNoCase("ArtistTransTo")) tr.Dest.ArtistTranslit = txt; else if(!id.CompareNoCase("SubtitleTransTo")) tr.Dest.SubtitleTranslit = txt; else LOG->Warn( "Unknown TitleSubst tag: \"%s\"", id.c_str() ); } /* Add the translation if this is a terminator (*) or section * marker ([foo]). */ if(line[0] == '*' || line[0] == '[') { if(!CurrentSection.CompareNoCase(section)) AddTrans(tr); /* Reset. */ tr = TitleTrans(); } if(line[0] == '[' && line[line.size()-1] == ']') { CurrentSection = line.substr(1, line.size()-2); } } }
float DoCorrectness(FBLatInfo *fbInfo, MemHeap *mem, ArcInfo *ai, float prune, int beamN/*phones on either side...*/, short int *minn_of_t, /* lowest sausage position active at time t. */ short int *maxn_of_t, /* highest ..*/ short int *niphones, /* num alternative sausage positions */ int **iphone, /* phone [0..N-1][0..niphones[n]-1] */ unsigned char *nonempty, /* if TRUE then no null transition at that sausage position */ int T, int N, float InsCorrectness, Boolean Quinphone, float pr_in){ HArc *a; CorrN *startNode = NULL, *endNode = NULL; double local_pr=LZERO; double local_pr_beta=LZERO; double avg_correct = 0; double avg_correct_beta = 0; CorrN *cn; for(a=ai->start;a;a=a->foll) a->mpe->cn = NULL; for(a=ai->start;a;a=a->foll){ /* This loop attaches the 'cn' structure to the lattice */ float locc = a->alpha + a->betaPlus - fbInfo->pr - a->ac->aclike*latProbScale; if(locc > prune){ /* ... if above prune threshold then attach the 'cn' structure */ if(!PhoneMEE && StartOfWord(a)/*expands to a->pos==0*/){ /* This is the MWE case. Create a cn structure for the first phone of the word. */ LArc *la = a->parentLarc; int iword = (int)/*from LabId*/ la->end->word->wordName; int id = (a->calcArc ? a->calcArc->id : a->id); HArc *b,*lastArc; int x; cn = (CorrN*)New(mem, sizeof(CorrN)); cn->next = NULL; if(endNode){ endNode->next=cn; cn->prev=endNode; endNode=cn;} else {startNode=cn;endNode=cn;cn->prev=NULL;} a->mpe->cn = cn; cn->me_start = a; cn->iphone = iword; cn->IsSilence = (Boolean)IsSilence(a->phone->name); /* First arc of word is sil->silence word. */ cn->follTrans=cn->precTrans=NULL; cn->scaled_aclike = fbInfo->aInfo->ac[id].aclike * latProbScale; cn->nArcs = la->nAlign; x=1; /*n arcs in cn.*/ lastArc=a; if(a->follTrans) for(b=a->follTrans->end;b->parentLarc==la;b->follTrans&&(b=b->follTrans->end)){ HArc *cb = (b->calcArc ? b->calcArc : b); x++; b->mpe->cn = (CorrN*)(void*)-1; cn->scaled_aclike += cb->ac->aclike * latProbScale + translm(b->precTrans->lmlike)/*should be zero unless inspen used in a funny way.*/; lastArc=b; } if(x!=cn->nArcs) HError(1, "Problem with nArcs [wordMee]..."); cn->me_end = lastArc; } else if(PhoneMEE && !a->mpe->cn){ /* This is the MPE case. !a->mpe->cn is to rule out silence (see this block of code, in which cn is set to -1. */ /* Quinphone stuff [only set if quinphone]: */ int Quinphone_NStates=1; int Quinphone_State=2; /* these defaults correspond to the non-quinphone case. */ int iphone; HArc *ca = (a->calcArc ? a->calcArc : a); HArc *b=a; int x; iphone = GetNoContextPhone(a->phone,&Quinphone_NStates,&Quinphone_State,NULL,NULL); if(Quinphone_NStates>1 && Quinphone_State != 2){ /*not a start state.*/ HError(-1, "Not a [quinphone] start state. This should happen very rarely if at all. "/*due to pruning, in fact it shouldn't happen at all.*/); continue; /*continue with loop, don't do this one. */ } cn = (CorrN*)New(mem, sizeof(CorrN)); cn->next = NULL; if(endNode){ endNode->next=cn; cn->prev=endNode; endNode=cn;} else {startNode=cn;endNode=cn;cn->prev=NULL;} a->mpe->cn = cn; cn->me_start = a; cn->iphone = iphone; cn->IsSilence = (Boolean)IsSilence(a->phone->name); cn->follTrans=cn->precTrans=NULL; /* Following code is the general case, for quinphones as well as triphones. */ cn->nArcs = Quinphone_NStates; /* number of sequential phone arcs.*/ cn->scaled_aclike = ca->ac->aclike * latProbScale; for(x=cn->nArcs;x>1;x--){ /* loop only happens in Quinphone case (when nArcs>1). */ if(b){ HArc *cb; b=b->follTrans->end; /*so b is last one ... */ b->mpe->cn = (CorrN*)(void*)-1; /* set to -1 for all others but the first...*/ cb = (b->calcArc ? b->calcArc : b); cn->scaled_aclike += b->ac->aclike * latProbScale + translm(b->precTrans->lmlike)/*should be zero unless inspen used in a funny way.*/; } /* else will be error . */ } if(b && b->follTrans && !b->follTrans->start_foll && IsSilence(b->follTrans->end->phone->name)){ /*might as well include b->foll as well since it's silence....*/ HArc *cb; b = b->follTrans->end; b->mpe->cn = (CorrN*)(void*)-1; /* set to -1 for all others but the first...*/ cb = (b->calcArc ? b->calcArc : b); cn->scaled_aclike += cb->ac->aclike * latProbScale + translm(b->precTrans->lmlike) /*should be zero unless inspen used in a funny way.*/; cn->nArcs++; } if(!b) HError(1, "Null arc in DoCorrectness [code or possibly lattice error]..."); cn->me_end = b; } } } for(cn=startNode;cn;cn=cn->next){ /* Attach transitions to the cn structure. */ HArc *a = cn->me_start; ArcTrans *at; for(at=cn->me_end->follTrans;at;at=at->start_foll){ if(at->end->mpe->cn){ /* If the next arc is also within the beam... */ if(at->end->mpe->cn==(CorrN*)(void*)-1) HError(1, "Not expecting -1 for this node..."); /* -1 only for nodes which are not the primary node of the arc, i.e. states>2 of quinphone, or end-of-word silence.*/ AddTrans(mem, a->mpe->cn, at->end->mpe->cn, translm(at->lmlike)); /* add transition. */ } } } /* Now recalculate alphas given new pruning, and get pr.... */ for(cn=startNode;cn;cn=cn->next){ CorrA *ca; if(!cn->me_start->precTrans) cn->alpha = 0; else cn->alpha = LZERO; for(ca=cn->precTrans;ca;ca=ca->end_foll){ cn->alpha = LAdd(cn->alpha, ca->sc_lmlike + ca->start->alpha); } cn->alpha += cn->scaled_aclike; /* acoustic likelihood. */ if(! cn->me_end->follTrans) local_pr = LAdd(local_pr, cn->alpha); } /* check local_pr: should be same as normal pr, bar pruning:*/ if(fabs(local_pr - pr_in) > 0.2) HError(-1, "DoCorrectness: possible problem with pr (%f != %f)...difference shouldnt be too large, decrease EXACTCORRPRUNE.",local_pr,pr_in); /* Now set up the arrays attached to the cn structure... */ for(cn=startNode;cn;cn=cn->next){ int i,ns,ne; int istart = cn->me_start->t_start, iend = cn->me_end->t_end; i = (istart+iend)/2; if(i<1||i>T){ HError(1, "istart/iend out of range."); } ns = minn_of_t[i]; ne = maxn_of_t[i]; /*following may not be needed.*/ if(!cn->me_start->precTrans) ns = 0; /*start node.*/ if(!cn->me_end->follTrans) ne = N; /*end node.*/ ns = MAX(0, ns - beamN); ne = MIN(N, ne + beamN); /*A node can start at N although N-1 is the last phone, this may be necessary for silences not consuming any phone.*/ cn->alphaCorr = (float*)New(mem, sizeof(float) * (ne-ns+1)); cn->alphaCorr -= ns; cn->betaCorr = (float*)New(mem, sizeof(float) * (ne-ns+1)); cn->betaCorr -= ns; cn->beta = (double*)New(mem, sizeof(double) * (ne-ns+1)); cn->beta -= ns; cn->starti = ns; cn->endi = ne; for(i=ns;i<=ne;i++){ cn->betaCorr[i]=0; cn->beta[i]=LZERO; } } /* Now set cn->alphaCorr[i] for each node cn, which is the average correctness of sentences leading up to reference phone cn where the last hypothesis sausage position is i. */ for(cn=startNode;cn;cn=cn->next){ int i; if(!cn->me_start->precTrans){ /* start node... */ if(cn->starti > 0) HError(1, "start node but doesn't include zero..."); cn->alphaCorr[0]=0; for(i=1;i<=cn->endi;i++) cn->alphaCorr[i]=-10000; /*very negative so wont be used.*/ } else { /* Not start node so sum over preceding nodes. */ CorrA *ca; CorrN *cn_prev; for(i=cn->starti;i<=cn->endi;i++){ cn->alphaCorr[i]=0; if(!cn->precTrans) /* has no preceding nodes-- may be the case due to pruning. */ cn->alphaCorr[i]=-10000; for(ca=cn->precTrans;ca;ca=ca->end_foll){ /* recursively calculate the correctness of this cn at this sausage-pos i, given that previous cn's will have their correctnesses calculated at all positions. */ float BestCorr = -10000; float occ; cn_prev = ca->start; if(GetBestCorrectness(&BestCorr, NULL, NULL, i, cn_prev, minn_of_t,maxn_of_t,niphones,iphone, nonempty,T,N,InsCorrectness)){ occ = cn_prev->alpha+ca->sc_lmlike+cn->scaled_aclike - cn->alpha; /* lg(occ as fraction of total occ of cn). */ if(occ<MINEARG) occ=0.0; else occ=exp(occ); cn->alphaCorr[i] += BestCorr * occ; /* these occs will sum to 1 over all preceding arcs. */ /* Checking: */ if(BestCorr > 10000 || ((BestCorr < -500) && cn_prev->alpha>LSMALL)){ if(debug_bestcorr > 0){ debug_bestcorr--; HError(-1, "BestCorr too big (or this is a very long or strange file)... (%f)", BestCorr); } else if(!debug_bestcorr){ HError(-1, "Not warning about this any more, BestCorr too big."); debug_bestcorr--; } } } } } if( (!cn->me_end->follTrans)) { /* end node, so get contribution to avg correctness... */ double alpha = cn->alpha; double occ = alpha - local_pr; occ = (occ>MINEARG ? exp(occ) : 0.0); if(cn->endi < N) HError(1, "Last node of lattice doesn't include N in alphaCorr vector."); if(occ > 1.1) HError(1, "Occ too big!"); avg_correct += occ * cn->alphaCorr[N]; /* was MAX(cn->alphaCorr[N], cn->alphaCorr[N-1]); */ /* This is N+1 the way I've written it in my PhD, I start from 1 not 0 there. */ /* Only works if last phone = silence or NULL, otherwise technique wont work!! */ } } } /* Now set beta and betaCorrect for all nodes and times. */ /* This is a traceback of the procedure that sets alpha and alphaCorrect. */ for(cn=endNode;cn;cn=cn->prev){ int i; if(!cn->me_start->precTrans){ /* start node... */ local_pr_beta = LAdd(local_pr_beta, cn->beta[0] + cn->scaled_aclike); avg_correct_beta = avg_correct_beta + cn->betaCorr[0] * (cn->beta[0]+cn->scaled_aclike-local_pr<MINEARG?0.0:exp(cn->beta[0]+cn->scaled_aclike-local_pr)); } else { /* Not start node so sum over preceding nodes. */ CorrA *ca; CorrN *cn_prev; if(!cn->me_end->follTrans){ /* end node... */ /* was: if(cn->alphaCorr[N] > cn->alphaCorr[N-1]) N is time of this phone. */ cn->beta[N] = 0.0; /* This is N+1 the way I've written it in my PhD, I start from 1 not 0 there. */ /* was: else cn->beta[N-1] = 0.0; */ /* All other betas are previously initialised to LZERO and betaCorr to 0.0. */ } for(i=cn->starti;i<=cn->endi;i++){ if(cn->beta[i] + cn->alpha - local_pr > 0.001){ HError(-1, "Too big pr!"); } if(cn->beta[i] > LZERO+1000){ for(ca=cn->precTrans;ca;ca=ca->end_foll){ float betaCorr,betaCorr_prev; float BestCorrPart; int bestj=-1; double beta_prev,beta_trans,beta_sum; cn_prev = ca->start; if( GetBestCorrectness(NULL, &BestCorrPart, &bestj, i, cn_prev, minn_of_t,maxn_of_t,niphones,iphone, nonempty,T,N,InsCorrectness) ){ /* if there is nonzero likelihood to cn_prev.. */ /* Add this contribution of beta to the previous beta, and set the previous betaCorr to a weighted avg of the betaCorrs (weighted by the betas. */ beta_prev = cn_prev->beta[bestj]; /* previous value of beta [beta is a likelihood] */ beta_trans = cn->beta[i] + ca->sc_lmlike + cn->scaled_aclike; /* a likelihood: beta due to this transition. */ beta_sum = LAdd(beta_prev,beta_trans); /* the new value [the sum of old and added] */ betaCorr = cn->betaCorr[i] + BestCorrPart; /*I.e, contribution from this phone and transition...*/ betaCorr_prev = cn_prev->betaCorr[bestj]; { double occ,occ_prev; occ = beta_trans - beta_sum; /* lg(occ of new part as fraction of total occ) */ occ_prev = beta_prev - beta_sum; /* lg(occ of old part as fraction of total occ) */ occ=(occ>MINEARG?exp(occ):0.0); occ_prev=(occ_prev>MINEARG?exp(occ_prev):0.0); cn_prev->betaCorr[bestj] = betaCorr*occ + betaCorr_prev*occ_prev; } cn_prev->beta[bestj] = beta_sum; } } } } } } /* check local_pr = local_pr_beta: forward and backward same. */ if(fabs(local_pr - local_pr_beta) > 0.0001) HError(-1, "DoCorrectness: possible problem with pr (forward and backward %f,%f....) ",local_pr,local_pr_beta); /* check correctness when calculated forward and backward is the same. */ if(fabs(avg_correct_beta - avg_correct) > 0.0001) HError(-1, "avg_correct{,beta} differ, %f,%f", avg_correct, avg_correct_beta); for(cn=startNode;cn;cn=cn->next){ /* Now set the "MPE occupancy" gamma_q^MPE = gamma_q ( corr_q - corr_avg ) actually we set mpe_occscale to corr_q - corr_avg, and get gamma_q^MPE later. */ int i; float total_diff=0; /* equals the sum of: (corr-avgCorr)*occ. */ HArc *a; /*float arc_occ;*/ for(i=cn->starti;i<=cn->endi;i++){ /* correctness of node is a sum over preceding transitions... */ if(cn->beta[i] > LSMALL){ /* only one sausage-position i should have nonzero beta, I think */ float locc,occ; float correctness_diff = cn->betaCorr[i] + cn->alphaCorr[i] - avg_correct; /* difference in correctness for this i.*/ locc = cn->alpha + cn->beta[i] - local_pr; /* The occupation probability gamma_q due to this transition. */ occ=(locc>MINEARG?exp(locc):0.0); total_diff += occ * correctness_diff; } } a=cn->me_start; for(i=1;i<=cn->nArcs;i++){ /* This iterates over arcs a, see the line "a = a->follTrans->end". */ /* In [non-quinphone] MPE, this loop will only have 1 iteration. */ HArc *ca = (a->calcArc ? a->calcArc : a); float total_occ = ca->ac->locc; /*occ of this group of arcs [sharing this start&end&name */ if(total_occ > MINEARG+5){ a->ac->mpe_occscale += exp(-total_occ) * total_diff; /* total_diff is for this arc, summed over preceding arcs. total_occ is occupation probability gamma_q for this arc. */ /* total_diff is gamma_q ( corr_q - corr_avg ), total_occ is gamma_q, mpe_occscale = (corr_q - corr_avg) */ } if(i!=cn->nArcs){ if(!a->follTrans) HError(1, "Problem with quinphone-related code for exact correctness."); a = a->follTrans->end; } } } return avg_correct; }