// Information on the quantity of data available for each client // Outputs a list with the selected files for a defined quantity of data int ExtractTargetDataInfo(Config& config) { String inputClientListFileName = config.getParam("targetIdList"); bool fixedLabelSelectedFrame; String labelSelectedFrames; if (config.existsParam("useIdForSelectedFrame")) // the ID of each speaker is used as labelSelectedFrame fixedLabelSelectedFrame=false; else{ // the label is decided by the command line and is unique for the run labelSelectedFrames=config.getParam("labelSelectedFrames"); if (verbose) cout << "Computing on" << labelSelectedFrames << " label" << endl; fixedLabelSelectedFrame=true; } unsigned long maxFrame=config.getParam("maxFrame").toLong(); String outputFilename=config.getParam("outputFilename"); ofstream outputFile(outputFilename.c_str(),ios::out| ios::trunc); try{ XList inputClientList(inputClientListFileName,config); // read the Id + filenames for each client XLine * linep; if (verbose) cout << "InfoTarget" << endl; // *********** Target loop ***************** while ((linep=inputClientList.getLine()) != NULL){ // linep gives the XLine with the Id of a given client and the list of files String *id=linep->getElement(); // Get the Client ID (id) outputFile<<*id; String currentFile=""; XLine featureFileListp=linep->getElements(); // Get the list of feature file for the client (end of the line) if (verbose) cout << "Info model ["<<*id<<"]"<<endl; if (!fixedLabelSelectedFrame){ // the ID is used as label for selecting the frame labelSelectedFrames=*id; if (debug) cout <<*id<<" is used for label selected frames"<<endl; } // label files reading - It creates, for each file and each label, a cluster of segments - will be integrated witth the featre s - asap SegServer segmentsServer; // Reading the segmentation files for each feature input file LabelServer labelServer; initializeClusters(featureFileListp,segmentsServer,labelServer,config); // Reading the segmentation files for each feature input file unsigned long codeSelectedFrame=labelServer.getLabelIndexByString(labelSelectedFrames); // Get the index of the cluster with in interest audio segments SegCluster& selectedSegments=segmentsServer.getCluster(codeSelectedFrame); // Gives the cluster of the selected/used segments Seg *seg; // Will give the current segment unsigned long frameCount=0; selectedSegments.rewind(); // at the begin of the selected segments list while(((seg=selectedSegments.getSeg())!=NULL) && (frameCount<maxFrame)){ // For each of the selected segments until the amount of data is get frameCount+=seg->length(); cout << seg->sourceName()<<" "<<seg->begin()<<" "<<seg->length()<<" Total time="<<frameCount<<endl; if (seg->sourceName()!=currentFile){ outputFile<<" "<<seg->sourceName(); currentFile=seg->sourceName(); } } // end of the initial Train Iteration loop outputFile<<endl; if (verbose) cout << "Save info client ["<<*id<<"]" << endl; } // end of the the target loop } // fin try catch (Exception& e) { cout << e.toString().c_str() << endl; } return 0; }
/* void baggedSegments(SegCluster &selectedSegments,RefVector<SegCluster> &baggedA,double & baggedProbability, unsigned long minimumLength,unsigned long maximumLength){ Seg* seg; // reset the reader at the begin of the input stream selectedSegments.rewind(); seg=selectedSegments.getSeg(); bool end=(seg==NULL); unsigned long beginSeg=0,lengthSeg=0; if (!end){ beginSeg=seg->begin(); lengthSeg=seg->length(); } while(!end){ if (debug) cout << "bagged, current input seg ["<<beginSeg<<","<<lengthSeg<<"]"<<endl; unsigned long verifyLength=correctedLength(lengthSeg,minimumLength,maximumLength); bool moveSeg=true; unsigned long length=0; if (lengthSeg<=verifyLength){ moveSeg=true; length=lengthSeg; if (debug) cout <<"change seg"<<endl; } else{ moveSeg=false; length=verifyLength; } // for all cluster in baggedA if (length>0) for (unsigned long idx=0;idx<baggedA.size();idx++) // For each component if(baggedFrame(baggedProbability)){ SegServer &segServerOutput=baggedA[idx].getServer(); Seg &newSeg=segServerOutput.createSeg(beginSeg,length,0,seg->string(),seg->sourceName()); baggedA[idx].add(newSeg); if (debug) cout << "bagged - Adding in bagged["<<idx<<"] the seg ["<<seg->sourceName()<<"]"<<newSeg.begin()<<" "<<newSeg.length()<<endl; } if (moveSeg){ seg=selectedSegments.getSeg(); end=(seg==NULL); if (!end){ beginSeg=seg->begin(); lengthSeg=seg->length(); } } else{ lengthSeg-=length; beginSeg+=length; } } if ((debug) || (verboseLevel>3)){ cout <<"Bagged segments"<<endl; for (unsigned long idx=0;idx<baggedA.size();idx++){ cout << "Bagged cluster["<<idx<<"]"<<endl; showCluster(baggedA[idx]); } } if (verbose){ unsigned long total=totalFrame(selectedSegments); for (unsigned long idx=0;idx<baggedA.size();idx++){ unsigned long selected=totalFrame(baggedA[idx]); double percent=(double)selected*100/(double) total; cout <<"Bagged segments["<<idx<<"] Initial frames["<<total<<"] Selected frames["<<selected<<"] % selected["<<percent<<"]"<<endl; } } }*/ void baggedSegments(SegCluster &selectedSegments,SegCluster &baggedFrameSegment,double baggedProbability, unsigned long minimumLength,unsigned long maximumLength){ SegServer &segServerOutput=baggedFrameSegment.getServer(); Seg* seg; // reset the reader at the begin of the input stream selectedSegments.rewind(); seg=selectedSegments.getSeg(); bool end=(seg==NULL); unsigned long beginSeg=0,lengthSeg=0; if (!end){ beginSeg=seg->begin(); lengthSeg=seg->length(); } while(!end){ if (debug) cout << "bagged, current input seg ["<<beginSeg<<","<<lengthSeg<<"]"<<endl; unsigned long verifyLength=correctedLength(lengthSeg,minimumLength,maximumLength); double segBaggedProbability=baggedProbability; bool moveSeg=true; unsigned long length=0; if (lengthSeg<=verifyLength){ moveSeg=true; length=lengthSeg; if (debug) cout <<"change seg"<<endl; } else{ moveSeg=false; length=verifyLength; } if ((length>0) &&(baggedFrame(segBaggedProbability))){ Seg &newSeg=segServerOutput.createSeg(beginSeg,length,0,seg->string(),seg->sourceName()); baggedFrameSegment.add(newSeg); if (debug) cout << "bagged - Adding the seg ["<<seg->sourceName()<<"]"<<newSeg.begin()<<" "<<newSeg.length()<<endl; } if (moveSeg){ seg=selectedSegments.getSeg(); end=(seg==NULL); if (!end){ beginSeg=seg->begin(); lengthSeg=seg->length(); } } else{ lengthSeg-=length; beginSeg+=length; } } if ((debug) || (verboseLevel>3)){ cout <<"Bagged segments"<<endl; showCluster(baggedFrameSegment); } if (verbose){ unsigned long total=totalFrame(selectedSegments); unsigned long selected=totalFrame(baggedFrameSegment); double percent=(double)selected*100/(double) total; cout <<"Bagged segments, Initial frames["<<total<<"] Selected frames["<<selected<<"] % selected["<<percent<<"]"<<endl; } }
//------------------------------------------------------------------------- void computeLabelNGram(NGram & NG,SegCluster &cluster,SegCluster &clusterOut,ULongVector &tabS,unsigned long nbSym){ unsigned long begin[100]; // max order 100... short int sym[100]; unsigned long end[100]; SegServer & segServerOut=clusterOut.getServer(); // Get the clusterserver reelated to the output cluster.rewind(); Seg* seg; // Reset the reader at the begin of the input stream while((seg=cluster.getSeg())!=NULL){ // For each of the selected segments unsigned long idxFrame=seg->begin(); unsigned long endS=endSeg(seg); if (endS>=nbSym) endS=nbSym; // Just if there is less symbol in the file than in the label if (idxFrame>endS) idxFrame=endS; unsigned long beginOOV=idxFrame; bool oov=true; if (debug) cout <<"begin Seg["<<idxFrame<<"]"<<endl; for (unsigned long n=0;(idxFrame<endS) &&(n<NG.getOrder()-1);n++){ // Recognize the (n-1) first symbols begin[n]=idxFrame; sym[n]=recognizeSymbol(idxFrame,endS,tabS); end[n]=idxFrame-1; if (debug) cout <<"sym ["<<sym[n]<<"] begin["<<begin[n]<<"] end["<<end[n]<<"] idxframe["<<idxFrame<<"]"<<endl; } while(idxFrame<endS){ begin[NG.getOrder()-1]=idxFrame; sym[NG.getOrder()-1]=recognizeSymbol(idxFrame,endS,tabS); end[NG.getOrder()-1]=idxFrame-1; if (debug) cout <<"sym ["<<sym[NG.getOrder()-1]<<"] begin["<<begin[NG.getOrder()-1] <<"] end["<<end[NG.getOrder()-1]<<"] idxframe["<<idxFrame<<"]"<<endl; unsigned long tag; if (isNGram(sym,NG,tag)){ if ((oov)&&(beginOOV<begin[0])){ if (debug) cout <<"OOV1 begin["<<beginOOV <<"] end["<<begin[0]-1<<"]"<<endl; Seg &segTmp=segServerOut.createSeg(beginOOV,begin[0]-beginOOV,0,"oov",seg->sourceName()); clusterOut.add(segTmp); } if (debug) cout <<"NGRAM ["<<tag<<"] begin["<<begin[0] <<"] end["<<end[NG.getOrder()-1]<<"]"<<endl; Seg &segTmp=segServerOut.createSeg(begin[0],end[NG.getOrder()-1]-begin[0]+1,0,String::valueOf(tag),seg->sourceName()); clusterOut.add(segTmp); beginOOV=idxFrame; oov=false; } else oov=true; moveTab(begin,sym,end,NG.getOrder()); } if (oov){ Seg &segTmp=segServerOut.createSeg(beginOOV,idxFrame-beginOOV,0,"oov",seg->sourceName()); clusterOut.add(segTmp); if (debug) cout <<"OOV2 begin["<<beginOOV <<"] end["<<idxFrame-1<<"]"<<endl; } } }
void FactorAnalysisStat::computeAndAccumulateGeneralFAStats(SegCluster &selectedSegments,FeatureServer &fs,Config & config){ if (verbose) cout <<"(FactorAnalysisStat) Compute General FA Stats (Complete)" << endl; double *N_h, *N, *S_X_h, *S_X,*ff; _matN_h.setAllValues(0.0); _matN.setAllValues(0.0); _matS_X_h.setAllValues(0.0); _matS_X.setAllValues(0.0); N_h=_matN_h.getArray(); N=_matN.getArray(); S_X_h=_matS_X_h.getArray();S_X=_matS_X.getArray(); MixtureGD & UBM=_ms.getMixtureGD((unsigned long) 1); MixtureGDStat &acc=_ss.createAndStoreMixtureStat(UBM); // Compute Occupations and Statistics acc.resetOcc(); Seg *seg; selectedSegments.rewind(); String currentSource="";unsigned long loc=0;unsigned long sent=0; while((seg=selectedSegments.getSeg())!=NULL){ unsigned long begin=seg->begin()+fs.getFirstFeatureIndexOfASource(seg->sourceName()); // Idx of the first frame of the current file in the feature server if (currentSource!=seg->sourceName()) { currentSource=seg->sourceName(); loc=_ndxTable.locNb(currentSource); sent=_ndxTable.sessionNb(currentSource); if (verbose)cout << "Processing speaker["<<currentSource<<"]"<< endl; } fs.seekFeature(begin); Feature f; if (!_topGauss) { for (unsigned long idxFrame=0;idxFrame<seg->length();idxFrame++){ fs.readFeature(f); acc.computeAndAccumulateOcc(f); RealVector <double> aPost=acc.getOccVect(); ff=f.getDataVector(); for(unsigned long k=0;k<_mixsize;k++) { N_h[sent*_mixsize+k]+=aPost[k]; N[loc*_mixsize+k] +=aPost[k]; for (unsigned long i=0;i<_vsize;i++) { S_X_h[sent*_supervsize+(k*_vsize+i)]+=aPost[k]*ff[i]; S_X[loc*_supervsize+(k*_vsize+i)] +=aPost[k]*ff[i]; } } } } else throw Exception("ComputeGeneralStats TopGauss not done at this level",__FILE__,__LINE__); } };
/// Normalize features with a smooth mixture transformation o't=ot-sum(P(c|ot)Uc.x) void FactorAnalysisStat::normalizeFeatures(SegCluster &selectedSegments,FeatureServer &fs,Config & config){ if (verbose) cout << "(FactorAnalysisStat) Normalize Features" << endl; MixtureGD & clientMixture=_ms.getMixtureGD(1); // copy the UBM mixture unsigned long nt=0; RealVector <double> m_xh_1; m_xh_1.setSize(_supervsize); double *_m_xh_1=m_xh_1.getArray(); Seg *seg; // current selectd segment selectedSegments.rewind(); String currentSource=""; while((seg=selectedSegments.getSeg())!=NULL){ unsigned long begin=seg->begin()+fs.getFirstFeatureIndexOfASource(seg->sourceName()); if (currentSource!=seg->sourceName()) { currentSource=seg->sourceName(); this->getUX(m_xh_1,currentSource); this->getSpeakerModel(clientMixture,currentSource); if (verbose)cout << "Processing speaker["<<currentSource<<"]"<< endl; } fs.seekFeature(begin); Feature f; if (!_topGauss) { for (unsigned long idxFrame=0;idxFrame<seg->length();idxFrame++){ fs.readFeature(f,0); double *ff=f.getDataVector(); double sum=0.0; RealVector <double> P; P.setSize(_mixsize); double *Prob=P.getArray(); for(unsigned long k=0;k<_mixsize;k++) { Prob[k]=clientMixture.weight(k)*clientMixture.getDistrib(k).computeLK(f); sum+=Prob[k]; } for(unsigned long k=0;k<_mixsize;k++) Prob[k]/=sum; for(unsigned long k=0;k<_mixsize;k++) { for (unsigned long i=0;i<_vsize;i++) ff[i]-= Prob[k]*_m_xh_1[k*_vsize+i]; } fs.writeFeature(f); nt++; } } else { throw Exception("no topgauss yet",__FILE__,__LINE__); } } };
// Can use this function to get likelihood with a topgauss double TopGauss::get(MixtureGD & UBM,FeatureServer &fs,String & featureFilename,Config & config){ StatServer ss(config); String labelSelectedFrames =config.getParam("labelSelectedFrames"); unsigned long begin=fs.getFirstFeatureIndexOfASource(featureFilename); fs.seekFeature(begin); SegServer segmentsServer; LabelServer labelServer; initializeClusters(featureFilename,segmentsServer,labelServer,config); // __android_log_print(ANDROID_LOG_DEBUG, "TopGauss::get", " Feature file %s \n", featureFilename.c_str()); verifyClusterFile(segmentsServer,fs,config); unsigned long codeSelectedFrame=labelServer.getLabelIndexByString(labelSelectedFrames); SegCluster& selectedSegments=segmentsServer.getCluster(codeSelectedFrame); MixtureGDStat &acc=ss.createAndStoreMixtureStat(UBM); Seg *seg; // current selected segment selectedSegments.rewind(); unsigned long t=0; //cnt frames acc.resetLLK(); unsigned long idxBegin=0; while((seg=selectedSegments.getSeg())!=NULL){ unsigned long begin=seg->begin()+fs.getFirstFeatureIndexOfASource(seg->sourceName()); fs.seekFeature(begin); Feature f; idxBegin=this->frameToIdx(t); for (unsigned long idxFrame=0;idxFrame<seg->length();idxFrame++){ fs.readFeature(f); //unsigned long idx=this->frameToIdx(t); unsigned long nbg=_nbg[t]; ULongVector index; double sumNonSelectedWeights=_snsw[t]; double sumNonSelectedLLK=_snsl[t]; for (unsigned long i=0;i<nbg;i++) { index.addValue(_idx[idxBegin+i]); } char c[100]; sprintf(c,"%d",(int)index.size()); config.setParam("topDistribsCount",c); // this should be high enough if (t==0) {acc.computeAndAccumulateLLK(f,1.0,DETERMINE_TOP_DISTRIBS);acc.resetLLK();} // to remove in ALIZE, this is to init the LKvector ss.setTopDistribIndexVector(index, sumNonSelectedWeights, sumNonSelectedLLK); acc.computeAndAccumulateLLK(f,1.0,USE_TOP_DISTRIBS); idxBegin+=nbg; t++; } } //ss.deleteMixtureStat(acc); if (t!=_nt || idxBegin !=_nbgcnt) cout << "W: t("<<t<<") != _nt(" <<_nt<<")"<<"W: idxBegin("<<idxBegin<<") != _nbgcnt(" <<_nbgcnt<<")"<<endl; return acc.getMeanLLK(); }
/// Compute Log Likelihood of the Factor Analysis model double FactorAnalysisStat::getLLK(SegCluster &selectedSegments,MixtureGD &model,FeatureServer&fs,Config & config){ if (verbose) cout << "(FactorAnalysisStat) Compute Likelihood" << endl; double llk=0.0; MixtureGDStat &acc=_ss.createAndStoreMixtureStat(model); Seg *seg; selectedSegments.rewind(); while((seg=selectedSegments.getSeg())!=NULL){ unsigned long begin=seg->begin()+fs.getFirstFeatureIndexOfASource(seg->sourceName()); fs.seekFeature(begin); Feature f; for (unsigned long idxFrame=0;idxFrame<seg->length();idxFrame++){ fs.readFeature(f); acc.computeAndAccumulateLLK(f,1.0,TOP_DISTRIBS_NO_ACTION); } } llk= acc.getMeanLLK(); _ss.deleteMixtureStat(acc); return llk; };
int saveApost(Config &config) { bool writeAllFeature=true; // Output a vector for all input vectors (selected and not selected vectors) - DEFAULT=on if (config.existsParam("writeAllFeatures")) writeAllFeature=config.getParam("writeAllFeatures").toBool(); // Define if all the feature (selected or not) should be written String modelname = config.getParam("inputModelFilename"); String inputFeatureFileName =config.getParam("inputFeatureFilename"); // input feature - could be a simple feature file or a list of filenames XLine inputFeatureFileNameList; // The (feature) input filename list if (inputFeatureFileName.endsWith(".lst")){ // If the file parameter is the name of a XList file XList inputFileNameXList(inputFeatureFileName,config); // Read the filename list file inputFeatureFileNameList=inputFileNameXList.getAllElements(); // And put the filename in a list if the file is a list of feature filenames } else { // It was a simple feature file and not a filename list inputFeatureFileNameList.addElement(inputFeatureFileName); // add the filename in the list } try{ // read UBM MixtureServer _ms(config); StatServer _ss(config); _ms.loadMixtureGD(config.getParam("inputWorldFilename")); MixtureGD & UBM=_ms.getMixtureGD((unsigned long) 0); MixtureGDStat &acc=_ss.createAndStoreMixtureStat(UBM); unsigned long _vsize=UBM.getVectSize(); unsigned long _mixsize=UBM.getDistribCount(); // Loop over the list of feature files String *file; String labelSelectedFrames; unsigned long codeSelectedFrame; while ((file=inputFeatureFileNameList.getElement())!= NULL){ String & featureFilename=(*file); FeatureServer fs(config,featureFilename); FeatureServer fs_out(config,featureFilename); SegServer segmentsServer; LabelServer labelServer; initializeClusters(featureFilename,segmentsServer,labelServer,config); verifyClusterFile(segmentsServer,fs,config); labelSelectedFrames=config.getParam("labelSelectedFrames"); codeSelectedFrame=labelServer.getLabelIndexByString(labelSelectedFrames); SegCluster& selectedSegments=segmentsServer.getCluster(codeSelectedFrame); // Compute Occupations and Statistics acc.resetOcc(); Seg *seg; selectedSegments.rewind(); String currentSource=""; while((seg=selectedSegments.getSeg())!=NULL){ unsigned long begin=seg->begin()+fs.getFirstFeatureIndexOfASource(seg->sourceName()); // Idx of the first frame of the current file in the feature server if (currentSource!=seg->sourceName()) { currentSource=seg->sourceName(); if (verbose)cout << "Processing speaker["<<currentSource<<"]"<< endl; } fs.seekFeature(begin); Feature f; for (unsigned long idxFrame=0;idxFrame<seg->length();idxFrame++){ fs.readFeature(f); acc.computeAndAccumulateOcc(f); RealVector <double> aPost=acc.getOccVect(); Feature tmpF; for(unsigned long k=0;k<_mixsize;k++) { tmpF[k]=aPost[k]; } fs_out.addFeature(f); } } // Writing apost probabilities to file cout << "Writing to: " << featureFilename << endl; FeatureFileWriter w(featureFilename, config); // build a featurefile writer to output the features (real features) SegServer fakeSegServer; if (writeAllFeature) { // Output all the features- feature count id the same SegServer fakeSegServer; // Create a new fake segment server fakeSegServer.createCluster(0); // Create a new cluster SegCluster& fakeSeg=fakeSegServer.getCluster(0); // Get the cluster fakeSeg.add(fakeSegServer.createSeg(0,fs_out.getFeatureCount(),codeSelectedFrame, labelSelectedFrames,featureFilename)); // Add a segment with all the features outputFeatureFile(config,fs_out,fakeSeg,w); // output all the features - giving the same file length } else outputFeatureFile(config,fs_out,selectedSegments, w); // Output only the selected features - giving a shorter output } } catch (Exception& e){cout << e.toString().c_str() << endl;} return 0; }
// Main init function double TopGauss::compute(MixtureGD & UBM,FeatureServer &fs,String & featureFilename,Config & config){ StatServer ss(config); MixtureGDStat &acc=ss.createAndStoreMixtureStat(UBM); unsigned long _mixsize=UBM.getDistribCount(); String labelSelectedFrames =config.getParam("labelSelectedFrames"); unsigned long begin=fs.getFirstFeatureIndexOfASource(featureFilename); fs.seekFeature(begin); SegServer segmentsServer; LabelServer labelServer; initializeClusters(featureFilename,segmentsServer,labelServer,config); // __android_log_print(ANDROID_LOG_DEBUG, "TopGauss::compute", " Feature file %s \n", featureFilename.c_str()); verifyClusterFile(segmentsServer,fs,config); unsigned long codeSelectedFrame=labelServer.getLabelIndexByString(labelSelectedFrames); SegCluster& selectedSegments=segmentsServer.getCluster(codeSelectedFrame); acc.resetLLK(); double topD=config.getParam("topGauss").toDouble(); if (verbose) {if(topD<1.0) cout << "LLK %="<< topD << "% ";else cout << "Top-"<<topD<<" ";} // Class values _nt=totalFrame(selectedSegments); _nbg.setSize(_nt); _idx.setSize(0);_snsw.setSize(0); _snsl.setSize(0); _nbg.setAllValues(0); _idx.setAllValues(0);_snsw.setAllValues(0.0);_snsl.setAllValues(0.0); _nbgcnt=0; Seg *seg; // current selected segment selectedSegments.rewind(); unsigned long t=0; //cnt frames while((seg=selectedSegments.getSeg())!=NULL){ unsigned long begin=seg->begin()+fs.getFirstFeatureIndexOfASource(seg->sourceName()); fs.seekFeature(begin); Feature f; for (unsigned long idxFrame=0;idxFrame<seg->length();idxFrame++){ fs.readFeature(f); double llk=acc.computeAndAccumulateLLK(f,1.0,DETERMINE_TOP_DISTRIBS); const LKVector &topV=ss.getTopDistribIndexVector(); double lk_tot=exp(llk); double val=0.0; if (topD<1.0) { for(unsigned long j=0;j<_mixsize;j++){ if (val > topD*lk_tot) break; val+=(topV[j].lk); _nbg[t]++; } } else _nbg[t]=(unsigned long)topD; _nbgcnt+=_nbg[t]; double snsw=1.0; double snsl=lk_tot; for(unsigned long j=0;j<_nbg[t];j++) { _idx.addValue(topV[j].idx); snsw -=UBM.weight(topV[j].idx); snsl -=topV[j].lk; } _snsw.addValue(snsw); if (snsl < EPS_LK) _snsl.addValue(EPS_LK); else _snsl.addValue(snsl); t++; } } if (t!=_nt) cout << "W: t("<<t<<") != _nt(" <<_nt<<")"<<endl; return acc.getMeanLLK(); }
//------------------------------------------------------------------------- int trainWorld(Config& config){ if (verbose) cout << "Begin world model training"<<endl; try{ // Reading the data, one or multiple separate streams unsigned long nbStream=0; // Number of Streams FeatureServer **fsTab=NULL; // Array of FeatureServer (address) - one by input stream SegServer **segServTab=NULL; // Array of segment server (address)- one by input stream SegCluster **segTab=NULL; // Array of selected segments cluster(address) - one by stream double *weightTab=NULL; // Array of weight of each stream. i.e influence of a stream on the final model String outputWorldFilename = config.getParam("outputWorldFilename"); // output worldmodel file filename bool fileInit=config.existsParam("inputWorldFilename"); // if a inputWorlFilename is given, init by file, else from scratch bool saveInitModel=true; if (config.existsParam("saveInitModel")) saveInitModel=config.getParam("saveInitModel").toBool(); String inputWorldFilename=""; if (fileInit) inputWorldFilename=config.getParam("inputWorldFilename"); // if file init, the initial model filename String labelSelectedFrames =config.getParam("labelSelectedFrames"); // label for selected frames TrainCfg trainCfg(config); // Get the training algo params // Reading the data if(config.existsParam("inputStreamList")){// We want to work on separated list XList tmp(config.getParam("inputStreamList"),config); // Each data set influence will be balanced during training XLine & listInputFilename=tmp.getAllElements(); // Read the list of (list) filenames in tmp -> listInputFilename nbStream=listInputFilename.getElementCount(); if (nbStream==0) throw Exception("TrainWorld error:no input stream" , __FILE__, __LINE__); reserveMem(fsTab,segServTab,segTab,weightTab,nbStream); for (unsigned i=0;i<nbStream;i++) featureStream(config,listInputFilename.getElement(i),fsTab[i],segServTab[i],segTab[i],labelSelectedFrames); if (config.existsParam("weightStreamList")){ // Read the weight of each stream, text file XList tmpW(config.getParam("weightStreamList"),config); XLine & listW=tmpW.getAllElements(); // Read the list of (list) filenames in tmp -> listInputFilename if (listW.getElementCount()!=nbStream) throw Exception("TrainWorld error: number of weigths differs than number of input streams" , __FILE__, __LINE__); for (unsigned i=0;i<nbStream;i++) weightTab[i]=listW.getElement(i).toDouble(); } } else{ // Only one input stream, no stream list nbStream=1; reserveMem(fsTab,segServTab,segTab,weightTab,nbStream); featureStream(config,config.getParam("inputFeatureFilename"),fsTab[0],segServTab[0],segTab[0],labelSelectedFrames); } unsigned long vectSize=fsTab[0]->getVectSize(); // size of the input vectors // Create stat server and mixture server MixtureServer ms(config); StatServer ss(config, ms); if (debug || verbose) cout << "Stream mode, nb Stream="<<nbStream<<endl; if (debug|| (verboseLevel>2)){ for (unsigned long i=0;i<nbStream;i++){ cout <<"Stream["<<i<<"]"<<endl; segTab[i]->rewind(); Seg *seg; // Reset to the first segment while((seg=segTab[i]->getSeg())!=NULL) // For each of the selected segments cout << "File["<<seg->sourceName()<<"] Segment begin["<< seg->begin()<<"] length["<<seg->length()<<"] index in the feature server["<<fsTab[i]->getFirstFeatureIndexOfASource(seg->sourceName())<<"]"<<endl; } } // Global mean and variance matrices initialisation (computed from dataa or set to 0,1) bool use01=false; if (config.existsParam("use01")) use01=config.getParam("use01").toBool(); if (verbose){ if (use01) cout<<"Use 0 mean, 1 cov "<<endl; else cout << "Compute global mean and cov"<<endl;} DoubleVector globalMean; DoubleVector globalCov; if (!use01){ FrameAccGD globalFrameAcc; unsigned long nbFrame=computeMeanCov(config,fsTab,segTab,nbStream,globalMean,globalCov); // Compute the global mean and covariance if (verboseLevel>1){ cout <<"global mean and cov of training data, number of frame= ["<<nbFrame<<"]"<<endl; for (unsigned i=0; i < vectSize; i++)cout << "mean[" << i << "=" << globalMean[i] << "]\tcov[" << globalCov[i] << "]" << endl; } } else initialize01(vectSize,globalMean,globalCov); MixtureGD &world=ms.createMixtureGD(); if (fileInit){ // Load or initialize the initial model if (verbose) cout << "Load initial world model ["<<inputWorldFilename<<"]" << endl; world=ms.loadMixtureGD(inputWorldFilename); // Load } else{ if (verbose) cout <<"World model init from scratch"<<endl; mixtureInit(ms,fsTab,segTab,weightTab,nbStream,world,globalCov,config,trainCfg); // Initialize if (saveInitModel) world.save(outputWorldFilename+"init", config); } MixtureGD *newWorld=&world; // TODO Verify and suppress... trainModelStream(config,ms,ss,fsTab,segTab,weightTab,nbStream,globalMean,globalCov,newWorld,trainCfg); if (verbose) cout << "Save world model ["<<outputWorldFilename<<"]" << endl; newWorld->save(outputWorldFilename, config); // Cleaning the memory freeMem(fsTab,segServTab,segTab,weightTab,nbStream); } catch (Exception& e){ cout << e.toString() << endl; } return 0; }