void KeyVertex::read2ndPass() { // Base classes Cell::read2ndPass(); KeyCell::read2ndPass(); VertexCell::read2ndPass(); // Tangent Edges for(int i=0; i<tangentEdges_.size(); ++i) { tangentEdges_[i].first.convertTempIdsToPointers(vac()); tangentEdges_[i].second.convertTempIdsToPointers(vac()); } }
void KeyFace::read2ndPass() { // Base classes Cell::read2ndPass(); KeyCell::read2ndPass(); FaceCell::read2ndPass(); // Cycles for(int i=0; i<cycles_.size(); ++i) cycles_[i].convertTempIdsToPointers(vac()); }
void InbetweenEdge::read2ndPass() { // Base classes Cell::read2ndPass(); InbetweenCell::read2ndPass(); EdgeCell::read2ndPass(); // Before Path beforePath_.convertTempIdsToPointers(vac()); // After Path afterPath_.convertTempIdsToPointers(vac()); // Start Animated Vertex startAnimatedVertex_.convertTempIdsToPointers(vac()); // End Animated Vertex endAnimatedVertex_.convertTempIdsToPointers(vac()); // Before Cycle beforeCycle_.convertTempIdsToPointers(vac()); // After Cycle afterCycle_.convertTempIdsToPointers(vac()); }
void transpair_model5::computeScores(const alignment&al,vector<double>&d)const { LogProb total1 = 1.0,total2=1.0,total3=1.0,total4=1.0 ; total1 *= pow(double(1-p1), m-2.0 * al.fert(0)) * pow(double(p1), double(al.fert(0))); for (WordIndex i = 1 ; i <= al.fert(0) ; i++) total1 *= double(m - al.fert(0) - i + 1) / i ; // IBM-5 is not deficient! for (WordIndex i = 1 ; i <= l ; i++) total2 *= get_fertility(i, al.fert(i)); for (WordIndex j = 1 ; j <= m ; j++) total3*= get_t(al(j), j) ; PositionIndex prev_cept=0; PositionIndex vac_all=m; Vector<char> vac(m+1,0); for(WordIndex i=1;i<=l;i++) { PositionIndex cur_j=al.als_i[i]; PositionIndex prev_j=0; PositionIndex k=0; if(cur_j) { // process first word of cept k++; total4*=d5m.getProb_first(vacancies(vac,cur_j),vacancies(vac,al.get_center(prev_cept)),d5m.fwordclasses->getClass(get_fs(cur_j)),l,m,vac_all-al.fert(i)+k); vac_all--; assert(vac[cur_j]==0); vac[cur_j]=1; prev_j=cur_j; cur_j=al.als_j[cur_j].next; } while(cur_j) { // process following words of cept k++; int vprev=vacancies(vac,prev_j); total4*=d5m.getProb_bigger(vacancies(vac,cur_j),vprev,d5m.fwordclasses->getClass(get_fs(cur_j)),l,m,vac_all-vprev/*war weg*/-al.fert(i)+k); vac_all--; vac[cur_j]=1; prev_j=cur_j; cur_j=al.als_j[cur_j].next; } assert(k==al.fert(i)); if( k ) prev_cept=i; } assert(vac_all==al.fert(0)); d.push_back(total1);//13 d.push_back(total2);//14 d.push_back(total3);//15 d.push_back(total4);//16 }
void threadmain(int argc, char **argv) { int i, j, fd, n, printstats; Dir *d; char *s; uint64_t u; VacFile *f, *fdiff; VacFs *fsdiff; int blocksize; int outfd; char *stdinname; char *diffvac; uint64_t qid; fmtinstall('F', vtfcallfmt); fmtinstall('H', encodefmt); fmtinstall('V', vtscorefmt); blocksize = BlockSize; stdinname = nil; printstats = 0; fsdiff = nil; diffvac = nil; ARGBEGIN{ case 'V': chattyventi++; break; case 'a': archivefile = EARGF(usage()); break; case 'b': u = unittoull(EARGF(usage())); if(u < 512) u = 512; if(u > VtMaxLumpSize) u = VtMaxLumpSize; blocksize = u; break; case 'd': diffvac = EARGF(usage()); break; case 'e': excludepattern(EARGF(usage())); break; case 'f': vacfile = EARGF(usage()); break; case 'h': host = EARGF(usage()); break; case 'i': stdinname = EARGF(usage()); break; case 'm': merge++; break; case 'q': qdiff++; break; case 's': printstats++; break; case 'v': verbose++; break; case 'x': loadexcludefile(EARGF(usage())); break; default: usage(); }ARGEND if(argc == 0 && !stdinname) usage(); if(archivefile && (vacfile || diffvac)){ fprint(2, "cannot use -a with -f, -d\n"); usage(); } z = vtdial(host); if(z == nil) sysfatal("could not connect to server: %r"); if(vtconnect(z) < 0) sysfatal("vtconnect: %r"); // Setup: // fs is the output vac file system // f is directory in output vac to write new files // fdiff is corresponding directory in existing vac if(archivefile){ VacFile *fp; char yyyy[5]; char mmdd[10]; char oldpath[40]; Tm tm; fdiff = nil; if((outfd = open(archivefile, ORDWR)) < 0){ if(access(archivefile, 0) >= 0) sysfatal("open %s: %r", archivefile); if((outfd = create(archivefile, OWRITE, 0666)) < 0) sysfatal("create %s: %r", archivefile); atexit(removevacfile); // because it is new if((fs = vacfscreate(z, blocksize, 512)) == nil) sysfatal("vacfscreate: %r"); }else{ if((fs = vacfsopen(z, archivefile, VtORDWR, 512)) == nil) sysfatal("vacfsopen %s: %r", archivefile); if((fdiff = recentarchive(fs, oldpath)) != nil){ if(verbose) fprint(2, "diff %s\n", oldpath); }else if(verbose) fprint(2, "no recent archive to diff against\n"); } // Create yyyy/mmdd. tm = *localtime(time(0)); snprint(yyyy, sizeof yyyy, "%04d", tm.year+1900); fp = vacfsgetroot(fs); if((f = vacfilewalk(fp, yyyy)) == nil && (f = vacfilecreate(fp, yyyy, ModeDir|0555)) == nil) sysfatal("vacfscreate %s: %r", yyyy); vacfiledecref(fp); fp = f; snprint(mmdd, sizeof mmdd, "%02d%02d", tm.mon+1, tm.mday); n = 0; while((f = vacfilewalk(fp, mmdd)) != nil){ vacfiledecref(f); n++; snprint(mmdd+4, sizeof mmdd-4, ".%d", n); } f = vacfilecreate(fp, mmdd, ModeDir|0555); if(f == nil) sysfatal("vacfscreate %s/%s: %r", yyyy, mmdd); vacfiledecref(fp); if(verbose) fprint(2, "archive %s/%s\n", yyyy, mmdd); }else{ if(vacfile == nil) outfd = 1; else if((outfd = create(vacfile, OWRITE, 0666)) < 0) sysfatal("create %s: %r", vacfile); atexit(removevacfile); if((fs = vacfscreate(z, blocksize, 512)) == nil) sysfatal("vacfscreate: %r"); f = vacfsgetroot(fs); fdiff = nil; if(diffvac){ if((fsdiff = vacfsopen(z, diffvac, VtOREAD, 128)) == nil) warn("vacfsopen %s: %r", diffvac); else fdiff = vacfsgetroot(fsdiff); } } if(stdinname) vacstdin(f, stdinname); for(i=0; i<argc; i++){ // We can't use / and . and .. and ../.. as valid archive // names, so expand to the list of files in the directory. if(argv[i][0] == 0){ warn("empty string given as command-line argument"); continue; } cleanname(argv[i]); if(strcmp(argv[i], "/") == 0 || strcmp(argv[i], ".") == 0 || strcmp(argv[i], "..") == 0 || (strlen(argv[i]) > 3 && strcmp(argv[i]+strlen(argv[i])-3, "/..") == 0)){ if((fd = open(argv[i], OREAD)) < 0){ warn("open %s: %r", argv[i]); continue; } while((n = dirread(fd, &d)) > 0){ for(j=0; j<n; j++){ s = vtmalloc(strlen(argv[i])+1+strlen(d[j].name)+1); strcpy(s, argv[i]); strcat(s, "/"); strcat(s, d[j].name); cleanname(s); vac(f, fdiff, s, &d[j]); } free(d); } close(fd); continue; } if((d = dirstat(argv[i])) == nil){ warn("stat %s: %r", argv[i]); continue; } vac(f, fdiff, argv[i], d); free(d); } if(fdiff) vacfiledecref(fdiff); /* * Record the maximum qid so that vacs can be merged * without introducing overlapping qids. Older versions * of vac arranged that the root would have the largest * qid in the file system, but we can't do that anymore * (the root gets created first!). */ if(_vacfsnextqid(fs, &qid) >= 0) vacfilesetqidspace(f, 0, qid); vacfiledecref(f); /* * Copy fsdiff's root block score into fs's slot for that, * so that vacfssync will copy it into root.prev for us. * Just nice documentation, no effect. */ if(fsdiff) memmove(fs->score, fsdiff->score, VtScoreSize); if(vacfssync(fs) < 0) fprint(2, "vacfssync: %r\n"); fprint(outfd, "vac:%V\n", fs->score); atexitdont(removevacfile); vacfsclose(fs); vthangup(z); if(printstats){ fprint(2, "%d files, %d files skipped, %d directories\n" "%lld data bytes written, %lld data bytes skipped\n", stats.nfile, stats.skipfiles, stats.ndir, stats.data, stats.skipdata); dup(2, 1); packetstats(); } threadexitsall(0); }
/* * Archive the file named name, which has stat info d, * into the vac directory fp (p = parent). * * If we're doing a vac -d against another archive, the * equivalent directory to fp in that archive is diffp. */ void vac(VacFile *fp, VacFile *diffp, char *name, Dir *d) { char *elem, *s; static char buf[65536]; int fd, i, n, bsize; int64_t off; Dir *dk; // kids VacDir vd, vddiff; VacFile *f, *fdiff; VtEntry e; if(!includefile(name)){ warn("excluding %s%s", name, (d->mode&DMDIR) ? "/" : ""); return; } if(d->mode&DMDIR) stats.ndir++; else stats.nfile++; if(merge && vacmerge(fp, name) >= 0) return; if(verbose) fprint(2, "%s%s\n", name, (d->mode&DMDIR) ? "/" : ""); if((fd = open(name, OREAD)) < 0){ warn("open %s: %r", name); return; } elem = strrchr(name, '/'); if(elem) elem++; else elem = name; plan9tovacdir(&vd, d); if((f = vacfilecreate(fp, elem, vd.mode)) == nil){ warn("vacfilecreate %s: %r", name); return; } if(diffp) fdiff = vacfilewalk(diffp, elem); else fdiff = nil; if(vacfilesetdir(f, &vd) < 0) warn("vacfilesetdir %s: %r", name); if(d->mode&DMDIR){ while((n = dirread(fd, &dk)) > 0){ for(i=0; i<n; i++){ s = vtmalloc(strlen(name)+1+strlen(dk[i].name)+1); strcpy(s, name); strcat(s, "/"); strcat(s, dk[i].name); vac(f, fdiff, s, &dk[i]); free(s); } free(dk); } }else{ off = 0; bsize = fs->bsize; if(fdiff){ /* * Copy fdiff's contents into f by moving the score. * We'll diff and update below. */ if(vacfilegetentries(fdiff, &e, nil) >= 0) if(vacfilesetentries(f, &e, nil) >= 0){ bsize = e.dsize; /* * Or if -q is set, and the metadata looks the same, * don't even bother reading the file. */ if(qdiff && vacfilegetdir(fdiff, &vddiff) >= 0){ if(vddiff.mtime == vd.mtime) if(vddiff.size == vd.size) if(!vddiff.plan9 || (/* vddiff.p9path == vd.p9path && */ vddiff.p9version == vd.p9version)){ stats.skipfiles++; stats.nfile--; vdcleanup(&vddiff); goto Out; } /* * Skip over presumably-unchanged prefix * of an append-only file. */ if(vd.mode&ModeAppend) if(vddiff.size < vd.size) if(vddiff.plan9 && vd.plan9) if(vddiff.p9path == vd.p9path){ off = vd.size/bsize*bsize; if(seek(fd, off, 0) >= 0) stats.skipdata += off; else{ seek(fd, 0, 0); // paranoia off = 0; } } vdcleanup(&vddiff); // XXX different verbose chatty prints for kaminsky? } } } if(qdiff && verbose) fprint(2, "+%s\n", name); while((n = readn(fd, buf, bsize)) > 0){ if(fdiff && sha1matches(f, off/bsize, (uint8_t*)buf, n)){ off += n; stats.skipdata += n; continue; } if(vacfilewrite(f, buf, n, off) < 0){ warn("venti write %s: %r", name); goto Out; } stats.data += n; off += n; } /* * Since we started with fdiff's contents, * set the size in case fdiff was bigger. */ if(fdiff && vacfilesetsize(f, off) < 0) warn("vtfilesetsize %s: %r", name); } Out: vacfileflush(f, 1); vacfiledecref(f); if(fdiff) vacfiledecref(fdiff); close(fd); }
LogProb transpair_model5::prob_of_target_and_alignment_given_source(const alignment&al, short distortionType,bool verb)const { if( doModel4Scoring ) return transpair_model4::prob_of_target_and_alignment_given_source(al,distortionType); LogProb total = 1.0 ; static const LogProb almostZero = 1E-299 ; double x2; if( distortionType&1 ) { total *= pow(double(1-p1), m-2.0 * al.fert(0)) * pow(double(p1), double(al.fert(0))); if( verb) cerr << "IBM-5: (1-p1)^(m-2 f0)*p1^f0: " << total << endl; for (WordIndex i = 1 ; i <= al.fert(0) ; i++) total *= double(m - al.fert(0) - i + 1) / i ; // IBM-5 is not deficient! if( verb) cerr << "IBM-5: +NULL:binomial+distortion " << total << endl; for (WordIndex i = 1 ; i <= l ; i++) { total *= get_fertility(i, al.fert(i)); if( verb) cerr << "IBM-5: fertility of " << i << " " << get_fertility(i, al.fert(i)) << " -> " << total << endl; } for (WordIndex j = 1 ; j <= m ; j++) { total*= get_t(al(j), j) ; if( verb) cerr << "IBM-5: t of j:" << j << " i:" << al(j) << ": " << get_t(al(j), j) << " -> " << total << endl; } } if( distortionType&2 ) { PositionIndex prev_cept=0; PositionIndex vac_all=m; Vector<char> vac(m+1,0); for(WordIndex i=1;i<=l;i++) { PositionIndex cur_j=al.als_i[i]; PositionIndex prev_j=0; PositionIndex k=0; if(cur_j) { // process first word of cept k++; // previous position total*= (x2=d5m.getProb_first(vacancies(vac,cur_j),vacancies(vac,al.get_center(prev_cept)),d5m.fwordclasses->getClass(get_fs(cur_j)),l,m,vac_all-al.fert(i)+k)); vac_all--; assert(vac[cur_j]==0); vac[cur_j]=1; if( verb) cerr << "IBM-5: d=1 of " << cur_j << ": " << x2 << " -> " << total << endl; prev_j=cur_j; cur_j=al.als_j[cur_j].next; } while(cur_j) { // process following words of cept k++; // previous position int vprev=vacancies(vac,prev_j); total*= (x2=d5m.getProb_bigger(vacancies(vac,cur_j),vprev,d5m.fwordclasses->getClass(get_fs(cur_j)),l,m,vac_all-vprev/*war weg*/-al.fert(i)+k)); vac_all--; vac[cur_j]=1; if( verb) cerr << "IBM-5: d>1 of " << cur_j << ": " << x2 << " -> " << total << endl; prev_j=cur_j; cur_j=al.als_j[cur_j].next; } assert(k==al.fert(i)); if( k ) prev_cept=i; } assert(vac_all==al.fert(0)); } total = total?total:almostZero; return total; }
void MaximumCompositeLikelihood::SetupTrainingData( const std::vector<labeled_instance_type>& training_data, const std::vector<InferenceMethod*> inference_methods) { assert(comp_training_data.size() == 0); assert(comp_inference_methods.size() == 0); assert(inference_methods.size() == training_data.size()); // Number of times each component will be covered unsigned int cover_count = 1; assert(decomp >= -1); if (decomp == DecomposePseudolikelihood) { cover_count = 1; } else if (decomp > 0) { cover_count = decomp; } // Produce composite factor graphs boost::timer decomp_timer; int training_data_size = static_cast<int>(training_data.size()); fg_cc_var_label.resize(cover_count * training_data_size); fg_cc_count.resize(cover_count * training_data_size); fg_orig_index.resize(cover_count * training_data_size); std::fill(fg_cc_count.begin(), fg_cc_count.end(), 0); unsigned int cn = 0; for (int n = 0; n < training_data_size; ++n) { FactorGraph* fg = training_data[n].first; size_t var_count = fg->Cardinalities().size(); // Get observation const FactorGraphObservation* obs = training_data[n].second; // Obtain one or more decomposition(s) for (unsigned int cover_iter = 0; cover_iter < cover_count; ++cover_iter) { VAcyclicDecomposition vac(fg); std::vector<bool> factor_is_removed; if (decomp == DecomposePseudolikelihood) { factor_is_removed.resize(fg->Factors().size()); std::fill(factor_is_removed.begin(), factor_is_removed.end(), true); } else { std::vector<double> factor_weight(fg->Factors().size(), 0.0); if (decomp == DecomposeUniform) { // Use constant weights std::fill(factor_weight.begin(), factor_weight.end(), 1.0); } else { // Use uniform random weights boost::uniform_real<double> uniform_dist(0.0, 1.0); boost::variate_generator<boost::mt19937&, boost::uniform_real<double> > rgen(RandomSource::GlobalRandomSampler(), uniform_dist); for (unsigned int fi = 0; fi < factor_weight.size(); ++fi) factor_weight[fi] = rgen(); } vac.ComputeDecompositionSP(factor_weight, factor_is_removed); } // Shatter factor graph into trees fg_cc_count[cn] += FactorGraphStructurizer::ConnectedComponents( fg, factor_is_removed, fg_cc_var_label[cn]); #if 0 std::cout << "MCL, instance " << n << " decomposed into " << cc_count << " components" << std::endl; #endif // Add each component as separate factor graph for (unsigned int ci = 0; ci < fg_cc_count[cn]; ++ci) { std::vector<unsigned int> cond_var_set; cond_var_set.reserve(var_count); // Add all variables not in this component to the conditioning set for (size_t vi = 0; vi < var_count; ++vi) { if (fg_cc_var_label[cn][vi] != ci) cond_var_set.push_back(static_cast<unsigned int>(vi)); } AddTrainingComponentCond(fg, obs, inference_methods[n], cond_var_set); } fg_orig_index[cn] = n; cn += 1; } } std::cout << "MCL, decomposed " << training_data.size() << " instances " << "into " << comp_training_data.size() << " instances " << (decomp == DecomposeUniform ? "(uniform)" : "(randomized)") << " in " << decomp_timer.elapsed() << "s." << std::endl; // Initialize MLE training data from created components SetupMLETrainingData(); }