Exemple #1
0
void KeyVertex::read2ndPass()
{
    // Base classes
    Cell::read2ndPass();
    KeyCell::read2ndPass();
    VertexCell::read2ndPass();

    // Tangent Edges
    for(int i=0; i<tangentEdges_.size(); ++i)
    {
        tangentEdges_[i].first.convertTempIdsToPointers(vac());
        tangentEdges_[i].second.convertTempIdsToPointers(vac());
    }
}
Exemple #2
0
void KeyFace::read2ndPass()
{
    // Base classes
    Cell::read2ndPass();
    KeyCell::read2ndPass();
    FaceCell::read2ndPass();

    // Cycles
    for(int i=0; i<cycles_.size(); ++i)
        cycles_[i].convertTempIdsToPointers(vac());
}
Exemple #3
0
void InbetweenEdge::read2ndPass()
{
    // Base classes
    Cell::read2ndPass();
    InbetweenCell::read2ndPass();
    EdgeCell::read2ndPass();

    // Before Path
    beforePath_.convertTempIdsToPointers(vac());

    // After Path
    afterPath_.convertTempIdsToPointers(vac());

    // Start Animated Vertex
    startAnimatedVertex_.convertTempIdsToPointers(vac());

    // End Animated Vertex
    endAnimatedVertex_.convertTempIdsToPointers(vac());

    // Before Cycle
    beforeCycle_.convertTempIdsToPointers(vac());

    // After Cycle
    afterCycle_.convertTempIdsToPointers(vac());
}
void transpair_model5::computeScores(const alignment&al,vector<double>&d)const
{
  LogProb total1 = 1.0,total2=1.0,total3=1.0,total4=1.0 ;
  total1 *= pow(double(1-p1), m-2.0 * al.fert(0)) * pow(double(p1), double(al.fert(0)));
  for (WordIndex i = 1 ; i <= al.fert(0) ; i++)
    total1 *= double(m - al.fert(0) - i + 1) / i ; // IBM-5 is not deficient!
  for (WordIndex i = 1 ; i <= l ; i++)
    total2 *= get_fertility(i, al.fert(i));
  for (WordIndex j = 1 ; j <= m ; j++)
    total3*= get_t(al(j), j) ;
  PositionIndex prev_cept=0;
  PositionIndex vac_all=m;
  Vector<char> vac(m+1,0);
  for(WordIndex i=1;i<=l;i++)
    {
      PositionIndex cur_j=al.als_i[i]; 
      PositionIndex prev_j=0;
      PositionIndex k=0;
      if(cur_j) { // process first word of cept
	k++;
	total4*=d5m.getProb_first(vacancies(vac,cur_j),vacancies(vac,al.get_center(prev_cept)),d5m.fwordclasses->getClass(get_fs(cur_j)),l,m,vac_all-al.fert(i)+k);
	vac_all--;
	assert(vac[cur_j]==0);
	vac[cur_j]=1;
	prev_j=cur_j;
	cur_j=al.als_j[cur_j].next;
      }
      while(cur_j) { // process following words of cept
	k++;
	int vprev=vacancies(vac,prev_j);
	total4*=d5m.getProb_bigger(vacancies(vac,cur_j),vprev,d5m.fwordclasses->getClass(get_fs(cur_j)),l,m,vac_all-vprev/*war weg*/-al.fert(i)+k);
	vac_all--;
	vac[cur_j]=1;
	prev_j=cur_j;
	cur_j=al.als_j[cur_j].next;
      }
      assert(k==al.fert(i));
      if( k )
	prev_cept=i;
    }
  assert(vac_all==al.fert(0));
  d.push_back(total1);//13
  d.push_back(total2);//14
  d.push_back(total3);//15
  d.push_back(total4);//16
}
Exemple #5
0
void
threadmain(int argc, char **argv)
{
	int i, j, fd, n, printstats;
	Dir *d;
	char *s;
	uint64_t u;
	VacFile *f, *fdiff;
	VacFs *fsdiff;
	int blocksize;
	int outfd;
	char *stdinname;
	char *diffvac;
	uint64_t qid;


	fmtinstall('F', vtfcallfmt);
	fmtinstall('H', encodefmt);
	fmtinstall('V', vtscorefmt);

	blocksize = BlockSize;
	stdinname = nil;
	printstats = 0;
	fsdiff = nil;
	diffvac = nil;

	ARGBEGIN{
	case 'V':
		chattyventi++;
		break;
	case 'a':
		archivefile = EARGF(usage());
		break;
	case 'b':
		u = unittoull(EARGF(usage()));
		if(u < 512)
			u = 512;
		if(u > VtMaxLumpSize)
			u = VtMaxLumpSize;
		blocksize = u;
		break;
	case 'd':
		diffvac = EARGF(usage());
		break;
	case 'e':
		excludepattern(EARGF(usage()));
		break;
	case 'f':
		vacfile = EARGF(usage());
		break;
	case 'h':
		host = EARGF(usage());
		break;
	case 'i':
		stdinname = EARGF(usage());
		break;
	case 'm':
		merge++;
		break;
	case 'q':
		qdiff++;
		break;
	case 's':
		printstats++;
		break;
	case 'v':
		verbose++;
		break;
	case 'x':
		loadexcludefile(EARGF(usage()));
		break;
	default:
		usage();
	}ARGEND

	if(argc == 0 && !stdinname)
		usage();

	if(archivefile && (vacfile || diffvac)){
		fprint(2, "cannot use -a with -f, -d\n");
		usage();
	}

	z = vtdial(host);
	if(z == nil)
		sysfatal("could not connect to server: %r");
	if(vtconnect(z) < 0)
		sysfatal("vtconnect: %r");

	// Setup:
	//	fs is the output vac file system
	//	f is directory in output vac to write new files
	//	fdiff is corresponding directory in existing vac
	if(archivefile){
		VacFile *fp;
		char yyyy[5];
		char mmdd[10];
		char oldpath[40];
		Tm tm;

		fdiff = nil;
		if((outfd = open(archivefile, ORDWR)) < 0){
			if(access(archivefile, 0) >= 0)
				sysfatal("open %s: %r", archivefile);
			if((outfd = create(archivefile, OWRITE, 0666)) < 0)
				sysfatal("create %s: %r", archivefile);
			atexit(removevacfile);	// because it is new
			if((fs = vacfscreate(z, blocksize, 512)) == nil)
				sysfatal("vacfscreate: %r");
		}else{
			if((fs = vacfsopen(z, archivefile, VtORDWR, 512)) == nil)
				sysfatal("vacfsopen %s: %r", archivefile);
			if((fdiff = recentarchive(fs, oldpath)) != nil){
				if(verbose)
					fprint(2, "diff %s\n", oldpath);
			}else
				if(verbose)
					fprint(2, "no recent archive to diff against\n");
		}

		// Create yyyy/mmdd.
		tm = *localtime(time(0));
		snprint(yyyy, sizeof yyyy, "%04d", tm.year+1900);
		fp = vacfsgetroot(fs);
		if((f = vacfilewalk(fp, yyyy)) == nil
		&& (f = vacfilecreate(fp, yyyy, ModeDir|0555)) == nil)
			sysfatal("vacfscreate %s: %r", yyyy);
		vacfiledecref(fp);
		fp = f;

		snprint(mmdd, sizeof mmdd, "%02d%02d", tm.mon+1, tm.mday);
		n = 0;
		while((f = vacfilewalk(fp, mmdd)) != nil){
			vacfiledecref(f);
			n++;
			snprint(mmdd+4, sizeof mmdd-4, ".%d", n);
		}
		f = vacfilecreate(fp, mmdd, ModeDir|0555);
		if(f == nil)
			sysfatal("vacfscreate %s/%s: %r", yyyy, mmdd);
		vacfiledecref(fp);

		if(verbose)
			fprint(2, "archive %s/%s\n", yyyy, mmdd);
	}else{
		if(vacfile == nil)
			outfd = 1;
		else if((outfd = create(vacfile, OWRITE, 0666)) < 0)
			sysfatal("create %s: %r", vacfile);
		atexit(removevacfile);
		if((fs = vacfscreate(z, blocksize, 512)) == nil)
			sysfatal("vacfscreate: %r");
		f = vacfsgetroot(fs);

		fdiff = nil;
		if(diffvac){
			if((fsdiff = vacfsopen(z, diffvac, VtOREAD, 128)) == nil)
				warn("vacfsopen %s: %r", diffvac);
			else
				fdiff = vacfsgetroot(fsdiff);
		}
	}

	if(stdinname)
		vacstdin(f, stdinname);
	for(i=0; i<argc; i++){
		// We can't use / and . and .. and ../.. as valid archive
		// names, so expand to the list of files in the directory.
		if(argv[i][0] == 0){
			warn("empty string given as command-line argument");
			continue;
		}
		cleanname(argv[i]);
		if(strcmp(argv[i], "/") == 0
		|| strcmp(argv[i], ".") == 0
		|| strcmp(argv[i], "..") == 0
		|| (strlen(argv[i]) > 3 && strcmp(argv[i]+strlen(argv[i])-3, "/..") == 0)){
			if((fd = open(argv[i], OREAD)) < 0){
				warn("open %s: %r", argv[i]);
				continue;
			}
			while((n = dirread(fd, &d)) > 0){
				for(j=0; j<n; j++){
					s = vtmalloc(strlen(argv[i])+1+strlen(d[j].name)+1);
					strcpy(s, argv[i]);
					strcat(s, "/");
					strcat(s, d[j].name);
					cleanname(s);
					vac(f, fdiff, s, &d[j]);
				}
				free(d);
			}
			close(fd);
			continue;
		}
		if((d = dirstat(argv[i])) == nil){
			warn("stat %s: %r", argv[i]);
			continue;
		}
		vac(f, fdiff, argv[i], d);
		free(d);
	}
	if(fdiff)
		vacfiledecref(fdiff);

	/*
	 * Record the maximum qid so that vacs can be merged
	 * without introducing overlapping qids.  Older versions
	 * of vac arranged that the root would have the largest
	 * qid in the file system, but we can't do that anymore
	 * (the root gets created first!).
	 */
	if(_vacfsnextqid(fs, &qid) >= 0)
		vacfilesetqidspace(f, 0, qid);
	vacfiledecref(f);

	/*
	 * Copy fsdiff's root block score into fs's slot for that,
	 * so that vacfssync will copy it into root.prev for us.
	 * Just nice documentation, no effect.
	 */
	if(fsdiff)
		memmove(fs->score, fsdiff->score, VtScoreSize);
	if(vacfssync(fs) < 0)
		fprint(2, "vacfssync: %r\n");

	fprint(outfd, "vac:%V\n", fs->score);
	atexitdont(removevacfile);
	vacfsclose(fs);
	vthangup(z);

	if(printstats){
		fprint(2,
			"%d files, %d files skipped, %d directories\n"
			"%lld data bytes written, %lld data bytes skipped\n",
			stats.nfile, stats.skipfiles, stats.ndir, stats.data, stats.skipdata);
		dup(2, 1);
		packetstats();
	}
	threadexitsall(0);
}
Exemple #6
0
/*
 * Archive the file named name, which has stat info d,
 * into the vac directory fp (p = parent).
 *
 * If we're doing a vac -d against another archive, the
 * equivalent directory to fp in that archive is diffp.
 */
void
vac(VacFile *fp, VacFile *diffp, char *name, Dir *d)
{
	char *elem, *s;
	static char buf[65536];
	int fd, i, n, bsize;
	int64_t off;
	Dir *dk;	// kids
	VacDir vd, vddiff;
	VacFile *f, *fdiff;
	VtEntry e;

	if(!includefile(name)){
		warn("excluding %s%s", name, (d->mode&DMDIR) ? "/" : "");
		return;
	}

	if(d->mode&DMDIR)
		stats.ndir++;
	else
		stats.nfile++;

	if(merge && vacmerge(fp, name) >= 0)
		return;

	if(verbose)
		fprint(2, "%s%s\n", name, (d->mode&DMDIR) ? "/" : "");

	if((fd = open(name, OREAD)) < 0){
		warn("open %s: %r", name);
		return;
	}

	elem = strrchr(name, '/');
	if(elem)
		elem++;
	else
		elem = name;

	plan9tovacdir(&vd, d);
	if((f = vacfilecreate(fp, elem, vd.mode)) == nil){
		warn("vacfilecreate %s: %r", name);
		return;
	}
	if(diffp)
		fdiff = vacfilewalk(diffp, elem);
	else
		fdiff = nil;

	if(vacfilesetdir(f, &vd) < 0)
		warn("vacfilesetdir %s: %r", name);

	if(d->mode&DMDIR){
		while((n = dirread(fd, &dk)) > 0){
			for(i=0; i<n; i++){
				s = vtmalloc(strlen(name)+1+strlen(dk[i].name)+1);
				strcpy(s, name);
				strcat(s, "/");
				strcat(s, dk[i].name);
				vac(f, fdiff, s, &dk[i]);
				free(s);
			}
			free(dk);
		}
	}else{
		off = 0;
		bsize = fs->bsize;
		if(fdiff){
			/*
			 * Copy fdiff's contents into f by moving the score.
			 * We'll diff and update below.
			 */
			if(vacfilegetentries(fdiff, &e, nil) >= 0)
			if(vacfilesetentries(f, &e, nil) >= 0){
				bsize = e.dsize;

				/*
				 * Or if -q is set, and the metadata looks the same,
				 * don't even bother reading the file.
				 */
				if(qdiff && vacfilegetdir(fdiff, &vddiff) >= 0){
					if(vddiff.mtime == vd.mtime)
					if(vddiff.size == vd.size)
					if(!vddiff.plan9 || (/* vddiff.p9path == vd.p9path && */ vddiff.p9version == vd.p9version)){
						stats.skipfiles++;
						stats.nfile--;
						vdcleanup(&vddiff);
						goto Out;
					}

					/*
					 * Skip over presumably-unchanged prefix
					 * of an append-only file.
					 */
					if(vd.mode&ModeAppend)
					if(vddiff.size < vd.size)
					if(vddiff.plan9 && vd.plan9)
					if(vddiff.p9path == vd.p9path){
						off = vd.size/bsize*bsize;
						if(seek(fd, off, 0) >= 0)
							stats.skipdata += off;
						else{
							seek(fd, 0, 0);	// paranoia
							off = 0;
						}
					}

					vdcleanup(&vddiff);
					// XXX different verbose chatty prints for kaminsky?
				}
			}
		}
		if(qdiff && verbose)
			fprint(2, "+%s\n", name);
		while((n = readn(fd, buf, bsize)) > 0){
			if(fdiff && sha1matches(f, off/bsize, (uint8_t*)buf, n)){
				off += n;
				stats.skipdata += n;
				continue;
			}
			if(vacfilewrite(f, buf, n, off) < 0){
				warn("venti write %s: %r", name);
				goto Out;
			}
			stats.data += n;
			off += n;
		}
		/*
		 * Since we started with fdiff's contents,
		 * set the size in case fdiff was bigger.
		 */
		if(fdiff && vacfilesetsize(f, off) < 0)
			warn("vtfilesetsize %s: %r", name);
	}

Out:
	vacfileflush(f, 1);
	vacfiledecref(f);
	if(fdiff)
		vacfiledecref(fdiff);
	close(fd);
}
LogProb transpair_model5::prob_of_target_and_alignment_given_source(const alignment&al, short distortionType,bool verb)const
{
  if( doModel4Scoring )
    return transpair_model4::prob_of_target_and_alignment_given_source(al,distortionType);
  LogProb total = 1.0 ;
  static const LogProb almostZero = 1E-299 ; 
  double x2;
  if( distortionType&1 )
    {
      total *= pow(double(1-p1), m-2.0 * al.fert(0)) * pow(double(p1), double(al.fert(0)));
      if( verb) cerr << "IBM-5: (1-p1)^(m-2 f0)*p1^f0: " << total << endl;
      for (WordIndex i = 1 ; i <= al.fert(0) ; i++)
	total *= double(m - al.fert(0) - i + 1) / i ; // IBM-5 is not deficient!
      if( verb) cerr << "IBM-5: +NULL:binomial+distortion " << total << endl;
      for (WordIndex i = 1 ; i <= l ; i++)
	{
	  total *= get_fertility(i, al.fert(i));
	  if( verb) cerr << "IBM-5: fertility of " << i << " " << get_fertility(i, al.fert(i)) << " -> " << total << endl;
	}
      for (WordIndex j = 1 ; j <= m ; j++)
	{
	  total*= get_t(al(j), j) ;
	  if( verb) cerr << "IBM-5: t of j:" << j << " i:" << al(j) << ": " << get_t(al(j), j)  << " -> " << total << endl;
	}
    }
  if( distortionType&2 )
    {
      PositionIndex prev_cept=0;
      PositionIndex vac_all=m;
      Vector<char> vac(m+1,0);
      for(WordIndex i=1;i<=l;i++)
	{
	  PositionIndex cur_j=al.als_i[i]; 
	  PositionIndex prev_j=0;
	  PositionIndex k=0;
	  if(cur_j) { // process first word of cept
	    k++;
	    // previous position
	    total*= (x2=d5m.getProb_first(vacancies(vac,cur_j),vacancies(vac,al.get_center(prev_cept)),d5m.fwordclasses->getClass(get_fs(cur_j)),l,m,vac_all-al.fert(i)+k));
	    
	    vac_all--;
	    assert(vac[cur_j]==0);
	    vac[cur_j]=1;
	    
	    if( verb) cerr << "IBM-5: d=1 of " << cur_j << ": " << x2  << " -> " << total << endl;
	    prev_j=cur_j;
	    cur_j=al.als_j[cur_j].next;
	  }
	  while(cur_j) { // process following words of cept
	    k++;
	    // previous position
	    int vprev=vacancies(vac,prev_j);
	    total*= (x2=d5m.getProb_bigger(vacancies(vac,cur_j),vprev,d5m.fwordclasses->getClass(get_fs(cur_j)),l,m,vac_all-vprev/*war weg*/-al.fert(i)+k));
	    
	    
	    vac_all--;
	    vac[cur_j]=1;
	    
	    
	    if( verb) cerr << "IBM-5: d>1 of " << cur_j << ": " << x2  << " -> " << total << endl;
	    prev_j=cur_j;
	    cur_j=al.als_j[cur_j].next;
	  }
	  assert(k==al.fert(i));
	  if( k )
	    prev_cept=i;
	}
      assert(vac_all==al.fert(0));
    }
  total = total?total:almostZero;
  return total;
}
void MaximumCompositeLikelihood::SetupTrainingData(
	const std::vector<labeled_instance_type>& training_data,
	const std::vector<InferenceMethod*> inference_methods) {
	assert(comp_training_data.size() == 0);
	assert(comp_inference_methods.size() == 0);
	assert(inference_methods.size() == training_data.size());

	// Number of times each component will be covered
	unsigned int cover_count = 1;
	assert(decomp >= -1);
	if (decomp == DecomposePseudolikelihood) {
		cover_count = 1;
	} else if (decomp > 0) {
		cover_count = decomp;
	}

	// Produce composite factor graphs
	boost::timer decomp_timer;
	int training_data_size = static_cast<int>(training_data.size());
	fg_cc_var_label.resize(cover_count * training_data_size);
	fg_cc_count.resize(cover_count * training_data_size);
	fg_orig_index.resize(cover_count * training_data_size);
	std::fill(fg_cc_count.begin(), fg_cc_count.end(), 0);
	unsigned int cn = 0;
	for (int n = 0; n < training_data_size; ++n) {
		FactorGraph* fg = training_data[n].first;
		size_t var_count = fg->Cardinalities().size();

		// Get observation
		const FactorGraphObservation* obs = training_data[n].second;

		// Obtain one or more decomposition(s)
		for (unsigned int cover_iter = 0; cover_iter < cover_count;
			++cover_iter) {
			VAcyclicDecomposition vac(fg);
			std::vector<bool> factor_is_removed;

			if (decomp == DecomposePseudolikelihood) {
				factor_is_removed.resize(fg->Factors().size());
				std::fill(factor_is_removed.begin(),
					factor_is_removed.end(), true);
			} else {
				std::vector<double> factor_weight(fg->Factors().size(), 0.0);
				if (decomp == DecomposeUniform) {
					// Use constant weights
					std::fill(factor_weight.begin(), factor_weight.end(), 1.0);
				} else {
					// Use uniform random weights
					boost::uniform_real<double> uniform_dist(0.0, 1.0);
					boost::variate_generator<boost::mt19937&,
						boost::uniform_real<double> >
						rgen(RandomSource::GlobalRandomSampler(), uniform_dist);

					for (unsigned int fi = 0; fi < factor_weight.size(); ++fi)
						factor_weight[fi] = rgen();
				}
				vac.ComputeDecompositionSP(factor_weight, factor_is_removed);
			}

			// Shatter factor graph into trees
			fg_cc_count[cn] += FactorGraphStructurizer::ConnectedComponents(
				fg, factor_is_removed, fg_cc_var_label[cn]);
#if 0
			std::cout << "MCL, instance " << n << " decomposed into " << cc_count
				<< " components" << std::endl;
#endif

			// Add each component as separate factor graph
			for (unsigned int ci = 0; ci < fg_cc_count[cn]; ++ci) {
				std::vector<unsigned int> cond_var_set;
				cond_var_set.reserve(var_count);

				// Add all variables not in this component to the conditioning set
				for (size_t vi = 0; vi < var_count; ++vi) {
					if (fg_cc_var_label[cn][vi] != ci)
						cond_var_set.push_back(static_cast<unsigned int>(vi));
				}
				AddTrainingComponentCond(fg, obs, inference_methods[n],
					cond_var_set);
			}
			fg_orig_index[cn] = n;
			cn += 1;
		}
	}
	std::cout << "MCL, decomposed " << training_data.size() << " instances "
		<< "into " << comp_training_data.size() << " instances "
		<< (decomp == DecomposeUniform ? "(uniform)" : "(randomized)")
		<< " in " << decomp_timer.elapsed() << "s." << std::endl;

	// Initialize MLE training data from created components
	SetupMLETrainingData();
}