void CTwoStateModel::reshape_emission_params(SGVector< float64_t >& emission_weights,
		SGVector< float64_t > w, int32_t num_feats, int32_t num_obs)
{
	emission_weights.zero();

	// Legend for state indices:
	// 0 -> start state
	// 1 -> stop state
	// 2 -> negative state (label == 0)
	// 3 -> positive state (label == 1)
	//
	// start and stop states have no emission scores

	index_t em_idx, w_idx = m_num_transmission_params;
	for ( int32_t s = 2 ; s < m_num_states ; ++s )
	{
		for ( int32_t f = 0 ; f < num_feats ; ++f )
		{
			for ( int32_t o = 0 ; o < num_obs ; ++o )
			{
				em_idx = s*num_feats*num_obs + f*num_obs + o;
				emission_weights[em_idx] = w[w_idx++];
			}
		}
	}
}
Esempio n. 2
0
void CLinearTimeMMD::compute_statistic_and_variance(
		SGVector<float64_t>& statistic, SGVector<float64_t>& variance,
		bool multiple_kernels)
{
	SG_DEBUG("entering %s::compute_statistic_and_variance()\n", get_name())

	REQUIRE(m_streaming_p, "%s::compute_statistic_and_variance: streaming "
			"features p required!\n", get_name());
	REQUIRE(m_streaming_q, "%s::compute_statistic_and_variance: streaming "
			"features q required!\n", get_name());

	REQUIRE(m_kernel, "%s::compute_statistic_and_variance: kernel needed!\n",
			get_name());

	/* make sure multiple_kernels flag is used only with a combined kernel */
	REQUIRE(!multiple_kernels || m_kernel->get_kernel_type()==K_COMBINED,
			"%s::compute_statistic_and_variance: multiple kernels specified,"
			"but underlying kernel is not of type K_COMBINED\n", get_name());

	/* m is number of samples from each distribution, m_2 is half of it
	 * using names from JLMR paper (see class documentation) */
	index_t m_2=m_m/2;

	SG_DEBUG("m_m=%d\n", m_m)

	/* find out whether single or multiple kernels (cast is safe, check above) */
	index_t num_kernels=1;
	if (multiple_kernels)
	{
		num_kernels=((CCombinedKernel*)m_kernel)->get_num_subkernels();
		SG_DEBUG("computing MMD and variance for %d sub-kernels\n",
				num_kernels);
	}

	/* allocate memory for results if vectors are empty */
	if (!statistic.vector)
		statistic=SGVector<float64_t>(num_kernels);

	if (!variance.vector)
		variance=SGVector<float64_t>(num_kernels);

	/* ensure right dimensions */
	REQUIRE(statistic.vlen==num_kernels, "%s::compute_statistic_and_variance: "
			"statistic vector size (%d) does not match number of kernels (%d)\n",
			 get_name(), statistic.vlen, num_kernels);

	REQUIRE(variance.vlen==num_kernels, "%s::compute_statistic_and_variance: "
			"variance vector size (%d) does not match number of kernels (%d)\n",
			 get_name(), variance.vlen, num_kernels);

	/* temp variable in the algorithm */
	float64_t current;
	float64_t delta;

	/* initialise statistic and variance since they are cumulative */
	statistic.zero();
	variance.zero();

	/* needed for online mean and variance */
	SGVector<index_t> term_counters(num_kernels);
	term_counters.set_const(1);

	/* term counter to compute online mean and variance */
	index_t num_examples_processed=0;
	while (num_examples_processed<m_2)
	{
		/* number of example to look at in this iteration */
		index_t num_this_run=CMath::min(m_blocksize,
				CMath::max(0, m_2-num_examples_processed));
		SG_DEBUG("processing %d more examples. %d so far processed. Blocksize "
				"is %d\n", num_this_run, num_examples_processed, m_blocksize);

		/* stream data from both distributions */
		CFeatures* p1=m_streaming_p->get_streamed_features(num_this_run);
		CFeatures* p2=m_streaming_p->get_streamed_features(num_this_run);
		CFeatures* q1=m_streaming_q->get_streamed_features(num_this_run);
		CFeatures* q2=m_streaming_q->get_streamed_features(num_this_run);

		/* check whether h0 should be simulated and permute if so */
		if (m_simulate_h0)
		{
			/* create merged copy of all feature instances to permute */
			CList* list=new CList();
			list->append_element(p2);
			list->append_element(q1);
			list->append_element(q2);
			CFeatures* merged=p1->create_merged_copy(list);
			SG_UNREF(list);

			/* permute */
			SGVector<index_t> inds(merged->get_num_vectors());
			inds.range_fill();
			inds.permute();
			merged->add_subset(inds);

			/* copy back, replacing old features */
			SG_UNREF(p1);
			SG_UNREF(p2);
			SG_UNREF(q1);
			SG_UNREF(q2);

			SGVector<index_t> copy(num_this_run);
			copy.range_fill();
			p1=merged->copy_subset(copy);
			copy.add(num_this_run);
			p2=merged->copy_subset(copy);
			copy.add(num_this_run);
			q1=merged->copy_subset(copy);
			copy.add(num_this_run);
			q2=merged->copy_subset(copy);

			/* clean up and note that copy_subset does a SG_REF */
			SG_UNREF(merged);
		}
		else
		{
			/* reference produced features (only if copy_subset was not used) */
			SG_REF(p1);
			SG_REF(p2);
			SG_REF(q1);
			SG_REF(q2);
		}

		/* if multiple kernels are used, compute all of them on streamed data,
		 * if multiple kernels flag is false, the above loop will be executed
		 * only once */
		CKernel* kernel=m_kernel;
		if (multiple_kernels)
		{
			SG_DEBUG("using multiple kernels\n");
		}

		/* iterate through all kernels for this data */
		for (index_t i=0; i<num_kernels; ++i)
		{
			/* if multiple kernels should be computed, set next kernel */
			if (multiple_kernels)
			{
				kernel=((CCombinedKernel*)m_kernel)->get_kernel(i);
			}

			/* compute kernel matrix diagonals */
			kernel->init(p1, p2);
			SGVector<float64_t> pp=kernel->get_kernel_diagonal();

			kernel->init(q1, q2);
			SGVector<float64_t> qq=kernel->get_kernel_diagonal();

			kernel->init(p1, q2);
			SGVector<float64_t> pq=kernel->get_kernel_diagonal();

			kernel->init(q1, p2);
			SGVector<float64_t> qp=kernel->get_kernel_diagonal();

			/* single variances for all kernels. Update mean and variance
			 * using Knuth's online variance algorithm.
			 * C.f. for example Wikipedia */
			for (index_t j=0; j<num_this_run; ++j)
			{
				/* compute sum of current h terms for current kernel */
				current=pp[j]+qq[j]-pq[j]-qp[j];

				/* D. Knuth's online variance algorithm for current kernel */
				delta=current-statistic[i];
				statistic[i]+=delta/term_counters[i]++;
				variance[i]+=delta*(current-statistic[i]);

				SG_DEBUG("burst: current=%f, delta=%f, statistic=%f, "
						"variance=%f, kernel_idx=%d\n", current, delta,
						statistic[i], variance[i], i);
			}
			
			if (multiple_kernels)
			{
				SG_UNREF(kernel);
			}
		}

		/* clean up streamed data */
		SG_UNREF(p1);
		SG_UNREF(p2);
		SG_UNREF(q1);
		SG_UNREF(q2);

		/* add number of processed examples for this run */
		num_examples_processed+=num_this_run;
	}
	SG_DEBUG("Done compouting statistic, processed 2*%d examples.\n",
			num_examples_processed);

	/* mean of sum all traces is linear time mmd, copy entries for all kernels */
	if (io->get_loglevel()==MSG_DEBUG || io->get_loglevel()==MSG_GCDEBUG)
		statistic.display_vector("statistics");

	/* variance of terms can be computed using mean (statistic).
	 * Note that the variance needs to be divided by m_2 in order to get
	 * variance of null-distribution */
	for (index_t i=0; i<num_kernels; ++i)
		variance[i]=variance[i]/(m_2-1)/m_2;

	if (io->get_loglevel()==MSG_DEBUG || io->get_loglevel()==MSG_GCDEBUG)
		variance.display_vector("variances");

	SG_DEBUG("leaving %s::compute_statistic_and_variance()\n", get_name())
}
Esempio n. 3
0
void CLinearTimeMMD::compute_statistic_and_Q(
		SGVector<float64_t>& statistic, SGMatrix<float64_t>& Q)
{
	SG_DEBUG("entering %s::compute_statistic_and_Q()\n", get_name())

	REQUIRE(m_streaming_p, "%s::compute_statistic_and_Q: streaming "
			"features p required!\n", get_name());
	REQUIRE(m_streaming_q, "%s::compute_statistic_and_Q: streaming "
			"features q required!\n", get_name());

	REQUIRE(m_kernel, "%s::compute_statistic_and_Q: kernel needed!\n",
			get_name());

	/* make sure multiple_kernels flag is used only with a combined kernel */
	REQUIRE(m_kernel->get_kernel_type()==K_COMBINED,
			"%s::compute_statistic_and_Q: underlying kernel is not of "
			"type K_COMBINED\n", get_name());

	/* cast combined kernel */
	CCombinedKernel* combined=(CCombinedKernel*)m_kernel;

	/* m is number of samples from each distribution, m_4 is quarter of it */
	REQUIRE(m_m>=4, "%s::compute_statistic_and_Q: Need at least m>=4\n",
			get_name());
	index_t m_4=m_m/4;

	SG_DEBUG("m_m=%d\n", m_m)

	/* find out whether single or multiple kernels (cast is safe, check above) */
	index_t num_kernels=combined->get_num_subkernels();
	REQUIRE(num_kernels>0, "%s::compute_statistic_and_Q: At least one kernel "
			"is needed\n", get_name());

	/* allocate memory for results if vectors are empty */
	if (!statistic.vector)
		statistic=SGVector<float64_t>(num_kernels);

	if (!Q.matrix)
		Q=SGMatrix<float64_t>(num_kernels, num_kernels);

	/* ensure right dimensions */
	REQUIRE(statistic.vlen==num_kernels, "%s::compute_statistic_and_variance: "
			"statistic vector size (%d) does not match number of kernels (%d)\n",
			 get_name(), statistic.vlen, num_kernels);

	REQUIRE(Q.num_rows==num_kernels, "%s::compute_statistic_and_variance: "
			"Q number of rows does (%d) not match number of kernels (%d)\n",
			 get_name(), Q.num_rows, num_kernels);

	REQUIRE(Q.num_cols==num_kernels, "%s::compute_statistic_and_variance: "
			"Q number of columns (%d) does not match number of kernels (%d)\n",
			 get_name(), Q.num_cols, num_kernels);

	/* initialise statistic and variance since they are cumulative */
	statistic.zero();
	Q.zero();

	/* produce two kernel lists to iterate doubly nested */
	CList* list_i=new CList();
	CList* list_j=new CList();
	
	for (index_t k_idx=0; k_idx<combined->get_num_kernels(); k_idx++)
	{
		CKernel* kernel = combined->get_kernel(k_idx);
		list_i->append_element(kernel);
		list_j->append_element(kernel);
		SG_UNREF(kernel);
	}

	/* needed for online mean and variance */
	SGVector<index_t> term_counters_statistic(num_kernels);
	SGMatrix<index_t> term_counters_Q(num_kernels, num_kernels);
	term_counters_statistic.set_const(1);
	term_counters_Q.set_const(1);

	index_t num_examples_processed=0;
	while (num_examples_processed<m_4)
	{
		/* number of example to look at in this iteration */
		index_t num_this_run=CMath::min(m_blocksize,
				CMath::max(0, m_4-num_examples_processed));
		SG_DEBUG("processing %d more examples. %d so far processed. Blocksize "
				"is %d\n", num_this_run, num_examples_processed, m_blocksize);

		/* stream data from both distributions */
		CFeatures* p1a=m_streaming_p->get_streamed_features(num_this_run);
		CFeatures* p1b=m_streaming_p->get_streamed_features(num_this_run);
		CFeatures* p2a=m_streaming_p->get_streamed_features(num_this_run);
		CFeatures* p2b=m_streaming_p->get_streamed_features(num_this_run);
		CFeatures* q1a=m_streaming_q->get_streamed_features(num_this_run);
		CFeatures* q1b=m_streaming_q->get_streamed_features(num_this_run);
		CFeatures* q2a=m_streaming_q->get_streamed_features(num_this_run);
		CFeatures* q2b=m_streaming_q->get_streamed_features(num_this_run);

		/* check whether h0 should be simulated and permute if so */
		if (m_simulate_h0)
		{
			/* create merged copy of all feature instances to permute */
			CList* list=new CList();
			list->append_element(p1b);
			list->append_element(p2a);
			list->append_element(p2b);
			list->append_element(q1a);
			list->append_element(q1b);
			list->append_element(q2a);
			list->append_element(q2b);
			CFeatures* merged=p1a->create_merged_copy(list);
			SG_UNREF(list);

			/* permute */
			SGVector<index_t> inds(merged->get_num_vectors());
			inds.range_fill();
			inds.permute();
			merged->add_subset(inds);

			/* copy back, replacing old features */
			SG_UNREF(p1a);
			SG_UNREF(p1b);
			SG_UNREF(p2a);
			SG_UNREF(p2b);
			SG_UNREF(q1a);
			SG_UNREF(q1b);
			SG_UNREF(q2a);
			SG_UNREF(q2b);

			SGVector<index_t> copy(num_this_run);
			copy.range_fill();
			p1a=merged->copy_subset(copy);
			copy.add(num_this_run);
			p1b=merged->copy_subset(copy);
			copy.add(num_this_run);
			p2a=merged->copy_subset(copy);
			copy.add(num_this_run);
			p2b=merged->copy_subset(copy);
			copy.add(num_this_run);
			q1a=merged->copy_subset(copy);
			copy.add(num_this_run);
			q1b=merged->copy_subset(copy);
			copy.add(num_this_run);
			q2a=merged->copy_subset(copy);
			copy.add(num_this_run);
			q2b=merged->copy_subset(copy);

			/* clean up and note that copy_subset does a SG_REF */
			SG_UNREF(merged);
		}
		else
		{
			/* reference the produced features (only if copy subset was not used) */
			SG_REF(p1a);
			SG_REF(p1b);
			SG_REF(p2a);
			SG_REF(p2b);
			SG_REF(q1a);
			SG_REF(q1b);
			SG_REF(q2a);
			SG_REF(q2b);
		}

		/* now for each of these streamed data instances, iterate through all
		 * kernels and update Q matrix while also computing MMD statistic */

		/* preallocate some memory for faster processing */
		SGVector<float64_t> pp(num_this_run);
		SGVector<float64_t> qq(num_this_run);
		SGVector<float64_t> pq(num_this_run);
		SGVector<float64_t> qp(num_this_run);
		SGVector<float64_t> h_i_a(num_this_run);
		SGVector<float64_t> h_i_b(num_this_run);
		SGVector<float64_t> h_j_a(num_this_run);
		SGVector<float64_t> h_j_b(num_this_run);

		/* iterate through Q matrix and update values, compute mmd */
		CKernel* kernel_i=(CKernel*)list_i->get_first_element();
		for (index_t i=0; i<num_kernels; ++i)
		{
			/* compute all necessary 8 h-vectors for this burst.
			 * h_delta-terms for each kernel, expression 7 of NIPS paper
			 * first kernel */

			/* first kernel, a-part */
			kernel_i->init(p1a, p2a);
			pp=kernel_i->get_kernel_diagonal(pp);
			kernel_i->init(q1a, q2a);
			qq=kernel_i->get_kernel_diagonal(qq);
			kernel_i->init(p1a, q2a);
			pq=kernel_i->get_kernel_diagonal(pq);
			kernel_i->init(q1a, p2a);
			qp=kernel_i->get_kernel_diagonal(qp);
			for (index_t it=0; it<num_this_run; ++it)
				h_i_a[it]=pp[it]+qq[it]-pq[it]-qp[it];

			/* first kernel, b-part */
			kernel_i->init(p1b, p2b);
			pp=kernel_i->get_kernel_diagonal(pp);
			kernel_i->init(q1b, q2b);
			qq=kernel_i->get_kernel_diagonal(qq);
			kernel_i->init(p1b, q2b);
			pq=kernel_i->get_kernel_diagonal(pq);
			kernel_i->init(q1b, p2b);
			qp=kernel_i->get_kernel_diagonal(qp);
			for (index_t it=0; it<num_this_run; ++it)
				h_i_b[it]=pp[it]+qq[it]-pq[it]-qp[it];

			/* iterate through j, but use symmetry in order to save half of the
			 * computations */
			CKernel* kernel_j=(CKernel*)list_j->get_first_element();
			for (index_t j=0; j<=i; ++j)
			{
				/* compute all necessary 8 h-vectors for this burst.
				 * h_delta-terms for each kernel, expression 7 of NIPS paper
				 * second kernel */

				/* second kernel, a-part */
				kernel_j->init(p1a, p2a);
				pp=kernel_j->get_kernel_diagonal(pp);
				kernel_j->init(q1a, q2a);
				qq=kernel_j->get_kernel_diagonal(qq);
				kernel_j->init(p1a, q2a);
				pq=kernel_j->get_kernel_diagonal(pq);
				kernel_j->init(q1a, p2a);
				qp=kernel_j->get_kernel_diagonal(qp);
				for (index_t it=0; it<num_this_run; ++it)
					h_j_a[it]=pp[it]+qq[it]-pq[it]-qp[it];

				/* second kernel, b-part */
				kernel_j->init(p1b, p2b);
				pp=kernel_j->get_kernel_diagonal(pp);
				kernel_j->init(q1b, q2b);
				qq=kernel_j->get_kernel_diagonal(qq);
				kernel_j->init(p1b, q2b);
				pq=kernel_j->get_kernel_diagonal(pq);
				kernel_j->init(q1b, p2b);
				qp=kernel_j->get_kernel_diagonal(qp);
				for (index_t it=0; it<num_this_run; ++it)
					h_j_b[it]=pp[it]+qq[it]-pq[it]-qp[it];

				float64_t term;
				for (index_t it=0; it<num_this_run; ++it)
				{
					/* current term of expression 7 of NIPS paper */
					term=(h_i_a[it]-h_i_b[it])*(h_j_a[it]-h_j_b[it]);

					/* update covariance element for the current burst. This is a
					 * running average of the product of the h_delta terms of each
					 * kernel */
					Q(i, j)+=(term-Q(i, j))/term_counters_Q(i, j)++;
				}

				/* use symmetry */
				Q(j, i)=Q(i, j);

				/* next kernel j */
				kernel_j=(CKernel*)list_j->get_next_element();
			}

			/* update MMD statistic online computation for kernel i, using
			 * vectors that were computed above */
			SGVector<float64_t> h(num_this_run*2);
			for (index_t it=0; it<num_this_run; ++it)
			{
				/* update statistic for kernel i (outer loop) and update using
				 * all elements of the h_i_a, h_i_b vectors (iterate over it) */
				statistic[i]=statistic[i]+
						(h_i_a[it]-statistic[i])/term_counters_statistic[i]++;

				/* Make sure to use all data, i.e. part a and b */
				statistic[i]=statistic[i]+
						(h_i_b[it]-statistic[i])/(term_counters_statistic[i]++);
			}

			/* next kernel i */
			kernel_i=(CKernel*)list_i->get_next_element();
		}

		/* clean up streamed data */
		SG_UNREF(p1a);
		SG_UNREF(p1b);
		SG_UNREF(p2a);
		SG_UNREF(p2b);
		SG_UNREF(q1a);
		SG_UNREF(q1b);
		SG_UNREF(q2a);
		SG_UNREF(q2b);

		/* add number of processed examples for this run */
		num_examples_processed+=num_this_run;
	}

	/* clean up */
	SG_UNREF(list_i);
	SG_UNREF(list_j);

	SG_DEBUG("Done compouting statistic, processed 4*%d examples.\n",
			num_examples_processed);

	SG_DEBUG("leaving %s::compute_statistic_and_Q()\n", get_name())
}