void CTilegenAction_FilterCandidatesByDirection::Execute( CLayoutSystem *pLayoutSystem )
{
	CUtlVector< CRoomCandidate > *pRoomCandidateList = pLayoutSystem->GetRoomCandidateList();
	if ( pRoomCandidateList->Count() == 0 )
	{
		return;
	}

	const char *pDirection = m_pDirectionExpression->Evaluate( pLayoutSystem->GetFreeVariables() );
	int nThreshold = m_pThresholdExpression->Evaluate( pLayoutSystem->GetFreeVariables() );
	nThreshold = MAX( 0, nThreshold );
	ExitDirection_t direction = GetDirectionFromString( pDirection );
	if ( direction < EXITDIR_BEGIN || direction >= EXITDIR_END )
	{
		Log_Warning( LOG_TilegenLayoutSystem, "Invalid direction specified: %s.\n", pDirection );
		return;
	}

	// First go through and figure out the highest score
	int nHighScore = INT_MIN;
	for ( int i = 0; i < pRoomCandidateList->Count(); ++ i )
	{
		const CRoomCandidate *pCandidate = &pRoomCandidateList->Element( i );
		int nScore = ComputeScore( direction, pCandidate->m_iXPos, pCandidate->m_iYPos );
		if ( nScore > nHighScore )
		{
			nHighScore = nScore;
		}
	}

	// Now go through and set the chance of each candidate to 1.0f for any with that score or 0.0f for those with a lower score
	// @TODO: allow for specifying a numerical range in which candidates are chosen
	for ( int i = pRoomCandidateList->Count() - 1; i >= 0; -- i )
	{
		const CRoomCandidate *pCandidate = &pRoomCandidateList->Element( i );
		if ( ComputeScore( direction, pCandidate->m_iXPos, pCandidate->m_iYPos ) < ( nHighScore - nThreshold ) )
		{
			pRoomCandidateList->FastRemove( i );
		}
	}
}
// does the two-step clustering algorithm:
// first make a subset of the data, to SubPoints points
// then run CEM on this
// then use these clusters to do a CEM on the full data
float KK::Cluster() {
	KK KKSub;
	int i, d, p;
	//float StepSize; // for resampling
	int sPoints; // number of points to subset to

	if (Subset<=1) { // don't subset
		Output("--- Clustering full data set of %d points ---\n", nPoints);
		return CEM(NULL, 1, 1);
	} else { // run on a subset of points

		sPoints = nPoints/Subset; // number of subset points - integer division will round down

		// set up KKSub object
		KKSub.nDims = nDims;
		KKSub.nPoints = sPoints;
		KKSub.penaltyMix = PenaltyMix;
		KKSub.nStartingClusters = nStartingClusters;
		KKSub.AllocateArrays();

		// fill KKSub with a subset of SubPoints from full data set.
		for (i=0; i<sPoints; i++) {
			// choose point to include, evenly spaced plus a random offset
			p= Subset*i + irand(0,Subset-1);

			// copy data
			for (d=0; d<nDims; d++) KKSub.Data[i*nDims + d] = Data[p*nDims + d];
		}

		// run CEM algorithm on KKSub
		Output("--- Running on subset of %d points ---\n", sPoints);
		KKSub.CEM(NULL, 1, 1);

		// now copy cluster shapes from KKSub to main KK
		Weight = KKSub.Weight;
		Mean = KKSub.Mean;
		Cov = KKSub.Cov;
		ClassAlive = KKSub.ClassAlive;
		nClustersAlive = KKSub.nClustersAlive;
		AliveIndex = KKSub.AliveIndex;

		// Run E and C steps on full data set
		Output("--- Evaluating fit on full set of %d points ---\n", nPoints);
		EStep();
		CStep();

		// compute score on full data set and leave
		return ComputeScore();
	}
}
// CEM(StartFile) - Does a whole CEM algorithm from a random start
// optional start file loads this cluster file to start iteration
// if Recurse is 0, it will not try and split.
// if InitRand is 0, use cluster assignments already in structure
float KK::CEM(const mxArray *InputClass/*= NULL*/, int Recurse /*=1*/, int InitRand /*=1*/)  {
	int p, c;
	int nChanged;
	int Iter;
	Array<int> OldClass(nPoints);
	float Score = HugeScore, OldScore;
	int LastStepFull; // stores whether the last step was a full one
    int DidSplit;

    if (InputClass!= NULL) LoadClu(InputClass);
	else if (InitRand) {
        // initialize data to random
        if (nStartingClusters>1)
    	    for(p=0; p<nPoints; p++) Class[p] = irand(1, nStartingClusters-1);
        else
            for(p=0; p<nPoints; p++) Class[p] = 0;

		for(c=0; c<MaxPossibleClusters; c++) ClassAlive[c] = (c<nStartingClusters);
    }

	// set all clases to alive
    Reindex();

	// main loop
	Iter = 0;
	FullStep = 1;
	do {
		// Store old classifications
		for(p=0; p<nPoints; p++) OldClass[p] = Class[p];

		// M-step - calculate class weights, means, and covariance matrices for each class
		MStep();

		// E-step - calculate scores for each point to belong to each class
		EStep();

		// dump distances if required

		//if (DistDump) MatPrint(Distfp, LogP.m_Data, DistDump, MaxPossibleClusters);

		// C-step - choose best class for each
		CStep();

		// Would deleting any classes improve things?
		if(Recurse) ConsiderDeletion();

		// Calculate number changed
		nChanged = 0;
		for(p=0; p<nPoints; p++) nChanged += (OldClass[p] != Class[p]);

		// Calculate score
		OldScore = Score;
		Score = ComputeScore();

		if(Verbose>=1) {
            if(Recurse==0) Output("\t");
            Output("Iteration %d%c: %d clusters Score %.7g nChanged %d\n",
			    Iter, FullStep ? 'F' : 'Q', nClustersAlive, Score, nChanged);
        }

		Iter++;

		/*
		if (Debug) {
			for(p=0;p<nPoints;p++) BestClass[p] = Class[p];
			SaveOutput(BestClass);
			Output("Press return");
			getchar();
		}*/

		// Next step a full step?
		LastStepFull = FullStep;
		FullStep = (
						nChanged>ChangedThresh*nPoints
						|| nChanged == 0
						|| Iter%FullStepEvery==0
					//	|| Score > OldScore Doesn't help!
					//	Score decreases are not because of quick steps!
					) ;
		if (Iter>MaxIter) {
			Output("Maximum iterations exceeded\n");
			break;
		}

        // try splitting
        if ((Recurse && SplitEvery>0) && (Iter%SplitEvery==SplitEvery-1 || (nChanged==0 && LastStepFull))) {
            DidSplit = TrySplits();
        } else DidSplit = 0;

	} while (nChanged > 0 || !LastStepFull || DidSplit);

	//if (DistDump) fprintf(Distfp, "\n");

	return Score;
}
// for each cluster, try to split it in two.  if that improves the score, do it.
// returns 1 if split was successful
int KK::TrySplits() {
    int i, c, cc, c2, p, p2, d, DidSplit = 0;
    float Score, NewScore, UnsplitScore, SplitScore;
    int UnusedCluster;
    KK K2; // second KK structure for sub-clustering
    KK K3; // third one for comparison

    if(nClustersAlive>=MaxPossibleClusters-1) {
        Output("Won't try splitting - already at maximum number of clusters\n");
        return 0;
    }

    // set up K3
    K3.nDims = nDims; K3.nPoints = nPoints;
    K3.penaltyMix = PenaltyMix;
    K3.AllocateArrays();
    for(i=0; i<nDims*nPoints; i++) K3.Data[i] = Data[i];

    Score = ComputeScore();

    // loop thu clusters, trying to split
    for (cc=1; cc<nClustersAlive; cc++) {
        c = AliveIndex[cc];

        // set up K2 strucutre to contain points of this cluster only

        // count number of points and allocate memory
        K2.nPoints = 0;
        K2.penaltyMix = PenaltyMix;
        for(p=0; p<nPoints; p++) if(Class[p]==c) K2.nPoints++;
        if(K2.nPoints==0) continue;
        K2.nDims = nDims;
        K2.AllocateArrays();
        K2.NoisePoint = 0;

        // put data into K2
        p2=0;
        for(p=0; p<nPoints; p++) if(Class[p]==c) {
            for(d=0; d<nDims; d++) K2.Data[p2*nDims + d] = Data[p*nDims + d];
            p2++;
        }

        // find an unused cluster
        UnusedCluster = -1;
        for(c2=1; c2<MaxPossibleClusters; c2++) {
             if (!ClassAlive[c2]) {
                 UnusedCluster = c2;
                 break;
             }
        }
        if (UnusedCluster==-1) {
            Output("No free clusters, abandoning split");
            return DidSplit;
        }

        // do it
        if (Verbose>=1) Output("Trying to split cluster %d (%d points) \n", c, K2.nPoints);
        K2.nStartingClusters=2; // (2 = 1 clusters + 1 unused noise cluster)
        UnsplitScore = K2.CEM(NULL, 0, 1);
        K2.nStartingClusters=3; // (3 = 2 clusters + 1 unused noise cluster)
        SplitScore = K2.CEM(NULL, 0, 1);

        // Fix by Michaël Zugaro: replace next line with following two lines
        // if(SplitScore<UnsplitScore) {
        if(K2.nClustersAlive<2) Output("Split failed - leaving alone\n");
        if(SplitScore<UnsplitScore&&K2.nClustersAlive>=2) {
            // will splitting improve the score in the whole data set?

            // assign clusters to K3
            for(c2=0; c2<MaxPossibleClusters; c2++) K3.ClassAlive[c2]=0;
            p2 = 0;
            for(p=0; p<nPoints; p++) {
                if(Class[p]==c) {
                    if(K2.Class[p2]==1) K3.Class[p] = c;
                    else if(K2.Class[p2]==2) K3.Class[p] = UnusedCluster;
                    else Error("split should only produce 2 clusters");
                    p2++;
                } else K3.Class[p] = Class[p];
                K3.ClassAlive[K3.Class[p]] = 1;
            }
            K3.Reindex();

            // compute scores
            K3.MStep();
            K3.EStep();
            NewScore = K3.ComputeScore();
            Output("Splitting cluster %d changes total score from %f to %f\n", c, Score, NewScore);

            if (NewScore<Score) {
                DidSplit = 1;
                Output("So it's getting split into cluster %d.\n", UnusedCluster);
                // so put clusters from K3 back into main KK struct (K1)
                for(c2=0; c2<MaxPossibleClusters; c2++) ClassAlive[c2] = K3.ClassAlive[c2];
                for(p=0; p<nPoints; p++) Class[p] = K3.Class[p];
            } else {
                Output("So it's not getting split.\n");
            }
        }
    }
    return DidSplit;
}
Exemple #5
0
//All the *_grad is the gradient of pos_score - neg_score w.r.t. the parameter *
void CRelation::TrainRelatTriple(int head, bool head_is_word, int r, int tail)
{
	real head_grads[MAX_EMBEDDING_SIZE], tail_grads[MAX_EMBEDDING_SIZE], grads_tmp[MAX_EMBEDDING_SIZE], relat_grads[MAX_EMBEDDING_SIZE], negh_grads[MAX_EMBEDDING_SIZE], negt_grads[MAX_EMBEDDING_SIZE], negr_grads[MAX_EMBEDDING_SIZE];
	real head_left_mat_grads[MAX_EMBEDDING_SIZE * MAX_RELAT_RANK], head_right_mat_grads[MAX_EMBEDDING_SIZE * MAX_RELAT_RANK], tail_left_mat_grads[MAX_EMBEDDING_SIZE * MAX_RELAT_RANK], tail_right_mat_grads[MAX_EMBEDDING_SIZE * MAX_RELAT_RANK];
	real negr_head_left_mat_grads[MAX_EMBEDDING_SIZE * MAX_RELAT_RANK], negr_head_right_mat_grads[MAX_EMBEDDING_SIZE * MAX_RELAT_RANK], negr_tail_left_mat_grads[MAX_EMBEDDING_SIZE * MAX_RELAT_RANK], negr_tail_right_mat_grads[MAX_EMBEDDING_SIZE * MAX_RELAT_RANK];
	
	real head_mat_grads[MAX_RELAT_RANK], tail_mat_grads[MAX_RELAT_RANK]; 
	real negr_head_mat_grads[MAX_RELAT_RANK], negr_tail_mat_grads[MAX_RELAT_RANK]; //Used for the diag case

	real off_vec[MAX_EMBEDDING_SIZE], neg_off_vec[MAX_EMBEDDING_SIZE];
	real Qh_h[MAX_RELAT_RANK], Qh_negh[MAX_RELAT_RANK], Qt_t[MAX_RELAT_RANK], Qt_negt[MAX_RELAT_RANK];
	real PhT_offvec[MAX_RELAT_RANK], PtT_offvec[MAX_RELAT_RANK];
	
	memset(head_grads, 0, sizeof(real)* Opt::embeding_size);
	memset(tail_grads, 0, sizeof(real)* Opt::embeding_size);
	memset(relat_grads, 0, sizeof(real)* Opt::embeding_size);
	memset(negh_grads, 0, sizeof(real)* Opt::embeding_size);
	memset(negt_grads, 0, sizeof(real)* Opt::embeding_size);
	memset(negr_grads, 0, sizeof(real)* Opt::embeding_size);

	if (Opt::update_mat && !Opt::is_diag)
	{
		memset(head_left_mat_grads, 0, sizeof(real)* Opt::head_relat_rank * Opt::embeding_size);
		memset(head_right_mat_grads, 0, sizeof(real)* Opt::head_relat_rank * Opt::embeding_size);

		if (Opt::use_tail_mat)
		{
			memset(tail_left_mat_grads, 0, sizeof(real)* Opt::embeding_size * Opt::tail_relat_rank);
			memset(tail_right_mat_grads, 0, sizeof(real)* Opt::tail_relat_rank * Opt::embeding_size);
		}

		memset(negr_head_left_mat_grads, 0, sizeof(real)* Opt::embeding_size * Opt::head_relat_rank);
		memset(negr_head_right_mat_grads, 0, sizeof(real)* Opt::embeding_size * Opt::head_relat_rank);

		if (Opt::use_tail_mat)
		{
			memset(negr_tail_left_mat_grads, 0, sizeof(real)* Opt::embeding_size * Opt::tail_relat_rank);
			memset(negr_tail_right_mat_grads, 0, sizeof(real)* Opt::embeding_size * Opt::tail_relat_rank);
		}
	}

	if (Opt::update_mat && Opt::is_diag)
	{
		memset(head_mat_grads, 0, sizeof(real)* Opt::embeding_size);
		memset(tail_mat_grads, 0, sizeof(real)* Opt::embeding_size);
		memset(negr_head_mat_grads, 0, sizeof(real)* Opt::embeding_size);
		memset(negr_tail_mat_grads, 0, sizeof(real)* Opt::embeding_size);
	}

	//Sample neg head word and neg tail word
	int neg_head, neg_tail, neg_r;
	do
	{
		neg_head = SampleWordIdx();
	} while (neg_head == head || neg_head == tail);

	do
	{
		neg_tail = SampleWordIdx();
	} while (neg_tail == head || neg_tail == tail);

	do
	{
		neg_r = SampleRelatIdx();
	} while (neg_r == r);

	real* head_embedding = WordParams::p_embedding[head];
	real* tail_embedding = WordParams::p_embedding[tail];
	real* relat_embedding = p_relat_emb[r];
	real* relat_act_embedding = Opt::act_relat ? p_relat_act_emb[r] : NULL;

	real* neg_head_embedding = WordParams::p_embedding[neg_head];
	real* neg_tail_embedding = WordParams::p_embedding[neg_tail];
	real* neg_r_embedding = p_relat_emb[neg_r];
	real* neg_r_act_embedding = Opt::act_relat ? p_relat_act_emb[neg_r] : NULL;

	real pos_score = ComputeScore(head, r, tail, Qh_h, Qt_t, off_vec); 
	real negh_score = ComputeScore(neg_head, r, tail, Qh_negh, Qt_t, neg_off_vec);
	
	real hgap;
	bool is_negh_margin_satisfied;

	hgap = negh_score - pos_score;
	is_negh_margin_satisfied = hgap > Opt::margin;
	if (!is_negh_margin_satisfied) //margin is not satisfied
		ComputeGradient(1, Opt::relat_neg_weight, r, grads_tmp, negh_grads, tail_grads, relat_grads,
			head_left_mat_grads, head_right_mat_grads, tail_left_mat_grads, tail_right_mat_grads,
			neg_off_vec, Qh_negh, Qt_t, PhT_offvec, PtT_offvec, neg_head_embedding, tail_embedding,
			head_mat_grads, tail_mat_grads);
	
	//Begin updating the loss and grads for ||LR(h - t)||_2^2 - ||LR(h - negt)||_2^2
	real negt_score = ComputeScore(head, r, neg_tail, Qh_h, Qt_negt, neg_off_vec);
	real tgap;
	bool is_negt_margin_satisfied;

	tgap = negt_score - pos_score;
	is_negt_margin_satisfied = tgap > Opt::margin;
	if (!is_negt_margin_satisfied)
		ComputeGradient(1, Opt::relat_neg_weight, r, grads_tmp, head_grads, negt_grads, relat_grads,
			head_left_mat_grads, head_right_mat_grads, tail_left_mat_grads, tail_right_mat_grads, neg_off_vec,
			Qh_h, Qt_negt, PhT_offvec, PtT_offvec, head_embedding, neg_tail_embedding,
			head_mat_grads, tail_mat_grads);
	
	real negr_score = ComputeScore(head, neg_r, tail, Qh_negh, Qt_negt, neg_off_vec);
	real rgap;
	bool is_negr_margin_satisfied = true;;
	
	rgap = negr_score - pos_score;
	is_negr_margin_satisfied = rgap > Opt::margin;
	if (!is_negr_margin_satisfied)
		ComputeGradient(1, Opt::relat_neg_weight, neg_r, grads_tmp, head_grads, tail_grads, negr_grads,
			negr_head_left_mat_grads, negr_head_right_mat_grads, negr_tail_left_mat_grads, negr_tail_right_mat_grads, neg_off_vec,
			Qh_negh, Qt_negt, PhT_offvec, PtT_offvec, head_embedding, tail_embedding,
			negr_head_mat_grads, negr_tail_mat_grads);
	
	if (!Opt::sig_relat && is_negh_margin_satisfied && is_negt_margin_satisfied && is_negr_margin_satisfied)
		return;

	int effect_cnt = (is_negh_margin_satisfied ? 0 : 1) + (is_negt_margin_satisfied ? 0 : 1) + (is_negr_margin_satisfied ? 0 : 1);
	ComputeGradient(-1, effect_cnt, r, grads_tmp, head_grads, tail_grads, relat_grads,
		head_left_mat_grads, head_right_mat_grads, tail_left_mat_grads, tail_right_mat_grads, off_vec,
		Qh_h, Qt_t, PhT_offvec, PtT_offvec, head_embedding, tail_embedding,
		head_mat_grads, tail_mat_grads);

	//Gradient Checking
	/*real gap = (is_negh_margin_satisfied ? 0 : negh_score - pos_score) + (is_negt_margin_satisfied ? 0 : negt_score - pos_score) + (is_negr_margin_satisfied ? 0 : negr_score - pos_score);

	
	if (rand() < 20)
	{
		const double epsilon = 1e-6;
		//head_embedding[idx] += epsilon;
		//p_head_left_mat[r][idx] += epsilon;
		//p_tail_right_mat[r][idx] += epsilon;
		//tail_embedding[idx] += epsilon;
		//p_actual_left_mat[r][idx] = 2 * Util::Sigmoid(p_left_mat[r][41]) - 1;
		//p_tail_left_mat[r][idx] += epsilon;
		//p_head_left_mat[neg_r][idx] += epsilon;
		//p_actual_right_mat[r][idx] = 2 * Util::Sigmoid(p_right_mat[r][idx]) - 1;
		//p_relat_emb[neg_r][idx] += epsilon;
		//p_relat_act_emb[neg_r][idx] = 2 * Util::Sigmoid(p_relat_emb[neg_r][idx]) - 1;

		idx = -1;
		for (auto x : tail_diag_mat_ele[r])
			idx = x.first;

		printf("\n");

		tail_diag_mat_ele[neg_r][idx] += epsilon;
		
		real new_gap = 0, pos_score = ComputeLoss(head, tail, r);
		//ComputeLoss(head, tail, neg_r) - ComputeLoss(head, tail, r);
		real neg_gap = ComputeLoss(neg_head, tail, r) - pos_score;
		if (neg_gap <= Opt::margin)
			new_gap += neg_gap;
		neg_gap = ComputeLoss(head, neg_tail, r) - pos_score;
		if (neg_gap <= Opt::margin)
			new_gap += neg_gap;
		neg_gap = ComputeLoss(head, tail, neg_r) - pos_score;
		if (neg_gap <= Opt::margin)
			new_gap += neg_gap;

		printf("real gradient: %.5f, our gradient %.5f, idx:%d\n", (new_gap - gap) / epsilon, negr_tail_mat_grads[idx], idx);
		//p_tail_right_mat[r][idx] -= epsilon;
		//p_head_left_mat[neg_r][idx] -= epsilon;
		//tail_embedding[idx] -= epsilon;
		//p_actual_right_mat[r][idx] = 2 * Util::Sigmoid(p_right_mat[r][idx]) - 1;
		//head_embedding[idx] -= epsilon;
		//p_relat_emb[neg_r][idx] -= epsilon;
		//p_relat_act_emb[neg_r][idx] = 2 * Util::Sigmoid(p_relat_emb[neg_r][idx]) - 1;
		//p_actual_left_mat[neg_r][idx] = 2 * Util::Sigmoid(p_left_mat[r][41]) - 1;
		tail_diag_mat_ele[neg_r][idx] -= epsilon;
	}*/

	double step_size = GetStepSize(head, r, tail);
	step_size /= effect_cnt;

	Util::MatPlusMat(relat_embedding, relat_grads, step_size, Opt::embeding_size, 1);
	
	Util::MatPlusMat(head_embedding, head_grads, step_size, Opt::embeding_size, 1);
	
	Util::MatPlusMat(tail_embedding, tail_grads, step_size, Opt::embeding_size, 1);

	if (!is_negt_margin_satisfied)
		Util::MatPlusMat(neg_tail_embedding, negt_grads, step_size, Opt::embeding_size, 1);

	if (!is_negh_margin_satisfied)
		Util::MatPlusMat(neg_head_embedding, negh_grads, step_size, Opt::embeding_size, 1);

	if (!is_negr_margin_satisfied)
		Util::MatPlusMat(neg_r_embedding, negr_grads, step_size, Opt::embeding_size, 1);

	if (Opt::update_mat && !Opt::is_diag)
	{
		Util::MatPlusMat(p_head_left_mat[r], head_left_mat_grads, step_size, Opt::embeding_size, Opt::head_relat_rank);
		Util::MatPlusMat(p_head_right_mat[r], head_right_mat_grads, step_size, Opt::head_relat_rank, Opt::embeding_size);
		if (Opt::use_tail_mat)
		{
			Util::MatPlusMat(p_tail_left_mat[r], tail_left_mat_grads, step_size, Opt::embeding_size, Opt::tail_relat_rank);
			Util::MatPlusMat(p_tail_right_mat[r], tail_right_mat_grads, step_size, Opt::tail_relat_rank, Opt::embeding_size);
		}

		if (!is_negr_margin_satisfied)
		{
			Util::MatPlusMat(p_head_left_mat[neg_r], negr_head_left_mat_grads, step_size, Opt::embeding_size, Opt::head_relat_rank);
			Util::MatPlusMat(p_head_right_mat[neg_r], negr_head_right_mat_grads, step_size, Opt::embeding_size, Opt::head_relat_rank);
			if (Opt::use_tail_mat)
			{
				Util::MatPlusMat(p_tail_left_mat[neg_r], negr_tail_left_mat_grads, step_size, Opt::embeding_size, Opt::tail_relat_rank);
				Util::MatPlusMat(p_tail_right_mat[neg_r], negr_tail_right_mat_grads, step_size, Opt::embeding_size, Opt::tail_relat_rank);
			}
		}
	}
	else if (Opt::update_mat && Opt::is_diag)
	{
		for (auto x : head_diag_mat_ele[r])
			head_diag_mat_ele[r][x.first] += step_size *  head_mat_grads[x.first];
		if (Opt::use_tail_mat)
			for (auto x : tail_diag_mat_ele[r])
				tail_diag_mat_ele[r][x.first] += step_size * tail_mat_grads[x.first];
		if (!is_negr_margin_satisfied)
		{
			for (auto x : head_diag_mat_ele[neg_r])
				head_diag_mat_ele[neg_r][x.first] += step_size *  negr_head_mat_grads[x.first];
			if (Opt::use_tail_mat)
				for (auto x : tail_diag_mat_ele[neg_r])
					tail_diag_mat_ele[neg_r][x.first] += step_size * negr_tail_mat_grads[x.first];
		}
	}

	ConstrainParameters(r);
	if (!is_negr_margin_satisfied)
		ConstrainParameters(neg_r);
}
Exemple #6
0
int main(int argc, char* argv[]){

//call the check args function to check the input arguments
checkArgs(argc, argv); 

//init the HashTable
HashTable* Table = ReadFile(argv[1]);
//init the array to hold all of the input words
char wordArray[MAX_ROWS][MAX_ROWS][MAX_WORD_LENGTH + 1];

//init keyboard input string
char line[MAX_WORD_LENGTH+1];



while (1){	//main loop

printf("\nEnter your string (enter \"QUIT\" to exit the function) \n");	
//accept user input. Deal with user input longer than the max line
if (fgets(line, MAX_LINE, stdin)){
	if (NULL == strchr(line, '\n')){
		printf("Query only accepts 1000 characters\n");
		eat_extra(); //"eats" characters after 1000 characters are input then exits
		exit(1);
	}
}

//handle when the user quits the program
if (strcmp(line, "QUIT\n") == 0){
	printf("Exit command reached, Cleaning memory and quitting\n");
	CleanHashMemory(Table);
	exit(0);
}

// size_t length = strlen(line);
// printf("length of input is %zu\n", length );

//check if the inputted line ends with AND or OR
EndsWithAND(line);
EndsWithOR(line);

char* argv2 = argv[2];

//make sure the wordArray is cleared out between queries
memset(wordArray, 0, sizeof(wordArray[0][0][0]) * 500 * MAX_ROWS * MAX_WORD_LENGTH + 1);

int FinalDocMatchArray[1705] = {0}; //keep the documents ids that have matched all the criteria
int FinalArrayIndex = 0;
int scoreArray[1705] = {0}; //keep the scores of the FinalDocMatchArray in parallel positions
int index = 0; 

//init variables for GetNextWord
int pos = 0;
int counter = 0;
int andPos = 0;
int andFlag = 0;
int orFlag = 0; 
int orPos = 0; 
char* word;
while((pos = GetNextWord(line, pos, &word)) > 0){ //go through the words in the query   	
	//if the word exists, add it to the hash table
	if (word != NULL && strlen(word) < MAX_WORD_LENGTH) { 
		//check if it starts with AND or OR
		if (counter == 0 && (strcmp(word, "AND") == 0 || strcmp(word, "OR") == 0)){
			printf("Input cannot start or end with AND or OR\n");
			exit(1);
		}
		else if (strcmp(word, "AND") == 0){
			// printf("AND detected\n");
			if (andFlag == 1)
			{
				printf("Two ANDs in a row. Invalid input.\n");
				exit(1);
			}
			andFlag = 1;
		}
		//detect ORs and increment position in wordArray
		else if (strcmp(word, "OR") == 0){
			// printf("OR detected\n");
			if (orFlag == 1)
			{
				printf("Two ORs in a row. Invalid input.\n");
				exit(1);
			}
			orPos++;
			andPos = 0;
			orFlag = 1;
		}
		else{

		NormalizeWord(word);
		// printf("Word is %s %i\n", word, counter);
		andFlag = 0;
		orFlag = 0;
		//put the word in the wordArray at the appropriate place
		int len = strlen(word+1);
		char wordCpy[len+1];
		strcpy(wordCpy,word);
		strcpy(wordArray[andPos][orPos], wordCpy);
		// printf("Adding %s to array at %i %i \n",word, andPos, orPos );
		andPos++;

		}

		counter++;
	}
	free(word);
	word = NULL;
}


//k is incremented every time an OR is processed
int k = 0;
while (strcmp(wordArray[0][k], "") != 0){ 

int docMatchArray[1705] = {0}; //temporary array of matching documents
int docMatchArrayIndex = 0;

char* firstWord = wordArray[0][k];
// printf("Word is: %s\n", firstWord);

//compute jenkins hash
int hashResult = JenkinsHash(firstWord, MAX_HASH_SLOT);

if (Table->table[hashResult] == NULL){
	printf("%s does not exist in hashTable database\n", firstWord );
	exit(1);
}

//go through the hashtable until you find the appropriate word and documents
//put it into a temporary array to be matched against
else{
	WordNode* node2 = Table->table[hashResult];
	WordNode* dummyWord = node2;
	while (dummyWord != NULL){ //go through all the linked words
		DocumentNode *dummy_doc = dummyWord->page;
		if (strcmp(dummyWord->word, firstWord) == 0){//if they are the same word, go through the document nodes
		    //go through the document nodes
		    while (dummy_doc != NULL) {
			    //put all of the first words docs into the temp list
			    docMatchArray[docMatchArrayIndex] = dummy_doc->doc_id;
			    docMatchArrayIndex++;
	            //advance
	            dummy_doc = dummy_doc->next;	
        }
        break; //you've found the word, no need to continue to other words
		}
		else{
			// printf("Did not find %s\n", firstWord );
		}

		dummyWord = dummyWord->next;
		// printf("Advancing\n");
	}
}

//if there's only 1 word to examine, no need to compare other words
if (strcmp(wordArray[1][k], "") == 0){ 
	//add everything in the doc match array to the FinalDocMatchArray
	for (int i = 0; i < docMatchArrayIndex; i ++ ){
		if (docMatchArray[i] != '\0'){
			int dupIndex = 0;
			int dupFlag = 0;
			while (FinalDocMatchArray[dupIndex] != '\0'){
				//check if they're the same
				if(docMatchArray[i] == FinalDocMatchArray[dupIndex]){
					// printf("FOUND A DUPLICATE for %i\n", docMatchArray[i] );
					dupFlag = 1; 
					//a duplicate was found, compute the final score and increment that element
					int finalScore = 0;
					int index3=0;
					// printf("docNum is %i\n",FinalDocMatchArray[index]);
					while(strcmp(wordArray[index3][k],"") != 0){ //for every word 
						//go through all the words and compute the final score
						finalScore += ComputeScore(FinalDocMatchArray[dupIndex], Table, wordArray[index3][k]);
						index3++;
					}
					//put it in the score array
					scoreArray[dupIndex] += finalScore;
					finalScore = 0;										
					break;
				}
				dupIndex++;
			}
			//if the duplicate was not found and there's only 1 word, then put everything into the final array
			if (dupFlag != 1) { //if a duplicate was not found in the list
				FinalDocMatchArray[FinalArrayIndex] = docMatchArray[i];
				FinalArrayIndex++; 
			}
		}
	}
}

//if there's more than one word between the OR statements, compute the final scores for them all
else{        
	for (int i = 0; i < 1705; i ++){
		if (docMatchArray[i] != 0){
		int result = 1;
		int m = 0;//make sure to adjust based on current position in masterList

		//for every doc in the docMatchArray, test if all other words contain that doc
		while (strcmp(wordArray[m][k], "") != 0) {  //increment word 

			//check if this word's documents and see if there's a match
			result = findDocMatch(docMatchArray[i], Table, wordArray[m][k]);

			if (result != 0){
				break; //the document had no matches, skip the rest
			}
			m++;
		}
		if (result == 0){
			//before you add it to the final array, check if you've already added it
			int dupIndex2 = 0;
			int dupFlag2 = 0;
			while (FinalDocMatchArray[dupIndex2] != '\0'){
				//if it's already in the list, then only increment the score
				if(docMatchArray[i] == FinalDocMatchArray[dupIndex2]){
					dupFlag2 = 1;
					int finalScore2 = 0;
					int index4 = 0;
					while(strcmp(wordArray[index4][k],"") != 0){//for every word 
						// printf("Word is %s\n",wordArray[index4][k]);
						finalScore2 += ComputeScore(FinalDocMatchArray[dupIndex2], Table, wordArray[index4][k]);
						index4++;
					}					
					scoreArray[dupIndex2] += finalScore2; //increment the appropriate score
					finalScore2 = 0;
					break; 
				}
				dupIndex2++;
			}
			//otherwise, add it to end of the Final Array
			if (dupFlag2 != 1){
				FinalDocMatchArray[FinalArrayIndex] = docMatchArray[i];
				FinalArrayIndex++;
			}
			}
		}
	}
}

//compute the scores for all the non-duplicates toward the end of the array
int finalScore = 0;
while (FinalDocMatchArray[index] != '\0'){ //for every doc that matches all AND words
	int index2=0;
	// printf("docNum is %i\n",FinalDocMatchArray[index]);
	while(strcmp(wordArray[index2][k],"") != 0){//for every word 
		// printf("Word is %s\n",wordArray[index2][k]);
		finalScore += ComputeScore(FinalDocMatchArray[index], Table, wordArray[index2][k]);
		index2++;
	}

	// printf("Score for %i is %i\n",FinalDocMatchArray[index], finalScore);
	//put it in the score array
	scoreArray[index] = finalScore;
	finalScore = 0;
	index++;
}
k++; //increment OR position
}

//sort the Final Array
BubbleSort(FinalDocMatchArray, scoreArray, argv2);

}//loop back to string entry

} //end main