示例#1
0
long KNN_prune_kCoverage
(
    ///////////////////////////////
    // Parameters                //
    ///////////////////////////////

    Pattern p,      // source
    //
    Categories c,   // source
    //
    long y,         // source instance index
    //
    long k,         // k(!)
    //
    long * indices  // Out: kCoverage set
    //
)

{
    Melder_assert(y <= p->ny);
    Melder_assert(k > 0 && k <= p->ny);

    long cc = 0;
    FeatureWeights fws = FeatureWeights_create(p->nx);

    if (fws)
    {
        long *tempindices = NUMlvector (0, p->ny - 1);
        for (long yy = 1; yy <= p->ny; yy++)
        {
            if (y != yy && FRIENDS(c->item[y], c->item[yy]))
            {
                // long n = KNN_kNeighboursSkip(p, p, fws, yy, k, tempindices, 0); .OS.081011
                long n = KNN_kNeighboursSkip(p, p, fws, yy, k, tempindices, y);
                while (n)
                {
                    Melder_assert (n <= p->ny);
                    if (tempindices[--n] == y)
                    {
                        indices[cc++] = yy;
                        break;
                    }
                }
            }
        }
        NUMlvector_free (tempindices, 0);
        forget(fws);
    }
    return(cc);
}
示例#2
0
int KNN_prune_superfluous
(
    ///////////////////////////////
    // Parameters                //
    ///////////////////////////////

    Pattern p,      // source
    //
    Categories c,   // source
    //
    long y,         // source instance index
    //
    long k,         // k(!)
    //
    long skipper    // Skipping instance skipper
    //
)

{
    if (y > p->ny) y = p->ny;   // safety belt
    if (k > p->ny) k = p->ny;

    FeatureWeights fws = FeatureWeights_create(p->nx);

    if (fws)
    {
        long *indices = NUMlvector (0, k - 1);
        long *freqindices = NUMlvector (0, k - 1);
        double *distances = NUMdvector (0, k - 1);
        double *freqs = NUMdvector (0, k - 1);

        // KNN_kNeighboursSkip(p, p, fws, y, k, indices, skipper); .OS.081011 ->
        if(!KNN_kNeighboursSkip(p, p, fws, y, k, indices, skipper))
            return(0);
        // .OS.081011 <-

        long ncategories = KNN_kIndicesToFrequenciesAndDistances(c, k, indices, distances, freqs, freqindices);

        forget(fws);

        int result = FRIENDS(c->item[y], c->item[freqindices[KNN_max(freqs, ncategories)]]);
        NUMlvector_free (indices, 0);
        NUMlvector_free (freqindices, 0);
        NUMdvector_free (distances, 0);
        NUMdvector_free (freqs, 0);
        if (result)
            return 1;
    }
    return 0;
}
示例#3
0
文件: bti.c 项目: thesues/bti
int main(int argc, char *argv[], char *envp[])
{
	//FIXME:user could choose only a website no all website.which means --host is available.

	static const struct option options[] = {
		{"debug", 0, NULL, 'd'},
		{"verbose", 0, NULL, 'V'},
		{"action", 1, NULL, 'A'},
		{"logfile", 1, NULL, 'L'},
		{"shrink-urls", 0, NULL, 's'},
		{"help", 0, NULL, 'h'},
		{"bash", 0, NULL, 'b'},
		{"background", 0, NULL, 'B'},
		{"dry-run", 0, NULL, 'n'},
		{"page", 1, NULL, 'g'},
		{"version", 0, NULL, 'v'},
		{"config", 1, NULL, 'c'},
		{"replyto", 1, NULL, 'r'},
		{"retweet", 1, NULL, 'w'},
		{}
	};
	struct session *session;
	pid_t child;
	int retval = 0;
	int option;
	char *home;
	const char *config_file;
	time_t t;
	int page_nr;
	char *tweet;

	debug = 0;

	session = session_alloc();
	if (!session) {
		fprintf(stderr, "no more memory...\n");
		return -1;
	}

	/* get the current time so that we can log it later */
	time(&t);
	session->time = strdup(ctime(&t));
	session->time[strlen(session->time) - 1] = 0x00;

	/*
	 * Get the home directory so we can try to find a config file.
	 * If we have no home dir set up, look in /etc/bti
	 */
	home = getenv("HOME");
	if (home) {
		/* We have a home dir, so this might be a user */
		session->homedir = strdup(home);
		config_file = config_user_default;
	} else {
		session->homedir = strdup("");
		config_file = config_default;
	}

	/* set up a default config file location (traditionally ~/.bti) */
	session->configfile =
	    zalloc(strlen(session->homedir) + strlen(config_file) + 7);
	sprintf(session->configfile, "%s/%s", session->homedir, config_file);

	session_readline_init(session);

	struct account *account = parse_configfile(session);
	if (account == NULL) {
		fprintf(stderr, "parse err, goto exit\n");
		exit(-1);
	}

	while (1) {
		option = getopt_long_only(argc, argv,
					  "dp:P:H:a:A:u:c:hg:G:sr:nVvw:",
					  options, NULL);
		if (option == -1)
			break;
		switch (option) {
		case 'd':
			debug = 1;
			break;
		case 'V':
			session->verbose = 1;
			break;
		case 'g':
			page_nr = atoi(optarg);
			dbg("page = %d\n", page_nr);
			session->page = page_nr;
			break;
		case 'r':
			session->replyto = strdup(optarg);
			dbg("in_reply_to_status_id = %s\n", session->replyto);
			break;
		case 'A':
			if (strcasecmp(optarg, "update") == 0)
				session->action = ACTION_UPDATE;
			else if (strcasecmp(optarg, "friends") == 0)
				session->action = ACTION_FRIENDS;
			else if (strcasecmp(optarg, "user") == 0)
				session->action = ACTION_USER;
			else if (strcasecmp(optarg, "replies") == 0)
				session->action = ACTION_REPLIES;
			else if (strcasecmp(optarg, "public") == 0)
				session->action = ACTION_PUBLIC;
			else if (strcasecmp(optarg, "group") == 0)
				session->action = ACTION_GROUP;
			else if (strcasecmp(optarg, "retweet") == 0)
				session->action = ACTION_RETWEET;
			else
				session->action = ACTION_UNKNOWN;
			dbg("action = %d\n", session->action);
			break;
		case 'u':
			if (session->user)
				free(session->user);
			session->user = strdup(optarg);
			dbg("user = %s\n", session->user);
			break;

		case 'G':
			if (session->group)
				free(session->group);
			session->group = strdup(optarg);
			dbg("group = %s\n", session->group);
			break;
		case 'L':
			if (session->logfile)
				free(session->logfile);
			session->logfile = strdup(optarg);
			dbg("logfile = %s\n", session->logfile);
			break;
		case 's':
			session->shrink_urls = 1;
			break;
		case 'b':
			session->bash = 1;
			/* fall-through intended */
		case 'B':
			session->background = 1;
			break;
		case 'c':
			if (session->configfile)
				free(session->configfile);
			session->configfile = strdup(optarg);
			dbg("configfile = %s\n", session->configfile);

			/*
			 * read the config file now.  Yes, this could override
			 * previously set options from the command line, but
			 * the user asked for it...
			 */
			//bti_parse_configfile(session);
			break;
		case 'h':
			display_help();
			goto exit;
		case 'n':
			session->dry_run = 1;
			break;
		case 'v':
			display_version();
			goto exit;
		default:
			display_help();
			goto exit;
		}
	}

	/*
	 * Show the version to make it easier to determine what
	 * is going on here
	 */
	if (debug)
		display_version();

	if (session->action == ACTION_UNKNOWN) {
		fprintf(stderr, "Unknown action, valid actions are:\n"
			"'update', 'friends', 'public', 'replies', 'group' or 'user'.\n");
		goto exit;
	}

	dbg("config file = %s\n", session->configfile);
	dbg("action = %d\n", session->action);

	/* fork ourself so that the main shell can get on
	 * with it's life as we try to connect and handle everything
	 */
	if (session->background) {
		child = fork();
		if (child) {
			dbg("child is %d\n", child);
			exit(0);
		}
	}
	switch (session->action) {
	case ACTION_PUBLIC:
		PUBLIC(account, session, retval);
		break;
	case ACTION_UPDATE:
		if (session->background || !session->interactive)
			tweet = get_string_from_stdin();
		else
			tweet = session->readline("tweet: ");
		if (!tweet || strlen(tweet) == 0) {
			dbg("no tweet?\n");
			return -1;
		}

		if (session->shrink_urls)
			tweet = shrink_urls(tweet);
		session->tweet = zalloc(strlen(tweet) + 10);
		if (session->bash)
			sprintf(session->tweet, "%c %s",
				getuid()? '$' : '#', tweet);
		else
			sprintf(session->tweet, "%s", tweet);
		if (tweet)
			free(tweet);
		dbg("tweet = %s\n", session->tweet);
		UPDATE(account, session, retval);
		break;
	case ACTION_FRIENDS:
		FRIENDS(account, session, retval);
		break;
	case ACTION_REPLIES:
		REPLIES(account, session, retval);
		break;
	default:
		retval = -1;
		break;
	}

	//      retval = send_request(session);

	if (retval && !session->background)
		fprintf(stderr, "operation failed\n");

	/* log_session(session, retval); */
	DESTORY(account);
 exit:
	session_readline_cleanup(session);
	session_free(session);
	return retval;;
}
Categories Pattern_to_Categories_cluster
(
    ///////////////////////////////
    // Parameters                //
    ///////////////////////////////

    Pattern p,              // source
                            //
    FeatureWeights fws,     // feature weights
                            //
    long k,                 // k(!)
                            //
    double s,               // clustersize constraint 0 < s <= 1
                            //
    long m                  // reseed maximum
                            //
)

{
    Categories categories = NULL, output = NULL;
    KNN knn = NULL;
    Pattern centroids = NULL;
    double *sizes = NULL, *beta = NULL;
    long *seeds = NULL;

	categories = Categories_sequentialNumbers (k); cherror
    if (k == p->ny)
        return categories;

    knn = KNN_create(); cherror
    if(p->ny % k) 
        if (s > (double) (p->ny / k) / (double) (p->ny / k + 1)) 
            s = (double) (p->ny / k) / (double) (p->ny / k + 1);

    double progress = m;
    sizes = NUMdvector (0, k); cherror
    seeds = NUMlvector (0, k); cherror

    centroids = Pattern_create (k, p->nx);
    beta = NUMdvector (0, centroids->nx);

    do
    {
        double delta;
        long nfriends  = 0;
        if (!Melder_progress1(1 - (progress - m) / progress, L"")) break;

        for (long y = 1; y <= centroids->ny; y++)
        {
            int friend = 1;
            long ys = (long) lround(NUMrandomUniform(1, p->ny));

            if (nfriends)
            {
                while (friend)
                {
                    ys = (long) lround(NUMrandomUniform(1, p->ny));
                    for (long fc = 0; fc < nfriends; fc++)
                    {
                        friend = 0;
                        Melder_assert (fc < k);
                        if (seeds [fc] == ys)
                        {
                            friend = 1;
                            break;
                        }
                    }
                }
            }
			Melder_assert (nfriends <= k);
            seeds [nfriends++] = ys;

            for (long x = 1; x <= centroids->nx; x++)
                centroids->z[y][x] = p->z[ys][x];
        }
        do
        {
            delta = 0;
            KNN_learn (knn, centroids, categories, kOla_REPLACE, kOla_SEQUENTIAL);
            Categories interim = KNN_classifyToCategories (knn, p, fws, 1, kOla_FLAT_VOTING);

            for (long x = 1; x <= k; x++)
                sizes [x] = 0;

            for (long yp = 1; yp <= categories->size; yp++)
            {
                double alfa = 1;
                Melder_assert (yp <= centroids->ny);

                for (long x = 1; x <= centroids->nx; x++)
                {
                    beta[x] = centroids->z[yp][x];
                }

                for (long ys = 1; ys <= interim->size; ys++)
                {
                    if (FRIENDS(categories->item[yp], interim->item[ys]))
                    {
                        for (long x = 1; x <= p->nx; x++)
                        {
                        	Melder_assert (ys <= p->ny);
                            if (alfa == 1)
                            {
                                centroids->z[yp][x] = p->z[ys][x];
                            }
                            else
                            {
                                centroids->z[yp][x] += (p->z[ys][x] - centroids->z[yp][x]) / alfa;
                            }
                        }
                        Melder_assert (yp <= k);
                        sizes [yp] ++;
                        alfa++;
                    }
                }

                for (long x = 1; x <= centroids->nx; x++)
                {
                    delta += fabs (beta[x] - centroids->z[yp][x]);
                }
            }
            forget (interim);
        }
        while (delta);

        double smax = sizes [1];
        double smin = sizes [1];

        for (long x = 1; x <= k; x++)
        {
            if (smax < sizes [x]) smax = sizes [x];
            if (smin > sizes [x]) smin = sizes [x];
        }

        sizes [0] = smin / smax;
        --m;
    }
    while (sizes[0] < s && m > 0);

    Melder_progress1(1.0, NULL);

    output = KNN_classifyToCategories (knn, p, fws, 1, kOla_FLAT_VOTING); cherror

end:
    forget (centroids);
    forget (categories);
    forget (knn);
    NUMdvector_free (sizes, 0);
    NUMdvector_free (beta, 0);
    NUMlvector_free (seeds, 0);
    iferror return NULL;
    return output;
}