long KNN_prune_kCoverage ( /////////////////////////////// // Parameters // /////////////////////////////// Pattern p, // source // Categories c, // source // long y, // source instance index // long k, // k(!) // long * indices // Out: kCoverage set // ) { Melder_assert(y <= p->ny); Melder_assert(k > 0 && k <= p->ny); long cc = 0; FeatureWeights fws = FeatureWeights_create(p->nx); if (fws) { long *tempindices = NUMlvector (0, p->ny - 1); for (long yy = 1; yy <= p->ny; yy++) { if (y != yy && FRIENDS(c->item[y], c->item[yy])) { // long n = KNN_kNeighboursSkip(p, p, fws, yy, k, tempindices, 0); .OS.081011 long n = KNN_kNeighboursSkip(p, p, fws, yy, k, tempindices, y); while (n) { Melder_assert (n <= p->ny); if (tempindices[--n] == y) { indices[cc++] = yy; break; } } } } NUMlvector_free (tempindices, 0); forget(fws); } return(cc); }
int KNN_prune_superfluous ( /////////////////////////////// // Parameters // /////////////////////////////// Pattern p, // source // Categories c, // source // long y, // source instance index // long k, // k(!) // long skipper // Skipping instance skipper // ) { if (y > p->ny) y = p->ny; // safety belt if (k > p->ny) k = p->ny; FeatureWeights fws = FeatureWeights_create(p->nx); if (fws) { long *indices = NUMlvector (0, k - 1); long *freqindices = NUMlvector (0, k - 1); double *distances = NUMdvector (0, k - 1); double *freqs = NUMdvector (0, k - 1); // KNN_kNeighboursSkip(p, p, fws, y, k, indices, skipper); .OS.081011 -> if(!KNN_kNeighboursSkip(p, p, fws, y, k, indices, skipper)) return(0); // .OS.081011 <- long ncategories = KNN_kIndicesToFrequenciesAndDistances(c, k, indices, distances, freqs, freqindices); forget(fws); int result = FRIENDS(c->item[y], c->item[freqindices[KNN_max(freqs, ncategories)]]); NUMlvector_free (indices, 0); NUMlvector_free (freqindices, 0); NUMdvector_free (distances, 0); NUMdvector_free (freqs, 0); if (result) return 1; } return 0; }
int main(int argc, char *argv[], char *envp[]) { //FIXME:user could choose only a website no all website.which means --host is available. static const struct option options[] = { {"debug", 0, NULL, 'd'}, {"verbose", 0, NULL, 'V'}, {"action", 1, NULL, 'A'}, {"logfile", 1, NULL, 'L'}, {"shrink-urls", 0, NULL, 's'}, {"help", 0, NULL, 'h'}, {"bash", 0, NULL, 'b'}, {"background", 0, NULL, 'B'}, {"dry-run", 0, NULL, 'n'}, {"page", 1, NULL, 'g'}, {"version", 0, NULL, 'v'}, {"config", 1, NULL, 'c'}, {"replyto", 1, NULL, 'r'}, {"retweet", 1, NULL, 'w'}, {} }; struct session *session; pid_t child; int retval = 0; int option; char *home; const char *config_file; time_t t; int page_nr; char *tweet; debug = 0; session = session_alloc(); if (!session) { fprintf(stderr, "no more memory...\n"); return -1; } /* get the current time so that we can log it later */ time(&t); session->time = strdup(ctime(&t)); session->time[strlen(session->time) - 1] = 0x00; /* * Get the home directory so we can try to find a config file. * If we have no home dir set up, look in /etc/bti */ home = getenv("HOME"); if (home) { /* We have a home dir, so this might be a user */ session->homedir = strdup(home); config_file = config_user_default; } else { session->homedir = strdup(""); config_file = config_default; } /* set up a default config file location (traditionally ~/.bti) */ session->configfile = zalloc(strlen(session->homedir) + strlen(config_file) + 7); sprintf(session->configfile, "%s/%s", session->homedir, config_file); session_readline_init(session); struct account *account = parse_configfile(session); if (account == NULL) { fprintf(stderr, "parse err, goto exit\n"); exit(-1); } while (1) { option = getopt_long_only(argc, argv, "dp:P:H:a:A:u:c:hg:G:sr:nVvw:", options, NULL); if (option == -1) break; switch (option) { case 'd': debug = 1; break; case 'V': session->verbose = 1; break; case 'g': page_nr = atoi(optarg); dbg("page = %d\n", page_nr); session->page = page_nr; break; case 'r': session->replyto = strdup(optarg); dbg("in_reply_to_status_id = %s\n", session->replyto); break; case 'A': if (strcasecmp(optarg, "update") == 0) session->action = ACTION_UPDATE; else if (strcasecmp(optarg, "friends") == 0) session->action = ACTION_FRIENDS; else if (strcasecmp(optarg, "user") == 0) session->action = ACTION_USER; else if (strcasecmp(optarg, "replies") == 0) session->action = ACTION_REPLIES; else if (strcasecmp(optarg, "public") == 0) session->action = ACTION_PUBLIC; else if (strcasecmp(optarg, "group") == 0) session->action = ACTION_GROUP; else if (strcasecmp(optarg, "retweet") == 0) session->action = ACTION_RETWEET; else session->action = ACTION_UNKNOWN; dbg("action = %d\n", session->action); break; case 'u': if (session->user) free(session->user); session->user = strdup(optarg); dbg("user = %s\n", session->user); break; case 'G': if (session->group) free(session->group); session->group = strdup(optarg); dbg("group = %s\n", session->group); break; case 'L': if (session->logfile) free(session->logfile); session->logfile = strdup(optarg); dbg("logfile = %s\n", session->logfile); break; case 's': session->shrink_urls = 1; break; case 'b': session->bash = 1; /* fall-through intended */ case 'B': session->background = 1; break; case 'c': if (session->configfile) free(session->configfile); session->configfile = strdup(optarg); dbg("configfile = %s\n", session->configfile); /* * read the config file now. Yes, this could override * previously set options from the command line, but * the user asked for it... */ //bti_parse_configfile(session); break; case 'h': display_help(); goto exit; case 'n': session->dry_run = 1; break; case 'v': display_version(); goto exit; default: display_help(); goto exit; } } /* * Show the version to make it easier to determine what * is going on here */ if (debug) display_version(); if (session->action == ACTION_UNKNOWN) { fprintf(stderr, "Unknown action, valid actions are:\n" "'update', 'friends', 'public', 'replies', 'group' or 'user'.\n"); goto exit; } dbg("config file = %s\n", session->configfile); dbg("action = %d\n", session->action); /* fork ourself so that the main shell can get on * with it's life as we try to connect and handle everything */ if (session->background) { child = fork(); if (child) { dbg("child is %d\n", child); exit(0); } } switch (session->action) { case ACTION_PUBLIC: PUBLIC(account, session, retval); break; case ACTION_UPDATE: if (session->background || !session->interactive) tweet = get_string_from_stdin(); else tweet = session->readline("tweet: "); if (!tweet || strlen(tweet) == 0) { dbg("no tweet?\n"); return -1; } if (session->shrink_urls) tweet = shrink_urls(tweet); session->tweet = zalloc(strlen(tweet) + 10); if (session->bash) sprintf(session->tweet, "%c %s", getuid()? '$' : '#', tweet); else sprintf(session->tweet, "%s", tweet); if (tweet) free(tweet); dbg("tweet = %s\n", session->tweet); UPDATE(account, session, retval); break; case ACTION_FRIENDS: FRIENDS(account, session, retval); break; case ACTION_REPLIES: REPLIES(account, session, retval); break; default: retval = -1; break; } // retval = send_request(session); if (retval && !session->background) fprintf(stderr, "operation failed\n"); /* log_session(session, retval); */ DESTORY(account); exit: session_readline_cleanup(session); session_free(session); return retval;; }
Categories Pattern_to_Categories_cluster ( /////////////////////////////// // Parameters // /////////////////////////////// Pattern p, // source // FeatureWeights fws, // feature weights // long k, // k(!) // double s, // clustersize constraint 0 < s <= 1 // long m // reseed maximum // ) { Categories categories = NULL, output = NULL; KNN knn = NULL; Pattern centroids = NULL; double *sizes = NULL, *beta = NULL; long *seeds = NULL; categories = Categories_sequentialNumbers (k); cherror if (k == p->ny) return categories; knn = KNN_create(); cherror if(p->ny % k) if (s > (double) (p->ny / k) / (double) (p->ny / k + 1)) s = (double) (p->ny / k) / (double) (p->ny / k + 1); double progress = m; sizes = NUMdvector (0, k); cherror seeds = NUMlvector (0, k); cherror centroids = Pattern_create (k, p->nx); beta = NUMdvector (0, centroids->nx); do { double delta; long nfriends = 0; if (!Melder_progress1(1 - (progress - m) / progress, L"")) break; for (long y = 1; y <= centroids->ny; y++) { int friend = 1; long ys = (long) lround(NUMrandomUniform(1, p->ny)); if (nfriends) { while (friend) { ys = (long) lround(NUMrandomUniform(1, p->ny)); for (long fc = 0; fc < nfriends; fc++) { friend = 0; Melder_assert (fc < k); if (seeds [fc] == ys) { friend = 1; break; } } } } Melder_assert (nfriends <= k); seeds [nfriends++] = ys; for (long x = 1; x <= centroids->nx; x++) centroids->z[y][x] = p->z[ys][x]; } do { delta = 0; KNN_learn (knn, centroids, categories, kOla_REPLACE, kOla_SEQUENTIAL); Categories interim = KNN_classifyToCategories (knn, p, fws, 1, kOla_FLAT_VOTING); for (long x = 1; x <= k; x++) sizes [x] = 0; for (long yp = 1; yp <= categories->size; yp++) { double alfa = 1; Melder_assert (yp <= centroids->ny); for (long x = 1; x <= centroids->nx; x++) { beta[x] = centroids->z[yp][x]; } for (long ys = 1; ys <= interim->size; ys++) { if (FRIENDS(categories->item[yp], interim->item[ys])) { for (long x = 1; x <= p->nx; x++) { Melder_assert (ys <= p->ny); if (alfa == 1) { centroids->z[yp][x] = p->z[ys][x]; } else { centroids->z[yp][x] += (p->z[ys][x] - centroids->z[yp][x]) / alfa; } } Melder_assert (yp <= k); sizes [yp] ++; alfa++; } } for (long x = 1; x <= centroids->nx; x++) { delta += fabs (beta[x] - centroids->z[yp][x]); } } forget (interim); } while (delta); double smax = sizes [1]; double smin = sizes [1]; for (long x = 1; x <= k; x++) { if (smax < sizes [x]) smax = sizes [x]; if (smin > sizes [x]) smin = sizes [x]; } sizes [0] = smin / smax; --m; } while (sizes[0] < s && m > 0); Melder_progress1(1.0, NULL); output = KNN_classifyToCategories (knn, p, fws, 1, kOla_FLAT_VOTING); cherror end: forget (centroids); forget (categories); forget (knn); NUMdvector_free (sizes, 0); NUMdvector_free (beta, 0); NUMlvector_free (seeds, 0); iferror return NULL; return output; }