Пример #1
0
int main(int argc, char **argv)
{
  Welcome(kWelcomeTextFile);
  
  hashset stopWords;
  BuildStopWordsHashset(&stopWords, kDefaultStopWordsFile);
  
  hashset wordHash;
  HashSetNew(&wordHash, sizeof(currWord), 10007, WordHashFn, 
	     WordCompare, WordFree);

  hashset articlesSeen;
  HashSetNew(&articlesSeen, sizeof(article), 10007, ArticleHashFn, 
	     ArticleCompare, ArticleFree);

  BuildIndices((argc == 1) ? kDefaultFeedsFile : argv[1], &stopWords, 
	       &wordHash, &articlesSeen);

  QueryIndices(&stopWords, &wordHash, &articlesSeen); 

  HashSetDispose(&stopWords);
  HashSetDispose(&wordHash);
  HashSetDispose(&articlesSeen);
  return 0;
}
Пример #2
0
int main(int argc, char **argv)
{
  static const char *stopwordFilename = "/home/compilers/media/assn-4-rss-news-search-data/stop-words.txt"; 
  static const int kStopwordBuckets = 1009; 
  static const int kIndexNumBuckets = 10007; 

  rssData allData; 

  HashSetNew(&allData.stopwords, sizeof(char*), kStopwordBuckets, StringHash, StringCmp, StringFree); 
 
  HashSetNew(&allData.indices, sizeof(indexEntry), kIndexNumBuckets, IndexHash, IndexCmp, IndexFree); 
   
  // this vector
  VectorNew(&allData.explored, sizeof(article), ArticleFree, 10);
  
  Welcome(kWelcomeTextFile);
  ReadStopwords(&allData.stopwords, stopwordFilename); 
  BuildIndices((argc == 1) ? kDefaultFeedsFile : argv[1], &allData );

  int hcount = HashSetCount(&allData.indices); 
  printf("hcount: %d\n", hcount); 
  
  printf("Finished BuildIndices\n"); 
  QueryIndices(&allData);
  return 0;
}
Пример #3
0
/**
 * Function: TestHashTable
 * -----------------------
 * Runs a test of the hashset using a frequency structure as the element
 * type.  It will open a file, read each char, and count the number of
 * times each char occurs.  Tests enter, lookup, and mapping for the hashset.
 * Prints contents of table to stdout.  Then it dumps all the table elements
 * into a vector and sorts them by frequency of occurrences and
 * prints the array out.  Note that this particular stress test passes
 * 0 as the initialAllocation, which the vector is required to handle
 * gracefully - be careful!
 */
static void TestHashTable(void)
{
  hashset counts;
  vector sortedCounts;

  HashSetNew(&counts, sizeof(struct frequency), kNumBuckets, HashFrequency, CompareLetter, NULL);

  fprintf(stdout, "\n\n ------------------------- Starting the HashTable test\n");
  BuildTableOfLetterCounts(&counts);

  fprintf(stdout, "Here is the unordered contents of the table:\n");
  HashSetMap(&counts, PrintFrequency, stdout);  // print contents of table

  VectorNew(&sortedCounts, sizeof(struct frequency), NULL, 0);
  HashSetMap(&counts, AddFrequency, &sortedCounts);   // add all freq to array
  VectorSort(&sortedCounts, CompareLetter);      // sort by char
  fprintf(stdout, "\nHere are the trials sorted by char: \n");
  VectorMap(&sortedCounts, PrintFrequency, stdout);

  VectorSort(&sortedCounts, CompareOccurrences); //sort by occurrences
  fprintf(stdout, "\nHere are the trials sorted by occurrence & char: \n");
  VectorMap(&sortedCounts, PrintFrequency, stdout);	// print out array

  VectorDispose(&sortedCounts);				// free all storage
  HashSetDispose(&counts);
}
Пример #4
0
static void BuildIndices(rssDatabase *db, const char *feedsFileURL)
{
  url u;
  urlconnection urlconn;
  URLNewAbsolute(&u, feedsFileURL);
  URLConnectionNew(&urlconn, &u);
  
  if (urlconn.responseCode / 100 == 3) {
    BuildIndices(db, urlconn.newUrl);
  } else {
    streamtokenizer st;
    char remoteFileName[2048];
    HashSetNew(&db->indices, sizeof(rssIndexEntry), kNumIndexEntryBuckets, IndexEntryHash, IndexEntryCompare, IndexEntryFree);
    VectorNew(&db->previouslySeenArticles, sizeof(rssNewsArticle), NewsArticleFree, 0);
  
    STNew(&st, urlconn.dataStream, kNewLineDelimiters, true);
    while (STSkipUntil(&st, ":") != EOF) { // ignore everything up to the first selicolon of the line
      STSkipOver(&st, ": ");		   // now ignore the semicolon and any whitespace directly after it
      STNextToken(&st, remoteFileName, sizeof(remoteFileName));
      ProcessFeed(db, remoteFileName);
    }
  
    printf("\n");
    STDispose(&st);
  }
  
  URLConnectionDispose(&urlconn);
  URLDispose(&u);
}
Пример #5
0
static void LoadStopWords(hashset *stopWords, const char *stopWordsURL)
{
  url u;
  urlconnection urlconn;
  
  URLNewAbsolute(&u, stopWordsURL);
  URLConnectionNew(&urlconn, &u);
  
  if (urlconn.responseCode / 100 == 3) {
    LoadStopWords(stopWords, urlconn.newUrl);
  } else {
    streamtokenizer st;
    char buffer[4096];
    HashSetNew(stopWords, sizeof(char *), kNumStopWordsBuckets, StringHash, StringCompare, StringFree);
    STNew(&st, urlconn.dataStream, kNewLineDelimiters, true);
    while (STNextToken(&st, buffer, sizeof(buffer))) {
      char *stopWord = strdup(buffer);
      HashSetEnter(stopWords, &stopWord);
    }
    STDispose(&st);
  }

  URLConnectionDispose(&urlconn);
  URLDispose(&u);
}
Пример #6
0
static void CreateDataStructure(rssFeedData* data){
  
  hashset stopWords;
  HashSetNew(&stopWords,sizeof(char**),kNumStopWordsBuckets,StringHash, StringCmp,StringFree);  
  memcpy(&data->stopWords,&stopWords,sizeof(hashset));

  hashset articles;  //pointers to article and not articles to make it possible to reallocate data insside hashset without changing article pointers
  HashSetNew(&articles,sizeof(articleData*),kNumStopWordsBuckets, ArticleHash, ArticleCmp,ArticleFree);
  memcpy(&data->articles,&articles,sizeof(hashset));

  hashset indices;  
  HashSetNew(&indices,sizeof(indexData),kNumIndexBuckets,IndexHash,IndexCmp, IndexFree);
  memcpy(&data->indices,&indices,sizeof(hashset));

  memset(&data->item,0,sizeof(rssFeedItem));
}
int main(int argc, const char *argv[])
{
  hashset thesaurus;
  HashSetNew(&thesaurus, sizeof(thesaurusEntry), kApproximateWordCount, StringHash, StringCompare, ThesEntryFree);
  const char *thesaurusFileName = (argc == 1) ? 
    "/Users/shankuljain/Documents/Programming WorkSpace/CS107/assn 3/assn-3-vector-hashset-data/thesaurus.txt" : argv[1];
  ReadThesaurus(&thesaurus, thesaurusFileName);
  QueryThesaurus(&thesaurus);
  HashSetDispose(&thesaurus);
  return 0;
}
Пример #8
0
static void initThreadsData(rssDatabase *db)
{
  VectorNew(&db->threads, sizeof(threadData),ThreadDataFree,0);
  HashSetNew(&(db->locks.limitConnToServerLock),sizeof(serverLockData),kNumOfServersBuckets, 
	     ConnectionsLockHash, ConnectionsLockCompare,ConnectionsLockFree);
  
  pthread_mutex_init(&(db->locks.serverDataLock), NULL);  
  pthread_mutex_init(&(db->locks.articlesVectorLock), NULL);
  pthread_mutex_init(&(db->locks.indicesHashSetLock), NULL);
  pthread_mutex_init(&(db->locks.stopWordsHashSetLock), NULL);
  sem_init(&(db->locks.connectionsLock),0,kNumOfConnections);
  
}
Пример #9
0
static void loadStopWords(hashset *s)
{
    HashSetNew(s, sizeof(char *), 1009, StringHash, StringCmp, StringFree);
    
    FILE *infile;
    streamtokenizer st;
    char buffer[1024];
    infile = fopen(kStopListFile, "r");
    assert(infile != NULL);

    STNew(&st, infile, kNewLineDelimiters, true);
    while (STNextToken(&st, buffer, sizeof(buffer))) {
        char *newWord = strdup(buffer);
        HashSetEnter(s, &newWord);
    }
    STDispose(&st);
    fclose(infile);
}
Пример #10
0
					       //stop words, so we choose
					       //the first prime > 1000.
static void BuildStopWordsHashset(hashset *stopWords, const char *stopWordsFileName)
{
  FILE *infile;
  streamtokenizer st;
  char buffer[1024];
  
  infile = fopen(stopWordsFileName, "r");
  assert(infile != NULL);    
  
  HashSetNew(stopWords, sizeof(char*), kApproximateWordCount, StringHash, StringCompare, StringFree); 

  STNew(&st, infile, kNewLineDelimiters, true);
  while (STNextToken(&st, buffer, sizeof(buffer))) {
    char *elem = strdup(buffer);
    HashSetEnter(stopWords, &elem);
  }

  STDispose(&st); 
  fclose(infile);
}
Пример #11
0
void calculateNext(hashset *hash, vector*  keys, int k, char* f, int size) {
  int currK, i, elems = 0, seedNumber;
  char *nxt, *cnt, *key2, *storage = (char*)malloc(sizeof(char) * k);
  FILE *fileR;
  vectorv keyNext;
  keysv *rs, key;
  k+=1;
  fileR = fopen(f, "r");
  assert(fileR != NULL && "Cannot open the file");
  VectorNew(keys, sizeof(char*) * k, NULL, 10);
  while (fgets(storage, k, fileR) != NULL) {
    currK = strlen(storage);
    if (currK < k && storage[currK - 1] == '\n') {
      fgets(&storage[currK], k - currK, fileR);
    }
    VectorAppend(keys, storage);
  }
  storage = (char*)VectorNth(keys, keys->currentPosition - 1);
  fclose(fileR);
  HashSetNew(hash, sizeof(keysv), keys->currentPosition * 3, hashVector, cmpVector, NULL);
  for (i = 0; i < (keys->currentPosition - 1); i++) {
    rs = (keysv*)malloc(sizeof(keysv));
    vector nexts;
    cnt = VectorNth(keys, i);
    nxt = VectorNth(keys, i + 1);
    rs->string = strdup(cnt);
    rs = (keysv*)HashSetLookup(hash, rs);
    keyNext.string = nxt;
    key.string = cnt;
    if (rs == NULL) {
      keyNext.frecuency = 1;
      VectorNew(&nexts, sizeof(vectorv), NULL, 1);
      VectorAppend(&nexts, &keyNext);
      key.frecuency = 1;
      key.vectorv = nexts;
      key.amount = 1;
      HashSetEnter(hash, &key);
    } else {
      rs->frecuency++;
      rs->amount++;
      vectorv* rSucessor;
      int idx = VectorSearch(&rs->vectorv, &keyNext, cmpvct, 0, false);
      if (idx >= 0) {
        rSucessor = VectorNth(&rs->vectorv, idx);
        rSucessor->frecuency++;
      } else {
        keyNext.frecuency = 1;
        VectorAppend(&rs->vectorv, &keyNext);
      }
    }
  }
  key.string = VectorNth(keys, keys->currentPosition - 1);
  key.frecuency = 1;
  key.amount = 0;
  HashSetEnter(hash, &key);
  if (k == 0) {
    elems = keys->currentPosition;
  } else {
    HashSetMap(hash, mapFn, &elems);
  }
  seedNumber = rand() % elems;
  key2 = (char*)VectorNth(keys, seedNumber);
  printf("Generated text:\n");
  printf("%s", key2);
  if (k > 0) {
    for (i = 0; i < size;) {
      key2 = ran(hash, keys, key2);
      printf("%s", key2);
      if (strstr(key2, " ") != NULL || strstr(key2, "\n") != NULL) {
        i++;
      }
    }
  }
  else {
    for (i = 0; i < size;) {
      seedNumber = rand() % elems;
      key2 = (char*)VectorNth(keys, seedNumber);
      printf("%s", key2);
      if (strstr(key2, " ") != NULL || strstr(key2, "\n") != NULL) {
        i++;
      }
    }
  }
  printf("\n");
}
Пример #12
0
static void CreateDataStructure(rssFeedData *data) {
    HashSetNew(&(data->stopWords), sizeof(char **), kNumStopWordsBuckets, StringHash, StringCmp, StringFree);
    HashSetNew(&(data->articles), sizeof(articleData*), kNumStopWordsBuckets, ArticleHash, ArticleCmp, ArticleFree);
    HashSetNew(&(data->indices), sizeof(indexData), kNumStopWordsBuckets, IndexHash, IndexCmp, IndexFree);
    //printf("data created\n");
}
Пример #13
0
void InitializeWordCounts(hashset *wordCount)
{
    HashSetNew(wordCount, sizeof(wordSet), 10007, wordHashFn, wordCmpFn, wordSetFreeFn);
}