Esempio n. 1
0
static void ParseArticle(rssDatabase *db, const char *articleTitle, const char *articleURL)
{      
  url u;
  urlconnection urlconn;
  streamtokenizer st;
  int articleID;

  URLNewAbsolute(&u, articleURL);
  rssNewsArticle newsArticle = { articleTitle, u.serverName, u.fullName };
  
  pthread_mutex_t *articlesLock = &(db->locks.articlesVectorLock);
  pthread_mutex_lock(articlesLock);
  if (VectorSearch(&db->previouslySeenArticles, &newsArticle, NewsArticleCompare, 0, false) >= 0) { 
    pthread_mutex_unlock(articlesLock);
    printf("[Ignoring \"%s\": we've seen it before.]\n", articleTitle);
    URLDispose(&u);     
    return;
  }
  pthread_mutex_unlock(articlesLock);  
  lockConnection(db,u.serverName);
  URLConnectionNew(&urlconn, &u);
  switch (urlconn.responseCode) {
      case 0: printf("Unable to connect to \"%s\".  Domain name or IP address is nonexistent.\n", articleURL);
	      break;
      case 200: //printf("[%s] Ready to Index \"%s\"\n", u.serverName, articleTitle);
	      pthread_mutex_lock(articlesLock);
	      printf("[%s] Indexing \"%s\"\n", u.serverName, articleTitle);
	      NewsArticleClone(&newsArticle, articleTitle, u.serverName, u.fullName);
	      
	      VectorAppend(&db->previouslySeenArticles, &newsArticle);
	      articleID = VectorLength(&db->previouslySeenArticles) - 1;
	      pthread_mutex_unlock(articlesLock);

	      STNew(&st, urlconn.dataStream, kTextDelimiters, false);	
	      ScanArticle(&st, articleID, &db->indices, &db->stopWords,
			  &(db->locks.indicesHashSetLock),&(db->locks.stopWordsHashSetLock) );    	      	    
	      STDispose(&st);
	      
	      break;
      case 301: 
      case 302:{ // just pretend we have the redirected URL all along, though index using the new URL and not the old one... 
	        
	        int newURLLength = strlen(urlconn.newUrl)+1; 
		char newURLBuffer[newURLLength];
		strcpy(newURLBuffer, urlconn.newUrl);
	        URLConnectionDispose(&urlconn);
		unlockConnection(db,u.serverName);
		URLDispose(&u);
		
		ParseArticle(db, articleTitle, newURLBuffer);
                return;
		
      } default: printf("Unable to pull \"%s\" from \"%s\". [Response code: %d] Punting...\n", articleTitle, u.serverName, urlconn.responseCode);
		break;
  }
  
  URLConnectionDispose(&urlconn);
  unlockConnection(db,u.serverName);
  URLDispose(&u);
}
Esempio n. 2
0
unsigned long long int getConnectionDataLen(struct CONNECTION_T *connection)
{
	lockConnection(connection);
		
	unsigned long long int result = (connection->bufferEnd - connection->bufferPos);

	unLockConnection(connection);

	return result;
}
Esempio n. 3
0
void clearConnectionBuffer(struct CONNECTION_T *connection)
{
	if (connection == NULL) return;

	//pthread_mutex_lock(&connection->readWriteLock);
	lockConnection(connection);

	if (connection->buffer) {
		free(connection->buffer);
	}

	connection->buffer = NULL;
	connection->bufferLen = 0;
	connection->bufferPos = 0;
	connection->bufferEnd = 0;

	unLockConnection(connection);
//	pthread_mutex_unlock(&connection->readWriteLock);
}
Esempio n. 4
0
unsigned long long int peekConnectionBuffer(struct CONNECTION_T *connection, char *dstBuffer, int len)
{
	unsigned long long int rlen = len;

	unsigned long long int tmpLen = getConnectionDataLen(connection);

	lockConnection(connection);
	//pthread_mutex_lock(&connection->readWriteLock);

	if (tmpLen < len) {
		rlen = tmpLen;
	}
	char *tmpBuffer = connection->buffer + connection->bufferPos;

	memcpy(dstBuffer, tmpBuffer, rlen);

	unLockConnection(connection);
	//pthread_mutex_unlock(&connection->readWriteLock);

	return rlen;
}
Esempio n. 5
0
unsigned long long int fillConnectionBuffer(struct CONNECTION_T *connection, unsigned long long int responseLength, int readFull)
{
	if (responseLength == 0) return 0;

	lockConnection(connection);

	char *response = malloc(responseLength+1);

	logInfo( LOG_CONNECTION_DEBUG,"fillBuffer: responseLength=%lld\n", responseLength);
	
	ssize_t readLen = recv(connection->socket, response, responseLength, 0);

	logInfo( LOG_CONNECTION_DEBUG,"fillBuffer: readLen=%zd, response=%s\n", readLen, response);

        if (readLen == -1) {
                logInfo( LOG_CONNECTION,"recv");
		unLockConnection(connection);
		free(response);
                return readLen;
        }

//	pthread_mutex_lock(&connection->readWriteLock);
//	lockConnection(connection);

	// Get memory for received data.
	if (connection->buffer == NULL) {

		logInfo( LOG_CONNECTION_DEBUG,"fillBuffer: Going to allocate memory for the first time.\n");

		connection->bufferLen = 0;
		connection->bufferPos = 0;
		connection->bufferEnd = 0;

		connection->buffer = malloc(readLen);
		if (connection->buffer == NULL) {
			logInfo( LOG_CONNECTION,"fillBuffer: connection buffer malloc. Tried to allocate %lld bytes.\n", (long long int)readLen);
			unLockConnection(connection);
			//pthread_mutex_unlock(&connection->readWriteLock);
			free(response);
			return -1;
		}
		connection->bufferLen = readLen;
	}
	else {
		if ((connection->bufferLen - connection->bufferEnd) < responseLength) {
			// Will not fit in the buffer 

			logInfo( LOG_CONNECTION_DEBUG,"fillBuffer: Going to create new memory allocation for buffer. bufferEnd=%d, bufferPos=%d, bufferLen=%d.\n", connection->bufferEnd, connection->bufferPos, connection->bufferLen);

			unsigned int newSize = readLen + (connection->bufferEnd - connection->bufferPos);

			logInfo( LOG_CONNECTION_DEBUG,"fillBuffer: newSize=%d.\n", newSize);

			char *newBuffer = malloc(newSize);
			if (newBuffer == NULL) {
				logInfo( LOG_CONNECTION,"fillBuffer: newBuffer malloc. Tried to allocate %d bytes.\n", newSize);
				unLockConnection(connection);
				//pthread_mutex_unlock(&connection->readWriteLock);
				free(response);
				return -1;
			}
			// Copy old buffer to new buffer
			unsigned int newBufferPos = 0;
			unsigned int oldBufferPos = connection->bufferPos;
			char *tmpNewBuffer;
			char *tmpOldBuffer;

			// Need to replace this with a memcpy
			while (oldBufferPos < connection->bufferEnd) {
				tmpNewBuffer = newBuffer + newBufferPos;
				tmpOldBuffer = connection->buffer + oldBufferPos;

				*tmpNewBuffer = *tmpOldBuffer;

				oldBufferPos++;
				newBufferPos++;
			}

			logInfo( LOG_CONNECTION_DEBUG,"fillBuffer: newBuffer=%s.\n", newBuffer);

			free(connection->buffer);

			connection->buffer = newBuffer;
			connection->bufferEnd = connection->bufferEnd - connection->bufferPos;
			connection->bufferPos = 0;
			connection->bufferLen = newSize;
		}
	}

	logInfo( LOG_CONNECTION_DEBUG,"fillBuffer: Buffer is ready. bufferEnd=%d, bufferPos=%d, bufferLen=%d.\n", connection->bufferEnd, connection->bufferPos, connection->bufferLen);

	// Add new received data to connection buffer
	char *tmpBuffer;
	tmpBuffer = connection->buffer + connection->bufferEnd;
	memcpy(tmpBuffer, response, readLen);
	connection->bufferEnd += readLen;
	
	free(response);

	logInfo( LOG_CONNECTION_DEBUG,"fillBuffer: new Buffer=%s.\n", connection->buffer);
	logInfo( LOG_CONNECTION_DEBUG,"fillBuffer: Buffer details. bufferEnd=%d, bufferPos=%d, bufferLen=%d.\n", connection->bufferEnd, connection->bufferPos, connection->bufferLen);

	unLockConnection(connection);
	//pthread_mutex_unlock(&connection->readWriteLock);

	if ((readFull > 0) && (readLen != responseLength)) {
		readLen += fillConnectionBuffer(connection, responseLength - readLen, readFull);
	}

	return readLen;
}