static void ParseArticle(rssDatabase *db, const char *articleTitle, const char *articleURL) { url u; urlconnection urlconn; streamtokenizer st; int articleID; URLNewAbsolute(&u, articleURL); rssNewsArticle newsArticle = { articleTitle, u.serverName, u.fullName }; pthread_mutex_t *articlesLock = &(db->locks.articlesVectorLock); pthread_mutex_lock(articlesLock); if (VectorSearch(&db->previouslySeenArticles, &newsArticle, NewsArticleCompare, 0, false) >= 0) { pthread_mutex_unlock(articlesLock); printf("[Ignoring \"%s\": we've seen it before.]\n", articleTitle); URLDispose(&u); return; } pthread_mutex_unlock(articlesLock); lockConnection(db,u.serverName); URLConnectionNew(&urlconn, &u); switch (urlconn.responseCode) { case 0: printf("Unable to connect to \"%s\". Domain name or IP address is nonexistent.\n", articleURL); break; case 200: //printf("[%s] Ready to Index \"%s\"\n", u.serverName, articleTitle); pthread_mutex_lock(articlesLock); printf("[%s] Indexing \"%s\"\n", u.serverName, articleTitle); NewsArticleClone(&newsArticle, articleTitle, u.serverName, u.fullName); VectorAppend(&db->previouslySeenArticles, &newsArticle); articleID = VectorLength(&db->previouslySeenArticles) - 1; pthread_mutex_unlock(articlesLock); STNew(&st, urlconn.dataStream, kTextDelimiters, false); ScanArticle(&st, articleID, &db->indices, &db->stopWords, &(db->locks.indicesHashSetLock),&(db->locks.stopWordsHashSetLock) ); STDispose(&st); break; case 301: case 302:{ // just pretend we have the redirected URL all along, though index using the new URL and not the old one... int newURLLength = strlen(urlconn.newUrl)+1; char newURLBuffer[newURLLength]; strcpy(newURLBuffer, urlconn.newUrl); URLConnectionDispose(&urlconn); unlockConnection(db,u.serverName); URLDispose(&u); ParseArticle(db, articleTitle, newURLBuffer); return; } default: printf("Unable to pull \"%s\" from \"%s\". [Response code: %d] Punting...\n", articleTitle, u.serverName, urlconn.responseCode); break; } URLConnectionDispose(&urlconn); unlockConnection(db,u.serverName); URLDispose(&u); }
unsigned long long int getConnectionDataLen(struct CONNECTION_T *connection) { lockConnection(connection); unsigned long long int result = (connection->bufferEnd - connection->bufferPos); unLockConnection(connection); return result; }
void clearConnectionBuffer(struct CONNECTION_T *connection) { if (connection == NULL) return; //pthread_mutex_lock(&connection->readWriteLock); lockConnection(connection); if (connection->buffer) { free(connection->buffer); } connection->buffer = NULL; connection->bufferLen = 0; connection->bufferPos = 0; connection->bufferEnd = 0; unLockConnection(connection); // pthread_mutex_unlock(&connection->readWriteLock); }
unsigned long long int peekConnectionBuffer(struct CONNECTION_T *connection, char *dstBuffer, int len) { unsigned long long int rlen = len; unsigned long long int tmpLen = getConnectionDataLen(connection); lockConnection(connection); //pthread_mutex_lock(&connection->readWriteLock); if (tmpLen < len) { rlen = tmpLen; } char *tmpBuffer = connection->buffer + connection->bufferPos; memcpy(dstBuffer, tmpBuffer, rlen); unLockConnection(connection); //pthread_mutex_unlock(&connection->readWriteLock); return rlen; }
unsigned long long int fillConnectionBuffer(struct CONNECTION_T *connection, unsigned long long int responseLength, int readFull) { if (responseLength == 0) return 0; lockConnection(connection); char *response = malloc(responseLength+1); logInfo( LOG_CONNECTION_DEBUG,"fillBuffer: responseLength=%lld\n", responseLength); ssize_t readLen = recv(connection->socket, response, responseLength, 0); logInfo( LOG_CONNECTION_DEBUG,"fillBuffer: readLen=%zd, response=%s\n", readLen, response); if (readLen == -1) { logInfo( LOG_CONNECTION,"recv"); unLockConnection(connection); free(response); return readLen; } // pthread_mutex_lock(&connection->readWriteLock); // lockConnection(connection); // Get memory for received data. if (connection->buffer == NULL) { logInfo( LOG_CONNECTION_DEBUG,"fillBuffer: Going to allocate memory for the first time.\n"); connection->bufferLen = 0; connection->bufferPos = 0; connection->bufferEnd = 0; connection->buffer = malloc(readLen); if (connection->buffer == NULL) { logInfo( LOG_CONNECTION,"fillBuffer: connection buffer malloc. Tried to allocate %lld bytes.\n", (long long int)readLen); unLockConnection(connection); //pthread_mutex_unlock(&connection->readWriteLock); free(response); return -1; } connection->bufferLen = readLen; } else { if ((connection->bufferLen - connection->bufferEnd) < responseLength) { // Will not fit in the buffer logInfo( LOG_CONNECTION_DEBUG,"fillBuffer: Going to create new memory allocation for buffer. bufferEnd=%d, bufferPos=%d, bufferLen=%d.\n", connection->bufferEnd, connection->bufferPos, connection->bufferLen); unsigned int newSize = readLen + (connection->bufferEnd - connection->bufferPos); logInfo( LOG_CONNECTION_DEBUG,"fillBuffer: newSize=%d.\n", newSize); char *newBuffer = malloc(newSize); if (newBuffer == NULL) { logInfo( LOG_CONNECTION,"fillBuffer: newBuffer malloc. Tried to allocate %d bytes.\n", newSize); unLockConnection(connection); //pthread_mutex_unlock(&connection->readWriteLock); free(response); return -1; } // Copy old buffer to new buffer unsigned int newBufferPos = 0; unsigned int oldBufferPos = connection->bufferPos; char *tmpNewBuffer; char *tmpOldBuffer; // Need to replace this with a memcpy while (oldBufferPos < connection->bufferEnd) { tmpNewBuffer = newBuffer + newBufferPos; tmpOldBuffer = connection->buffer + oldBufferPos; *tmpNewBuffer = *tmpOldBuffer; oldBufferPos++; newBufferPos++; } logInfo( LOG_CONNECTION_DEBUG,"fillBuffer: newBuffer=%s.\n", newBuffer); free(connection->buffer); connection->buffer = newBuffer; connection->bufferEnd = connection->bufferEnd - connection->bufferPos; connection->bufferPos = 0; connection->bufferLen = newSize; } } logInfo( LOG_CONNECTION_DEBUG,"fillBuffer: Buffer is ready. bufferEnd=%d, bufferPos=%d, bufferLen=%d.\n", connection->bufferEnd, connection->bufferPos, connection->bufferLen); // Add new received data to connection buffer char *tmpBuffer; tmpBuffer = connection->buffer + connection->bufferEnd; memcpy(tmpBuffer, response, readLen); connection->bufferEnd += readLen; free(response); logInfo( LOG_CONNECTION_DEBUG,"fillBuffer: new Buffer=%s.\n", connection->buffer); logInfo( LOG_CONNECTION_DEBUG,"fillBuffer: Buffer details. bufferEnd=%d, bufferPos=%d, bufferLen=%d.\n", connection->bufferEnd, connection->bufferPos, connection->bufferLen); unLockConnection(connection); //pthread_mutex_unlock(&connection->readWriteLock); if ((readFull > 0) && (readLen != responseLength)) { readLen += fillConnectionBuffer(connection, responseLength - readLen, readFull); } return readLen; }