bool Images::makeThumb ( ) { // did it have an error? if ( g_errno ) { // just give up on all of them if one has an error log ( "image: had error downloading image on page %s: %s. " "Not downloading any more.", m_pageUrl->getUrl(),mstrerror(g_errno)); // stop it m_stopDownloading = true; return true; } char *buf; long bufLen, bufMaxLen; HttpMime mime; m_imgData = NULL; m_imgDataSize = 0; log( LOG_DEBUG, "image: gotImage() entered." ); // . if there was a problem, just ignore, don't let it stop getting // the real page. if ( g_errno ) { log( "ERROR? g_errno puked: %s", mstrerror(g_errno) ); //g_errno = 0; return true; } //if ( ! slot ) return true; // extract image data from the socket buf = m_msg13.m_replyBuf; bufLen = m_msg13.m_replyBufSize; bufMaxLen = m_msg13.m_replyBufAllocSize; // no image? if ( ! buf || bufLen <= 0 ) { g_errno = EBADIMG; return true; } // we are image candidate #i //long i = m_j - 1; // get img tag node // get the url of the image long srcLen; char *src = NULL; if ( m_xd->m_isDiffbotJSONObject ) { src = *m_xd->getDiffbotPrimaryImageUrl(); srcLen = gbstrlen(src); } else { long node = m_imageNodes[m_j]; src = m_xml->getString(node,"src",&srcLen); } // set it to the full url Url iu; // use "pageUrl" as the baseUrl iu.set ( m_pageUrl , src , srcLen ); // get the mime if ( ! mime.set ( buf, bufLen, &iu ) ) { log ( "image: MIME.set() failed in gotImage()" ); // give up on the remaining images then m_stopDownloading = true; g_errno = EBADIMG; return true; } // set the status so caller can see long httpStatus = mime.getHttpStatus(); // check the status if ( httpStatus != 200 ) { log( LOG_DEBUG, "image: http status of img download is %li.", m_httpStatus); // give up on the remaining images then m_stopDownloading = true; g_errno = EBADIMG; return true; } // make sure this is an image m_imgType = mime.getContentType(); if ( m_imgType < CT_GIF || m_imgType > CT_TIFF ) { log( LOG_DEBUG, "image: gotImage() states that this image is " "not in a format we currently handle." ); // try the next image if any g_errno = EBADIMG; return true; } // get the content m_imgData = buf + mime.getMimeLen(); m_imgDataSize = bufLen - mime.getMimeLen(); // Reset socket, so socket doesn't free the data, now we own // We must free the buf after thumbnail is inserted in TitleRec m_imgReply = buf;//slot->m_readBuf; m_imgReplyLen = bufLen;//slot->m_readBufSize; m_imgReplyMaxLen = bufMaxLen;//slot->m_readBufMaxSize; // do not let UdpServer free the reply, we own it now //slot->m_readBuf = NULL; if ( ! m_imgReply || m_imgReplyLen == 0 ) { log( LOG_DEBUG, "image: Returned empty image reply!" ); g_errno = EBADIMG; return true; } // get next if too small if ( m_imgDataSize < 20 ) { g_errno = EBADIMG; return true; } long imageType; getImageInfo ( m_imgData, m_imgDataSize, &m_dx, &m_dy, &imageType ); // log the image dimensions log( LOG_DEBUG,"image: Image Link: %s", iu.getUrl() ); log( LOG_DEBUG,"image: Max Buffer Size: %lu bytes.",m_imgReplyMaxLen); log( LOG_DEBUG,"image: Image Original Size: %lu bytes.",m_imgReplyLen); log( LOG_DEBUG,"image: Image Buffer @ 0x%lx - 0x%lx",(long)m_imgReply, (long)m_imgReply+m_imgReplyMaxLen ); log( LOG_DEBUG, "image: Size: %lupx x %lupx", m_dx, m_dy ); // what is this? if ( m_dx <= 0 || m_dy <= 0 ) { log(LOG_DEBUG, "image: Image has bad dimensions."); g_errno = EBADIMG; return true; } // skip if bad dimensions if( ((m_dx < 50) || (m_dy < 50)) && ((m_dx > 0) && (m_dy > 0)) ) { log(LOG_DEBUG, "image: Image is too small to represent a news article." ); g_errno = EBADIMG; return true; } // skip if bad aspect ratio. 5x1 or 1x5 is bad i guess if ( m_dx > 0 && m_dy > 0 ) { float aspect = (float)m_dx / (float)m_dy; if ( aspect < .2 || aspect > 5.0 ) { log(LOG_DEBUG, "image: Image aspect ratio is worse that 5 to 1"); g_errno = EBADIMG; return true; } } // update status if ( m_xd ) m_xd->setStatus ( "making thumbnail" ); // log it log ( LOG_DEBUG, "image: gotImage() thumbnailing image." ); // create the thumbnail... // reset this... why? g_errno = 0; // reset this since filterStart_r() will set it on error m_errno = 0; // callThread returns true on success, in which case we block if ( g_threads.call ( FILTER_THREAD , MAX_NICENESS , this , makeThumbWrapper , thumbStartWrapper_r ) ) return false; // threads might be off logf ( LOG_DEBUG, "image: Calling thumbnail gen without thread."); thumbStartWrapper_r ( this , NULL ); return true; }
bool Images::makeThumb ( ) { // did it have an error? if ( g_errno ) { // just give up on all of them if one has an error log ( "image: had error downloading image on page %s: %s. " "Not downloading any more.", m_pageUrl->getUrl(),mstrerror(g_errno)); // stop it m_stopDownloading = true; return true; } char *buf; int32_t bufLen, bufMaxLen; HttpMime mime; m_imgData = NULL; m_imgDataSize = 0; log( LOG_DEBUG, "image: gotImage() entered." ); // . if there was a problem, just ignore, don't let it stop getting // the real page. if ( g_errno ) { log( "ERROR? g_errno puked: %s", mstrerror(g_errno) ); //g_errno = 0; return true; } //if ( ! slot ) return true; // extract image data from the socket buf = m_msg13.m_replyBuf; bufLen = m_msg13.m_replyBufSize; bufMaxLen = m_msg13.m_replyBufAllocSize; // no image? if ( ! buf || bufLen <= 0 ) { g_errno = EBADIMG; return true; } // we are image candidate #i //int32_t i = m_j - 1; // get img tag node // get the url of the image int32_t srcLen; char *src = getImageUrl ( m_j , &srcLen ); // set it to the full url Url iu; // use "pageUrl" as the baseUrl iu.set( m_pageUrl, src, srcLen ); // get the mime if ( ! mime.set ( buf, bufLen, &iu ) ) { log ( "image: MIME.set() failed in gotImage()" ); // give up on the remaining images then m_stopDownloading = true; g_errno = EBADIMG; return true; } // set the status so caller can see int32_t httpStatus = mime.getHttpStatus(); // check the status if ( httpStatus != 200 ) { log( LOG_DEBUG, "image: http status of img download is %" PRId32".", m_httpStatus); // give up on the remaining images then m_stopDownloading = true; g_errno = EBADIMG; return true; } // make sure this is an image m_imgType = mime.getContentType(); if ( m_imgType < CT_GIF || m_imgType > CT_TIFF ) { log( LOG_DEBUG, "image: gotImage() states that this image is " "not in a format we currently handle." ); // try the next image if any g_errno = EBADIMG; return true; } // get the content m_imgData = buf + mime.getMimeLen(); m_imgDataSize = bufLen - mime.getMimeLen(); // Reset socket, so socket doesn't free the data, now we own // We must free the buf after thumbnail is inserted in TitleRec m_imgReply = buf;//slot->m_readBuf; m_imgReplyLen = bufLen;//slot->m_readBufSize; m_imgReplyMaxLen = bufMaxLen;//slot->m_readBufMaxSize; // do not let UdpServer free the reply, we own it now //slot->m_readBuf = NULL; if ( ! m_imgReply || m_imgReplyLen == 0 ) { log( LOG_DEBUG, "image: Returned empty image reply!" ); g_errno = EBADIMG; return true; } // get next if too small if ( m_imgDataSize < 20 ) { g_errno = EBADIMG; return true; } int32_t imageType; getImageInfo ( m_imgData, m_imgDataSize, &m_dx, &m_dy, &imageType ); // log the image dimensions log( LOG_DEBUG,"image: Image Link: %s", iu.getUrl() ); log( LOG_DEBUG,"image: Max Buffer Size: %" PRIu32" bytes.",m_imgReplyMaxLen); log( LOG_DEBUG,"image: Image Original Size: %" PRIu32" bytes.",m_imgReplyLen); log( LOG_DEBUG,"image: Image Buffer @ 0x%" PTRFMT" - 0x%" PTRFMT"",(PTRTYPE)m_imgReply, (PTRTYPE)(m_imgReply+m_imgReplyMaxLen) ); log( LOG_DEBUG, "image: Size: %" PRIu32"px x %" PRIu32"px", m_dx, m_dy ); // what is this? if ( m_dx <= 0 || m_dy <= 0 ) { log(LOG_DEBUG, "image: Image has bad dimensions."); g_errno = EBADIMG; return true; } // skip if bad dimensions if( ((m_dx < 50) || (m_dy < 50)) && ((m_dx > 0) && (m_dy > 0)) ) { log(LOG_DEBUG, "image: Image is too small to represent a news article." ); g_errno = EBADIMG; return true; } // skip if bad aspect ratio. 5x1 or 1x5 is bad i guess if ( m_dx > 0 && m_dy > 0 ) { float aspect = (float)m_dx / (float)m_dy; if ( aspect < .2 || aspect > 5.0 ) { log(LOG_DEBUG, "image: Image aspect ratio is worse that 5 to 1"); g_errno = EBADIMG; return true; } } CollectionRec *cr = g_collectiondb.getRec(m_collnum); if ( ! cr ) { g_errno = ENOCOLLREC; return true; } // save how big of thumbnails we should make. user can change // this in the 'spider controls' m_xysize = cr->m_thumbnailMaxWidthHeight ; // make it 250 pixels if no decent value provided if ( m_xysize <= 0 ) m_xysize = 250; // and keep it sane if ( m_xysize > 2048 ) m_xysize = 2048; // update status if ( m_xd ) m_xd->setStatus ( "making thumbnail" ); // log it log ( LOG_DEBUG, "image: gotImage() thumbnailing image." ); // create the thumbnail... // reset this... why? g_errno = 0; // reset this since filterStart_r() will set it on error m_errno = 0; // callThread returns true on success, in which case we block if ( g_jobScheduler.submit(thumbStartWrapper_r, makeThumbWrapper, this, thread_type_generate_thumbnail, MAX_NICENESS) ) return false; // threads might be off logf ( LOG_DEBUG, "image: Calling thumbnail gen without thread."); thumbStartWrapper_r ( this ); return true; }