/*--------------------------------------------------------------------------- * Copyright (C) 2008, 2009, 2010, 2011 - Emanuele Bovisio * * This file is part of Mulk. * * Mulk is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Mulk is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with Mulk. If not, see <http://www.gnu.org/licenses/>. * * In addition, as a special exception, the copyright holders give * permission to link the code of portions of this program with the * OpenSSL library under certain conditions as described in each * individual source file, and distribute linked combinations * including the two. * You must obey the GNU Lesser General Public License in all respects * for all of the code used other than OpenSSL. If you modify * file(s) with this exception, you may extend this exception to your * version of the file(s), but you are not obligated to do so. If you * do not wish to do so, delete this exception statement from your * version. If you delete this exception statement from all source * files in the program, then also delete it here. *---------------------------------------------------------------------------*/ #include "buffer_array.h" #include "jpg_obj.h" #include "gif_obj.h" #include "png_obj.h" #include "url_list.h" #include "file_obj.h" #include "string_obj.h" #include "mime_types.h" #include "parse.h" #ifdef ENABLE_CHECKSUM #include "checksum.h" #endif #ifdef ENABLE_METALINK #include "chunk_list.h" #endif buffer_t *buffer_array = NULL; void create_buffer_array(void) { buffer_array = m_calloc(option_values.max_sim_conns, sizeof(buffer_t)); } int get_buffer(CURL *id) { int i; for (i = 0; buffer_array[i].id != id && i < option_values.max_sim_conns; i++); if (i >= option_values.max_sim_conns) return MULK_RET_ERR; return i; } static int get_buffer_by_url(url_list_t *url, int id) { int i; if (!url) return MULK_RET_ERR; for (i = 0; i < option_values.max_sim_conns; i++) if (i != id && buffer_array[i].url && buffer_array[i].url->id == url->id) break; if (i >= option_values.max_sim_conns) return MULK_RET_ERR; return i; } int count_number_of_hostname(UriUriA *uri) { int i, count = 0; if (!uri) return MULK_RET_ERR; for (i = 0; i < option_values.max_sim_conns; i++) if (buffer_array[i].uri && are_hosts_equal(buffer_array[i].uri, uri)) count++; return count; } int is_uri_compatible(UriUriA *uri, int maxconns) { int count = count_number_of_hostname(uri); return count >= 0 && count < (maxconns > 0 ? maxconns : option_values.max_sim_conns_per_host); } #ifdef ENABLE_METALINK int count_number_of_chunks(metalink_file_list_t *file) { int i, count = 0; if (!file) return MULK_RET_ERR; for (i = 0; i < option_values.max_sim_conns; i++) { if (buffer_array[i].chunk && buffer_array[i].chunk->file == file) count++; } return count; } static mulk_type_return_t init_buffer_file(buffer_t *buffer, metalink_file_list_t *metalink_file) { if (buffer->file_pt) fclose(buffer->file_pt); if (!metalink_file) return MULK_RET_ERR; if (metalink_file->resume_filename && is_file_exist(metalink_file->resume_filename)) rename(metalink_file->resume_filename, buffer->filename); if (create_truncated_file(buffer->filename, metalink_file->size)) return MULK_RET_FILE_ERR; if (!(buffer->file_pt = fopen(buffer->filename, "rb+"))) return MULK_RET_FILE_ERR; buffer->url->tmp_file_created = 1; return MULK_RET_OK; } int open_buffer(CURL *id, url_list_t *url, UriUriA *uri, chunk_t *chunk, metalink_resource_list_t *resource, int header) #else /* not ENABLE_METALINK */ int open_buffer(CURL *id, url_list_t *url, UriUriA *uri) #endif { int i; buffer_t *buffer; if ((i = get_buffer(NULL)) < 0) return i; buffer = buffer_array + i; buffer->id = id; buffer->url = url; buffer->uri = uri; #ifdef ENABLE_METALINK buffer->chunk = chunk; buffer->used_res = resource; if (chunk || header) { int id_file; string_printf(&buffer->filename, "%smetalink-mulktmp%05d", option_values.temp_directory, url->id); if (header || !is_file_exist(buffer->filename) || !url->tmp_file_created) { if (!make_dir_pathname(buffer->filename)) { if (is_file_exist(buffer->filename)) buffer->file_pt = fopen(buffer->filename, "rb+"); else buffer->file_pt = fopen(buffer->filename, "wb"); if (!header && url->metalink_uri->size > 0) init_buffer_file(buffer, url->metalink_uri); } } else if ((id_file = get_buffer_by_url(url, i)) < 0) { if (!make_dir_pathname(buffer->filename)) buffer->file_pt = fopen(buffer->filename, "rb+"); } else { buffer->file_pt = buffer_array[id_file].file_pt; } } else #endif /* ENABLE_METALINK */ { string_printf(&buffer->filename, "%smulktmp%05d", option_values.temp_directory, i); if (!make_dir_pathname(buffer->filename)) buffer->file_pt = fopen(buffer->filename, "wb"); } if (is_printf(MINFO)) { char *uri_str = uri2string(uri); MULK_INFO((_("Open link #%d, url: %s, tmp file: %s\n"), i, uri_str ? uri_str : "", buffer->filename ? buffer->filename : "")); string_free(&uri_str); } return i; }
char *uri2filename(UriUriA *uri) { char *furi_str = uri2string(uri); char *newfilename = NULL; if (!furi_str) return NULL; string_replace_with_char(furi_str, "//", *DIR_SEPAR_STR); string_replace_with_char(furi_str, "/", *DIR_SEPAR_STR); #ifdef _WIN32 string_replace_with_char(furi_str, ":", '_'); #endif if (furi_str) { string_printf(&newfilename, "%s%s", option_values.file_output_directory, furi_str); if (newfilename[strlen(newfilename)-1] == *DIR_SEPAR_STR) string_cat(&newfilename, "index.html"); } string_free(furi_str); return newfilename; }
int filter_uri(UriUriA **uri, int level) { char *url; if (!option_values.exec_filter) return 0; if (!uri) return -1; if ((url = uri2string(*uri)) == NULL) return -1; uri_free(*uri); *uri = NULL; if (execute_filter(option_values.exec_filter, &url, level)) { string_free(url); return -1; } *uri = create_absolute_uri(NULL, url); string_free(url); if (!*uri) return -1; return 0; }
char *extract_relative_url(const char *src_filename, const char *base_filename) { UriUriA *abs_src_uri, *abs_base_uri, dst; char *dst_filename = NULL; if (!src_filename || !base_filename) return NULL; abs_src_uri = filename2absolute_uri(src_filename); abs_base_uri = filename2absolute_uri(base_filename); if (!abs_src_uri || !abs_base_uri) { uri_free(abs_src_uri); uri_free(abs_base_uri); return NULL; } if (uriRemoveBaseUriA(&dst, abs_src_uri, abs_base_uri, URI_FALSE) != URI_SUCCESS) goto exit; dst_filename = uri2string(&dst); exit: uri_free(abs_src_uri); uri_free(abs_base_uri); uriFreeUriMembersA(&dst); return dst_filename; }
static void addDropFile(char *fileName) { if (uxDropFileCount) uxDropFileNames= (char **)xrealloc(uxDropFileNames, (uxDropFileCount + 1) * sizeof(char *)); else uxDropFileNames= (char **)xcalloc(1, sizeof(char *)); #if USE_FILE_URIs uxDropFileNames[uxDropFileCount++]= strdup(fileName); #else uxDropFileNames[uxDropFileCount++]= uri2string(fileName); #endif }
void print_chunks(metalink_file_list_t *file) { chunk_t *elem; int i = 0; if (!file || !is_printf(MINFO)) return; for (elem = file->chunk; elem; elem = elem->next) { char *uri_str = elem->used_res ? uri2string(elem->used_res->uri) : "NULL"; printf("%d: %" PRIdMAX " %" PRIdMAX "-%" PRIdMAX "/%" PRIdMAX ", %s\n", ++i, (intmax_t) elem->pos, (intmax_t) elem->start, (intmax_t) elem->start+elem->length-1, (intmax_t) elem->length, uri_str); if (elem->used_res) string_free(&uri_str); } }
char *uri2absolute_filename(UriUriA* abs_uri) { char *abs_filename = uri2string(abs_uri), *dst_filename = NULL; int length = strlen(abs_filename); dst_filename = string_alloc(length); #ifdef _WIN32 if (uriUriStringToWindowsFilenameA(abs_filename, dst_filename) != URI_SUCCESS) #else if (uriUriStringToUnixFilenameA(abs_filename, dst_filename) != URI_SUCCESS) #endif string_free(dst_filename); string_free(abs_filename); return dst_filename; }
void print_buffers(void) { int i; buffer_t *buffer; if (is_printf(MINFO)) { for (i = 0, buffer = buffer_array; i < option_values.max_sim_conns; i++, buffer++) { #ifdef ENABLE_METALINK char *uri_str = uri2string(buffer->used_res->uri); MULK_INFO(("%d: %s, %s, %s", i, buffer->id ? _("PRESENT") : _("EMPTY"), buffer->used_res ? uri_str : _("NULL"), buffer->chunk ? _("CHUNK") : _("NO CHUNK"))); string_free(&uri_str); #else /* not ENABLE_METALINK */ MULK_INFO(("%d: %s", i, buffer->id ? _("PRESENT") : _("EMPTY"))); #endif /* not ENABLE_METALINK */ MULK_INFO(("\n")); } } }
static void dndGetSelection(Window owner, Atom property) { unsigned long remaining; unsigned char *data= 0; Atom actual; int format; unsigned long count; if (Success != XGetWindowProperty(stDisplay, owner, property, 0, 65536, 1, AnyPropertyType, &actual, &format, &count, &remaining, &data)) fprintf(stderr, "dndGetSelection: XGetWindowProperty failed\n"); else if (remaining) /* a little violent perhaps */ fprintf(stderr, "dndGetSelection: XGetWindowProperty has more than 64K (why?)\n"); else { char *tokens= data; char *item= 0; while ((item= strtok(tokens, "\n\r"))) { dprintf((stderr, "got URI <%s>\n", item)); if (!strncmp(item, "file:", 5)) /*** xxx BOGUS -- just while image is broken ***/ { if (uxDropFileCount) uxDropFileNames= (char **)xrealloc(uxDropFileNames, (uxDropFileCount + 1) * sizeof(char *)); else uxDropFileNames= (char **)xcalloc(1, sizeof(char *)); uxDropFileNames[uxDropFileCount++]= uri2string(item); } tokens= 0; } if (uxDropFileCount) recordDragEvent(DragDrop, uxDropFileCount); dprintf((stderr, "+++ DROP %d\n", uxDropFileCount)); } XFree(data); }
void report_urls(void) { url_list_t *elem; FILE *textfile = NULL, *csvfile = NULL; char *uri_str, *rep_uri, *rep_mime, *rep_file, *rep_mimefile; int update_pointer = 1; static int write_header = 1; char *text_filename = option_values.report_filename; char *csv_filename = option_values.report_csv_filename; if (text_filename && *text_filename) textfile = fopen(text_filename, "a"); if (csv_filename && *csv_filename) csvfile = fopen(csv_filename, "a"); if (!textfile && !csvfile) return; if (csvfile && write_header) { fprintf(csvfile, "ID,Depth,Error Code,HTTP Code,URL,Mime-Type,Filename,Mime-Type Filename\n"); write_header = 0; } for (elem = report_ptr; elem; elem = elem->next) { if (elem->err_code == ERR_CODE_NOT_ASSIGNED) { update_pointer = 0; continue; } if (elem->reported) continue; uri_str = elem->uri ? uri2string(elem->uri) : NULL; #ifdef ENABLE_METALINK rep_uri = elem->uri ? uri_str : (elem->metalink_uri ? elem->metalink_uri->file->name : REPORT_TEXT_NONE); rep_mime = elem->metalink_uri ? REPORT_TEXT_EMPTY : (elem->mimetype ? elem->mimetype : REPORT_TEXT_EMPTY); #else rep_uri = elem->uri ? uri_str : REPORT_TEXT_NONE; rep_mime = elem->mimetype ? elem->mimetype : REPORT_TEXT_EMPTY; #endif rep_file = elem->filename ? elem->filename : REPORT_TEXT_EMPTY; rep_mimefile = elem->mimefilename ? elem->mimefilename : REPORT_TEXT_EMPTY; if (textfile) fprintf(textfile, "ID:%d Depth:%d Error Code:%ld HTTP Code:%ld URL:\"%s\" Mime-Type:\"%s\" " "Filename:\"%s\" Mime-Type Filename:\"%s\"\n", elem->id, elem->level, elem->err_code, elem->http_code, rep_uri, rep_mime, rep_file, rep_mimefile); if (csvfile) fprintf(csvfile, "%d,%d,%ld,%ld,\"%s\",\"%s\",\"%s\",\"%s\"\n", elem->id, elem->level, elem->err_code, elem->http_code, rep_uri, rep_mime, rep_file, rep_mimefile); elem->reported = 1; if (update_pointer) report_ptr = elem; string_free(uri_str); } if (textfile) fclose(textfile); if (csvfile) fclose(csvfile); }
mulk_type_return_t init_url(CURLM *cm) { int i; url_list_t *url; UriUriA *uri; #ifdef ENABLE_METALINK int header; chunk_t *chunk; metalink_resource_list_t *resource; #endif char *str_url = NULL; CURL *eh; /* we need at least a free buffer */ if (get_buffer(NULL) < 0) return MULK_RET_ERR; #ifdef ENABLE_METALINK if ((url = search_next_url(&uri, &chunk, &resource, &header)) == NULL) #else if ((url = search_next_url(&uri)) == NULL) #endif return MULK_RET_ERR; if ((eh = curl_easy_init()) == NULL) return MULK_RET_ERR; #ifdef ENABLE_METALINK i = open_buffer(eh, url, uri, chunk, resource, header); #else i = open_buffer(eh, url, uri); #endif str_url = uri2string(uri); #ifdef ENABLE_METALINK if (header) { curl_easy_setopt(eh, CURLOPT_WRITEFUNCTION, NULL); curl_easy_setopt(eh, CURLOPT_WRITEDATA, NULL); curl_easy_setopt(eh, CURLOPT_NOBODY, 1L); } else #endif /* ENABLE_METALINK */ { curl_easy_setopt(eh, CURLOPT_WRITEFUNCTION, write_data_cb); curl_easy_setopt(eh, CURLOPT_WRITEDATA, &(buffer_array[i])); curl_easy_setopt(eh, CURLOPT_NOBODY, 0L); #ifdef ENABLE_METALINK if (chunk) { char *range = NULL; string_printf(&range, "%" PRIdMAX "-%" PRIdMAX, (intmax_t) chunk->start, (intmax_t) (chunk->start + chunk->length - 1)); curl_easy_setopt(eh, CURLOPT_RANGE, range); string_free(range); } #endif /* ENABLE_METALINK */ } curl_easy_setopt(eh, CURLOPT_HEADER, 0L); curl_easy_setopt(eh, CURLOPT_USERAGENT, option_values.user_agent ? option_values.user_agent : "Mulk/" VERSION); curl_easy_setopt(eh, CURLOPT_URL, str_url); curl_easy_setopt(eh, CURLOPT_PRIVATE, str_url); curl_easy_setopt(eh, CURLOPT_ENCODING, ""); curl_easy_setopt(eh, CURLOPT_VERBOSE, 0L); curl_easy_setopt(eh, CURLOPT_NOPROGRESS, 1L); curl_easy_setopt(eh, CURLOPT_CONNECTTIMEOUT, 30L); curl_easy_setopt(eh, CURLOPT_LOW_SPEED_LIMIT, 100L); curl_easy_setopt(eh, CURLOPT_LOW_SPEED_TIME, 30L); curl_easy_setopt(eh, CURLOPT_SSL_VERIFYPEER, 0L); curl_easy_setopt(eh, CURLOPT_SSL_VERIFYHOST, 0L); if (option_values.user || option_values.password) { char *usr_pwd = NULL; string_printf(&usr_pwd, "%s:%s", option_values.user ? option_values.user : "", option_values.password ? option_values.password : ""); curl_easy_setopt(eh, CURLOPT_USERPWD, usr_pwd); string_free(usr_pwd); } if (option_values.proxy) curl_easy_setopt(eh, CURLOPT_PROXY, option_values.proxy); if (option_values.cookie) curl_easy_setopt(eh, CURLOPT_COOKIE, option_values.cookie); if (option_values.load_cookies) curl_easy_setopt(eh, CURLOPT_COOKIEFILE, option_values.load_cookies); if (option_values.save_cookies) curl_easy_setopt(eh, CURLOPT_COOKIEJAR, option_values.save_cookies); if (curl_multi_add_handle(cm, eh) != CURLM_OK) return MULK_RET_ERR; return MULK_RET_OK; }