int start_elem(const std::string &elem){ // new tag, clean content; current.clear(); // check XML nested level, security protection if(stack_status.size() < 200){ stack_status.push(elem); }else{ throw DavixException(davix_scope_xml_parser(), StatusCode::ParsingError, "Impossible to parse S3 content, corrupted XML"); } // check element, if it is "deleted" this recource has beed deleted successfully // or the resource did not exist in the first place, either way, log it if( StrUtil::compare_ncase(delete_prop, elem) ==0){ DAVIX_SLOG(DAVIX_LOG_TRACE, DAVIX_LOG_XML, "deleted entry found", elem.c_str()); status.clear(); entry_count = 0; } // check element, if "Error" there has been problem with deleting this resource // the code returned will have to be mapped to http code if( StrUtil::compare_ncase(error_prop, elem) ==0){ DAVIX_SLOG(DAVIX_LOG_TRACE, DAVIX_LOG_XML, "error entry found", elem.c_str()); status.clear(); status.error = true; entry_count = 0; } return 1; }
dav_ssize_t HttpIOVecOps::parseMultipartRequest(HttpRequest & _req, const IntervalTree<ElemChunk> & tree, DavixError** err) { std::string boundary; dav_ssize_t ret = 0, tmp_ret =0; DAVIX_SLOG(DAVIX_LOG_TRACE, DAVIX_LOG_CHAIN, "Davix::parseMultipartRequest multi part parsing"); if(get_multi_part_info(_req, boundary, err) != 0){ DAVIX_SLOG(DAVIX_LOG_TRACE, DAVIX_LOG_CHAIN, "Invalid Header Content info for multi part request"); return -1; } DAVIX_SLOG(DAVIX_LOG_DEBUG, DAVIX_LOG_CHAIN, "Davix::parseMultipartRequest multi-part boundary {}", boundary); while(1) { DAVIX_SLOG(DAVIX_LOG_DEBUG, DAVIX_LOG_CHAIN, "Davix::parseMultipartRequest parsing a new chunk"); ChunkInfo infos; int n_try = 0; tmp_ret = parse_multi_part_header(_req, boundary, infos, n_try, err); if(tmp_ret == -2) break; // terminating boundary if(tmp_ret == -1) return -1; // error if( (tmp_ret = copyChunk(_req, tree, infos.offset, infos.size, err)) <0 ) return -1; ret += tmp_ret; DAVIX_SLOG(DAVIX_LOG_DEBUG, DAVIX_LOG_CHAIN, "Davix::parseMultipartRequest chunk parsed with success, next chunk.."); } // finish with success, dump the remaining part of the query to end the request properly char buffer[255]; while( _req.readBlock(buffer, 255, NULL) > 0); return ret; }
int PutOp::executeOp() { DavixError* tmp_err=NULL; int fd = -1; if( (fd = getInFd(&tmp_err)) < 0) { if(tmp_err) Tool::errorPrint(&tmp_err); return -1; } TRY_DAVIX{ DavFile f(_c, _destination_url); DAVIX_SLOG(DAVIX_LOG_DEBUG, DAVIX_LOG_CORE, "{} executing op on ", _scope, _destination_url); f.put(&_opts.params, fd, _file_size); close(fd); return 0; } CATCH_DAVIX(&tmp_err); if(fd != -1) close(fd); if(tmp_err->getStatus() == StatusCode::FileExist) { std::cerr << std::endl << _scope << " " << _destination_url << " already exists, continuing..." << std::endl; } else std::cerr << std::endl << _scope << " Failed to PUT " << _target_url << std::endl; Tool::errorPrint(&tmp_err); return -1; }
int end_elem(const std::string &elem){ StrUtil::trim(current); // if "Key", current is file name // if "Code", current is error code // if "Message", current is error message // if "Delete", end of successful delete entry for that resource, push it if( StrUtil::compare_ncase(delete_prop, elem) ==0){ DAVIX_SLOG(DAVIX_LOG_TRACE, DAVIX_LOG_XML, "push deleted status for {}", status.filename.c_str()); del_status.push_back(status); entry_count++; } // if "Error", end of error entry for that resource, push it if( StrUtil::compare_ncase(error_prop, elem) ==0){ DAVIX_SLOG(DAVIX_LOG_TRACE, DAVIX_LOG_XML, "push error status for {}", status.filename.c_str()); del_status.push_back(status); entry_count++; } // if "Key", current is file name if( StrUtil::compare_ncase(key_prop, elem) ==0){ DAVIX_SLOG(DAVIX_LOG_TRACE, DAVIX_LOG_XML, "key found for {}", current); status.filename = current; } // if "Code", current is error code if( StrUtil::compare_ncase(code_prop, elem) ==0){ DAVIX_SLOG(DAVIX_LOG_TRACE, DAVIX_LOG_XML, "code found {}", current); status.error_code = current; } // if "Message", current is error message if( StrUtil::compare_ncase(message_prop, elem) ==0){ DAVIX_SLOG(DAVIX_LOG_TRACE, DAVIX_LOG_XML, "error message found {}", current); status.message = current; } // reduce stack size if(stack_status.size() > 0) stack_status.pop(); current.clear(); return 0; }
int DeleteOp::executeOp() { DavixError* tmp_err=NULL; DAVIX_SLOG(DAVIX_LOG_DEBUG, DAVIX_LOG_CORE, "{} executing op on ", _scope, _destination_url); if(_opts.params.getProtocol() == RequestProtocol::AwsS3) { _destination_url += "/?delete"; PostRequest req(_c, _destination_url, &tmp_err); if(tmp_err) { Tool::errorPrint(&tmp_err); return -1; } req.setParameters(_opts.params); std::ostringstream ss; ss << _buf.size(); // calculate md5 of body and set header fields, these are required for S3 multi-objects delete std::string md5; S3::calculateMD5(_buf, md5); req.addHeaderField("Content-MD5", md5); req.addHeaderField("Content-Length", ss.str()); req.setRequestBody(_buf); req.executeRequest(&tmp_err); if(tmp_err) { Tool::errorPrint(&tmp_err); return -1; } // check response code int code = req.getRequestCode(); if(!httpcodeIsValid(code)) { httpcodeToDavixError(req.getRequestCode(), davix_scope_http_request(), "during S3 multi-objects delete operation", &tmp_err); if(tmp_err) { Tool::errorPrint(&tmp_err); return -1; } } std::vector<char> body = req.getAnswerContentVec(); TRY_DAVIX{ parse_deletion_result(code, Uri(_destination_url), _scope, body); } CATCH_DAVIX(&tmp_err); if(tmp_err) { Tool::errorPrint(&tmp_err); return -1; } }
static void check_status(DavDeleteXMLParser::DavxDeleteXmlIntern & par, const std::string & name){ DAVIX_SLOG(DAVIX_LOG_DEBUG, DAVIX_LOG_XML, " status found -> parse it"); std::string str_status(name); ltrim(str_status, StrUtil::isSpace()); std::string::iterator it1, it2; it1 = std::find(str_status.begin(), str_status.end(), ' '); if( it1 != str_status.end()){ it2 = std::find(it1+1, str_status.end(), ' '); std::string str_status_parsed(it1+1, it2); unsigned long res = strtoul(str_status_parsed.c_str(), NULL, 10); if(res != ULONG_MAX){ DAVIX_SLOG(DAVIX_LOG_DEBUG, DAVIX_LOG_XML, " status value : {}", res); par._last_response_status = res; return; } } DAVIX_SLOG(DAVIX_LOG_VERBOSE, DAVIX_LOG_XML, "Invalid dav status field value"); errno =0; }
dav_ssize_t HttpIOVecOps::preadVec(IOChainContext & iocontext, const DavIOVecInput * input_vec, DavIOVecOuput * output_vec, const dav_size_t count_vec){ if(count_vec ==0) return 0; for(dav_size_t i = 0; i < count_vec; i++) { output_vec[i].diov_size = 0; } // size of merge window dav_size_t mergewindow = 2000; if(iocontext._uri.fragmentParamExists("mergewindow")) { mergewindow = atoi(iocontext._uri.getFragmentParam("mergewindow").c_str()); DAVIX_SLOG(DAVIX_LOG_DEBUG, DAVIX_LOG_CHAIN, "Setting mergewindow to {}", mergewindow); } // number of parallel connections in case of a simulation uint nconnections = 3; if(iocontext._uri.fragmentParamExists("nconnections")) { nconnections = atoi(iocontext._uri.getFragmentParam("nconnections").c_str()); DAVIX_SLOG(DAVIX_LOG_DEBUG, DAVIX_LOG_CHAIN, "Setting number of desired parallel connections to {}", nconnections); } IntervalTree<ElemChunk> tree = buildIntervalTree(input_vec, output_vec, count_vec); // a lot of servers do not support multirange... should we even try? if(count_vec == 1 || iocontext._uri.getFragmentParam("multirange") == "false") { SortedRanges sorted = partialMerging(tree, mergewindow); return simulateMultirange(iocontext, tree, sorted, nconnections); } SortedRanges sorted = partialMerging(tree, mergewindow); MultirangeResult res = performMultirange(iocontext, tree, sorted); if(res.res == MultirangeResult::SUCCESS || res.res == MultirangeResult::SUCCESS_BUT_NO_MULTIRANGE) { return res.size_bytes; } else { DAVIX_SLOG(DAVIX_LOG_DEBUG, DAVIX_LOG_CHAIN, "Multi-range request has failed, attempting to recover by using multiple single-range requests"); sorted = partialMerging(tree, mergewindow); return simulateMultirange(iocontext, tree, sorted, nconnections); } }
dav_ssize_t HttpIOVecOps::simulateMultiPartRequest(HttpRequest & _req, const IntervalTree<ElemChunk> & tree, DavixError** err) { DAVIX_SLOG(DAVIX_LOG_TRACE, DAVIX_LOG_CHAIN, " -> Davix vec : 200 full file, simulate vec io"); char buffer[DAVIX_READ_BLOCK_SIZE+1]; dav_ssize_t partial_read_size = 0, total_read_size = 0; while( (partial_read_size = _req.readBlock(buffer, DAVIX_READ_BLOCK_SIZE, err)) >0) { fillChunks(buffer, tree, total_read_size, partial_read_size); total_read_size += partial_read_size; } return total_read_size; }
static void check_href(DavDeleteXMLParser::DavxDeleteXmlIntern & par, const std::string & name){ std::string _href(name); rtrim(_href, isSlash()); // remove trailing slash std::string::reverse_iterator it = std::find(_href.rbegin(), _href.rend(), '/'); if( it == _href.rend()){ par._last_filename.assign(_href); }else{ par._last_filename.assign(it.base(), _href.end()); } DAVIX_SLOG(DAVIX_LOG_DEBUG, DAVIX_LOG_XML, " href/filename parsed -> {} ", par._last_filename.c_str() ); }
// find all matching chunks in tree and fill them static void fillChunks(const char *source, const IntervalTree<ElemChunk> &tree, dav_off_t offset, dav_size_t size) { std::vector<Interval<ElemChunk> > matches; tree.findOverlapping(offset, offset+size-1, matches); for(std::vector<Interval<ElemChunk> >::iterator it = matches.begin(); it != matches.end(); it++) { copyBytes(source, offset, size, it->value); } if(matches.size() == 0) { DAVIX_SLOG(DAVIX_LOG_DEBUG, DAVIX_LOG_CHAIN, "WARNING: Received byte-range from server does not match any in the interval tree"); } }
int start_elem(const std::string &elem){ // new tag, clean content; current.clear(); // check XML nested level, security protection if(stack_status.size() < 200){ stack_status.push(elem); }else{ throw DavixException(davix_scope_xml_parser(), StatusCode::ParsingError, "Impossible to parse S3 content, corrupted XML"); } // check element, if collection name add first entry if( StrUtil::compare_ncase(col_prop, elem) ==0){ DAVIX_SLOG(DAVIX_LOG_TRACE, DAVIX_LOG_XML, "collection found", elem.c_str()); property.clear(); prop_count = 0; } // check element, if new entry clear current entry if( StrUtil::compare_ncase(delimiter_prop, elem) ==0){ DAVIX_SLOG(DAVIX_LOG_TRACE, DAVIX_LOG_XML, "new element found", elem.c_str()); property.clear(); } // check element, if common prefixes set flag if( (_s3_listing_mode == S3ListingMode::Hierarchical) && StrUtil::compare_ncase(com_prefix_prop, elem) ==0){ DAVIX_SLOG(DAVIX_LOG_TRACE, DAVIX_LOG_XML, "common prefixes found", elem.c_str()); inside_com_prefix = true; } // check element, if prefix clear current entry if( (_s3_listing_mode == S3ListingMode::Hierarchical) && StrUtil::compare_ncase(prefix_prop, elem) ==0){ DAVIX_SLOG(DAVIX_LOG_TRACE, DAVIX_LOG_XML, "prefix found", elem.c_str()); property.clear(); } return 1; }
int http_extract_boundary_from_content_type(const std::string & buffer, std::string & boundary, DavixError** err){ dav_size_t pos_bound; static const std::string delimiter = "\";"; if( (pos_bound= buffer.find(ans_header_boundary_field)) != std::string::npos){ std::vector<std::string> tokens = tokenSplit(buffer.substr(pos_bound + ans_header_boundary_field.size()), delimiter); if( tokens.size() >= 1 && tokens[0].size() > 0 && tokens[0].size() <= 70){ DAVIX_SLOG(DAVIX_LOG_TRACE, DAVIX_LOG_CHAIN, "Multi part boundary: {}", boundary); std::swap(boundary,tokens[0]); return 0; } } return -1; }
dav_ssize_t copyChunk(HttpRequest & req, const IntervalTree<ElemChunk> &tree, dav_off_t offset, dav_size_t size, DavixError** err){ DavixError* tmp_err=NULL; dav_ssize_t ret; DAVIX_SLOG(DAVIX_LOG_DEBUG, DAVIX_LOG_CHAIN, "Davix::parseMultipartRequest::copyChunk copy {} bytes with offset {}", size, offset); std::vector<char> buffer; buffer.resize(size+1); ret = req.readSegment(&buffer[0], size, &tmp_err); if(ret != (dav_ssize_t) size || tmp_err) { DavixError::propagateError(err, tmp_err); } else { fillChunks(&buffer[0], tree, offset, size); } return ret; }
int GetOp::executeOp() { int ret = -1; int fd = -1; DavixError* tmp_err=NULL; DavFile f(_c, _target_url); if((fd = getOutFd())> 0) { DAVIX_SLOG(DAVIX_LOG_DEBUG, DAVIX_LOG_CORE, "{} executing op on ", _scope, _target_url); ret = f.getToFd(&_opts.params, fd, &tmp_err); //if getToFd failed, remove the just created blank local file if(tmp_err) { std::cerr << std::endl << _scope << " Failed to GET " << _target_url << std::endl; Tool::errorPrint(&tmp_err); remove(_destination_url.c_str()); } close(fd); } return ret; }
dav_ssize_t HttpIOVecOps::simulateMultirange(IOChainContext & iocontext, const IntervalTree<ElemChunk> & tree, const SortedRanges & ranges, const uint nconnections) { DAVIX_SLOG(DAVIX_LOG_DEBUG, DAVIX_LOG_CHAIN, "Simulating a multi-range request with {} vectors", ranges.size()); dav_ssize_t size = 0; uint num_threads = nconnections; if(num_threads > ranges.size()) { num_threads = ranges.size(); } uint queries_per_thread = ranges.size() / num_threads; pthread_t threads[num_threads]; thdata data[num_threads]; for(uint i = 0; i < num_threads; i++) { data[i].ptr = this; data[i].thread_no = i; data[i].ranges = &ranges; data[i].tree = &tree; data[i].iocontext = &iocontext; data[i].start = i*queries_per_thread; data[i].end = data[i].start + queries_per_thread; if(i == num_threads - 1) { data[i].end = ranges.size(); } pthread_create(&threads[i], NULL, parallelSingleRange, (void*) &data[i]); } for(uint i = 0; i < num_threads; i++) { pthread_join(threads[i], NULL); size += data[i].size; } return size; }
// do a multi-range on selected ranges MultirangeResult HttpIOVecOps::performMultirange(IOChainContext & iocontext, const IntervalTree<ElemChunk> &tree, const SortedRanges & ranges) { DavixError * tmp_err=NULL; dav_ssize_t tmp_ret=-1, ret = 0; ptrdiff_t p_diff=0; dav_size_t counter = 0; MultirangeResult::OperationResult opresult = MultirangeResult::SUCCESS; // calculate total bytes to be read (approximate, since ranges could overlap) dav_ssize_t bytes_to_read = 0; for(dav_size_t i = 0; i < ranges.size(); i++) { bytes_to_read += (ranges[i].second - ranges[i].first + 1); } std::function<int (dav_off_t &, dav_off_t &)> offsetProvider = std::bind(&davIOVecProvider, ranges, std::ref(counter), std::placeholders::_1, std::placeholders::_2); // header line need to be inferior to 8K on Apache2 / ngnix // in Addition, some S3 implementation limit the total header size to 4k.... // 3900 bytes maximum for the range seems to be a ood compromise std::vector< std::pair<dav_size_t, std::string> > vecRanges = generateRangeHeaders(3900, offsetProvider); DAVIX_SLOG(DAVIX_LOG_DEBUG, DAVIX_LOG_CHAIN, " -> getPartialVec operation for {} vectors", ranges.size()); for(std::vector< std::pair<dav_size_t, std::string> >::iterator it = vecRanges.begin(); it < vecRanges.end(); ++it){ DAVIX_SLOG(DAVIX_LOG_DEBUG, DAVIX_LOG_CHAIN, " -> getPartialVec request for {} chunks", it->first); if(it->first == 1){ // one chunk only : no need of multi part ret += singleRangeRequest(iocontext, tree, ranges[p_diff].first, ranges[p_diff].second - ranges[p_diff].first + 1); p_diff += 1; }else{ GetRequest req (iocontext._context, iocontext._uri, &tmp_err); if(tmp_err == NULL){ RequestParams request_params(iocontext._reqparams); req.setParameters(request_params); req.addHeaderField(req_header_byte_range, it->second); if( req.beginRequest(&tmp_err) == 0){ const int retcode = req.getRequestCode(); // looks like the server supports multi-range requests.. yay if(retcode == 206) { ret = parseMultipartRequest(req, tree, &tmp_err); // could not parse multipart response - server's broken? // known to happen with ceph - return code is 206, but only // returns the first range if(ret == -1) { opresult = MultirangeResult::NOMULTIRANGE; req.endRequest(&tmp_err); break; } } // no multi-range.. bad server, bad else if(retcode == 200) { DAVIX_SLOG(DAVIX_LOG_DEBUG, DAVIX_LOG_CHAIN, "Multi-range request resulted in getting the whole file."); // we have two options: read the entire file or abort current // request and start a multi-range simulation // if this is a huge file, reading the entire contents is // definitely not an option if(req.getAnswerSize() > 1000000 && req.getAnswerSize() > 2*bytes_to_read) { DAVIX_SLOG(DAVIX_LOG_DEBUG, DAVIX_LOG_CHAIN, "File is too large; will not waste bandwidth, bailing out"); opresult = MultirangeResult::NOMULTIRANGE; req.endRequest(&tmp_err); } else { DAVIX_SLOG(DAVIX_LOG_DEBUG, DAVIX_LOG_CHAIN, "Simulating multi-part response from the contents of the entire file"); opresult = MultirangeResult::SUCCESS_BUT_NO_MULTIRANGE; ret = simulateMultiPartRequest(req, tree, &tmp_err); } break; } else if(retcode == 416) { ret = 0; DavixError::clearError(&tmp_err); } else { httpcodeToDavixError(req.getRequestCode(),davix_scope_http_request(),", ", &tmp_err); ret = -1; break; } p_diff += it->first; ret += tmp_ret; } else { ret = -1; break; } } } } DAVIX_SLOG(DAVIX_LOG_DEBUG, DAVIX_LOG_CHAIN, " <- getPartialVec operation for {} vectors", ranges.size()); checkDavixError(&tmp_err); return MultirangeResult(opresult, ret); }
inline void update_elem(){ DAVIX_SLOG(DAVIX_LOG_DEBUG, DAVIX_LOG_XML, " updating propertie's info "); _current_props.filename = _last_filename; _current_props.req_status = _last_response_status; }
inline void store_new_elem(){ DAVIX_SLOG(DAVIX_LOG_DEBUG, DAVIX_LOG_XML, " end of properties... "); _props.push_back(_current_props); }
inline void add_new_elem(){ DAVIX_SLOG(DAVIX_LOG_DEBUG, DAVIX_LOG_XML, " properties detected "); _current_props.clear(); _current_props.filename = _last_filename; // setup the current filename _current_props.req_status = _last_response_status; }
int end_elem(const std::string &elem){ StrUtil::trim(current); // found prefix if( (_s3_listing_mode == S3ListingMode::Hierarchical) && StrUtil::compare_ncase(prefix_prop, elem) ==0 && !current.empty()){ DAVIX_SLOG(DAVIX_LOG_TRACE, DAVIX_LOG_XML, "new prefix {}", current.c_str()); prefix = current; if(inside_com_prefix){ // all keys would have been processed by now, just common prefixes left, use as DIRs DAVIX_SLOG(DAVIX_LOG_TRACE, DAVIX_LOG_XML, "push new common prefix {}", current.c_str()); current = current.erase(current.size()-1,1); property.filename = current.erase(0, prefix_to_remove.size()); property.info.mode = 0755 | S_IFDIR; property.info.mode &= ~(S_IFREG); props.push_back(property); prop_count++; } } // new name new fileprop if( StrUtil::compare_ncase(name_prop, elem) ==0){ if((_s3_listing_mode == S3ListingMode::Flat)){ // flat mode property.filename = current.erase(0,prefix.size()); } else if(prefix.empty()){ // at root level property.filename = current; } else if(!prefix.empty()){ if(prefix.compare((prefix.size()-1),1,"/")){ // prefix doesn't end with '/', file property.filename = current; } else if(!(StrUtil::compare_ncase(prefix, current) ==0)){ // folder property.filename = current.erase(0, prefix_to_remove.size()); } } if(!property.filename.empty()) property.info.mode = 0755; } if( StrUtil::compare_ncase(size_prop, elem) ==0){ try{ dav_size_t size = toType<dav_size_t, std::string>()(current); DAVIX_SLOG(DAVIX_LOG_TRACE, DAVIX_LOG_XML, "element size {}", size); property.info.size = size; }catch(...){ DAVIX_SLOG(DAVIX_LOG_TRACE, DAVIX_LOG_XML, "Unable to parse element size"); } } if( StrUtil::compare_ncase(last_modified_prop, elem) ==0){ try{ time_t mtime = S3::s3TimeConverter(current); DAVIX_SLOG(DAVIX_LOG_TRACE, DAVIX_LOG_XML, "element LastModified {}", current); property.info.mtime = mtime; property.info.ctime = mtime; }catch(...){ DAVIX_SLOG(DAVIX_LOG_TRACE, DAVIX_LOG_XML, "Unable to parse element LastModified"); } } // found bucket name // push it as first item to identify bucket if( StrUtil::compare_ncase(col_prop, elem) ==0){ DAVIX_SLOG(DAVIX_LOG_TRACE, DAVIX_LOG_XML, "push collection", elem.c_str()); property.filename = current; property.info.mode |= S_IFDIR; property.info.mode &= ~(S_IFREG); props.push_back(property); } // check element, if end entry push new entry if( StrUtil::compare_ncase(delimiter_prop, elem) ==0){ DAVIX_SLOG(DAVIX_LOG_TRACE, DAVIX_LOG_XML, "push new element {}", elem.c_str()); props.push_back(property); prop_count++; } // check element, if end common prefix reset flag if( (_s3_listing_mode == S3ListingMode::Hierarchical) && StrUtil::compare_ncase(com_prefix_prop, elem) ==0){ inside_com_prefix = false; } // end of xml respond and still no property, requested key exists but isn't a directory if( (_s3_listing_mode == S3ListingMode::Hierarchical) && (StrUtil::compare_ncase(listbucketresult_prop, elem) ==0) && (prop_count ==0) ){ throw DavixException(davix_scope_directory_listing_str(), StatusCode::IsNotADirectory, "Not a S3 directory"); } // reduce stack size if(stack_status.size() > 0) stack_status.pop(); current.clear(); return 0; }