int mi_extra(MI_INFO *info, enum ha_extra_function function, void *extra_arg) { int error=0; ulong cache_size; MYISAM_SHARE *share=info->s; DBUG_ENTER("mi_extra"); DBUG_PRINT("enter",("function: %d",(int) function)); switch (function) { case HA_EXTRA_RESET_STATE: /* Reset state (don't free buffers) */ info->lastinx= 0; /* Use first index as def */ info->last_search_keypage=info->lastpos= HA_OFFSET_ERROR; info->page_changed=1; /* Next/prev gives first/last */ if (info->opt_flag & READ_CACHE_USED) { reinit_io_cache(&info->rec_cache,READ_CACHE,0, (pbool) (info->lock_type != F_UNLCK), (pbool) test(info->update & HA_STATE_ROW_CHANGED) ); } info->update= ((info->update & HA_STATE_CHANGED) | HA_STATE_NEXT_FOUND | HA_STATE_PREV_FOUND); break; case HA_EXTRA_CACHE: if (info->lock_type == F_UNLCK && (share->options & HA_OPTION_PACK_RECORD)) { error=1; /* Not possibly if not locked */ my_errno=EACCES; break; } if (info->s->file_map) /* Don't use cache if mmap */ break; #if defined(HAVE_MMAP) && defined(HAVE_MADVISE) if ((share->options & HA_OPTION_COMPRESS_RECORD)) { mysql_mutex_lock(&share->intern_lock); if (_mi_memmap_file(info)) { /* We don't nead MADV_SEQUENTIAL if small file */ madvise((char*) share->file_map, share->state.state.data_file_length, share->state.state.data_file_length <= RECORD_CACHE_SIZE*16 ? MADV_RANDOM : MADV_SEQUENTIAL); mysql_mutex_unlock(&share->intern_lock); break; } mysql_mutex_unlock(&share->intern_lock); } #endif if (info->opt_flag & WRITE_CACHE_USED) { info->opt_flag&= ~WRITE_CACHE_USED; if ((error=end_io_cache(&info->rec_cache))) break; } if (!(info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED | MEMMAP_USED))) { cache_size= (extra_arg ? *(ulong*) extra_arg : my_default_record_cache_size); if (!(init_io_cache(&info->rec_cache,info->dfile, (uint) MY_MIN(info->state->data_file_length + 1, cache_size), READ_CACHE,0L,(pbool) (info->lock_type != F_UNLCK), MYF(share->write_flag & MY_WAIT_IF_FULL)))) { info->opt_flag|=READ_CACHE_USED; info->update&= ~HA_STATE_ROW_CHANGED; } if (share->concurrent_insert) info->rec_cache.end_of_file=info->state->data_file_length; } break; case HA_EXTRA_REINIT_CACHE: if (info->opt_flag & READ_CACHE_USED) { reinit_io_cache(&info->rec_cache,READ_CACHE,info->nextpos, (pbool) (info->lock_type != F_UNLCK), (pbool) test(info->update & HA_STATE_ROW_CHANGED)); info->update&= ~HA_STATE_ROW_CHANGED; if (share->concurrent_insert) info->rec_cache.end_of_file=info->state->data_file_length; } break; case HA_EXTRA_WRITE_CACHE: if (info->lock_type == F_UNLCK) { error=1; /* Not possibly if not locked */ break; } cache_size= (extra_arg ? *(ulong*) extra_arg : my_default_record_cache_size); if (!(info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED | OPT_NO_ROWS)) && !share->state.header.uniques) if (!(init_io_cache(&info->rec_cache,info->dfile, cache_size, WRITE_CACHE,info->state->data_file_length, (pbool) (info->lock_type != F_UNLCK), MYF(share->write_flag & MY_WAIT_IF_FULL)))) { info->opt_flag|=WRITE_CACHE_USED; info->update&= ~(HA_STATE_ROW_CHANGED | HA_STATE_WRITE_AT_END | HA_STATE_EXTEND_BLOCK); } break; case HA_EXTRA_PREPARE_FOR_UPDATE: if (info->s->data_file_type != DYNAMIC_RECORD) break; /* Remove read/write cache if dynamic rows */ case HA_EXTRA_NO_CACHE: if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED)) { info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); error=end_io_cache(&info->rec_cache); /* Sergei will insert full text index caching here */ } #if defined(HAVE_MMAP) && defined(HAVE_MADVISE) if (info->opt_flag & MEMMAP_USED) madvise((char*) share->file_map, share->state.state.data_file_length, MADV_RANDOM); #endif break; case HA_EXTRA_FLUSH_CACHE: if (info->opt_flag & WRITE_CACHE_USED) { if ((error=flush_io_cache(&info->rec_cache))) { mi_print_error(info->s, HA_ERR_CRASHED); mi_mark_crashed(info); /* Fatal error found */ } } break; case HA_EXTRA_NO_READCHECK: info->opt_flag&= ~READ_CHECK_USED; /* No readcheck */ break; case HA_EXTRA_READCHECK: info->opt_flag|= READ_CHECK_USED; break; case HA_EXTRA_KEYREAD: /* Read only keys to record */ case HA_EXTRA_REMEMBER_POS: info->opt_flag |= REMEMBER_OLD_POS; bmove((uchar*) info->lastkey+share->base.max_key_length*2, (uchar*) info->lastkey,info->lastkey_length); info->save_update= info->update; info->save_lastinx= info->lastinx; info->save_lastpos= info->lastpos; info->save_lastkey_length=info->lastkey_length; if (function == HA_EXTRA_REMEMBER_POS) break; /* fall through */ case HA_EXTRA_KEYREAD_CHANGE_POS: info->opt_flag |= KEY_READ_USED; info->read_record=_mi_read_key_record; break; case HA_EXTRA_NO_KEYREAD: case HA_EXTRA_RESTORE_POS: if (info->opt_flag & REMEMBER_OLD_POS) { bmove((uchar*) info->lastkey, (uchar*) info->lastkey+share->base.max_key_length*2, info->save_lastkey_length); info->update= info->save_update | HA_STATE_WRITTEN; info->lastinx= info->save_lastinx; info->lastpos= info->save_lastpos; info->lastkey_length=info->save_lastkey_length; } info->read_record= share->read_record; info->opt_flag&= ~(KEY_READ_USED | REMEMBER_OLD_POS); break; case HA_EXTRA_NO_USER_CHANGE: /* Database is somehow locked agains changes */ info->lock_type= F_EXTRA_LCK; /* Simulate as locked */ break; case HA_EXTRA_WAIT_LOCK: info->lock_wait=0; break; case HA_EXTRA_NO_WAIT_LOCK: info->lock_wait=MY_DONT_WAIT; break; case HA_EXTRA_NO_KEYS: if (info->lock_type == F_UNLCK) { error=1; /* Not possibly if not lock */ break; } if (mi_is_any_key_active(share->state.key_map)) { MI_KEYDEF *key=share->keyinfo; uint i; for (i=0 ; i < share->base.keys ; i++,key++) { if (!(key->flag & HA_NOSAME) && info->s->base.auto_key != i+1) { mi_clear_key_active(share->state.key_map, i); info->update|= HA_STATE_CHANGED; } } if (!share->changed) { share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED; share->changed=1; /* Update on close */ if (!share->global_changed) { share->global_changed=1; share->state.open_count++; } } share->state.state= *info->state; error=mi_state_info_write(share->kfile,&share->state,1 | 2); } break; case HA_EXTRA_FORCE_REOPEN: mysql_mutex_lock(&THR_LOCK_myisam); share->last_version= 0L; /* Impossible version */ mysql_mutex_unlock(&THR_LOCK_myisam); break; case HA_EXTRA_PREPARE_FOR_DROP: mysql_mutex_lock(&THR_LOCK_myisam); share->last_version= 0L; /* Impossible version */ #ifdef __WIN__REMOVE_OBSOLETE_WORKAROUND /* Close the isam and data files as Win32 can't drop an open table */ mysql_mutex_lock(&share->intern_lock); if (flush_key_blocks(share->key_cache, share->kfile, (function == HA_EXTRA_FORCE_REOPEN ? FLUSH_RELEASE : FLUSH_IGNORE_CHANGED))) { error=my_errno; share->changed=1; mi_print_error(info->s, HA_ERR_CRASHED); mi_mark_crashed(info); /* Fatal error found */ } if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED)) { info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); error=end_io_cache(&info->rec_cache); } if (info->lock_type != F_UNLCK && ! info->was_locked) { info->was_locked=info->lock_type; if (mi_lock_database(info,F_UNLCK)) error=my_errno; info->lock_type = F_UNLCK; } if (share->kfile >= 0) _mi_decrement_open_count(info); if (share->kfile >= 0 && mysql_file_close(share->kfile, MYF(0))) error=my_errno; { LIST *list_element ; for (list_element=myisam_open_list ; list_element ; list_element=list_element->next) { MI_INFO *tmpinfo=(MI_INFO*) list_element->data; if (tmpinfo->s == info->s) { if (tmpinfo->dfile >= 0 && mysql_file_close(tmpinfo->dfile, MYF(0))) error = my_errno; tmpinfo->dfile= -1; } } } share->kfile= -1; /* Files aren't open anymore */ mysql_mutex_unlock(&share->intern_lock); #endif mysql_mutex_unlock(&THR_LOCK_myisam); break; case HA_EXTRA_FLUSH: if (!share->temporary) flush_key_blocks(share->key_cache, share->kfile, FLUSH_KEEP); #ifdef HAVE_PWRITE _mi_decrement_open_count(info); #endif if (share->not_flushed) { share->not_flushed=0; if (mysql_file_sync(share->kfile, MYF(0))) error= my_errno; if (mysql_file_sync(info->dfile, MYF(0))) error= my_errno; if (error) { share->changed=1; mi_print_error(info->s, HA_ERR_CRASHED); mi_mark_crashed(info); /* Fatal error found */ } } if (share->base.blobs) mi_alloc_rec_buff(info, -1, &info->rec_buff); break; case HA_EXTRA_NORMAL: /* Theese isn't in use */ info->quick_mode=0; break; case HA_EXTRA_QUICK: info->quick_mode=1; break; case HA_EXTRA_NO_ROWS: if (!share->state.header.uniques) info->opt_flag|= OPT_NO_ROWS; break; case HA_EXTRA_PRELOAD_BUFFER_SIZE: info->preload_buff_size= *((ulong *) extra_arg); break; case HA_EXTRA_CHANGE_KEY_TO_UNIQUE: case HA_EXTRA_CHANGE_KEY_TO_DUP: mi_extra_keyflag(info, function); break; case HA_EXTRA_MMAP: #ifdef HAVE_MMAP mysql_mutex_lock(&share->intern_lock); /* Memory map the data file if it is not already mapped. It is safe to memory map a file while other threads are using file I/O on it. Assigning a new address to a function pointer is an atomic operation. intern_lock prevents that two or more mappings are done at the same time. */ if (!share->file_map) { if (mi_dynmap_file(info, share->state.state.data_file_length)) { DBUG_PRINT("warning",("mmap failed: errno: %d",errno)); error= my_errno= errno; } } mysql_mutex_unlock(&share->intern_lock); #endif break; case HA_EXTRA_MARK_AS_LOG_TABLE: mysql_mutex_lock(&share->intern_lock); share->is_log_table= TRUE; mysql_mutex_unlock(&share->intern_lock); break; case HA_EXTRA_KEY_CACHE: case HA_EXTRA_NO_KEY_CACHE: default: break; } { char tmp[1]; tmp[0]=function; myisam_log_command(MI_LOG_EXTRA,info,(uchar*) tmp,1,error); } DBUG_RETURN(error); } /* mi_extra */
static filemap_s * util_map_file(const char *path, filemap_s *fmp, int writeable) { #if defined(_WIN32) HANDLE hMap; DWORD protection; protection = writeable ? PAGE_WRITECOPY : PAGE_READONLY; fmp->fm_fh = CreateFile(path, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); if (fmp->fm_fh == INVALID_HANDLE_VALUE) { perror(path); return NULL; } if ((fmp->fm_fsize = GetFileSize(fmp->fm_fh, NULL)) == INVALID_FILE_SIZE) { perror(path); return NULL; } if (!(hMap = CreateFileMapping(fmp->fm_fh, NULL, protection, 0, 0, NULL))) { perror(path); return NULL; } if (!(fmp->fm_fdata = (unsigned char *)MapViewOfFile(hMap, FILE_MAP_COPY, 0, 0, fmp->fm_fsize))) { perror(path); return NULL; } #else struct stat stbuf; int prot; prot = writeable ? PROT_READ|PROT_WRITE : PROT_READ; if ((fmp->fm_fd = open(path, O_RDONLY)) == -1) { perror(path); return NULL; } if (fstat(fmp->fm_fd, &stbuf)) { perror(path); return NULL; } fmp->fm_fsize = stbuf.st_size; // This could fail if the file is too big for available swap... fmp->fm_fdata = mmap64(0, fmp->fm_fsize, prot, MAP_PRIVATE, fmp->fm_fd, 0); if (fmp->fm_fdata == MAP_FAILED) { perror(path); return NULL; } // A potential optimization. if (madvise(fmp->fm_fdata, fmp->fm_fsize, MADV_SEQUENTIAL)) { perror(path); return NULL; } #endif fmp->fm_path = path; return fmp; }
int network_write_chunkqueue_writev(server *srv, connection *con, int fd, chunkqueue *cq, off_t max_bytes) { chunk *c; for(c = cq->first; (max_bytes > 0) && (NULL != c); c = c->next) { int chunk_finished = 0; switch(c->type) { case MEM_CHUNK: { char * offset; off_t toSend; ssize_t r; size_t num_chunks, i; struct iovec *chunks; chunk *tc; size_t num_bytes = 0; /* build writev list * * 1. limit: num_chunks < MAX_CHUNKS * 2. limit: num_bytes < max_bytes */ for (num_chunks = 0, tc = c; tc && tc->type == MEM_CHUNK && num_chunks < MAX_CHUNKS; num_chunks++, tc = tc->next); chunks = calloc(num_chunks, sizeof(*chunks)); for(tc = c, i = 0; i < num_chunks; tc = tc->next, i++) { if (buffer_string_is_empty(tc->mem)) { chunks[i].iov_base = tc->mem->ptr; chunks[i].iov_len = 0; } else { offset = tc->mem->ptr + tc->offset; toSend = buffer_string_length(tc->mem) - tc->offset; chunks[i].iov_base = offset; /* protect the return value of writev() */ if (toSend > max_bytes || (off_t) num_bytes + toSend > max_bytes) { chunks[i].iov_len = max_bytes - num_bytes; num_chunks = i + 1; break; } else { chunks[i].iov_len = toSend; } num_bytes += toSend; } } if ((r = writev(fd, chunks, num_chunks)) < 0) { switch (errno) { case EAGAIN: case EINTR: r = 0; break; case EPIPE: case ECONNRESET: free(chunks); return -2; default: log_error_write(srv, __FILE__, __LINE__, "ssd", "writev failed:", strerror(errno), fd); free(chunks); return -1; } } cq->bytes_out += r; max_bytes -= r; /* check which chunks have been written */ for(i = 0, tc = c; i < num_chunks; i++, tc = tc->next) { if (r >= (ssize_t)chunks[i].iov_len) { /* written */ r -= chunks[i].iov_len; tc->offset += chunks[i].iov_len; if (chunk_finished) { /* skip the chunks from further touches */ c = c->next; } else { /* chunks_written + c = c->next is done in the for()*/ chunk_finished = 1; } } else { /* partially written */ tc->offset += r; chunk_finished = 0; break; } } free(chunks); break; } case FILE_CHUNK: { ssize_t r; off_t abs_offset; off_t toSend; stat_cache_entry *sce = NULL; #define KByte * 1024 #define MByte * 1024 KByte #define GByte * 1024 MByte const off_t we_want_to_mmap = 512 KByte; char *start = NULL; if (HANDLER_ERROR == stat_cache_get_entry(srv, con, c->file.name, &sce)) { log_error_write(srv, __FILE__, __LINE__, "sb", strerror(errno), c->file.name); return -1; } abs_offset = c->file.start + c->offset; if (abs_offset > sce->st.st_size) { log_error_write(srv, __FILE__, __LINE__, "sb", "file was shrinked:", c->file.name); return -1; } /* mmap the buffer * - first mmap * - new mmap as the we are at the end of the last one */ if (c->file.mmap.start == MAP_FAILED || abs_offset == (off_t)(c->file.mmap.offset + c->file.mmap.length)) { /* Optimizations for the future: * * adaptive mem-mapping * the problem: * we mmap() the whole file. If someone has alot large files and 32bit * machine the virtual address area will be unrun and we will have a failing * mmap() call. * solution: * only mmap 16M in one chunk and move the window as soon as we have finished * the first 8M * * read-ahead buffering * the problem: * sending out several large files in parallel trashes the read-ahead of the * kernel leading to long wait-for-seek times. * solutions: (increasing complexity) * 1. use madvise * 2. use a internal read-ahead buffer in the chunk-structure * 3. use non-blocking IO for file-transfers * */ /* all mmap()ed areas are 512kb expect the last which might be smaller */ off_t we_want_to_send; size_t to_mmap; /* this is a remap, move the mmap-offset */ if (c->file.mmap.start != MAP_FAILED) { munmap(c->file.mmap.start, c->file.mmap.length); c->file.mmap.offset += we_want_to_mmap; } else { /* in case the range-offset is after the first mmap()ed area we skip the area */ c->file.mmap.offset = 0; while (c->file.mmap.offset + we_want_to_mmap < c->file.start) { c->file.mmap.offset += we_want_to_mmap; } } /* length is rel, c->offset too, assume there is no limit at the mmap-boundaries */ we_want_to_send = c->file.length - c->offset; to_mmap = (c->file.start + c->file.length) - c->file.mmap.offset; /* we have more to send than we can mmap() at once */ if (abs_offset + we_want_to_send > c->file.mmap.offset + we_want_to_mmap) { we_want_to_send = (c->file.mmap.offset + we_want_to_mmap) - abs_offset; to_mmap = we_want_to_mmap; } if (-1 == c->file.fd) { /* open the file if not already open */ if (-1 == (c->file.fd = open(c->file.name->ptr, O_RDONLY))) { log_error_write(srv, __FILE__, __LINE__, "sbs", "open failed for:", c->file.name, strerror(errno)); return -1; } fd_close_on_exec(c->file.fd); } if (MAP_FAILED == (c->file.mmap.start = mmap(NULL, to_mmap, PROT_READ, MAP_SHARED, c->file.fd, c->file.mmap.offset))) { log_error_write(srv, __FILE__, __LINE__, "ssbd", "mmap failed:", strerror(errno), c->file.name, c->file.fd); return -1; } c->file.mmap.length = to_mmap; #ifdef LOCAL_BUFFERING buffer_copy_string_len(c->mem, c->file.mmap.start, c->file.mmap.length); #else #ifdef HAVE_MADVISE /* don't advise files < 64Kb */ if (c->file.mmap.length > (64 KByte)) { /* darwin 7 is returning EINVAL all the time and I don't know how to * detect this at runtime.i * * ignore the return value for now */ madvise(c->file.mmap.start, c->file.mmap.length, MADV_WILLNEED); } #endif #endif /* chunk_reset() or chunk_free() will cleanup for us */ } /* to_send = abs_mmap_end - abs_offset */ toSend = (c->file.mmap.offset + c->file.mmap.length) - (abs_offset); if (toSend < 0) { log_error_write(srv, __FILE__, __LINE__, "soooo", "toSend is negative:", toSend, c->file.mmap.length, abs_offset, c->file.mmap.offset); force_assert(toSend < 0); } if (toSend > max_bytes) toSend = max_bytes; #ifdef LOCAL_BUFFERING start = c->mem->ptr; #else start = c->file.mmap.start; #endif if ((r = write(fd, start + (abs_offset - c->file.mmap.offset), toSend)) < 0) { switch (errno) { case EAGAIN: case EINTR: r = 0; break; case EPIPE: case ECONNRESET: return -2; default: log_error_write(srv, __FILE__, __LINE__, "ssd", "write failed:", strerror(errno), fd); return -1; } } c->offset += r; cq->bytes_out += r; max_bytes -= r; if (c->offset == c->file.length) { chunk_finished = 1; /* we don't need the mmaping anymore */ if (c->file.mmap.start != MAP_FAILED) { munmap(c->file.mmap.start, c->file.mmap.length); c->file.mmap.start = MAP_FAILED; } } break; } default: log_error_write(srv, __FILE__, __LINE__, "ds", c, "type not known"); return -1; } if (!chunk_finished) { /* not finished yet */ break; } } return 0; }
int main(int argc, char *argv[]) { void *addr; int i; int ret; int fd = 0; int semid; int semaphore; int inject = 0; int madvise_code = MADV_HWPOISON; int early_kill = 0; int avoid_touch = 0; int anonflag = 0; int shmflag = 0; int shmkey = 0; int forkflag = 0; int privateflag = 0; int cowflag = 0; char c; pid_t pid = 0; void *expected_addr = NULL; struct sembuf sembuffer; PS = getpagesize(); HPS = HPAGE_SIZE; file_size = 1; corrupt_page = -1; if (argc == 1) { usage(); exit(EXIT_FAILURE); } while ((c = getopt_long(argc, argv, "m:o:xOeSAaFpcf:h", opts, NULL)) != -1) { switch (c) { case 'm': file_size = strtol(optarg, NULL, 10); break; case 'o': corrupt_page = strtol(optarg, NULL, 10); break; case 'x': inject = 1; break; case 'O': madvise_code = MADV_SOFT_OFFLINE; break; case 'e': early_kill = 1; break; case 'S': shmflag = 1; break; case 'A': anonflag = 1; break; case 'a': avoid_touch = 1; break; case 'F': forkflag = 1; break; case 'p': privateflag = 1; break; case 'c': cowflag = 1; break; case 'f': strcat(filename, optarg); shmkey = strtol(optarg, NULL, 10); break; case 'h': usage(); exit(EXIT_SUCCESS); default: usage(); exit(EXIT_FAILURE); } } if (inject && corrupt_page * PS > file_size * HPAGE_SIZE) errmsg("Target page is out of range.\n"); if (avoid_touch && corrupt_page == -1) errmsg("Avoid which page?\n"); /* Construct file name */ if (access(argv[argc - 1], F_OK) == -1) { usage(); exit(EXIT_FAILURE); } else { strcpy(filepath, argv[argc - 1]); strcat(filepath, filename); } if (shmflag) { addr = alloc_shm_hugepage(&shmkey, file_size * HPAGE_SIZE); if (!addr) errmsg("Failed in alloc_shm_hugepage()"); } else if (anonflag) { addr = alloc_anonymous_hugepage(file_size * HPAGE_SIZE, privateflag); if (!addr) errmsg("Failed in alloc_anonymous_hugepage()"); } else { addr = alloc_filebacked_hugepage(filepath, file_size * HPAGE_SIZE, privateflag, &fd); if (!addr) errmsg("Failed in alloc_filebacked_hugepage()"); } if (corrupt_page != -1 && avoid_touch) expected_addr = (void *)(addr + corrupt_page / 512 * HPAGE_SIZE); if (forkflag) { semid = semget(IPC_PRIVATE, 1, 0666|IPC_CREAT); if (semid == -1) { perror("semget"); goto cleanout; } semaphore = semctl(semid, 0, SETVAL, 1); if (semaphore == -1) { perror("semctl"); goto cleanout; } if (get_semaphore(semid, &sembuffer)) { perror("get_semaphore"); goto cleanout; } } write_hugepage(addr, file_size, 0); read_hugepage(addr, file_size, 0); if (early_kill) prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, NULL, NULL); /* * Intended order: * 1. Child COWs * 2. Parent madvise()s * 3. Child exit()s */ if (forkflag) { pid = fork(); if (!pid) { /* Semaphore is already held */ if (cowflag) { write_hugepage(addr, file_size, 0); read_hugepage(addr, file_size, 0); } if (put_semaphore(semid, &sembuffer)) err("put_semaphore"); usleep(1000); /* Wait for madvise() to be done */ if (get_semaphore(semid, &sembuffer)) err("put_semaphore"); if (put_semaphore(semid, &sembuffer)) err("put_semaphore"); return 0; } } /* Wait for COW */ if (forkflag && get_semaphore(semid, &sembuffer)) { perror("get_semaphore"); goto cleanout; } if (inject && corrupt_page != -1) { ret = madvise(addr + corrupt_page * PS, PS, madvise_code); if (ret) { printf("madivise return %d :", ret); perror("madvise"); goto cleanout; } } if (forkflag && put_semaphore(semid, &sembuffer)) { perror("put_semaphore"); goto cleanout; } if (madvise_code != MADV_SOFT_OFFLINE); write_hugepage(addr, file_size, expected_addr); read_hugepage(addr, file_size, expected_addr); if (forkflag) { if (wait(&i) == -1) err("wait"); if (semctl(semid, 0, IPC_RMID) == -1) err("semctl(IPC_RMID)"); } cleanout: if (shmflag) { if (free_shm_hugepage(shmkey, addr) == -1) exit(2); } else if (anonflag) { if (free_anonymous_hugepage(addr, file_size * HPAGE_SIZE) == -1) exit(2); } else { if (free_filebacked_hugepage(addr, file_size * HPAGE_SIZE, fd, filepath) == -1) exit(2); } return 0; }
void* alloc_huge(size_t size) { void* mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_NORESERVE | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); madvise(mem, size, MADV_HUGEPAGE); return mem; }
extern cl_fmap_t *cl_fmap_open_handle(void *handle, size_t offset, size_t len, clcb_pread pread_cb, int use_aging) { unsigned int pages, mapsz, hdrsz; cl_fmap_t *m; int pgsz = cli_getpagesize(); if((off_t)offset < 0 || offset != fmap_align_to(offset, pgsz)) { cli_warnmsg("fmap: attempted mapping with unaligned offset\n"); return NULL; } if(!len) { cli_dbgmsg("fmap: attempted void mapping\n"); return NULL; } if (offset >= len) { cli_warnmsg("fmap: attempted oof mapping\n"); return NULL; } pages = fmap_align_items(len, pgsz); hdrsz = fmap_align_to(sizeof(fmap_t) + (pages-1) * sizeof(uint32_t), pgsz); /* fmap_t includes 1 bitmap slot, hence (pages-1) */ mapsz = pages * pgsz + hdrsz; #ifndef ANONYMOUS_MAP use_aging = 0; #endif #ifdef ANONYMOUS_MAP if (use_aging) { fmap_lock; if ((m = (fmap_t *)mmap(NULL, mapsz, PROT_READ | PROT_WRITE, MAP_PRIVATE|/*FIXME: MAP_POPULATE is ~8% faster but more memory intensive */ANONYMOUS_MAP, -1, 0)) == MAP_FAILED) { m = NULL; } else { #if HAVE_MADVISE madvise((void *)m, mapsz, MADV_RANDOM|MADV_DONTFORK); #endif /* madvise */ /* fault the header while we still have the lock - we DO context switch here a lot here :@ */ memset(fmap_bitmap, 0, sizeof(uint32_t) * pages); } fmap_unlock; } #endif /* ANONYMOUS_MAP */ if (!use_aging) { m = (fmap_t *)cli_malloc(mapsz); if (!(m)) { cli_warnmsg("fmap: map allocation failed\n"); return NULL; } memset(m, 0, hdrsz); } if(!m) { cli_warnmsg("fmap: map allocation failed\n"); return NULL; } m->handle = handle; m->pread_cb = pread_cb; m->aging = use_aging; m->offset = offset; m->nested_offset = 0; m->len = len;/* m->nested_offset + m->len = m->real_len */ m->real_len = len; m->pages = pages; m->hdrsz = hdrsz; m->pgsz = pgsz; m->paged = 0; m->dont_cache_flag = 0; m->unmap = use_aging ? unmap_mmap : unmap_malloc; m->need = handle_need; m->need_offstr = handle_need_offstr; m->gets = handle_gets; m->unneed_off = handle_unneed_off; return m; }
int main(int argc, char *argv[]) { int lc, fd, pagesize; int i; unsigned long len; char *file, *low, *high; struct stat stat; char *ptr_memory_allocated = NULL; char *tmp_memory_allocated = NULL; char *msg = NULL; char filename[64]; char *progname = NULL; char *str_for_file = "abcdefghijklmnopqrstuvwxyz12345\n"; msg = parse_opts(argc, argv, NULL, NULL); if (msg) tst_brkm(TBROK, NULL, "OPTION PARSING ERROR - %s", msg); setup(); progname = *argv; sprintf(filename, "%s-out.%d", progname, getpid()); for (lc = 0; TEST_LOOPING(lc); lc++) { Tst_count = 0; fd = open(filename, O_RDWR|O_CREAT, 0664); if (fd < 0) tst_brkm(TBROK, cleanup, "open failed"); #ifdef MM_DEBUG tst_resm(TINFO, "filename = %s opened successfully", filename); #endif pagesize = getpagesize(); /* Writing 16 pages of random data into this file */ for (i = 0; i < (pagesize / 2); i++) if (write(fd, str_for_file, strlen(str_for_file)) == -1) tst_brkm(TBROK|TERRNO, cleanup, "write failed"); if (fstat(fd, &stat) == -1) tst_brkm(TBROK, cleanup, "fstat failed"); file = mmap(NULL, stat.st_size, PROT_READ, MAP_SHARED, fd, 0); if (file == MAP_FAILED) tst_brkm(TBROK|TERRNO, cleanup, "mmap failed"); #ifdef MM_DEBUG tst_resm(TINFO, "The Page size is %d", pagesize); #endif /* Test Case 1 */ TEST(madvise(file + 100, stat.st_size, MADV_NORMAL)); check_and_print(EINVAL); /* Test Case 2 */ TEST(madvise(file, stat.st_size, 1212)); check_and_print(EINVAL); #if !defined(UCLINUX) /* Test Case 3 */ if (mlock((void *)file, stat.st_size) < 0) tst_brkm(TBROK, cleanup, "mlock failed"); TEST(madvise(file, stat.st_size, MADV_DONTNEED)); check_and_print(EINVAL); if (munmap(file, stat.st_size) == -1) tst_brkm(TBROK|TERRNO, cleanup, "munmap failed"); #endif /* if !defined(UCLINUX) */ /* Test Case 4 */ /* We cannot be sure, which region is mapped, which is * not, at runtime. * So, we will create two maps(of the same file), * unmap the map at higher address. * Now issue an madvise() on a region covering the * region which we unmapped. */ low = mmap(NULL, stat.st_size / 2, PROT_READ, MAP_SHARED, fd, 0); if (low == MAP_FAILED) tst_brkm(TBROK, cleanup, "mmap [low] failed"); high = mmap(NULL, stat.st_size / 2, PROT_READ, MAP_SHARED, fd, stat.st_size / 2); if (high == MAP_FAILED) tst_brkm(TBROK, cleanup, "mmap [high] failed"); /* Swap if necessary to make low < high */ if (low > high) { char *tmp; tmp = high; high = low; low = tmp; } len = (high - low) + pagesize; if (munmap(high, stat.st_size / 2) < 0) tst_brkm(TBROK|TERRNO, cleanup, "munmap [high] failed"); TEST(madvise(low, len, MADV_NORMAL)); check_and_print(ENOMEM); /* Test Case 5 */ /* Unmap the file map from low */ if (munmap(low, stat.st_size / 2) < 0) tst_brkm(TBROK|TERRNO, cleanup, "munmap [low] failed"); /* Create one memory segment using malloc */ ptr_memory_allocated = (char *)malloc(5 * pagesize); /* * Take temporary pointer for later use, freeing up the * original one. */ tmp_memory_allocated = ptr_memory_allocated; tmp_memory_allocated = (char *)(((unsigned long)tmp_memory_allocated + pagesize - 1) & ~(pagesize - 1)); TEST(madvise (tmp_memory_allocated, 5 * pagesize, MADV_WILLNEED)); check_and_print(EBADF); free((void *)ptr_memory_allocated); close(fd); } cleanup(); tst_exit(); }
/* * Periodically pat the watchdog, preventing it from firing. */ int main(int argc, char *argv[]) { struct rtprio rtp; struct pidfh *pfh; pid_t otherpid; if (getuid() != 0) errx(EX_SOFTWARE, "not super user"); parseargs(argc, argv); if (do_syslog) openlog("watchdogd", LOG_CONS|LOG_NDELAY|LOG_PERROR, LOG_DAEMON); rtp.type = RTP_PRIO_REALTIME; rtp.prio = 0; if (rtprio(RTP_SET, 0, &rtp) == -1) err(EX_OSERR, "rtprio"); if (!is_dry_run && watchdog_init() == -1) errx(EX_SOFTWARE, "unable to initialize watchdog"); if (is_daemon) { if (watchdog_onoff(1) == -1) err(EX_OSERR, "patting the dog"); pfh = pidfile_open(pidfile, 0600, &otherpid); if (pfh == NULL) { if (errno == EEXIST) { watchdog_onoff(0); errx(EX_SOFTWARE, "%s already running, pid: %d", getprogname(), otherpid); } warn("Cannot open or create pidfile"); } if (debugging == 0 && daemon(0, 0) == -1) { watchdog_onoff(0); pidfile_remove(pfh); err(EX_OSERR, "daemon"); } signal(SIGHUP, SIG_IGN); signal(SIGINT, sighandler); signal(SIGTERM, sighandler); pidfile_write(pfh); if (madvise(0, 0, MADV_PROTECT) != 0) warn("madvise failed"); if (mlockall(MCL_CURRENT | MCL_FUTURE) != 0) warn("mlockall failed"); watchdog_loop(); /* exiting */ pidfile_remove(pfh); return (EX_OK); } else { if (passive) timeout |= WD_PASSIVE; else timeout |= WD_ACTIVE; if (watchdog_patpat(timeout) < 0) err(EX_OSERR, "patting the dog"); return (EX_OK); } }
dict_t parse_database(const char *filename) { RECDB recdb; int res; dict_t db; struct stat statinfo; recdb.source = filename; if (!(recdb.f = fopen(filename, "r"))) { log_module(MAIN_LOG, LOG_ERROR, "Unable to open database file '%s' for reading: %s", filename, strerror(errno)); return NULL; } if (fstat(fileno(recdb.f), &statinfo)) { log_module(MAIN_LOG, LOG_ERROR, "Unable to fstat database file '%s': %s", filename, strerror(errno)); fclose(recdb.f); return NULL; } recdb.length = (size_t)statinfo.st_size; if (recdb.length == 0) { fclose(recdb.f); return alloc_database(); } #ifdef HAVE_MMAP /* Try mmap */ if (!mmap_error && (recdb.s = mmap(NULL, recdb.length, PROT_READ|PROT_WRITE, MAP_PRIVATE, fileno(recdb.f), 0)) != MAP_FAILED) { recdb.type = RECDB_MMAP; madvise(recdb.s, recdb.length, MADV_SEQUENTIAL); } else { /* Fall back to stdio */ if (!mmap_error) { log_module(MAIN_LOG, LOG_WARNING, "Unable to mmap database file '%s' (falling back to stdio): %s", filename, strerror(errno)); mmap_error = 1; } #else if (1) { #endif recdb.s = NULL; recdb.type = RECDB_FILE; } recdb.ctx.line = recdb.ctx.col = 1; recdb.pos = 0; if ((res = setjmp(recdb.env)) == 0) { db = parse_database_int(&recdb); } else { explain_failure(&recdb, res); _exit(1); } switch (recdb.type) { case RECDB_MMAP: #ifdef HAVE_MMAP munmap(recdb.s, recdb.length); #endif /* fall through */ case RECDB_FILE: fclose(recdb.f); break; /* Appease gcc */ default: break; } return db; }
int main(int argc, char *argv[]) { int lc, fd; char *file = NULL; struct stat stat; void *addr1; long shm_size = 0; char *msg = NULL; char filename[64]; char *progname = NULL; char *str_for_file = "abcdefghijklmnopqrstuvwxyz12345\n"; /* 32-byte string */ if ((msg = parse_opts(argc, argv, NULL, NULL)) != NULL) tst_brkm(TBROK, NULL, "OPTION PARSING ERROR - %s", msg); setup(); progname = *argv; sprintf(filename, "%s-out.%d", progname, getpid()); for (lc = 0; TEST_LOOPING(lc); lc++) { Tst_count = 0; if ((fd = open(filename, O_RDWR | O_CREAT, 0664)) < 0) tst_brkm(TBROK, cleanup, "open failed"); #ifdef MM_DEBUG tst_resm(TINFO, "filename = %s opened successfully", filename); #endif /* Writing 40 KB of random data into this file [32 * 1280 = 40960] */ for (i = 0; i < 1280; i++) if (write(fd, str_for_file, strlen(str_for_file)) == -1) tst_brkm(TBROK|TERRNO, cleanup, "write failed"); if (fstat(fd, &stat) == -1) tst_brkm(TBROK, cleanup, "fstat failed"); if ((file = mmap(NULL, stat.st_size, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) tst_brkm(TBROK|TERRNO, cleanup, "mmap failed"); /* Allocate shared memory segment */ shm_size = get_shmmax(); #define min(a, b) ((a) < (b) ? (a) : (b)) if ((shmid1 = shmget(IPC_PRIVATE, min(1024*1024*1024, shm_size), IPC_CREAT|IPC_EXCL|0701)) == -1) tst_brkm(TBROK, cleanup, "shmget failed"); /* Attach shared memory segment to 0x22000000 address */ if ((addr1 = shmat(shmid1, (void *)0x22000000, 0)) == (void *) -1) tst_brkm(TBROK, cleanup, "shmat error"); /*(1) Test case for MADV_REMOVE */ TEST(madvise((void *)0x22000000, 4096, MADV_REMOVE)); check_and_print("MADV_REMOVE"); /*(2) Test case for MADV_DONTFORK */ TEST(madvise(file, (stat.st_size / 2), MADV_DONTFORK)); check_and_print("MADV_DONTFORK"); /*(3) Test case for MADV_DOFORK */ TEST(madvise(file, (stat.st_size / 2), MADV_DOFORK)); check_and_print("MADV_DOFORK"); /* Finally Unmapping the whole file */ if (munmap(file, stat.st_size) < 0) tst_brkm(TBROK|TERRNO, cleanup, "munmap failed"); close(fd); } cleanup(); tst_exit(); }
void hh_shared_init( value global_size_val, value heap_size_val ) { CAMLparam2(global_size_val, heap_size_val); global_size_b = Long_val(global_size_val); heap_size = Long_val(heap_size_val); /* MAP_NORESERVE is because we want a lot more virtual memory than what * we are actually going to use. */ int flags = MAP_SHARED | MAP_ANON | MAP_NORESERVE | MAP_FIXED; int prot = PROT_READ | PROT_WRITE; int page_size = getpagesize(); /* The total size of the shared memory. Most of it is going to remain * virtual. */ size_t shared_mem_size = global_size_b + 2 * DEP_SIZE_B + HASHTBL_SIZE_B + heap_size; char* shared_mem = (char*)mmap((void*)SHARED_MEM_INIT, page_size + shared_mem_size, prot, flags, 0, 0); if(shared_mem == MAP_FAILED) { printf("Error initializing: %s\n", strerror(errno)); exit(2); } #ifdef MADV_DONTDUMP // We are unlikely to get much useful information out of the shared heap in // a core file. Moreover, it can be HUGE, and the extensive work done dumping // it once for each CPU can mean that the user will reboot their machine // before the much more useful stack gets dumped! madvise(shared_mem, page_size + shared_mem_size, MADV_DONTDUMP); #endif // Keeping the pids around to make asserts. master_pid = getpid(); my_pid = master_pid; char* bottom = shared_mem; init_shared_globals(shared_mem); // Checking that we did the maths correctly. assert(*heap + heap_size == bottom + shared_mem_size + page_size); // Uninstall ocaml's segfault handler. It's supposed to throw an exception on // stack overflow, but we don't actually handle that exception, so what // happens in practice is we terminate at toplevel with an unhandled exception // and a useless ocaml backtrace. A core dump is actually more useful. Sigh. struct sigaction sigact; sigact.sa_handler = SIG_DFL; sigemptyset(&sigact.sa_mask); sigact.sa_flags = 0; sigaction(SIGSEGV, &sigact, NULL); set_priorities(); CAMLreturn0; }
value hh_shared_init( value global_size_val, value heap_size_val ) { CAMLparam2(global_size_val, heap_size_val); global_size_b = Long_val(global_size_val); heap_size = Long_val(heap_size_val); char* shared_mem; size_t page_size = getpagesize(); /* The total size of the shared memory. Most of it is going to remain * virtual. */ size_t shared_mem_size = global_size_b + 2 * DEP_SIZE_B + HASHTBL_SIZE_B + heap_size + page_size; #ifdef _WIN32 /* We create an anonymous memory file, whose `handle` might be inherited by slave processes. This memory file is tagged "reserved" but not "committed". This means that the memory space will be reserved in the virtual memory table but the pages will not be bound to any physical memory yet. Further calls to 'VirtualAlloc' will "commit" pages, meaning they will be bound to physical memory. This is behavior that should reflect the 'MAP_NORESERVE' flag of 'mmap' on Unix. But, on Unix, the "commit" is implicit. Committing the whole shared heap at once would require the same amount of free space in memory (or in swap file). */ HANDLE handle = CreateFileMapping( INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE | SEC_RESERVE, shared_mem_size >> 32, shared_mem_size & ((1ll << 32) - 1), NULL); if (handle == NULL) { win32_maperr(GetLastError()); uerror("CreateFileMapping", Nothing); } if (!SetHandleInformation(handle, HANDLE_FLAG_INHERIT, HANDLE_FLAG_INHERIT)) { win32_maperr(GetLastError()); uerror("SetHandleInformation", Nothing); } shared_mem = MapViewOfFileEx( handle, FILE_MAP_ALL_ACCESS, 0, 0, 0, (char *)SHARED_MEM_INIT); if (shared_mem != (char *)SHARED_MEM_INIT) { shared_mem = NULL; win32_maperr(GetLastError()); uerror("MapViewOfFileEx", Nothing); } #else /* _WIN32 */ /* MAP_NORESERVE is because we want a lot more virtual memory than what * we are actually going to use. */ int flags = MAP_SHARED | MAP_ANON | MAP_NORESERVE | MAP_FIXED; int prot = PROT_READ | PROT_WRITE; shared_mem = (char*)mmap((void*)SHARED_MEM_INIT, shared_mem_size, prot, flags, 0, 0); if(shared_mem == MAP_FAILED) { printf("Error initializing: %s\n", strerror(errno)); exit(2); } #ifdef MADV_DONTDUMP // We are unlikely to get much useful information out of the shared heap in // a core file. Moreover, it can be HUGE, and the extensive work done dumping // it once for each CPU can mean that the user will reboot their machine // before the much more useful stack gets dumped! madvise(shared_mem, shared_mem_size, MADV_DONTDUMP); #endif // Keeping the pids around to make asserts. master_pid = getpid(); my_pid = master_pid; #endif /* _WIN32 */ char* bottom = shared_mem; init_shared_globals(shared_mem); // Checking that we did the maths correctly. assert(*heap + heap_size == bottom + shared_mem_size); #ifndef _WIN32 // Uninstall ocaml's segfault handler. It's supposed to throw an exception on // stack overflow, but we don't actually handle that exception, so what // happens in practice is we terminate at toplevel with an unhandled exception // and a useless ocaml backtrace. A core dump is actually more useful. Sigh. struct sigaction sigact; sigact.sa_handler = SIG_DFL; sigemptyset(&sigact.sa_mask); sigact.sa_flags = 0; sigaction(SIGSEGV, &sigact, NULL); #endif set_priorities(); CAMLreturn(Val_unit); }
void cpu_invltlb(void) { madvise((void *)KvaStart, KvaEnd - KvaStart, MADV_INVAL); }
void cpu_invlpg(void *addr) { madvise(addr, PAGE_SIZE, MADV_INVAL); }
static void child(void) { size_t i; char *ptr; unsigned int usage, old_limit, old_memsw_limit; int status, pid, retries = 0; SAFE_MKDIR(cgroup_path, 0777); SAFE_FILE_PRINTF(tasks_path, "%i", getpid()); ptr = SAFE_MMAP(NULL, PAGES * page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); for (i = 0; i < PAGES * page_size; i++) ptr[i] = 'a'; if (madvise(ptr, PAGES * page_size, MADV_FREE)) { if (errno == EINVAL) tst_brk(TCONF | TERRNO, "MADV_FREE is not supported"); tst_brk(TBROK | TERRNO, "MADV_FREE failed"); } if (ptr[page_size] != 'a') tst_res(TFAIL, "MADV_FREE pages were freed immediatelly"); else tst_res(TPASS, "MADV_FREE pages were not freed immediatelly"); ptr[TOUCHED_PAGE1 * page_size] = 'b'; ptr[TOUCHED_PAGE2 * page_size] = 'b'; usage = 8 * 1024 * 1024; tst_res(TINFO, "Setting memory limits to %u %u", usage, 2 * usage); SAFE_FILE_SCANF(limit_in_bytes_path, "%u", &old_limit); if (swap_accounting_enabled) SAFE_FILE_SCANF(memsw_limit_in_bytes_path, "%u", &old_memsw_limit); SAFE_FILE_PRINTF(limit_in_bytes_path, "%u", usage); if (swap_accounting_enabled) SAFE_FILE_PRINTF(memsw_limit_in_bytes_path, "%u", 2 * usage); do { sleep_between_faults++; pid = SAFE_FORK(); if (!pid) memory_pressure_child(); tst_res(TINFO, "Memory hungry child %i started, try %i", pid, retries); SAFE_WAIT(&status); } while (retries++ < 10 && count_freed(ptr) == 0); char map[PAGES+1]; unsigned int freed = 0; unsigned int corrupted = 0; for (i = 0; i < PAGES; i++) { char exp_val; if (ptr[i * page_size]) { exp_val = 'a'; map[i] = 'p'; } else { exp_val = 0; map[i] = '_'; freed++; } if (i != TOUCHED_PAGE1 && i != TOUCHED_PAGE2) { if (check_page(ptr + i * page_size, exp_val)) { map[i] = '?'; corrupted++; } } else { if (check_page_baaa(ptr + i * page_size)) { map[i] = '?'; corrupted++; } } } map[PAGES] = '\0'; tst_res(TINFO, "Memory map: %s", map); if (freed) tst_res(TPASS, "Pages MADV_FREE were freed on low memory"); else tst_res(TFAIL, "No MADV_FREE page was freed on low memory"); if (corrupted) tst_res(TFAIL, "Found corrupted page"); else tst_res(TPASS, "All pages have expected content"); if (swap_accounting_enabled) SAFE_FILE_PRINTF(memsw_limit_in_bytes_path, "%u", old_memsw_limit); SAFE_FILE_PRINTF(limit_in_bytes_path, "%u", old_limit); SAFE_MUNMAP(ptr, PAGES); exit(0); }
static int display_folder(const char *fn, int skip, int last_only) { const char new_mail_str[] = "From "; const size_t new_mail_str_len = sizeof(new_mail_str)-1; char *from, *subject; char *p, *q, *b, *block, *body, *head; struct stat st; ssize_t left; size_t llen, hsize, fsize; off_t off; int fd; fd = open(fn, O_RDONLY); if (fd < 0) { perror(fn); return 1; } if (fstat(fd, &st) < 0) { perror(fn); return 1; } subject = NULL; from = NULL; fsize = st.st_size; /* New mails start with "\n\r?From ", look for those blocks * and make sure we won't miss mails that are split in * two blocks (look at fsize-7 bytes on all but the last block) */ block = mmap(NULL, fsize, PROT_READ, MAP_PRIVATE, fd, 0); if (block == MAP_FAILED) block = NULL; head = block; /* special case a bit here to seeking backwards for the last header */ if (block && last_only) { for (b = block+fsize-0xffff, left=0xffff; b > block; b-= 0xffff, left += 0xffff) { p = memmem(b, left, new_mail_str, new_mail_str_len); // check for \n\r if (p && (*(p-1) == '\n' || (*(p-2) == '\n' && *(p-1) == '\r'))) { head = p; break; } } } /* Look first for "From ", and then make sure that it is * preceded by possibly a \r and a \n. */ for (off=0, b=head; b; ) { llen = b-block; left = fsize-llen; if (left <= 0) break; if (llen-off > MAP_RECYCLE) { /* drop some pages from the cache */ madvise(block, llen, MADV_DONTNEED); off = llen; } p = memmem(b, left, new_mail_str, new_mail_str_len); // check for \n\r if (p && p != block) if (! (*(p-1) == '\n' || (p > block+1 && *(p-2) == '\n' && *(p-1) == '\r'))) { /* From was not preceded by \n or \n\r */ b = p + new_mail_str_len; continue; } if (p == NULL) { /* No new mail was found in this block. Increment "pos" and start again. We need * to make sure that we don't increment "pos" too much, if there was a header * split on block boundary we want to catch it */ break; } /* So we found a header. Yay. */ head = p; left = fsize-(head-block); /* Look for an end-of-header. */ for (body = NULL; body == NULL; p = q+1) { if ((q = memchr(p, '\n', left)) == NULL) break; left = fsize-(q-block); if (left >= 0 && *(q+1) == '\n') body = q+2; else if (left >= 1 && *(q+1) == '\r' && *(q+2) == '\n') body = q+3; else left = left-1; } if (body == NULL) break; /* invalid mail / end of mbox */ b = body; /* for next block */ hsize = body-head; /* size of header */ /* read the header, look for subject: and from: */ if (from) { free(from); from = 0; } if (subject) { free(subject); subject = 0; } for (p=head; hsize>0; ) { q = memchr(p, '\n', hsize); if (q == NULL) /* end of header, apparently. */ break; llen = q-p-1; if (llen > 5 && strncasecmp(p, "from:", 5) == 0) from = read_header(p, llen, hsize); else if (llen > 8 && strncasecmp(p, "subject:", 8) == 0) subject = read_header(p, llen, hsize); if (from && subject) { skip --; if (!last_only && skip <= 0) display_mail(from, subject); break; } hsize -= llen+2; p = q+1; if (*p == '\r') { hsize--; p++; } } /* read next mail */ } if (block) munmap(block, fsize); close(fd); if (last_only && from && subject) display_mail(from, subject); /* show the last mail */ return 0; }
static void * mozload(const char * path, void *zip, struct cdir_entry *cdir_start, uint16_t cdir_entries) { #ifdef DEBUG struct timeval t0, t1; gettimeofday(&t0, 0); #endif struct cdir_entry *entry = find_cdir_entry(cdir_start, cdir_entries, path); struct local_file_header *file = (struct local_file_header *)((char *)zip + letoh32(entry->offset)); void * data = ((char *)&file->data) + letoh16(file->filename_size) + letoh16(file->extra_field_size); void * handle; if (extractLibs) { char fullpath[PATH_MAX]; snprintf(fullpath, PATH_MAX, "%s/%s", getenv("CACHE_PATH"), path); __android_log_print(ANDROID_LOG_ERROR, "GeckoLibLoad", "resolved %s to %s", path, fullpath); extractFile(fullpath, entry, data); handle = __wrap_dlopen(fullpath, RTLD_LAZY); if (!handle) __android_log_print(ANDROID_LOG_ERROR, "GeckoLibLoad", "Couldn't load %s because %s", fullpath, __wrap_dlerror()); #ifdef DEBUG gettimeofday(&t1, 0); __android_log_print(ANDROID_LOG_ERROR, "GeckoLibLoad", "%s: spent %d", path, (((long long)t1.tv_sec * 1000000LL) + (long long)t1.tv_usec) - (((long long)t0.tv_sec * 1000000LL) + (long long)t0.tv_usec)); #endif return handle; } size_t offset = letoh32(entry->offset) + sizeof(*file) + letoh16(file->filename_size) + letoh16(file->extra_field_size); bool skipLibCache = false; int fd = zip_fd; void * buf = NULL; uint32_t lib_size = letoh32(entry->uncompressed_size); int cache_fd = 0; if (letoh16(file->compression) == DEFLATE) { cache_fd = lookupLibCacheFd(path); fd = cache_fd; if (fd < 0) fd = createAshmem(lib_size, path); #ifdef DEBUG else __android_log_print(ANDROID_LOG_ERROR, "GeckoLibLoad", "Loading %s from cache", path); #endif if (fd < 0) { __android_log_print(ANDROID_LOG_ERROR, "GeckoLibLoad", "Couldn't open " ASHMEM_NAME_DEF ", Error %d, %s, bailing out", errno, strerror(errno)); return NULL; } buf = mmap(NULL, lib_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (buf == (void *)-1) { __android_log_print(ANDROID_LOG_ERROR, "GeckoLibLoad", "Couldn't mmap decompression buffer"); close(fd); return NULL; } offset = 0; if (cache_fd < 0) { extractLib(entry, data, buf); #ifdef ANDROID_ARM_LINKER /* We just extracted data that is going to be executed in the future. * We thus need to ensure Instruction and Data cache coherency. */ cacheflush((unsigned) buf, (unsigned) buf + entry->uncompressed_size, 0); #endif addLibCacheFd(path, fd, lib_size, buf); } // preload libxul, to avoid slowly demand-paging it if (!strcmp(path, "libxul.so")) madvise(buf, entry->uncompressed_size, MADV_WILLNEED); data = buf; } #ifdef DEBUG __android_log_print(ANDROID_LOG_ERROR, "GeckoLibLoad", "Loading %s with len %d (0x%08x) and offset %d (0x%08x)", path, lib_size, lib_size, offset, offset); #endif handle = moz_mapped_dlopen(path, RTLD_LAZY, fd, data, lib_size, offset); if (!handle) __android_log_print(ANDROID_LOG_ERROR, "GeckoLibLoad", "Couldn't load %s because %s", path, __wrap_dlerror()); if (buf) munmap(buf, lib_size); #ifdef DEBUG gettimeofday(&t1, 0); __android_log_print(ANDROID_LOG_ERROR, "GeckoLibLoad", "%s: spent %d", path, (((long long)t1.tv_sec * 1000000LL) + (long long)t1.tv_usec) - (((long long)t0.tv_sec * 1000000LL) + (long long)t0.tv_usec)); #endif return handle; }
/*! * \brief Clear bitmap flag. */ void TBitMapMarker::clear(void) { /* Temporary advise to zero-clear with sequential access. */ madvise(this->bitmapAddr, this->bitmapSize, MADV_SEQUENTIAL); memset(this->bitmapAddr, 0, this->bitmapSize); madvise(this->bitmapAddr, this->bitmapSize, MADV_RANDOM); }
void mmapzero(void) { int n; if (opt_anon) { x = mmap(NULL, SIZE + SIZE - ps, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); } else { if ((fd = open("/dev/zero", O_RDWR, 0666)) < 0) tst_brkm(TBROK | TERRNO, cleanup, "open"); x = mmap(NULL, SIZE + SIZE - ps, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); } if (x == MAP_FAILED) tst_brkm(TFAIL | TERRNO, cleanup, "mmap"); #ifdef HAVE_MADV_MERGEABLE if (opt_ksm) { if (madvise(x, SIZE + SIZE - ps, MADV_MERGEABLE) == -1) tst_brkm(TBROK | TERRNO, cleanup, "madvise"); } #endif x[SIZE] = 0; switch (n = fork()) { case -1: tst_brkm(TBROK | TERRNO, cleanup, "fork"); case 0: if (munmap(x + SIZE + ps, SIZE - ps - ps) == -1) tst_brkm(TFAIL | TERRNO, cleanup, "munmap"); exit(0); default: break; } switch (n = fork()) { case -1: tst_brkm(TBROK | TERRNO, cleanup, "fork"); case 0: if (munmap(x + SIZE + ps, SIZE - ps - ps) == -1) tst_brkm(TFAIL | TERRNO, cleanup, "subsequent munmap #1"); exit(0); default: switch (n = fork()) { case -1: tst_brkm(TBROK | TERRNO, cleanup, "fork"); case 0: if (munmap(x + SIZE + ps, SIZE - ps - ps) == -1) tst_brkm(TFAIL | TERRNO, cleanup, "subsequent munmap #2"); exit(0); default: break; } break; } if (munmap(x, SIZE + SIZE - ps) == -1) tst_resm(TFAIL | TERRNO, "munmap all"); while (waitpid(-1, &n, WUNTRACED | WCONTINUED) > 0) if (WEXITSTATUS(n) != 0) tst_resm(TFAIL, "child exit status is %d", WEXITSTATUS(n)); }
int network_write_chunkqueue_writev(server *srv, connection *con, int fd, chunkqueue *cq) { chunk *c; size_t chunks_written = 0; for(c = cq->first; c; c = c->next) { int chunk_finished = 0; switch(c->type) { case MEM_CHUNK: { char * offset; size_t toSend; ssize_t r; size_t num_chunks, i; struct iovec *chunks; chunk *tc; size_t num_bytes = 0; #if defined(_SC_IOV_MAX) /* IRIX, MacOS X, FreeBSD, Solaris, ... */ const size_t max_chunks = sysconf(_SC_IOV_MAX); #elif defined(IOV_MAX) /* Linux x86 (glibc-2.3.6-3) */ const size_t max_chunks = IOV_MAX; #elif defined(MAX_IOVEC) /* Linux ia64 (glibc-2.3.3-98.28) */ const size_t max_chunks = MAX_IOVEC; #elif defined(UIO_MAXIOV) /* Linux x86 (glibc-2.2.5-233) */ const size_t max_chunks = UIO_MAXIOV; #elif (defined(__FreeBSD__) && __FreeBSD_version < 500000) || defined(__DragonFly__) || defined(__APPLE__) /* - FreeBSD 4.x * - MacOS X 10.3.x * (covered in -DKERNEL) * */ const size_t max_chunks = 1024; /* UIO_MAXIOV value from sys/uio.h */ #else #error "sysconf() doesnt return _SC_IOV_MAX ..., check the output of 'man writev' for the EINVAL error and send the output to [email protected]" #endif /* we can't send more then SSIZE_MAX bytes in one chunk */ /* build writev list * * 1. limit: num_chunks < max_chunks * 2. limit: num_bytes < SSIZE_MAX */ for (num_chunks = 0, tc = c; tc && tc->type == MEM_CHUNK && num_chunks < max_chunks; num_chunks++, tc = tc->next); chunks = calloc(num_chunks, sizeof(*chunks)); for(tc = c, i = 0; i < num_chunks; tc = tc->next, i++) { if (tc->mem->used == 0) { chunks[i].iov_base = tc->mem->ptr; chunks[i].iov_len = 0; } else { offset = tc->mem->ptr + tc->offset; toSend = tc->mem->used - 1 - tc->offset; chunks[i].iov_base = offset; /* protect the return value of writev() */ if (toSend > SSIZE_MAX || num_bytes + toSend > SSIZE_MAX) { chunks[i].iov_len = SSIZE_MAX - num_bytes; num_chunks = i + 1; break; } else { chunks[i].iov_len = toSend; } num_bytes += toSend; } } if ((r = writev(fd, chunks, num_chunks)) < 0) { switch (errno) { case EAGAIN: case EINTR: r = 0; break; case EPIPE: case ECONNRESET: free(chunks); return -2; default: log_error_write(srv, __FILE__, __LINE__, "ssd", "writev failed:", strerror(errno), fd); free(chunks); return -1; } } cq->bytes_out += r; /* check which chunks have been written */ for(i = 0, tc = c; i < num_chunks; i++, tc = tc->next) { if (r >= (ssize_t)chunks[i].iov_len) { /* written */ r -= chunks[i].iov_len; tc->offset += chunks[i].iov_len; if (chunk_finished) { /* skip the chunks from further touches */ chunks_written++; c = c->next; } else { /* chunks_written + c = c->next is done in the for()*/ chunk_finished++; } } else { /* partially written */ tc->offset += r; chunk_finished = 0; break; } } free(chunks); break; } case FILE_CHUNK: { ssize_t r; off_t abs_offset; off_t toSend; stat_cache_entry *sce = NULL; #define KByte * 1024 #define MByte * 1024 KByte #define GByte * 1024 MByte const off_t we_want_to_mmap = 512 KByte; char *start = NULL; if (HANDLER_ERROR == stat_cache_get_entry(srv, con, c->file.name, &sce)) { log_error_write(srv, __FILE__, __LINE__, "sb", strerror(errno), c->file.name); return -1; } abs_offset = c->file.start + c->offset; if (abs_offset > sce->st.st_size) { log_error_write(srv, __FILE__, __LINE__, "sb", "file was shrinked:", c->file.name); return -1; } /* mmap the buffer * - first mmap * - new mmap as the we are at the end of the last one */ if (c->file.mmap.start == MAP_FAILED || abs_offset == (off_t)(c->file.mmap.offset + c->file.mmap.length)) { /* Optimizations for the future: * * adaptive mem-mapping * the problem: * we mmap() the whole file. If someone has alot large files and 32bit * machine the virtual address area will be unrun and we will have a failing * mmap() call. * solution: * only mmap 16M in one chunk and move the window as soon as we have finished * the first 8M * * read-ahead buffering * the problem: * sending out several large files in parallel trashes the read-ahead of the * kernel leading to long wait-for-seek times. * solutions: (increasing complexity) * 1. use madvise * 2. use a internal read-ahead buffer in the chunk-structure * 3. use non-blocking IO for file-transfers * */ /* all mmap()ed areas are 512kb expect the last which might be smaller */ off_t we_want_to_send; size_t to_mmap; /* this is a remap, move the mmap-offset */ if (c->file.mmap.start != MAP_FAILED) { munmap(c->file.mmap.start, c->file.mmap.length); c->file.mmap.offset += we_want_to_mmap; } else { /* in case the range-offset is after the first mmap()ed area we skip the area */ c->file.mmap.offset = 0; while (c->file.mmap.offset + we_want_to_mmap < c->file.start) { c->file.mmap.offset += we_want_to_mmap; } } /* length is rel, c->offset too, assume there is no limit at the mmap-boundaries */ we_want_to_send = c->file.length - c->offset; to_mmap = (c->file.start + c->file.length) - c->file.mmap.offset; /* we have more to send than we can mmap() at once */ if (abs_offset + we_want_to_send > c->file.mmap.offset + we_want_to_mmap) { we_want_to_send = (c->file.mmap.offset + we_want_to_mmap) - abs_offset; to_mmap = we_want_to_mmap; } if (-1 == c->file.fd) { /* open the file if not already open */ if (-1 == (c->file.fd = open(c->file.name->ptr, O_RDONLY))) { log_error_write(srv, __FILE__, __LINE__, "sbs", "open failed for:", c->file.name, strerror(errno)); return -1; } #ifdef FD_CLOEXEC fcntl(c->file.fd, F_SETFD, FD_CLOEXEC); #endif } if (MAP_FAILED == (c->file.mmap.start = mmap(0, to_mmap, PROT_READ, MAP_SHARED, c->file.fd, c->file.mmap.offset))) { /* close it here, otherwise we'd have to set FD_CLOEXEC */ log_error_write(srv, __FILE__, __LINE__, "ssbd", "mmap failed:", strerror(errno), c->file.name, c->file.fd); return -1; } c->file.mmap.length = to_mmap; #ifdef LOCAL_BUFFERING buffer_copy_string_len(c->mem, c->file.mmap.start, c->file.mmap.length); #else #ifdef HAVE_MADVISE /* don't advise files < 64Kb */ if (c->file.mmap.length > (64 KByte)) { /* darwin 7 is returning EINVAL all the time and I don't know how to * detect this at runtime.i * * ignore the return value for now */ madvise(c->file.mmap.start, c->file.mmap.length, MADV_WILLNEED); } #endif #endif /* chunk_reset() or chunk_free() will cleanup for us */ } /* to_send = abs_mmap_end - abs_offset */ toSend = (c->file.mmap.offset + c->file.mmap.length) - (abs_offset); if (toSend < 0) { log_error_write(srv, __FILE__, __LINE__, "soooo", "toSend is negative:", toSend, c->file.mmap.length, abs_offset, c->file.mmap.offset); assert(toSend < 0); } #ifdef LOCAL_BUFFERING start = c->mem->ptr; #else start = c->file.mmap.start; #endif if ((r = write(fd, start + (abs_offset - c->file.mmap.offset), toSend)) < 0) { switch (errno) { case EAGAIN: case EINTR: r = 0; break; case EPIPE: case ECONNRESET: return -2; default: log_error_write(srv, __FILE__, __LINE__, "ssd", "write failed:", strerror(errno), fd); return -1; } } c->offset += r; cq->bytes_out += r; if (c->offset == c->file.length) { chunk_finished = 1; /* we don't need the mmaping anymore */ if (c->file.mmap.start != MAP_FAILED) { munmap(c->file.mmap.start, c->file.mmap.length); c->file.mmap.start = MAP_FAILED; } } break; } case SMB_CHUNK: { ssize_t r; off_t offset; size_t toSend; off_t rest_len; stat_cache_entry *sce = NULL; //#define BUFF_SIZE 2048 //- 256K #define BUFF_SIZE 256*1024 char buff[BUFF_SIZE]={0}; // memset(buff,0,BUFF_SIZE); // char *buff=NULL; int ifd; if (HANDLER_ERROR == stat_cache_get_entry(srv, con, c->file.name, &sce)) { log_error_write(srv, __FILE__, __LINE__, "sb", strerror(errno), c->file.name); Cdbg(DBE,"stat cache get entry failed"); return -1; } offset = c->file.start + c->offset; toSend = (c->file.length - c->offset>BUFF_SIZE)? BUFF_SIZE : c->file.length - c->offset ; // rest_len = c->file.length - c->offset; // toSend = Cdbg(DBE,"offset =%lli, toSend=%d, sce->st.st_size=%lli", offset, toSend, sce->st.st_size); if (offset > sce->st.st_size) { log_error_write(srv, __FILE__, __LINE__, "sb", "file was shrinked:", c->file.name); Cdbg(DBE,"offset > size"); return -1; } // if (-1 == (ifd = open(c->file.name->ptr, O_RDONLY))) { if (-1 == (ifd = smbc_wrapper_open(con,c->file.name->ptr, O_RDONLY, 0755))) { log_error_write(srv, __FILE__, __LINE__, "ss", "open failed: ", strerror(errno)); Cdbg(DBE,"wrapper open failed,ifd=%d, fn =%s, open failed =%s, errno =%d",ifd, c->file.name->ptr,strerror(errno),errno); return -1; } Cdbg(DBE,"ifd =%d, toSend=%d",ifd, toSend); smbc_wrapper_lseek(con, ifd, offset, SEEK_SET ); if (-1 == (toSend = smbc_wrapper_read(con, ifd, buff, toSend ))) { log_error_write(srv, __FILE__, __LINE__, "ss", "read: ", strerror(errno)); smbc_wrapper_close(con, ifd); Cdbg(DBE,"ifd =%d,toSend =%d, errno=%s",ifd,toSend, strerror(errno)); return -1; } Cdbg(DBE,"close ifd=%d, toSend=%d",ifd,toSend); smbc_wrapper_close(con, ifd); Cdbg(DBE,"write socket fd=%d",fd); if ((r = write(fd, buff, toSend)) < 0) { switch (errno) { case EAGAIN: case EINTR: r = 0; break; case EPIPE: case ECONNRESET: return -2; default: log_error_write(srv, __FILE__, __LINE__, "ssd", "write failed:", strerror(errno), fd); return -1; } } c->offset += r; cq->bytes_out += r; Cdbg(DBE,"r =%d",r); if (c->offset == c->file.length) { chunk_finished = 1; } break; } default: log_error_write(srv, __FILE__, __LINE__, "ds", c, "type not known"); return -1; } if (!chunk_finished) { /* not finished yet */ break; } chunks_written++; } return chunks_written; }
void test_ksm_merge_across_nodes(unsigned long nr_pages) { char **memory; int i, ret; int num_nodes, *nodes; unsigned long length; unsigned long pagesize; #if HAVE_NUMA_H && HAVE_LINUX_MEMPOLICY_H && HAVE_NUMAIF_H \ && HAVE_MPOL_CONSTANTS unsigned long nmask[MAXNODES / BITS_PER_LONG] = { 0 }; #endif ret = get_allowed_nodes_arr(NH_MEMS|NH_CPUS, &num_nodes, &nodes); if (ret != 0) tst_brkm(TBROK|TERRNO, cleanup, "get_allowed_nodes_arr"); if (num_nodes < 2) { tst_resm(TINFO, "need NUMA system support"); free(nodes); return; } pagesize = sysconf(_SC_PAGE_SIZE); length = nr_pages * pagesize; memory = malloc(num_nodes * sizeof(char *)); for (i = 0; i < num_nodes; i++) { memory[i] = mmap(NULL, length, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); if (memory[i] == MAP_FAILED) tst_brkm(TBROK|TERRNO, tst_exit, "mmap"); #ifdef HAVE_MADV_MERGEABLE if (madvise(memory[i], length, MADV_MERGEABLE) == -1) tst_brkm(TBROK|TERRNO, tst_exit, "madvise"); #endif #if HAVE_NUMA_H && HAVE_LINUX_MEMPOLICY_H && HAVE_NUMAIF_H \ && HAVE_MPOL_CONSTANTS clean_node(nmask); set_node(nmask, nodes[i]); /* * Use mbind() to make sure each node contains * length size memory. */ ret = mbind(memory[i], length, MPOL_BIND, nmask, MAXNODES, 0); if (ret == -1) tst_brkm(TBROK|TERRNO, tst_exit, "mbind"); #endif memset(memory[i], 10, length); } SAFE_FILE_PRINTF(cleanup, PATH_KSM "sleep_millisecs", "0"); SAFE_FILE_PRINTF(cleanup, PATH_KSM "pages_to_scan", "%ld", nr_pages * num_nodes); /* * merge_across_nodes setting can be changed only when there * are no ksm shared pages in system, so set run 2 to unmerge * pages first, then to 1 after changing merge_across_nodes, * to remerge according to the new setting. */ SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "2"); wait_ksmd_done(); tst_resm(TINFO, "Start to test KSM with merge_across_nodes=1"); SAFE_FILE_PRINTF(cleanup, PATH_KSM "merge_across_nodes", "1"); SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "1"); group_check(1, 1, nr_pages * num_nodes - 1, 0, 0, 0, nr_pages * num_nodes); SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "2"); wait_ksmd_done(); tst_resm(TINFO, "Start to test KSM with merge_across_nodes=0"); SAFE_FILE_PRINTF(cleanup, PATH_KSM "merge_across_nodes", "0"); SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "1"); group_check(1, num_nodes, nr_pages * num_nodes - num_nodes, 0, 0, 0, nr_pages * num_nodes); SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "2"); wait_ksmd_done(); }
static void springfield_load(springfield_t *r) { struct stat st; r->mapfd = open(r->path, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR); assert(r->mapfd > -1); int s = fstat(r->mapfd, &st); if (!s) { r->eof = st.st_size >= 4 ? st.st_size : 0; } r->map = NULL; if (!r->eof) { r->offsets = malloc(r->num_buckets * sizeof(uint64_t)); memset(r->offsets, 0xff, r->num_buckets * sizeof(uint64_t)); } else { r->mmap_alloc = r->eof; r->map = (uint8_t *)mmap( NULL, r->mmap_alloc, PROT_READ, MAP_PRIVATE, r->mapfd, 0); assert(r->map); s = madvise(r->map, r->mmap_alloc, MADV_SEQUENTIAL); assert(!s); uint8_t *p = r->map; r->num_buckets = *(uint32_t *)p; r->offsets = malloc(r->num_buckets * sizeof(uint64_t)); memset(r->offsets, 0xff, r->num_buckets * sizeof(uint64_t)); uint64_t off = 4; p += 4; while (1) { if (off + 8 > r->eof) { r->eof = off; break; } springfield_header_v1 *h = (springfield_header_v1 *)p; if (h->version == 0) { r->eof = off; break; } assert(h->version == 1); if (off + HEADER_SIZE > r->eof) { r->eof = off; break; } if (h->klen == 0) { r->eof = off; break; } assert(h->vlen <= MAX_VLEN); uint32_t jump = h->vlen + h->klen + HEADER_SIZE; if (off + jump > r->eof) { r->eof = off; break; } /* Check CRC32 */ if (crc32(0, p + 4, HEADER_SIZE_MINUS_CRC + h->klen + h->vlen) != h->crc) { r->eof = off; break; } char *key = (char *)(p + HEADER_SIZE); uint64_t prev = springfield_index_keyval(r, key, off); assert(prev == h->last); off += jump; p += jump; } munmap(r->map, r->mmap_alloc); } r->mmap_alloc = r->eof + MMAP_OVERFLOW; s = ftruncate(r->mapfd, (off_t)r->mmap_alloc); assert(!s); r->map = (uint8_t *)mmap( NULL, r->mmap_alloc, PROT_READ | PROT_WRITE, MAP_SHARED, r->mapfd, 0); s = madvise(r->map, r->mmap_alloc, MADV_RANDOM); assert(!s); uint32_t buckets_on_record = *(uint32_t *)r->map; if (buckets_on_record) { assert(buckets_on_record == r->num_buckets); } else { *(uint32_t *)r->map = r->num_buckets; assert(r->eof == 0); r->eof = 4; } assert(r->map); }
void runtime·SysUnused(void *v, uintptr n) { runtime·madvise(v, n, MADV_FREE); }
int copy_file(FTSENT *entp, int dne) { static char *buf; static char *zeroes; struct stat to_stat, *fs; int ch, checkch, from_fd, rcount, rval, to_fd, wcount; #ifdef VM_AND_BUFFER_CACHE_SYNCHRONIZED char *p; #endif if (!buf) { buf = malloc(MAXBSIZE); if (!buf) err(1, "malloc"); } if (!zeroes) { zeroes = calloc(1, MAXBSIZE); if (!zeroes) err(1, "calloc"); } if ((from_fd = open(entp->fts_path, O_RDONLY, 0)) == -1) { warn("%s", entp->fts_path); return (1); } fs = entp->fts_statp; /* * In -f (force) mode, we always unlink the destination first * if it exists. Note that -i and -f are mutually exclusive. */ if (!dne && fflag) (void)unlink(to.p_path); /* * If the file exists and we're interactive, verify with the user. * If the file DNE, set the mode to be the from file, minus setuid * bits, modified by the umask; arguably wrong, but it makes copying * executables work right and it's been that way forever. (The * other choice is 666 or'ed with the execute bits on the from file * modified by the umask.) */ if (!dne && !fflag) { if (iflag) { (void)fprintf(stderr, "overwrite %s? ", to.p_path); checkch = ch = getchar(); while (ch != '\n' && ch != EOF) ch = getchar(); if (checkch != 'y' && checkch != 'Y') { (void)close(from_fd); return (0); } } to_fd = open(to.p_path, O_WRONLY | O_TRUNC, 0); } else to_fd = open(to.p_path, O_WRONLY | O_TRUNC | O_CREAT, fs->st_mode & ~(S_ISTXT | S_ISUID | S_ISGID)); if (to_fd == -1) { warn("%s", to.p_path); (void)close(from_fd); return (1); } rval = 0; /* * Mmap and write if less than 8M (the limit is so we don't totally * trash memory on big files. This is really a minor hack, but it * wins some CPU back. */ #ifdef VM_AND_BUFFER_CACHE_SYNCHRONIZED /* XXX broken for 0-size mmap */ if (fs->st_size <= 8 * 1048576) { if ((p = mmap(NULL, (size_t)fs->st_size, PROT_READ, MAP_FILE|MAP_SHARED, from_fd, (off_t)0)) == MAP_FAILED) { warn("mmap: %s", entp->fts_path); rval = 1; } else { madvise(p, fs->st_size, MADV_SEQUENTIAL); if (write(to_fd, p, fs->st_size) != fs->st_size) { warn("%s", to.p_path); rval = 1; } /* Some systems don't unmap on close(2). */ if (munmap(p, fs->st_size) < 0) { warn("%s", entp->fts_path); rval = 1; } } } else #endif { int skipholes = 0; struct stat tosb; if (!fstat(to_fd, &tosb) && S_ISREG(tosb.st_mode)) skipholes = 1; while ((rcount = read(from_fd, buf, MAXBSIZE)) > 0) { if (skipholes && memcmp(buf, zeroes, rcount) == 0) wcount = lseek(to_fd, rcount, SEEK_CUR) == -1 ? -1 : rcount; else wcount = write(to_fd, buf, rcount); if (rcount != wcount || wcount == -1) { warn("%s", to.p_path); rval = 1; break; } } if (skipholes && rcount >= 0) rcount = ftruncate(to_fd, lseek(to_fd, 0, SEEK_CUR)); if (rcount < 0) { warn("%s", entp->fts_path); rval = 1; } } if (rval == 1) { (void)close(from_fd); (void)close(to_fd); return (1); } if (pflag && setfile(fs, to_fd)) rval = 1; /* * If the source was setuid or setgid, lose the bits unless the * copy is owned by the same user and group. */ #define RETAINBITS \ (S_ISUID | S_ISGID | S_ISVTX | S_IRWXU | S_IRWXG | S_IRWXO) if (!pflag && dne && fs->st_mode & (S_ISUID | S_ISGID) && fs->st_uid == myuid) { if (fstat(to_fd, &to_stat)) { warn("%s", to.p_path); rval = 1; } else if (fs->st_gid == to_stat.st_gid && fchmod(to_fd, fs->st_mode & RETAINBITS & ~myumask)) { warn("%s", to.p_path); rval = 1; } } (void)close(from_fd); if (close(to_fd)) { warn("%s", to.p_path); rval = 1; } return (rval); }
int main() { unsigned long long i; unsigned long long j; unsigned long long failed = 0, success = 0; unsigned long long failed_hp = 0, success_hp = 0; void **page_list; page_list = (void **)malloc(sizeof(void*)*PAGE_LIST_SZ); for (i = 0; i < PAGE_LIST_SZ; ++i) { /*page_list[i] = malloc(PAGE_4KB);*/ page_list[i] = valloc(PAGE_4KB); if (page_list[i] == NULL) { printf("page %lld failed", i); exit(-1); } memset(page_list[i], 0, PAGE_4KB); int ret = madvise(page_list[i], PAGE_4KB, MADV_MERGEABLE); if (ret != 0) { printf("madvise for mergable failed: %lld, succeeded %lld\n", failed, success); ++failed; exit(-1); } else { ++success; } /*ret = madvise(page_list[i], PAGE_4KB, MADV_NOHUGEPAGE);*/ /*if (ret != 0)*/ /*{*/ /*++failed_hp;*/ /*}*/ /*else*/ /*{*/ /*++success_hp;*/ /*}*/ /*if (i % 1024 == 0)*/ /*{*/ /*printf("%lld\n",i);*/ /*sleep(10);*/ /*}*/ } printf("madvise for mergable failed: %lld, succeeded %lld\n", failed, success); printf("madvise for huge page failed: %lld, succeeded %lld\n", failed_hp, success_hp); /*for (j = 0; j < 900000000000000UL; j++)*/ /*{*/ /*}*/ char a = getchar(); for (i = 0; i < PAGE_LIST_SZ; ++i) { free(page_list[i]); } free(page_list); return 0; }
/* * Map a shared object into memory. The "fd" argument is a file descriptor, * which must be open on the object and positioned at its beginning. * The "path" argument is a pathname that is used only for error messages. * * The return value is a pointer to a newly-allocated Obj_Entry structure * for the shared object. Returns NULL on failure. */ Obj_Entry * map_object(int fd, const char *path, const struct stat *sb) { Obj_Entry *obj; Elf_Ehdr *hdr; int i; Elf_Phdr *phdr; Elf_Phdr *phlimit; Elf_Phdr **segs; int nsegs; Elf_Phdr *phdyn; Elf_Phdr *phinterp; Elf_Phdr *phtls; caddr_t mapbase; size_t mapsize; Elf_Off base_offset; Elf_Addr base_vaddr; Elf_Addr base_vlimit; caddr_t base_addr; Elf_Off data_offset; Elf_Addr data_vaddr; Elf_Addr data_vlimit; caddr_t data_addr; int data_prot; int data_flags; Elf_Addr clear_vaddr; caddr_t clear_addr; caddr_t clear_page; Elf_Addr phdr_vaddr; size_t nclear, phsize; Elf_Addr bss_vaddr; Elf_Addr bss_vlimit; caddr_t bss_addr; hdr = get_elf_header(fd, path); if (hdr == NULL) return (NULL); /* * Scan the program header entries, and save key information. * * We expect that the loadable segments are ordered by load address. */ phdr = (Elf_Phdr *) ((char *)hdr + hdr->e_phoff); phsize = hdr->e_phnum * sizeof (phdr[0]); phlimit = phdr + hdr->e_phnum; nsegs = -1; phdyn = phinterp = phtls = NULL; phdr_vaddr = 0; segs = alloca(sizeof(segs[0]) * hdr->e_phnum); while (phdr < phlimit) { switch (phdr->p_type) { case PT_INTERP: phinterp = phdr; break; case PT_LOAD: segs[++nsegs] = phdr; if ((segs[nsegs]->p_align & (PAGE_SIZE - 1)) != 0) { _rtld_error("%s: PT_LOAD segment %d not page-aligned", path, nsegs); return NULL; } break; case PT_PHDR: phdr_vaddr = phdr->p_vaddr; phsize = phdr->p_memsz; break; case PT_DYNAMIC: phdyn = phdr; break; case PT_TLS: phtls = phdr; break; } ++phdr; } if (phdyn == NULL) { _rtld_error("%s: object is not dynamically-linked", path); return NULL; } if (nsegs < 0) { _rtld_error("%s: too few PT_LOAD segments", path); return NULL; } /* * Map the entire address space of the object, to stake out our * contiguous region, and to establish the base address for relocation. */ base_offset = trunc_page(segs[0]->p_offset); base_vaddr = trunc_page(segs[0]->p_vaddr); base_vlimit = round_page(segs[nsegs]->p_vaddr + segs[nsegs]->p_memsz); mapsize = base_vlimit - base_vaddr; base_addr = hdr->e_type == ET_EXEC ? (caddr_t) base_vaddr : NULL; mapbase = mmap(base_addr, mapsize, PROT_NONE, MAP_ANON | MAP_PRIVATE | MAP_NOCORE, -1, 0); if (mapbase == (caddr_t) -1) { _rtld_error("%s: mmap of entire address space failed: %s", path, strerror(errno)); return NULL; } if (base_addr != NULL && mapbase != base_addr) { _rtld_error("%s: mmap returned wrong address: wanted %p, got %p", path, base_addr, mapbase); munmap(mapbase, mapsize); return NULL; } for (i = 0; i <= nsegs; i++) { /* Overlay the segment onto the proper region. */ data_offset = trunc_page(segs[i]->p_offset); data_vaddr = trunc_page(segs[i]->p_vaddr); data_vlimit = round_page(segs[i]->p_vaddr + segs[i]->p_filesz); data_addr = mapbase + (data_vaddr - base_vaddr); data_prot = convert_prot(segs[i]->p_flags); data_flags = convert_flags(segs[i]->p_flags) | MAP_FIXED; if (mmap(data_addr, data_vlimit - data_vaddr, data_prot, data_flags, fd, data_offset) == (caddr_t) -1) { _rtld_error("%s: mmap of data failed: %s", path, strerror(errno)); return NULL; } /* Do BSS setup */ if (segs[i]->p_filesz != segs[i]->p_memsz) { /* Clear any BSS in the last page of the segment. */ clear_vaddr = segs[i]->p_vaddr + segs[i]->p_filesz; clear_addr = mapbase + (clear_vaddr - base_vaddr); clear_page = mapbase + (trunc_page(clear_vaddr) - base_vaddr); if ((nclear = data_vlimit - clear_vaddr) > 0) { /* Make sure the end of the segment is writable */ if ((data_prot & PROT_WRITE) == 0) { if (mprotect(clear_page, PAGE_SIZE, data_prot|PROT_WRITE) < 0) { _rtld_error("%s: mprotect failed: %s", path, strerror(errno)); return NULL; } } memset(clear_addr, 0, nclear); /* * reset the data protection back, enable the segment to be * coredumped since we modified it. */ if ((data_prot & PROT_WRITE) == 0) { madvise(clear_page, PAGE_SIZE, MADV_CORE); mprotect(clear_page, PAGE_SIZE, data_prot); } } /* Overlay the BSS segment onto the proper region. */ bss_vaddr = data_vlimit; bss_vlimit = round_page(segs[i]->p_vaddr + segs[i]->p_memsz); bss_addr = mapbase + (bss_vaddr - base_vaddr); if (bss_vlimit > bss_vaddr) { /* There is something to do */ if (mprotect(bss_addr, bss_vlimit - bss_vaddr, data_prot) == -1) { _rtld_error("%s: mprotect of bss failed: %s", path, strerror(errno)); return NULL; } } } if (phdr_vaddr == 0 && data_offset <= hdr->e_phoff && (data_vlimit - data_vaddr + data_offset) >= (hdr->e_phoff + hdr->e_phnum * sizeof (Elf_Phdr))) { phdr_vaddr = data_vaddr + hdr->e_phoff - data_offset; } } obj = obj_new(); if (sb != NULL) { obj->dev = sb->st_dev; obj->ino = sb->st_ino; } obj->mapbase = mapbase; obj->mapsize = mapsize; obj->textsize = round_page(segs[0]->p_vaddr + segs[0]->p_memsz) - base_vaddr; obj->vaddrbase = base_vaddr; obj->relocbase = mapbase - base_vaddr; obj->dynamic = (const Elf_Dyn *) (obj->relocbase + phdyn->p_vaddr); if (hdr->e_entry != 0) obj->entry = (caddr_t) (obj->relocbase + hdr->e_entry); if (phdr_vaddr != 0) { obj->phdr = (const Elf_Phdr *) (obj->relocbase + phdr_vaddr); } else { obj->phdr = malloc(phsize); if (obj->phdr == NULL) { obj_free(obj); _rtld_error("%s: cannot allocate program header", path); return NULL; } memcpy((char *)obj->phdr, (char *)hdr + hdr->e_phoff, phsize); obj->phdr_alloc = true; } obj->phsize = phsize; if (phinterp != NULL) obj->interp = (const char *) (obj->relocbase + phinterp->p_vaddr); if (phtls != NULL) { tls_dtv_generation++; obj->tlsindex = ++tls_max_index; obj->tlssize = phtls->p_memsz; obj->tlsalign = phtls->p_align; obj->tlsinitsize = phtls->p_filesz; obj->tlsinit = mapbase + phtls->p_vaddr; } return obj; }
MAdvise::~MAdvise() { madvise(_p,_len,MADV_NORMAL); }
static bool ProbeRegion(uintptr_t aPage) { return !!madvise(reinterpret_cast<void*>(aPage), PageSize(), MADV_NORMAL); }
FileMap::FileMap(const char *fileName) : fd(0), ptr(NULL) { #ifdef WIN32 #ifdef UNICODE int stringLength = (int)strlen(fileName)+1; int bufferLength = MultiByteToWideChar(CP_ACP, 0, fileName, stringLength, 0, 0); wchar_t *buf = new wchar_t[bufferLength]; MultiByteToWideChar(CP_ACP,0, fileName, stringLength, buf, bufferLength); wstring fileNameString(buf); delete [] buf; #else string fileNameString(fileName); #endif // Open file fd = CreateFile( (WCHAR*)fileNameString.c_str(), GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL,NULL); if (fd==INVALID_HANDLE_VALUE) { throw "Error creating file for mapping."; } // Guess size DWORD lower, upper; lower = GetFileSize(fd,&upper); mappedSize = lower | ((uint64_t)upper<<32); // Create mapping h = CreateFileMapping (fd, NULL, PAGE_READONLY, upper, lower, NULL); if (h==NULL) { CloseHandle(fd); throw "Error creating mapping on file"; } // Get pointer ptr = (unsigned char*) MapViewOfFile (h, FILE_MAP_READ, 0, 0, 0); if (ptr==NULL) { CloseHandle(fd); CloseHandle(h); DWORD err = GetLastError(); cerr << "Error getting pointer: " << err << endl; throw "Error getting pointer of the mapped file"; } #else // Linux and OSX // Open file fd = open(fileName, O_RDONLY); if(fd<=0) { perror("mmap"); throw "Error opening HDT file for mapping."; } // Guess size struct stat statbuf; if(stat(fileName,&statbuf)!=0) { perror("Error on stat()"); throw "Error trying to guess the file size"; } mappedSize = statbuf.st_size; // Do mmap ptr = (unsigned char *) mmap(0, mappedSize, PROT_READ, MAP_PRIVATE, fd, 0); if(ptr==MAP_FAILED) { perror("Error on mmap"); throw "Error trying to mmap HDT file"; } // Mark as needed so the OS keeps as much as possible in memory madvise((void*)ptr, mappedSize, MADV_WILLNEED); #endif }
ucs_status_t uct_mem_alloc(void *addr, size_t min_length, unsigned flags, uct_alloc_method_t *methods, unsigned num_methods, uct_md_h *mds, unsigned num_mds, const char *alloc_name, uct_allocated_memory_t *mem) { uct_alloc_method_t *method; uct_md_attr_t md_attr; ucs_status_t status; size_t alloc_length; unsigned md_index; uct_mem_h memh; uct_md_h md; void *address; int shmid; unsigned map_flags; if (min_length == 0) { ucs_error("Allocation length cannot be 0"); return UCS_ERR_INVALID_PARAM; } if (num_methods == 0) { ucs_error("No allocation methods provided"); return UCS_ERR_INVALID_PARAM; } if ((flags & UCT_MD_MEM_FLAG_FIXED) && (!addr || ((uintptr_t)addr % ucs_get_page_size()))) { ucs_debug("UCT_MD_MEM_FLAG_FIXED requires valid page size aligned address"); return UCS_ERR_INVALID_PARAM; } for (method = methods; method < methods + num_methods; ++method) { ucs_debug("trying allocation method %s", uct_alloc_method_names[*method]); switch (*method) { case UCT_ALLOC_METHOD_MD: /* Allocate with one of the specified memory domains */ for (md_index = 0; md_index < num_mds; ++md_index) { md = mds[md_index]; status = uct_md_query(md, &md_attr); if (status != UCS_OK) { ucs_error("Failed to query MD"); return status; } /* Check if MD supports allocation */ if (!(md_attr.cap.flags & UCT_MD_FLAG_ALLOC)) { continue; } /* Check if MD supports allocation with fixed address * if it's requested */ if ((flags & UCT_MD_MEM_FLAG_FIXED) && !(md_attr.cap.flags & UCT_MD_FLAG_FIXED)) { continue; } /* Allocate memory using the MD. * If the allocation fails, it's considered an error and we don't * fall-back, because this MD already exposed support for memory * allocation. */ alloc_length = min_length; address = addr; status = uct_md_mem_alloc(md, &alloc_length, &address, flags, alloc_name, &memh); if (status != UCS_OK) { ucs_error("failed to allocate %zu bytes using md %s: %s", alloc_length, md->component->name, ucs_status_string(status)); return status; } ucs_assert(memh != UCT_MEM_HANDLE_NULL); mem->md = md; mem->memh = memh; goto allocated; } break; case UCT_ALLOC_METHOD_THP: #ifdef MADV_HUGEPAGE if (!ucs_is_thp_enabled()) { break; } /* Fixed option is not supported for thp allocation*/ if (flags & UCT_MD_MEM_FLAG_FIXED) { break; } alloc_length = ucs_align_up(min_length, ucs_get_huge_page_size()); address = ucs_memalign(ucs_get_huge_page_size(), alloc_length UCS_MEMTRACK_VAL); if (address != NULL) { status = madvise(address, alloc_length, MADV_HUGEPAGE); if (status != UCS_OK) { ucs_error("madvise failure status (%d) address(%p) len(%zu):" " %m", status, address, alloc_length); ucs_free(address); break; } else { goto allocated_without_md; } } ucs_debug("failed to allocate by thp %zu bytes: %m", alloc_length); #endif break; case UCT_ALLOC_METHOD_HEAP: /* Allocate aligned memory using libc allocator */ /* Fixed option is not supported for heap allocation*/ if (flags & UCT_MD_MEM_FLAG_FIXED) { break; } alloc_length = min_length; address = ucs_memalign(UCS_SYS_CACHE_LINE_SIZE, alloc_length UCS_MEMTRACK_VAL); if (address != NULL) { goto allocated_without_md; } ucs_debug("failed to allocate %zu bytes from the heap", alloc_length); break; case UCT_ALLOC_METHOD_MMAP: map_flags = uct_mem_get_mmap_flags(flags); /* Request memory from operating system using mmap() */ alloc_length = ucs_align_up_pow2(min_length, ucs_get_page_size()); address = ucs_mmap(addr, alloc_length, PROT_READ | PROT_WRITE, map_flags, -1, 0 UCS_MEMTRACK_VAL); if (address != MAP_FAILED) { goto allocated_without_md; } ucs_debug("failed to mmap %zu bytes: %m", alloc_length); break; case UCT_ALLOC_METHOD_HUGE: /* Allocate huge pages */ alloc_length = min_length; address = (flags & UCT_MD_MEM_FLAG_FIXED) ? addr : NULL; status = ucs_sysv_alloc(&alloc_length, &address, SHM_HUGETLB, &shmid UCS_MEMTRACK_VAL); if (status == UCS_OK) { goto allocated_without_md; } ucs_debug("failed to allocate %zu bytes from hugetlb: %s", min_length, ucs_status_string(status)); break; default: ucs_error("Invalid allocation method %d", *method); return UCS_ERR_INVALID_PARAM; } } ucs_debug("Could not allocate memory with any of the provided methods"); return UCS_ERR_NO_MEMORY; allocated_without_md: mem->md = NULL; mem->memh = UCT_MEM_HANDLE_NULL; allocated: ucs_debug("allocated %zu bytes at %p using %s", alloc_length, address, (mem->md == NULL) ? uct_alloc_method_names[*method] : mem->md->component->name); mem->address = address; mem->length = alloc_length; mem->method = *method; return UCS_OK; }