int fuse_session_loop(struct fuse_session *se) { int res = 0; struct fuse_chan *ch = fuse_session_next_chan(se, NULL); size_t bufsize = fuse_chan_bufsize(ch); char *buf = (char *) malloc(bufsize); if (!buf) { fprintf(stderr, "fuse: failed to allocate read buffer\n"); return -1; } while (!fuse_session_exited(se)) { struct fuse_chan *tmpch = ch; res = fuse_chan_recv(&tmpch, buf, bufsize); if (res == -EINTR) continue; if (res <= 0) break; fuse_session_process(se, buf, res, tmpch); } free(buf); fuse_session_reset(se); return res < 0 ? -1 : 0; }
/** * Rozofs_fuse channel send: * Since Rozofs operates in non-blocking mode it cannot rely on the default fuse_kern_chan_send() operation of fuse since if there is a congestion on the fuse device, the response or notification will be lost since the caller release the ressource allocated for sending the response once it returns from fuse_kern_chan_send(). To avoid that issue, rozofs MUST be tracked of the response that has not been sent and must save it in some internals buffers. @param ch: fuse channel (contains the reference of the file descriptor to use @param iov: list of the vectors to send @param count: number of vectors to send @retval 0 on success @retval < 0 on error */ int rozofs_fuse_kern_chan_send(struct fuse_chan *ch, const struct iovec iov[], size_t count) { if (iov) { ssize_t res = writev(fuse_chan_fd(ch), iov, count); int err = errno; if (res == -1) { struct fuse_session *se = fuse_chan_session(ch); assert(se != NULL); if(err == EAGAIN) { /* ** fuse device is congestion, so we store the reply and assert ** the congestion flag in the rozofs_fuse context */ return 0; } /* ENOENT means the operation was interrupted */ if (!fuse_session_exited(se) && err != ENOENT) perror("fuse: writing device"); return -err; } } return 0; }
static Bool fuseProcessMessages(void *data) { CompDisplay *d = (CompDisplay *)data; struct fuse_chan *channel; size_t bufferSize; int res = 0; FUSE_DISPLAY(d); channel = fuse_session_next_chan(fd->session, NULL); bufferSize = fuse_chan_bufsize(channel); if (fuse_session_exited(fd->session)) return FALSE; for (;; ) { struct fuse_chan *tmpch = channel; res = fuse_chan_recv(&tmpch, fd->buffer, bufferSize); if (res == -EINTR) continue; if (res > 0) fuse_session_process(fd->session, fd->buffer, res, tmpch); break; } return TRUE; }
void rozofs_fuse_show(char * argv[], uint32_t tcpRef, void *bufRef) { uint32_t buffer_count=0; char status[16]; char *pChar = localBuf; buffer_count = ruc_buf_getFreeBufferCount(rozofs_fuse_ctx_p->fuseReqPoolRef); /* ** check if the session has been exited */ if (fuse_session_exited(rozofs_fuse_ctx_p->se)) sprintf(status,"exited"); else sprintf(status,"running"); pChar += sprintf(pChar,"FUSE %8s - %d/%d ctx remaining\n", status, buffer_count, rozofs_fuse_ctx_p->initBufCount); int i; for (i = 0; i < RZ_FUSE_WRITE_MAX; i++) { pChar +=sprintf(pChar,"cpt_%d: %8llu\n",i,(long long unsigned int)rozofs_write_merge_stats_tab[i]); } /** * clear the stats */ memset(rozofs_write_merge_stats_tab,0,sizeof(uint64_t)*RZ_FUSE_WRITE_MAX); /** * read/write statistics */ pChar +=sprintf(pChar,"flush buf. count: %8llu\n",(long long unsigned int)rozofs_fuse_read_write_stats_buf.flush_buf_cpt); pChar +=sprintf(pChar,"readahead count: %8llu\n",(long long unsigned int)rozofs_fuse_read_write_stats_buf.readahead_cpt); pChar +=sprintf(pChar,"read req. count: %8llu\n",(long long unsigned int)rozofs_fuse_read_write_stats_buf.read_req_cpt); pChar +=sprintf(pChar,"read fuse count: %8llu\n",(long long unsigned int)rozofs_fuse_read_write_stats_buf.read_fuse_cpt); memset(&rozofs_fuse_read_write_stats_buf,0,sizeof(rozofs_fuse_read_write_stats)); /* ** Per array statistics */ pChar +=sprintf(pChar,"Per Read Array statitics:\n" ); for (i = 0; i < 32; i++) { if (rozofs_read_buf_section_table[i]!= 0) pChar +=sprintf(pChar," %6d: %8llu\n",(i+1)*ROZOFS_BSIZE,(long long unsigned int)rozofs_read_buf_section_table[i]); } pChar +=sprintf(pChar,"Per Write Array statitics:\n" ); for (i = 0; i < 32; i++) { if (rozofs_write_buf_section_table[i]!= 0) pChar +=sprintf(pChar," %6d: %8llu\n",(i+1)*ROZOFS_BSIZE,(long long unsigned int)rozofs_write_buf_section_table[i]); } memset (rozofs_write_buf_section_table,0,sizeof(uint64_t)*ROZOFS_FUSE_NB_OF_BUSIZE_SECTION_MAX); memset (rozofs_read_buf_section_table,0,sizeof(uint64_t)*ROZOFS_FUSE_NB_OF_BUSIZE_SECTION_MAX); uma_dbg_send(tcpRef, bufRef, TRUE, localBuf); }
/** * internal function that is called from processing a message that has been queued on the /dev/fuse socket. That function is inherited from fuse_kern_chan_receive @param chp : pointer to the channel @param buf: pointer to the buffer where data will be copied @param size : max size of the receive buffer @retval > 0 : number of byte read @retval = 0 : session has been exited @retval < 0 : error */ int rozofs_fuse_kern_chan_receive(struct fuse_chan **chp, char *buf, size_t size) { struct fuse_chan *ch = *chp; int err; ssize_t res; struct fuse_session *se = fuse_chan_session(ch); assert(se != NULL); restart: res = read(fuse_chan_fd(ch), buf, size); if (fuse_session_exited(se)) return 0; if (res == -1) { /* ENOENT means the operation was interrupted, it's safe to restart */ err = errno; if (err == ENOENT) { rozofs_fuse_req_enoent_count++; goto restart; } if (err == ENODEV) { severe("Exit from RozofsMount required!!!"); fuse_session_exit(se); rozofs_exit(); return 0; } /* Errors occurring during normal operation: EINTR (read interrupted), EAGAIN (nonblocking I/O), ENODEV (filesystem umounted) */ if (err != EINTR && err != EAGAIN) severe("fuse: reading device"); if ((err == EAGAIN)|| (err == EINTR)) rozofs_fuse_req_eagain_count++; return -err; } #if 0 if ((size_t) res < sizeof(struct fuse_in_header)) { fprintf(stderr, "short read on fuse device\n"); return -EIO; } #endif rozofs_fuse_req_count++; rozofs_fuse_req_byte_in+=res; return res; }
static int fuse_kern_chan_send(struct fuse_chan *ch, const struct iovec iov[], size_t count) { if (iov) { ssize_t res = writev(fuse_chan_fd(ch), iov, count); int err = errno; if (res == -1) { struct fuse_session *se = fuse_chan_session(ch); assert(se != NULL); /* ENOENT means the operation was interrupted */ if (!fuse_session_exited(se) && err != ENOENT) perror("fuse: writing device"); return -err; } } return 0; }
int fuse_session_loop_mt(struct fuse_session *se) { int err; struct fuse_mt mt; struct fuse_worker *w; memset(&mt, 0, sizeof(struct fuse_mt)); mt.se = se; mt.prevch = fuse_session_next_chan(se, NULL); mt.error = 0; mt.numworker = 0; mt.numavail = 0; mt.main.thread_id = pthread_self(); mt.main.prev = mt.main.next = &mt.main; sem_init(&mt.finish, 0, 0); fuse_mutex_init(&mt.lock); pthread_mutex_lock(&mt.lock); err = fuse_start_thread(&mt); pthread_mutex_unlock(&mt.lock); if (!err) { /* sem_wait() is interruptible */ while (!fuse_session_exited(se)) sem_wait(&mt.finish); for (w = mt.main.next; w != &mt.main; w = w->next) pthread_cancel(w->thread_id); mt.exit = 1; pthread_mutex_unlock(&mt.lock); while (mt.main.next != &mt.main) fuse_join_worker(&mt, mt.main.next); err = mt.error; } pthread_mutex_destroy(&mt.lock); sem_destroy(&mt.finish); fuse_session_reset(se); return err; }
uint32_t rozofs_fuse_rcvReadysock(void * rozofs_fuse_ctx_p,int socketId) { rozofs_fuse_ctx_t *ctx_p; uint32_t buffer_count; ctx_p = (rozofs_fuse_ctx_t*)rozofs_fuse_ctx_p; /* ** check if the session has been exited */ if (fuse_session_exited(ctx_p->se)) { /* ** session is dead, so stop receiving fuse request */ return FALSE; } /* ** There is no specific buffer pool needed for receiving the fuse request ** since the fuse library allocates memory to store the incoming request. ** The only element that can prevent a fuse request to be processed is the ** amount of transaction context. So the system has to check how many transaction ** contexts are remaining in the transaction context buffer pool. ** When there is no enough contexts, then the system stops looking at the ** fuse "socket". */ buffer_count = ruc_buf_getFreeBufferCount(ctx_p->fuseReqPoolRef); /* ** 2 fuse contexts are required : ** - 1 to process the incoming request ** - 1 to eventualy process an internal asynchronous flush */ if (buffer_count < 2) { rozofs_fuse_buffer_depletion_count++; return FALSE; } return TRUE; }
int fuse_session_loop(struct fuse_session *se) { int res = 0; struct fuse_buf fbuf = { .mem = NULL, }; while (!fuse_session_exited(se)) { res = fuse_session_receive_buf_int(se, &fbuf, NULL); if (res == -EINTR) continue; if (res <= 0) break; fuse_session_process_buf_int(se, &fbuf, NULL); } free(fbuf.mem); fuse_session_reset(se); return res < 0 ? -1 : 0; }
static int fuse_kern_chan_receive(struct fuse_chan **chp, char *buf, size_t size) { struct fuse_chan *ch = *chp; int err; ssize_t res; struct fuse_session *se = fuse_chan_session(ch); assert(se != NULL); restart: res = read(fuse_chan_fd(ch), buf, size); err = errno; if (fuse_session_exited(se)) return 0; if (res == -1) { /* ENOENT means the operation was interrupted, it's safe to restart */ if (err == ENOENT) goto restart; if (err == ENODEV) { fuse_session_exit(se); return 0; } /* Errors occurring during normal operation: EINTR (read interrupted), EAGAIN (nonblocking I/O), ENODEV (filesystem umounted) */ if (err != EINTR && err != EAGAIN) perror("fuse: reading device"); return -err; } if ((size_t) res < sizeof(struct fuse_in_header)) { fprintf(stderr, "short read on fuse device\n"); return -EIO; } return res; }
static void *fuse_do_work(void *data) { struct fuse_worker *w = (struct fuse_worker *) data; struct fuse_mt *mt = w->mt; while (!fuse_session_exited(mt->se)) { int isforget = 0; struct fuse_chan *ch = mt->prevch; int res; pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); res = fuse_chan_recv(&ch, w->buf, w->bufsize); pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); if (res == -EINTR) continue; if (res <= 0) { if (res < 0) { fuse_session_exit(mt->se); mt->error = -1; } break; } pthread_mutex_lock(&mt->lock); if (mt->exit) { pthread_mutex_unlock(&mt->lock); return NULL; } /* * This disgusting hack is needed so that zillions of threads * are not created on a burst of FORGET messages */ if (((struct fuse_in_header *) w->buf)->opcode == FUSE_FORGET) isforget = 1; if (!isforget) mt->numavail--; if (mt->numavail == 0) fuse_start_thread(mt); pthread_mutex_unlock(&mt->lock); fuse_session_process(mt->se, w->buf, res, ch); pthread_mutex_lock(&mt->lock); if (!isforget) mt->numavail++; if (mt->numavail > 10) { if (mt->exit) { pthread_mutex_unlock(&mt->lock); return NULL; } list_del_worker(w); mt->numavail--; mt->numworker--; pthread_mutex_unlock(&mt->lock); pthread_detach(w->thread_id); free(w->buf); free(w); return NULL; } pthread_mutex_unlock(&mt->lock); } sem_post(&mt->finish); #ifdef __APPLE__ { sigset_t set; (void) sigprocmask(0, NULL, &set); (void) sigsuspend(&set); /* want cancelable */ } #else /* !__APPLE__ */ pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); pause(); #endif /* __APPLE__ */ return NULL; }
void rozofs_fuse_show(char * argv[], uint32_t tcpRef, void *bufRef) { uint32_t buffer_count=0; char status[16]; char *pChar = uma_dbg_get_buffer(); buffer_count = ruc_buf_getFreeBufferCount(rozofs_fuse_ctx_p->fuseReqPoolRef); /* ** check if the session has been exited */ if (fuse_session_exited(rozofs_fuse_ctx_p->se)) sprintf(status,"exited"); else sprintf(status,"running"); pChar += sprintf(pChar,"FUSE %8s - %d/%d ctx remaining\n", status, buffer_count, rozofs_fuse_ctx_p->initBufCount); /* ** display the cache mode */ pChar += sprintf(pChar,"FS Mode : "); if (rozofs_mode== 0) { pChar += sprintf(pChar,"standard\n"); } else { pChar += sprintf(pChar,"Block\n"); } pChar += sprintf(pChar,"cache Mode : "); switch (rozofs_cache_mode) { default: case 0: pChar += sprintf(pChar,"default\n"); break; case 1: pChar += sprintf(pChar,"direct_io\n"); break; case 2: pChar += sprintf(pChar,"keep_cache\n"); break; } int i; for (i = 0; i < RZ_FUSE_WRITE_MAX; i++) { pChar +=sprintf(pChar,"cpt_%d: %8llu\n",i,(long long unsigned int)rozofs_write_merge_stats_tab[i]); } /** * clear the stats */ memset(rozofs_write_merge_stats_tab,0,sizeof(uint64_t)*RZ_FUSE_WRITE_MAX); /** * read/write statistics */ pChar +=sprintf(pChar,"flush buf. count: %8llu\n",(long long unsigned int)rozofs_fuse_read_write_stats_buf.flush_buf_cpt); pChar +=sprintf(pChar,"readahead count: %8llu\n",(long long unsigned int)rozofs_fuse_read_write_stats_buf.readahead_cpt); pChar +=sprintf(pChar,"read req. count: %8llu\n",(long long unsigned int)rozofs_fuse_read_write_stats_buf.read_req_cpt); pChar +=sprintf(pChar,"read fuse count: %8llu\n",(long long unsigned int)rozofs_fuse_read_write_stats_buf.read_fuse_cpt); memset(&rozofs_fuse_read_write_stats_buf,0,sizeof(rozofs_fuse_read_write_stats)); /* ** Per array statistics */ pChar +=sprintf(pChar,"Per Read Array statitics:\n" ); for (i = 0; i < 32; i++) { if (rozofs_read_buf_section_table[i]!= 0) pChar +=sprintf(pChar," %6d: %8llu\n",(i+1)*ROZOFS_PAGE_SZ,(long long unsigned int)rozofs_read_buf_section_table[i]); } pChar +=sprintf(pChar,"Per Write Array statitics:\n" ); for (i = 0; i < 32; i++) { if (rozofs_write_buf_section_table[i]!= 0) pChar +=sprintf(pChar," %6d: %8llu\n",(i+1)*ROZOFS_PAGE_SZ,(long long unsigned int)rozofs_write_buf_section_table[i]); } memset (rozofs_write_buf_section_table,0,sizeof(uint64_t)*ROZOFS_FUSE_NB_OF_BUSIZE_SECTION_MAX); memset (rozofs_read_buf_section_table,0,sizeof(uint64_t)*ROZOFS_FUSE_NB_OF_BUSIZE_SECTION_MAX); uma_dbg_send(tcpRef, bufRef, TRUE, uma_dbg_get_buffer()); }
void rozofs_fuse_show(char * argv[], uint32_t tcpRef, void *bufRef) { uint32_t buffer_count=0; char status[16]; int new_val; char *pChar = uma_dbg_get_buffer(); if (argv[1] != NULL) { if (strcmp(argv[1],"loop")==0) { errno = 0; if (argv[2] == NULL) { pChar += sprintf(pChar, "argument is missing\n"); uma_dbg_send(tcpRef, bufRef, TRUE, uma_dbg_get_buffer()); return; } new_val = (int) strtol(argv[2], (char **) NULL, 10); if (errno != 0) { pChar += sprintf(pChar, "bad value %s\n",argv[2]); uma_dbg_send(tcpRef, bufRef, TRUE, uma_dbg_get_buffer()); return; } /* ** */ if (new_val == 0) { pChar += sprintf(pChar, "unsupported value %s\n",argv[2]); uma_dbg_send(tcpRef, bufRef, TRUE, uma_dbg_get_buffer()); return; } rozofs_fuse_loop_count = new_val; } } uint64_t old_ticker = rozofs_fuse_req_tic; rozofs_fuse_get_ticker(); buffer_count = ruc_buf_getFreeBufferCount(rozofs_fuse_ctx_p->fuseReqPoolRef); /* ** check if the session has been exited */ if (fuse_session_exited(rozofs_fuse_ctx_p->se)) sprintf(status,"exited"); else sprintf(status,"running"); pChar += sprintf(pChar,"FUSE %8s - %d/%d ctx remaining\n", status, buffer_count, rozofs_fuse_ctx_p->initBufCount); /* ** display the cache mode */ pChar += sprintf(pChar,"poll count : %d\n",rozofs_fuse_loop_count); pChar += sprintf(pChar,"FS Mode : "); if (rozofs_mode== 0) { pChar += sprintf(pChar,"standard\n"); } else { pChar += sprintf(pChar,"Block\n"); } pChar += sprintf(pChar,"FS Xattr : %s\n",(rozofs_xattr_disable==1)?"Disabled":"Enabled"); pChar += sprintf(pChar,"cache Mode : "); switch (rozofs_cache_mode) { default: case 0: pChar += sprintf(pChar,"default\n"); break; case 1: pChar += sprintf(pChar,"direct_io\n"); break; case 2: pChar += sprintf(pChar,"keep_cache\n"); break; } int i; for (i = 0; i < RZ_FUSE_WRITE_MAX; i++) { pChar +=sprintf(pChar,"cpt_%d: %8llu\n",i,(long long unsigned int)rozofs_write_merge_stats_tab[i]); } /** * clear the stats */ uint64_t delay = rozofs_fuse_req_tic-old_ticker; memset(rozofs_write_merge_stats_tab,0,sizeof(uint64_t)*RZ_FUSE_WRITE_MAX); pChar +=sprintf(pChar,"fuse req_in (count/bytes): %8llu/%llu\n",(long long unsigned int)rozofs_fuse_req_count, (long long unsigned int)rozofs_fuse_req_byte_in); if (delay) { pChar +=sprintf(pChar,"fuse req_in/s : %8llu/%llu\n",(long long unsigned int)(rozofs_fuse_req_count*1000000/delay), (long long unsigned int)(rozofs_fuse_req_byte_in*1000000/delay)); } pChar +=sprintf(pChar,"fuse req_in EAGAIN/ENOENT: %8llu/%llu\n",(long long unsigned int)rozofs_fuse_req_eagain_count, (long long unsigned int)rozofs_fuse_req_enoent_count); pChar +=sprintf(pChar,"fuse buffer depletion : %8llu\n",(long long unsigned int)rozofs_fuse_buffer_depletion_count); rozofs_fuse_buffer_depletion_count =0; rozofs_fuse_req_count = 0; rozofs_fuse_req_byte_in = 0; rozofs_fuse_req_eagain_count = 0; rozofs_fuse_req_enoent_count = 0; /** * read/write statistics */ pChar +=sprintf(pChar,"flush buf. count : %8llu\n",(long long unsigned int)rozofs_fuse_read_write_stats_buf.flush_buf_cpt); pChar +=sprintf(pChar," start aligned/unaligned : %8llu/%llu\n", (long long unsigned int)rozofs_aligned_write_start[0], (long long unsigned int)rozofs_aligned_write_start[1] ); pChar +=sprintf(pChar," end aligned/unaligned : %8llu/%llu\n", (long long unsigned int)rozofs_aligned_write_end[0], (long long unsigned int)rozofs_aligned_write_end[1] ); pChar +=sprintf(pChar,"readahead count : %8llu\n",(long long unsigned int)rozofs_fuse_read_write_stats_buf.readahead_cpt); pChar +=sprintf(pChar,"read req. count : %8llu\n",(long long unsigned int)rozofs_fuse_read_write_stats_buf.read_req_cpt); pChar +=sprintf(pChar,"read fuse count : %8llu\n",(long long unsigned int)rozofs_fuse_read_write_stats_buf.read_fuse_cpt); memset(&rozofs_fuse_read_write_stats_buf,0,sizeof(rozofs_fuse_read_write_stats)); { int k; for (k= 0;k< 2;k++) { rozofs_aligned_write_start[k] = 0; rozofs_aligned_write_end[k] = 0; } } /* ** Per array statistics */ pChar +=sprintf(pChar,"Per Read Array statitics:\n" ); for (i = 0; i < 32; i++) { if (rozofs_read_buf_section_table[i]!= 0) pChar +=sprintf(pChar," %6d: %8llu\n",(i+1)*ROZOFS_PAGE_SZ,(long long unsigned int)rozofs_read_buf_section_table[i]); } pChar +=sprintf(pChar,"Per Write Array statitics:\n" ); for (i = 0; i < 32; i++) { if (rozofs_write_buf_section_table[i]!= 0) pChar +=sprintf(pChar," %6d: %8llu\n",(i+1)*ROZOFS_PAGE_SZ,(long long unsigned int)rozofs_write_buf_section_table[i]); } memset (rozofs_write_buf_section_table,0,sizeof(uint64_t)*ROZOFS_FUSE_NB_OF_BUSIZE_SECTION_MAX); memset (rozofs_read_buf_section_table,0,sizeof(uint64_t)*ROZOFS_FUSE_NB_OF_BUSIZE_SECTION_MAX); uma_dbg_send(tcpRef, bufRef, TRUE, uma_dbg_get_buffer()); }
uint32_t rozofs_fuse_rcvReadysock(void * rozofs_fuse_ctx_p,int socketId) { rozofs_fuse_ctx_t *ctx_p; uint32_t buffer_count; uint32_t status; ctx_p = (rozofs_fuse_ctx_t*)rozofs_fuse_ctx_p; /* ** check if the session has been exited */ if (fuse_session_exited(ctx_p->se)) { /* ** session is dead, so stop receiving fuse request */ return FALSE; } /* ** There is no specific buffer pool needed for receiving the fuse request ** since the fuse library allocates memory to store the incoming request. ** The only element that can prevent a fuse request to be processed is the ** amount of transaction context. So the system has to check how many transaction ** contexts are remaining in the transaction context buffer pool. ** When there is no enough contexts, then the system stops looking at the ** fuse "socket". */ buffer_count = ruc_buf_getFreeBufferCount(ctx_p->fuseReqPoolRef); /* ** 2 fuse contexts are required : ** - 1 to process the incoming request ** - 1 to eventualy process an internal asynchronous flush */ if (buffer_count < 2) { rozofs_fuse_buffer_depletion_count++; return FALSE; } /* ** check the number of requests towards the storcli */ if (rozofs_storcli_pending_req_count >= rozofs_max_storcli_tx) { status = rozofs_xoff(); rozofs_storcli_buffer_depletion_count++; return status; } /* ** Check the amount of read buffer (shared pool) */ buffer_count = rozofs_get_shared_storcli_buf_free(SHAREMEM_IDX_READ); if (buffer_count < 2) { status = rozofs_xoff(); rozofs_storcli_buffer_depletion_count++; return status; } /* ** Check the amount of read buffer (shared pool) */ buffer_count = rozofs_get_shared_storcli_buf_free(SHAREMEM_IDX_WRITE); if (buffer_count < 2) { status = rozofs_xoff(); rozofs_storcli_buffer_depletion_count++; return status; } rozofs_xon(); return TRUE; }
/* **__________________________________________________________________________ */ int rozofs_fuse_session_loop(rozofs_fuse_ctx_t *ctx_p, int * empty) { int res = 0; char *buf; struct fuse_buf fbuf; int exit_req = 0; struct fuse_session *se = ctx_p->se; struct fuse_chan *ch = fuse_session_next_chan(se, NULL); *empty = 0; /* ** Get a buffer from the rozofs_fuse context. That buffer is unique and is allocated ** at startup. */ // START_PROFILING_FUSE(); // buf = ctx_p->buf_fuse_req_p; if (rozofs_fuse_cur_rcv_buf == NULL) { /* ** Allocate a buffer for receiving message from fuse kernel */ rozofs_fuse_cur_rcv_buf = rozofs_fuse_alloc_rcv_buffer_pool(); if (rozofs_fuse_cur_rcv_buf == NULL) { /* ** force empty in order to exit from the polling loop */ *empty = 1; return 0; } } buf = rozofs_fuse_cur_rcv_buf->buf; while (1) { struct fuse_chan *tmpch = ch; /* ** set the reference of the buffer that will be used by fuse */ fbuf.mem = buf; fbuf.flags = 0; fbuf.size = ctx_p->bufsize; res = fuse_session_receive_buf(se, &fbuf, &tmpch); if (res == 0) { /* ** session has been exited */ exit_req = 1; break; } if (res < 0) { switch(errno) { case EINTR: continue; case EAGAIN: /* ** the fuse queue is empty */ *empty = 1; return 0; break; default: /* ** fatal error */ exit_req = 1; break; } } /* ** OK it looks like that there is a valid message */ // STOP_PROFILING_FUSE(); if ( exit_req == 0) fuse_session_process_buf(se, &fbuf, tmpch); if (fuse_session_exited(se) == 1) { exit_req = 1; break; } break; } /* ** to be reworked */ // free(buf); // fuse_session_reset(se); return res < 0 ? -1 : 0; }
void rozofs_fuse_show(char * argv[], uint32_t tcpRef, void *bufRef) { uint32_t buffer_count=0; char status[16]; int new_val; int ret; char *pChar = uma_dbg_get_buffer(); if (argv[1] != NULL) { if (strcmp(argv[1],"kernel")==0) { if (rozofs_fuse_ctx_p->ioctl_supported) { ioctl(rozofs_fuse_ctx_p->fd,100,NULL); pChar += sprintf(pChar, "check result in dmesg: ROZOFS_FUSE...\n"); } else { pChar += sprintf(pChar, "ioctl not supported with that fuse kernel version\n"); } uma_dbg_send(tcpRef, bufRef, TRUE, uma_dbg_get_buffer()); return; } if (strcmp(argv[1],"loop")==0) { errno = 0; if (argv[2] == NULL) { pChar += sprintf(pChar, "argument is missing\n"); rozofs_fuse_show_usage(pChar); uma_dbg_send(tcpRef, bufRef, TRUE, uma_dbg_get_buffer()); return; } new_val = (int) strtol(argv[2], (char **) NULL, 10); if (errno != 0) { pChar += sprintf(pChar, "bad value %s\n",argv[2]); rozofs_fuse_show_usage(pChar); uma_dbg_send(tcpRef, bufRef, TRUE, uma_dbg_get_buffer()); return; } /* ** */ if (new_val == 0) { pChar += sprintf(pChar, "unsupported value %s\n",argv[2]); rozofs_fuse_show_usage(pChar); uma_dbg_send(tcpRef, bufRef, TRUE, uma_dbg_get_buffer()); return; } rozofs_fuse_loop_count = new_val; pChar += sprintf(pChar, "new polling request count %d\n",rozofs_fuse_loop_count); uma_dbg_send(tcpRef, bufRef, TRUE, uma_dbg_get_buffer()); return; } if (strcmp(argv[1],"dir")==0) { if (rozofs_fuse_ctx_p->ioctl_supported==0) { pChar += sprintf(pChar, "ioctl not supported with that fuse kernel version\n"); uma_dbg_send(tcpRef, bufRef, TRUE, uma_dbg_get_buffer()); return; } errno = 0; if (argv[2] == NULL) { pChar += sprintf(pChar, "argument is missing\n"); rozofs_fuse_show_usage(pChar); uma_dbg_send(tcpRef, bufRef, TRUE, uma_dbg_get_buffer()); return; } if (strcmp(argv[2],"enable")==0) { ret = ioctl(rozofs_fuse_ctx_p->fd,4,NULL); if (ret < 0) { pChar += sprintf(pChar, "ioctl failed %s\n",strerror(errno)); } else { pChar += sprintf(pChar, "directory attributes are invalidated on mknod/rmdir/unlink...\n"); rozofs_fuse_ctx_p->dir_attr_invalidate = 1; } uma_dbg_send(tcpRef, bufRef, TRUE, uma_dbg_get_buffer()); return; } if (strcmp(argv[2],"disable")==0) { ret = ioctl(rozofs_fuse_ctx_p->fd,3,NULL); if (ret < 0) { pChar += sprintf(pChar, "ioctl failed %s\n",strerror(errno)); } else { pChar += sprintf(pChar, "directory attributes are not invalidated on mknod/rmdir/unlink...\n"); rozofs_fuse_ctx_p->dir_attr_invalidate = 0; } uma_dbg_send(tcpRef, bufRef, TRUE, uma_dbg_get_buffer()); return; } pChar += sprintf(pChar, "unsupported argument %s\n",argv[2]); rozofs_fuse_show_usage(pChar); uma_dbg_send(tcpRef, bufRef, TRUE, uma_dbg_get_buffer()); return; } pChar += sprintf(pChar, "unsupported command %s\n",argv[1]); rozofs_fuse_show_usage(pChar); uma_dbg_send(tcpRef, bufRef, TRUE, uma_dbg_get_buffer()); return; } uint64_t old_ticker = rozofs_fuse_req_tic; rozofs_fuse_get_ticker(); buffer_count = ruc_buf_getFreeBufferCount(rozofs_fuse_ctx_p->fuseReqPoolRef); /* ** check if the session has been exited */ if (fuse_session_exited(rozofs_fuse_ctx_p->se)) sprintf(status,"exited"); else sprintf(status,"running"); pChar += sprintf(pChar,"FUSE %8s - %d/%d ctx remaining\n", status, buffer_count, rozofs_fuse_ctx_p->initBufCount); /* ** display the cache mode */ pChar += sprintf(pChar,"buffer sz : %d\n",rozofs_fuse_ctx_p->bufsize); pChar += sprintf(pChar,"poll count : %d\n",rozofs_fuse_loop_count); pChar += sprintf(pChar,"dir attr : %s\n",(rozofs_fuse_ctx_p->dir_attr_invalidate)?"INVALIDATE":"KEEP"); pChar += sprintf(pChar,"FS Mode : "); if (rozofs_mode== 0) { pChar += sprintf(pChar,"standard\n"); } else { pChar += sprintf(pChar,"Block\n"); } pChar += sprintf(pChar,"FS Xattr : %s\n",(rozofs_xattr_disable==1)?"Disabled":"Enabled"); pChar += sprintf(pChar,"cache Mode : "); switch (rozofs_cache_mode) { default: case 0: pChar += sprintf(pChar,"default\n"); break; case 1: pChar += sprintf(pChar,"direct_io\n"); break; case 2: pChar += sprintf(pChar,"keep_cache\n"); break; } int i; for (i = 0; i < RZ_FUSE_WRITE_MAX; i++) { pChar +=sprintf(pChar,"cpt_%d: %8llu\n",i,(long long unsigned int)rozofs_write_merge_stats_tab[i]); } /** * clear the stats */ uint64_t delay = rozofs_fuse_req_tic-old_ticker; memset(rozofs_write_merge_stats_tab,0,sizeof(uint64_t)*RZ_FUSE_WRITE_MAX); pChar +=sprintf(pChar,"fuse req_in (count/bytes): %8llu/%llu\n",(long long unsigned int)rozofs_fuse_req_count, (long long unsigned int)rozofs_fuse_req_byte_in); pChar +=sprintf(pChar,"fuse time :%8llu (%llu)\n", (long long unsigned int)(fuse_profile[P_COUNT]?fuse_profile[P_ELAPSE]/fuse_profile[P_COUNT]:0), (long long unsigned int)fuse_profile[P_COUNT]); if (delay) { pChar +=sprintf(pChar,"fuse req_in/s : %8llu/%llu\n",(long long unsigned int)(rozofs_fuse_req_count*1000000/delay), (long long unsigned int)(rozofs_fuse_req_byte_in*1000000/delay)); } pChar +=sprintf(pChar,"fuse req_in EAGAIN/ENOENT: %8llu/%llu\n",(long long unsigned int)rozofs_fuse_req_eagain_count, (long long unsigned int)rozofs_fuse_req_enoent_count); pChar +=sprintf(pChar,"fuse buffer depletion : %8llu\n",(long long unsigned int)rozofs_fuse_buffer_depletion_count); pChar +=sprintf(pChar,"storcli buffer depletion : %8llu\n",(long long unsigned int)rozofs_storcli_buffer_depletion_count); pChar +=sprintf(pChar,"pending storcli requests : %8d\n",rozofs_storcli_pending_req_count); pChar +=sprintf(pChar,"fuse kernel xoff/xon : %8llu/%llu\n",(long long unsigned int)rozofs_storcli_xoff_count, (long long unsigned int)rozofs_storcli_xon_count); rozofs_storcli_buffer_depletion_count =0; rozofs_fuse_buffer_depletion_count =0; rozofs_fuse_req_count = 0; rozofs_fuse_req_byte_in = 0; rozofs_fuse_req_eagain_count = 0; rozofs_fuse_req_enoent_count = 0; rozofs_storcli_xoff_count = 0; rozofs_storcli_xon_count = 0; /** * read/write statistics */ pChar +=sprintf(pChar,"big write count : %8llu\n",(long long unsigned int)rozofs_fuse_read_write_stats_buf.big_write_cpt); pChar +=sprintf(pChar,"flush buf. count : %8llu\n",(long long unsigned int)rozofs_fuse_read_write_stats_buf.flush_buf_cpt); pChar +=sprintf(pChar," start aligned/unaligned : %8llu/%llu\n", (long long unsigned int)rozofs_aligned_write_start[0], (long long unsigned int)rozofs_aligned_write_start[1] ); pChar +=sprintf(pChar," end aligned/unaligned : %8llu/%llu\n", (long long unsigned int)rozofs_aligned_write_end[0], (long long unsigned int)rozofs_aligned_write_end[1] ); pChar +=sprintf(pChar,"readahead count : %8llu\n",(long long unsigned int)rozofs_fuse_read_write_stats_buf.readahead_cpt); pChar +=sprintf(pChar,"read req. count : %8llu\n",(long long unsigned int)rozofs_fuse_read_write_stats_buf.read_req_cpt); pChar +=sprintf(pChar,"read fuse count : %8llu\n",(long long unsigned int)rozofs_fuse_read_write_stats_buf.read_fuse_cpt); memset(&rozofs_fuse_read_write_stats_buf,0,sizeof(rozofs_fuse_read_write_stats)); { int k; for (k= 0;k< 2;k++) { rozofs_aligned_write_start[k] = 0; rozofs_aligned_write_end[k] = 0; } } /* ** Per array statistics */ pChar +=sprintf(pChar,"Per Read Array statitics:\n" ); for (i = 0; i < ROZOFS_FUSE_NB_OF_BUSIZE_SECTION_MAX; i++) { if (rozofs_read_buf_section_table[i]!= 0) pChar +=sprintf(pChar," %6d: %8llu\n",(i+1)*ROZOFS_PAGE_SZ,(long long unsigned int)rozofs_read_buf_section_table[i]); } pChar +=sprintf(pChar,"Per Write Array statitics:\n" ); for (i = 0; i < ROZOFS_FUSE_NB_OF_BUSIZE_SECTION_MAX; i++) { if (rozofs_write_buf_section_table[i]!= 0) pChar +=sprintf(pChar," %6d: %8llu\n",(i+1)*ROZOFS_PAGE_SZ,(long long unsigned int)rozofs_write_buf_section_table[i]); } memset (rozofs_write_buf_section_table,0,sizeof(uint64_t)*ROZOFS_FUSE_NB_OF_BUSIZE_SECTION_MAX); memset (rozofs_read_buf_section_table,0,sizeof(uint64_t)*ROZOFS_FUSE_NB_OF_BUSIZE_SECTION_MAX); uma_dbg_send(tcpRef, bufRef, TRUE, uma_dbg_get_buffer()); }
static void *zfsfuse_listener_loop(void *arg) { size_t bufsize = 0; char *buf = NULL; VERIFY(pthread_mutex_lock(&mtx) == 0); fuse_listeners_count++; while(!exit_fuse_listener) { int ret = poll(fds, nfds, 1000); if(ret == 0 || (ret == -1 && errno == EINTR)) continue; if(ret == -1) { perror("poll"); continue; } int oldfds = nfds; for(int i = 0; i < oldfds; i++) { short rev = fds[i].revents; if(rev == 0) continue; fds[i].revents = 0; if (rev & POLLNVAL) { // already closed // fuse_unmount_all triggers this fds[i].fd = -1; continue; } if(!(rev & POLLIN) && !(rev & POLLERR) && !(rev & POLLHUP)) continue; if(i == 0) { new_fs(); } else { /* Handle request */ if(fsinfo[i].bufsize > bufsize) { char *new_buf = realloc(buf, fsinfo[i].bufsize); if(new_buf == NULL) { fprintf(stderr, "Warning: out of memory!\n"); continue; } buf = new_buf; bufsize = fsinfo[i].bufsize; } if (!fsinfo[i].se) { destroy_fs(i); continue; } int res = fuse_chan_recv(&fsinfo[i].ch, buf, fsinfo[i].bufsize); if(res == -1 || fuse_session_exited(fsinfo[i].se)) { destroy_fs(i); continue; } if(res == 0) continue; struct fuse_session *se = fsinfo[i].se; struct fuse_chan *ch = fsinfo[i].ch; /* * While we process this request, we let another * thread receive new events */ VERIFY(pthread_mutex_unlock(&mtx) == 0); fuse_session_process(se, buf, res, ch); /* Acquire the mutex before proceeding */ VERIFY(pthread_mutex_lock(&mtx) == 0); /* * At this point, we can no longer trust oldfds * to be accurate, so we exit this loop * * Also, exit_fuse_listener might have been set in the mean * time */ break; } } /* Free the closed file descriptors entries */ int write_ptr = 0; for(int read_ptr = 0; read_ptr < nfds; read_ptr++) { if(fds[read_ptr].fd == -1) continue; if(read_ptr != write_ptr) { fds[write_ptr] = fds[read_ptr]; fsinfo[write_ptr] = fsinfo[read_ptr]; mountpoints[write_ptr] = mountpoints[read_ptr]; } write_ptr++; } nfds = write_ptr; } fuse_listeners_count--; VERIFY(0 == pthread_cond_signal(&exiting_fuse_listener)); VERIFY(pthread_mutex_unlock(&mtx) == 0); return NULL; }
/* **__________________________________________________________________________ */ int rozofs_fuse_session_loop(rozofs_fuse_ctx_t *ctx_p) { int res = 0; char *buf; struct fuse_buf fbuf; int exit_req = 0; struct fuse_session *se = ctx_p->se; struct fuse_chan *ch = fuse_session_next_chan(se, NULL); /* ** Get a buffer from the rozofs_fuse context. That buffer is unique and is allocated ** at startup. */ buf = ctx_p->buf_fuse_req_p; while (1) { struct fuse_chan *tmpch = ch; /* ** set the reference of the buffer that will be used by fuse */ fbuf.mem = buf; fbuf.flags = 0; fbuf.size = ctx_p->bufsize; res = fuse_session_receive_buf(se, &fbuf, &tmpch); if (res == 0) { /* ** session has been exited */ exit_req = 1; break; } if (res < 0) { switch(errno) { case EINTR: continue; case EAGAIN: /* ** the fuse queue is empty */ return 0; break; default: /* ** fatal error */ exit_req = 1; break; } } /* ** OK it looks like that there is a valid message */ if ( exit_req == 0) fuse_session_process_buf(se, &fbuf, tmpch); if (fuse_session_exited(se) == 1) { exit_req = 1; break; } break; } /* ** to be reworked */ // free(buf); // fuse_session_reset(se); return res < 0 ? -1 : 0; }
static void *fuse_do_work(void *data) { struct fuse_worker *w = (struct fuse_worker *) data; struct fuse_mt *mt = w->mt; while (!fuse_session_exited(mt->se)) { int isforget = 0; struct fuse_chan *ch = mt->prevch; struct fuse_buf fbuf = { .mem = w->buf, .size = w->bufsize, }; int res; pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); res = fuse_session_receive_buf(mt->se, &fbuf, &ch); pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); if (res == -EINTR) continue; if (res <= 0) { if (res < 0) { fuse_session_exit(mt->se); mt->error = -1; } break; } pthread_mutex_lock(&mt->lock); if (mt->exit) { pthread_mutex_unlock(&mt->lock); return NULL; } /* * This disgusting hack is needed so that zillions of threads * are not created on a burst of FORGET messages */ if (!(fbuf.flags & FUSE_BUF_IS_FD)) { struct fuse_in_header *in = fbuf.mem; if (in->opcode == FUSE_FORGET || in->opcode == FUSE_BATCH_FORGET) isforget = 1; } if (!isforget) mt->numavail--; if (mt->numavail == 0) fuse_loop_start_thread(mt); pthread_mutex_unlock(&mt->lock); fuse_session_process_buf(mt->se, &fbuf, ch); pthread_mutex_lock(&mt->lock); if (!isforget) mt->numavail++; if (mt->numavail > 10) { if (mt->exit) { pthread_mutex_unlock(&mt->lock); return NULL; } list_del_worker(w); mt->numavail--; mt->numworker--; pthread_mutex_unlock(&mt->lock); pthread_detach(w->thread_id); free(w->buf); free(w); return NULL; } pthread_mutex_unlock(&mt->lock); } sem_post(&mt->finish); return NULL; } int fuse_start_thread(pthread_t *thread_id, void *(*func)(void *), void *arg) { sigset_t oldset; sigset_t newset; int res; pthread_attr_t attr; char *stack_size; /* Override default stack size */ pthread_attr_init(&attr); stack_size = getenv(ENVNAME_THREAD_STACK); if (stack_size && pthread_attr_setstacksize(&attr, atoi(stack_size))) fprintf(stderr, "fuse: invalid stack size: %s\n", stack_size); /* Disallow signal reception in worker threads */ sigemptyset(&newset); sigaddset(&newset, SIGTERM); sigaddset(&newset, SIGINT); sigaddset(&newset, SIGHUP); sigaddset(&newset, SIGQUIT); pthread_sigmask(SIG_BLOCK, &newset, &oldset); res = pthread_create(thread_id, &attr, func, arg); pthread_sigmask(SIG_SETMASK, &oldset, NULL); pthread_attr_destroy(&attr); if (res != 0) { fprintf(stderr, "fuse: error creating thread: %s\n", strerror(res)); return -1; } return 0; }
static int mt_session_exited(void *data) { struct procdata *pd = (struct procdata *) data; return fuse_session_exited(pd->prevse); }
// Return the amount of time to wait between sched_run_callbacks() calls static struct timeval fuse_serve_timeout(void) { struct timeval tv = { .tv_sec = 0, .tv_usec = 1000000/HZ }; return tv; } struct callback_list { unlock_callback_t callback; void * data; int count; struct callback_list * next; }; static struct callback_list * callbacks; int fstitchd_unlock_callback(unlock_callback_t callback, void * data) { if(callbacks && callbacks->callback == callback && callbacks->data == data) callbacks->count++; else { struct callback_list * list = malloc(sizeof(*list)); if(!list) return -ENOMEM; list->callback = callback; list->data = data; list->count = 1; list->next = callbacks; callbacks = list; } return 0; } // Adapted from FUSE's lib/fuse_loop.c to support sched callbacks and multiple mounts int fuse_serve_loop(void) { struct timeval tv; mount_t ** mp; int r; Dprintf("%s()\n", __FUNCTION__); if (!root_cfs) { fprintf(stderr, "%s(): no root cfs was specified; not running.\n", __FUNCTION__); return -1; } if ((r = fuse_serve_mount_load_mounts()) < 0) { fprintf(stderr, "%s(): fuse_serve_load_mounts: %d\n", __FUNCTION__, r); return r; } serving = 1; tv = fuse_serve_timeout(); while ((mp = fuse_serve_mounts()) && mp && mp[0]) { fd_set rfds; int max_fd = 0; struct timeval it_start, it_end; FD_ZERO(&rfds); if (shutdown_pipe[0] != -1) { FD_SET(shutdown_pipe[0], &rfds); if (shutdown_pipe[0] > max_fd) max_fd = shutdown_pipe[0]; } FD_SET(remove_activity, &rfds); if (remove_activity > max_fd) max_fd = remove_activity; for (mp = fuse_serve_mounts(); mp && *mp; mp++) { if ((*mp)->mounted && !fuse_session_exited((*mp)->session)) { //printf("[\"%s\"]", mount->fstitch_path); fflush(stdout); // debug int mount_fd = fuse_chan_fd((*mp)->channel); FD_SET(mount_fd, &rfds); if (mount_fd > max_fd) max_fd = mount_fd; } } r = select(max_fd+1, &rfds, NULL, NULL, &tv); if (r == 0) { //printf("."); fflush(stdout); // debugging output sched_run_callbacks(); tv = fuse_serve_timeout(); } else if (r < 0) { if (errno != EINTR) perror("select"); //printf("!\n"); fflush(stdout); // debugging output tv = fuse_serve_timeout(); // tv may have become undefined } else { if (gettimeofday(&it_start, NULL) == -1) { perror("gettimeofday"); break; } for (mp = fuse_serve_mounts(); mp && *mp; mp++) { if ((*mp)->mounted && FD_ISSET((*mp)->channel_fd, &rfds)) { r = fuse_chan_receive((*mp)->channel, channel_buf, channel_buf_len); if(r <= 0) fprintf(stderr, "fuse_chan_receive() returned %d, ignoring!\n", r); //assert(r > 0); // this happens during shutdown on MacFUSE... Dprintf("fuse_serve: request for mount \"%s\"\n", (*mp)->fstitch_path); fuse_session_process((*mp)->session, channel_buf, r, (*mp)->channel); sched_run_cleanup(); } } if (shutdown_pipe[0] != -1 && FD_ISSET(shutdown_pipe[0], &rfds)) { // Start unmounting all filesystems // Looping will stop once all filesystems are unmounted ignore_shutdown_signals(); if (fuse_serve_mount_start_shutdown() < 0) { fprintf(stderr, "fuse_serve_mount_start_shutdown() failed, exiting fuse_serve_loop()\n"); return -1; } } if (FD_ISSET(remove_activity, &rfds)) { if (fuse_serve_mount_step_remove() < 0) { fprintf(stderr, "fuse_serve_mount_step_remove() failed, exiting fuse_serve_loop()\n"); return -1; } } if (gettimeofday(&it_end, NULL) == -1) { perror("gettimeofday"); break; } tv = time_subtract(tv, time_elapsed(it_start, it_end)); } while(callbacks) { struct callback_list * first = callbacks; callbacks = first->next; first->callback(first->data, first->count); free(first); } } serving = 0; return 0; }