/* * LAMEbus interrupt handling function. (Machine-independent!) */ void lamebus_interrupt(struct lamebus_softc *lamebus) { /* * Note that despite the fact that "spl" stands for "set * priority level", we don't actually support interrupt * priorities. When an interrupt happens, we look through the * slots to find the first interrupting device and call its * interrupt routine, no matter what that device is. * * Note that the entire LAMEbus uses only one on-cpu interrupt line. * Thus, we do not use any on-cpu interrupt priority system either. */ int slot; uint32_t mask; uint32_t irqs; void (*handler)(void *); void *data; /* For keeping track of how many bogus things happen in a row. */ static int duds = 0; int duds_this_time = 0; /* and we better have a valid bus instance. */ KASSERT(lamebus != NULL); /* Lock the softc */ spinlock_acquire(&lamebus->ls_lock); /* * Read the LAMEbus controller register that tells us which * slots are asserting an interrupt condition. */ irqs = read_ctl_register(lamebus, CTLREG_IRQS); if (irqs == 0) { /* * Huh? None of them? Must be a glitch. */ kprintf("lamebus: stray interrupt on cpu %u\n", curcpu->c_number); duds++; duds_this_time++; /* * We could just return now, but instead we'll * continue ahead. Because irqs == 0, nothing in the * loop will execute, and passing through it gets us * to the code that checks how many duds we've * seen. This is important, because we just might get * a stray interrupt that latches itself on. If that * happens, we're pretty much toast, but it's better * to panic and hopefully reset the system than to * loop forever printing "stray interrupt". */ } /* * Go through the bits in the value we got back to see which * ones are set. */ for (mask=1, slot=0; slot<LB_NSLOTS; mask<<=1, slot++) { if ((irqs & mask) == 0) { /* Nope. */ continue; } /* * This slot is signalling an interrupt. */ if ((lamebus->ls_slotsinuse & mask)==0) { /* * No device driver is using this slot. */ duds++; duds_this_time++; continue; } if (lamebus->ls_irqfuncs[slot]==NULL) { /* * The device driver hasn't installed an interrupt * handler. */ duds++; duds_this_time++; continue; } /* * Call the interrupt handler. Release the spinlock * while we do so, in case other CPUs are handling * interrupts on other devices. */ handler = lamebus->ls_irqfuncs[slot]; data = lamebus->ls_devdata[slot]; spinlock_release(&lamebus->ls_lock); handler(data); spinlock_acquire(&lamebus->ls_lock); /* * Reload the mask of pending IRQs - if we just called * hardclock, we might not have come back to this * context for some time, and it might have changed. */ irqs = read_ctl_register(lamebus, CTLREG_IRQS); } /* * If we get interrupts for a slot with no driver or no * interrupt handler, it's fairly serious. Because LAMEbus * uses level-triggered interrupts, if we don't shut off the * condition, we'll keep getting interrupted continuously and * the system will make no progress. But we don't know how to * do that if there's no driver or no interrupt handler. * * So, if we get too many dud interrupts, panic, since it's * better to panic and reset than to hang. * * If we get through here without seeing any duds this time, * the condition, whatever it was, has gone away. It might be * some stupid device we don't have a driver for, or it might * have been an electrical transient. In any case, warn and * clear the dud count. */ if (duds_this_time == 0 && duds > 0) { kprintf("lamebus: %d dud interrupts\n", duds); duds = 0; } if (duds > 10000) { panic("lamebus: too many (%d) dud interrupts\n", duds); } /* Unlock the softc */ spinlock_release(&lamebus->ls_lock); }
/** * We have a registered slave that is behind the current leading edge of the * binlog. We must replay the log entries to bring this node up to speed. * * There may be a large number of records to send to the slave, the process * is triggered by the slave COM_BINLOG_DUMP message and all the events must * be sent without receiving any new event. This measn there is no trigger into * MaxScale other than this initial message. However, if we simply send all the * events we end up with an extremely long write queue on the DCB and risk * running the server out of resources. * * The slave catchup routine will send a burst of replication events per single * call. The paramter "long" control the number of events in the burst. The * short burst is intended to be used when the master receive an event and * needs to put the slave into catchup mode. This prevents the slave taking * too much tiem away from the thread that is processing the master events. * * At the end of the burst a fake EPOLLOUT event is added to the poll event * queue. This ensures that the slave callback for processing DCB write drain * will be called and future catchup requests will be handled on another thread. * * @param router The binlog router * @param slave The slave that is behind * @param large Send a long or short burst of events * @return The number of bytes written */ int blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, bool large) { GWBUF *head, *record; REP_HEADER hdr; int written, rval = 1, burst; int rotating; unsigned long burst_size; uint8_t *ptr; if (large) burst = router->long_burst; else burst = router->short_burst; burst_size = router->burst_size; spinlock_acquire(&slave->catch_lock); if (slave->cstate & CS_BUSY) { spinlock_release(&slave->catch_lock); return 0; } slave->cstate |= CS_BUSY; spinlock_release(&slave->catch_lock); if (slave->file == NULL) { rotating = router->rotating; if ((slave->file = blr_open_binlog(router, slave->binlogfile)) == NULL) { if (rotating) { spinlock_acquire(&slave->catch_lock); slave->cstate |= CS_EXPECTCB; slave->cstate &= ~CS_BUSY; spinlock_release(&slave->catch_lock); poll_fake_write_event(slave->dcb); return rval; } LOGIF(LE, (skygw_log_write( LOGFILE_ERROR, "blr_slave_catchup failed to open binlog file %s", slave->binlogfile))); slave->cstate &= ~CS_BUSY; slave->state = BLRS_ERRORED; dcb_close(slave->dcb); return 0; } } slave->stats.n_bursts++; while (burst-- && burst_size > 0 && (record = blr_read_binlog(router, slave->file, slave->binlog_pos, &hdr)) != NULL) { head = gwbuf_alloc(5); ptr = GWBUF_DATA(head); encode_value(ptr, hdr.event_size + 1, 24); ptr += 3; *ptr++ = slave->seqno++; *ptr++ = 0; // OK head = gwbuf_append(head, record); if (hdr.event_type == ROTATE_EVENT) { unsigned long beat1 = hkheartbeat; blr_close_binlog(router, slave->file); if (hkheartbeat - beat1 > 1) LOGIF(LE, (skygw_log_write( LOGFILE_ERROR, "blr_close_binlog took %d beats", hkheartbeat - beat1))); blr_slave_rotate(slave, GWBUF_DATA(record)); beat1 = hkheartbeat; if ((slave->file = blr_open_binlog(router, slave->binlogfile)) == NULL) { if (rotating) { spinlock_acquire(&slave->catch_lock); slave->cstate |= CS_EXPECTCB; slave->cstate &= ~CS_BUSY; spinlock_release(&slave->catch_lock); poll_fake_write_event(slave->dcb); return rval; } LOGIF(LE, (skygw_log_write( LOGFILE_ERROR, "blr_slave_catchup failed to open binlog file %s", slave->binlogfile))); slave->state = BLRS_ERRORED; dcb_close(slave->dcb); break; } if (hkheartbeat - beat1 > 1) LOGIF(LE, (skygw_log_write( LOGFILE_ERROR, "blr_open_binlog took %d beats", hkheartbeat - beat1))); } slave->stats.n_bytes += gwbuf_length(head); written = slave->dcb->func.write(slave->dcb, head); if (written && hdr.event_type != ROTATE_EVENT) { slave->binlog_pos = hdr.next_pos; } rval = written; slave->stats.n_events++; burst_size -= hdr.event_size; } if (record == NULL) slave->stats.n_failed_read++; spinlock_acquire(&slave->catch_lock); slave->cstate &= ~CS_BUSY; spinlock_release(&slave->catch_lock); if (record) { slave->stats.n_flows++; spinlock_acquire(&slave->catch_lock); slave->cstate |= CS_EXPECTCB; spinlock_release(&slave->catch_lock); poll_fake_write_event(slave->dcb); } else if (slave->binlog_pos == router->binlog_position && strcmp(slave->binlogfile, router->binlog_name) == 0) { int state_change = 0; spinlock_acquire(&router->binlog_lock); spinlock_acquire(&slave->catch_lock); /* * Now check again since we hold the router->binlog_lock * and slave->catch_lock. */ if (slave->binlog_pos != router->binlog_position || strcmp(slave->binlogfile, router->binlog_name) != 0) { slave->cstate &= ~CS_UPTODATE; slave->cstate |= CS_EXPECTCB; spinlock_release(&slave->catch_lock); spinlock_release(&router->binlog_lock); poll_fake_write_event(slave->dcb); } else { if ((slave->cstate & CS_UPTODATE) == 0) { slave->stats.n_upd++; slave->cstate |= CS_UPTODATE; spinlock_release(&slave->catch_lock); spinlock_release(&router->binlog_lock); state_change = 1; } } if (state_change) { slave->stats.n_caughtup++; if (slave->stats.n_caughtup == 1) { LOGIF(LM, (skygw_log_write(LOGFILE_MESSAGE, "%s: Slave %s is up to date %s, %u.", router->service->name, slave->dcb->remote, slave->binlogfile, slave->binlog_pos))); } else if ((slave->stats.n_caughtup % 50) == 0) { LOGIF(LM, (skygw_log_write(LOGFILE_MESSAGE, "%s: Slave %s is up to date %s, %u.", router->service->name, slave->dcb->remote, slave->binlogfile, slave->binlog_pos))); } } } else { if (slave->binlog_pos >= blr_file_size(slave->file) && router->rotating == 0 && strcmp(router->binlog_name, slave->binlogfile) != 0 && blr_master_connected(router)) { /* We may have reached the end of file of a non-current * binlog file. * * Note if the master is rotating there is a window during * which the rotate event has been written to the old binlog * but the new binlog file has not yet been created. Therefore * we ignore these issues during the rotate processing. */ LOGIF(LE, (skygw_log_write(LOGFILE_ERROR, "Slave reached end of file for binlong file %s at %u " "which is not the file currently being downloaded. " "Master binlog is %s, %lu.", slave->binlogfile, slave->binlog_pos, router->binlog_name, router->binlog_position))); if (blr_slave_fake_rotate(router, slave)) { spinlock_acquire(&slave->catch_lock); slave->cstate |= CS_EXPECTCB; spinlock_release(&slave->catch_lock); poll_fake_write_event(slave->dcb); } else { slave->state = BLRS_ERRORED; dcb_close(slave->dcb); } } else { spinlock_acquire(&slave->catch_lock); slave->cstate |= CS_EXPECTCB; spinlock_release(&slave->catch_lock); poll_fake_write_event(slave->dcb); } } return rval; }
/** * The routeQuery entry point. This is passed the query buffer * to which the filter should be applied. Once processed the * query is passed to the downstream component * (filter or router) in the filter chain. * * The function tries to extract a SQL query out of the query buffer, * adds a timestamp to it and publishes the resulting string on the exchange. * The message is tagged with an unique identifier and the clientReply will * use the same identifier for the reply from the backend. * * @param instance The filter instance data * @param session The filter session * @param queue The query data */ static int routeQuery(FILTER *instance, void *session, GWBUF *queue) { MQ_SESSION *my_session = (MQ_SESSION *)session; MQ_INSTANCE *my_instance = (MQ_INSTANCE *)instance; char *ptr, t_buf[128], *combined; int length, err_code = AMQP_STATUS_OK; amqp_basic_properties_t prop; spinlock_acquire(my_instance->rconn_lock); if(my_instance->conn_stat != AMQP_STATUS_OK){ if(difftime(time(NULL),my_instance->last_rconn) > my_instance->rconn_intv){ my_instance->last_rconn = time(NULL); if(init_conn(my_instance,my_session)){ my_instance->rconn_intv = 1.0; my_instance->conn_stat = AMQP_STATUS_OK; }else{ my_instance->rconn_intv += 5.0; skygw_log_write(LOGFILE_ERROR, "Error : Failed to reconnect to the MQRabbit server "); } err_code = my_instance->conn_stat; } } spinlock_release(my_instance->rconn_lock); if(modutil_is_SQL(queue)){ if(my_session->uid == NULL){ my_session->uid = calloc(33,sizeof(char)); if(!my_session->uid){ skygw_log_write(LOGFILE_ERROR,"Error : Out of memory."); }else{ genkey(my_session->uid,32); } } } if (err_code == AMQP_STATUS_OK){ if(modutil_extract_SQL(queue, &ptr, &length)){ my_session->was_query = 1; prop._flags = AMQP_BASIC_CONTENT_TYPE_FLAG | AMQP_BASIC_DELIVERY_MODE_FLAG | AMQP_BASIC_MESSAGE_ID_FLAG | AMQP_BASIC_CORRELATION_ID_FLAG; prop.content_type = amqp_cstring_bytes("text/plain"); prop.delivery_mode = AMQP_DELIVERY_PERSISTENT; prop.correlation_id = amqp_cstring_bytes(my_session->uid); prop.message_id = amqp_cstring_bytes("query"); memset(t_buf,0,128); sprintf(t_buf, "%lu|",(unsigned long)time(NULL)); int qlen = length + strnlen(t_buf,128); if((combined = malloc((qlen+1)*sizeof(char))) == NULL){ skygw_log_write_flush(LOGFILE_ERROR, "Error : Out of memory"); } strcpy(combined,t_buf); strncat(combined,ptr,length); if((err_code = amqp_basic_publish(my_session->conn,my_session->channel, amqp_cstring_bytes(my_instance->exchange), amqp_cstring_bytes(my_instance->key), 0,0,&prop,amqp_cstring_bytes(combined)) ) != AMQP_STATUS_OK){ spinlock_acquire(my_instance->rconn_lock); my_instance->conn_stat = err_code; spinlock_release(my_instance->rconn_lock); skygw_log_write_flush(LOGFILE_ERROR, "Error : Failed to publish message to MQRabbit server: " "%s",amqp_error_string2(err_code)); } } } /** Pass the query downstream */ return my_session->down.routeQuery(my_session->down.instance, my_session->down.session, queue); }
static int subpage_kfree(void *ptr) { int blktype; // index into sizes[] that we're using vaddr_t ptraddr; // same as ptr struct pageref *pr; // pageref for page we're freeing in vaddr_t prpage; // PR_PAGEADDR(pr) vaddr_t fla; // free list entry address struct freelist *fl; // free list entry vaddr_t offset; // offset into page ptraddr = (vaddr_t)ptr; spinlock_acquire(&kmalloc_spinlock); checksubpages(); for (pr = allbase; pr; pr = pr->next_all) { prpage = PR_PAGEADDR(pr); blktype = PR_BLOCKTYPE(pr); /* check for corruption */ KASSERT(blktype>=0 && blktype<NSIZES); checksubpage(pr); if (ptraddr >= prpage && ptraddr < prpage + PAGE_SIZE) { break; } } if (pr==NULL) { /* Not on any of our pages - not a subpage allocation */ spinlock_release(&kmalloc_spinlock); return -1; } offset = ptraddr - prpage; /* Check for proper positioning and alignment */ if (offset >= PAGE_SIZE || offset % sizes[blktype] != 0) { panic("kfree: subpage free of invalid addr %p\n", ptr); } /* * Clear the block to 0xdeadbeef to make it easier to detect * uses of dangling pointers. */ fill_deadbeef(ptr, sizes[blktype]); /* * We probably ought to check for free twice by seeing if the block * is already on the free list. But that's expensive, so we don't. */ fla = prpage + offset; fl = (struct freelist *)fla; if (pr->freelist_offset == INVALID_OFFSET) { fl->next = NULL; } else { fl->next = (struct freelist *)(prpage + pr->freelist_offset); } pr->freelist_offset = offset; pr->nfree++; KASSERT(pr->nfree <= PAGE_SIZE / sizes[blktype]); if (pr->nfree == PAGE_SIZE / sizes[blktype]) { /* Whole page is free. */ remove_lists(pr, blktype); freepageref(pr); /* Call free_kpages without kmalloc_spinlock. */ spinlock_release(&kmalloc_spinlock); free_kpages(prpage); } else { spinlock_release(&kmalloc_spinlock); } #ifdef SLOWER /* Don't get the lock unless checksubpages does something. */ spinlock_acquire(&kmalloc_spinlock); checksubpages(); spinlock_release(&kmalloc_spinlock); #endif return 0; }
/** * Print all servers in Json format to a DCB * * Designed to be called within a debugger session in order * to display all active servers within the gateway */ void dprintAllServersJson(DCB *dcb) { SERVER *ptr; char *stat; int len = 0; int el = 1; spinlock_acquire(&server_spin); ptr = allServers; while (ptr) { ptr = ptr->next; len++; } ptr = allServers; dcb_printf(dcb, "[\n"); while (ptr) { dcb_printf(dcb, " {\n \"server\": \"%s\",\n", ptr->name); stat = server_status(ptr); dcb_printf(dcb, " \"status\": \"%s\",\n", stat); free(stat); dcb_printf(dcb, " \"protocol\": \"%s\",\n", ptr->protocol); dcb_printf(dcb, " \"port\": \"%d\",\n", ptr->port); if (ptr->server_string) dcb_printf(dcb, " \"version\": \"%s\",\n", ptr->server_string); dcb_printf(dcb, " \"nodeId\": \"%d\",\n", ptr->node_id); dcb_printf(dcb, " \"masterId\": \"%d\",\n", ptr->master_id); if (ptr->slaves) { int i; dcb_printf(dcb, " \"slaveIds\": [ "); for (i = 0; ptr->slaves[i]; i++) { if (i == 0) dcb_printf(dcb, "%li", ptr->slaves[i]); else dcb_printf(dcb, ", %li ", ptr->slaves[i]); } dcb_printf(dcb, "],\n"); } dcb_printf(dcb, " \"replDepth\": \"%d\",\n", ptr->depth); if (SERVER_IS_SLAVE(ptr) || SERVER_IS_RELAY_SERVER(ptr)) { if (ptr->rlag >= 0) { dcb_printf(dcb, " \"slaveDelay\": \"%d\",\n", ptr->rlag); } } if (ptr->node_ts > 0) { dcb_printf(dcb, " \"lastReplHeartbeat\": \"%lu\",\n", ptr->node_ts); } dcb_printf(dcb, " \"totalConnections\": \"%d\",\n", ptr->stats.n_connections); dcb_printf(dcb, " \"currentConnections\": \"%d\",\n", ptr->stats.n_current); dcb_printf(dcb, " \"currentOps\": \"%d\"\n", ptr->stats.n_current_ops); if (el < len) { dcb_printf(dcb, " },\n"); } else { dcb_printf(dcb, " }\n"); } ptr = ptr->next; el++; } dcb_printf(dcb, "]\n"); spinlock_release(&server_spin); }
/** * The main polling loop * * This routine does the polling and despatches of IO events * to the DCB's. It may be called either directly or as the entry point * of a polling thread within the gateway. * * The routine will loop as long as the variable "shutdown" is set to zero, * setting this to a non-zero value will cause the polling loop to return. * * There are two options for the polling, a debug option that is only useful if * you have a single thread. This blocks in epoll_wait until an event occurs. * * The non-debug option does an epoll with a time out. This allows the checking of * shutdown value to be checked in all threads. The algorithm for polling in this * mode is to do a poll with no-wait, if no events are detected then the poll is * repeated with a time out. This allows for a quick check before making the call * with timeout. The call with the timeout differs in that the Linux scheduler may * deschedule a process if a timeout is included, but will not do this if a 0 timeout * value is given. this improves performance when the gateway is under heavy load. * * In order to provide a fairer means of sharing the threads between the different * DCB's the poll mechanism has been decoupled from the processing of the events. * The events are now recieved via the epoll_wait call, a queue of DCB's that have * events pending is maintained and as new events arrive the DCB is added to the end * of this queue. If an eent arrives for a DCB alreayd in the queue, then the event * bits are added to the DCB but the DCB mantains the same point in the queue unless * the original events are already being processed. If they are being processed then * the DCB is moved to the back of the queue, this means that a DCB that is receiving * events at a high rate will not block the execution of events for other DCB's and * should result in a fairer polling strategy. * * The introduction of the ability to inject "fake" write events into the event queue meant * that there was a possibility to "starve" new events sicne the polling loop would * consume the event queue before looking for new events. If the DCB that inject * the fake event then injected another fake event as a result of the first it meant * that new events did not get added to the queue. The strategy has been updated to * not consume the entire event queue, but process one event before doing a non-blocking * call to add any new events before processing any more events. A blocking call to * collect events is only made if there are no pending events to be processed on the * event queue. * * Also introduced a "timeout bias" mechanism. This mechansim control the length of * of timeout passed to epoll_wait in blocking calls based on previous behaviour. * The initial call will block for 10% of the define timeout peroid, this will be * increased in increments of 10% until the full timeout value is used. If at any * point there is an event to be processed then the value will be reduced to 10% again * for the next blocking call. * * @param arg The thread ID passed as a void * to satisfy the threading package */ void poll_waitevents(void *arg) { struct epoll_event events[MAX_EVENTS]; int i, nfds, timeout_bias = 1; intptr_t thread_id = (intptr_t)arg; DCB *zombies = NULL; int poll_spins = 0; /** Add this thread to the bitmask of running polling threads */ bitmask_set(&poll_mask, thread_id); if (thread_data) { thread_data[thread_id].state = THREAD_IDLE; } /** Init mysql thread context for use with a mysql handle and a parser */ mysql_thread_init(); while (1) { if (pollStats.evq_pending == 0 && timeout_bias < 10) { timeout_bias++; } atomic_add(&n_waiting, 1); #if BLOCKINGPOLL nfds = epoll_wait(epoll_fd, events, MAX_EVENTS, -1); atomic_add(&n_waiting, -1); #else /* BLOCKINGPOLL */ #if MUTEX_EPOLL simple_mutex_lock(&epoll_wait_mutex, TRUE); #endif if (thread_data) { thread_data[thread_id].state = THREAD_POLLING; } atomic_add(&pollStats.n_polls, 1); if ((nfds = epoll_wait(epoll_fd, events, MAX_EVENTS, 0)) == -1) { atomic_add(&n_waiting, -1); int eno = errno; errno = 0; LOGIF(LD, (skygw_log_write( LOGFILE_DEBUG, "%lu [poll_waitevents] epoll_wait returned " "%d, errno %d", pthread_self(), nfds, eno))); atomic_add(&n_waiting, -1); } /* * If there are no new descriptors from the non-blocking call * and nothing to process on the event queue then for do a * blocking call to epoll_wait. * * We calculate a timeout bias to alter the length of the blocking * call based on the time since we last received an event to process */ else if (nfds == 0 && pollStats.evq_pending == 0 && poll_spins++ > number_poll_spins) { atomic_add(&pollStats.blockingpolls, 1); nfds = epoll_wait(epoll_fd, events, MAX_EVENTS, (max_poll_sleep * timeout_bias) / 10); if (nfds == 0 && pollStats.evq_pending) { atomic_add(&pollStats.wake_evqpending, 1); poll_spins = 0; } } else { atomic_add(&n_waiting, -1); } if (n_waiting == 0) atomic_add(&pollStats.n_nothreads, 1); #if MUTEX_EPOLL simple_mutex_unlock(&epoll_wait_mutex); #endif #endif /* BLOCKINGPOLL */ if (nfds > 0) { timeout_bias = 1; if (poll_spins <= number_poll_spins + 1) atomic_add(&pollStats.n_nbpollev, 1); poll_spins = 0; LOGIF(LD, (skygw_log_write( LOGFILE_DEBUG, "%lu [poll_waitevents] epoll_wait found %d fds", pthread_self(), nfds))); atomic_add(&pollStats.n_pollev, 1); if (thread_data) { thread_data[thread_id].n_fds = nfds; thread_data[thread_id].cur_dcb = NULL; thread_data[thread_id].event = 0; thread_data[thread_id].state = THREAD_PROCESSING; } pollStats.n_fds[(nfds < MAXNFDS ? (nfds - 1) : MAXNFDS - 1)]++; load_average = (load_average * load_samples + nfds) / (load_samples + 1); atomic_add(&load_samples, 1); atomic_add(&load_nfds, nfds); /* * Process every DCB that has a new event and add * it to the poll queue. * If the DCB is currently being processed then we * or in the new eent bits to the pending event bits * and leave it in the queue. * If the DCB was not already in the queue then it was * idle and is added to the queue to process after * setting the event bits. */ for (i = 0; i < nfds; i++) { DCB *dcb = (DCB *)events[i].data.ptr; __uint32_t ev = events[i].events; spinlock_acquire(&pollqlock); if (DCB_POLL_BUSY(dcb)) { if (dcb->evq.pending_events == 0) { pollStats.evq_pending++; dcb->evq.inserted = hkheartbeat; } dcb->evq.pending_events |= ev; } else { dcb->evq.pending_events = ev; if (eventq) { dcb->evq.prev = eventq->evq.prev; eventq->evq.prev->evq.next = dcb; eventq->evq.prev = dcb; dcb->evq.next = eventq; } else { eventq = dcb; dcb->evq.prev = dcb; dcb->evq.next = dcb; } pollStats.evq_length++; pollStats.evq_pending++; dcb->evq.inserted = hkheartbeat; if (pollStats.evq_length > pollStats.evq_max) { pollStats.evq_max = pollStats.evq_length; } } spinlock_release(&pollqlock); } } /* * Process of the queue of waiting requests * This is done without checking the evq_pending count as a * precautionary measure to avoid issues if the house keeping * of the count goes wrong. */ if (process_pollq(thread_id)) timeout_bias = 1; if (thread_data) thread_data[thread_id].state = THREAD_ZPROCESSING; zombies = dcb_process_zombies(thread_id); if (thread_data) thread_data[thread_id].state = THREAD_IDLE; if (do_shutdown) { /*< * Remove the thread from the bitmask of running * polling threads. */ if (thread_data) { thread_data[thread_id].state = THREAD_STOPPED; } bitmask_clear(&poll_mask, thread_id); /** Release mysql thread context */ mysql_thread_end(); return; } if (thread_data) { thread_data[thread_id].state = THREAD_IDLE; } } /*< while(1) */ }
/* * VOP_RECLAIM * * Reclaim should make an effort to returning errors other than EBUSY. */ static int emufs_reclaim(struct vnode *v) { struct emufs_vnode *ev = v->vn_data; struct emufs_fs *ef = v->vn_fs->fs_data; unsigned ix, i, num; int result; /* * Need both of these locks: e_lock to protect the device, * and vn_countlock for the reference count. */ lock_acquire(ef->ef_emu->e_lock); spinlock_acquire(&ev->ev_v.vn_countlock); if (ev->ev_v.vn_refcount > 1) { /* consume the reference VOP_DECREF passed us */ ev->ev_v.vn_refcount--; spinlock_release(&ev->ev_v.vn_countlock); lock_release(ef->ef_emu->e_lock); return EBUSY; } KASSERT(ev->ev_v.vn_refcount == 1); /* * Since we hold e_lock and are the last ref, nobody can increment * the refcount, so we can release vn_countlock. */ spinlock_release(&ev->ev_v.vn_countlock); /* emu_close retries on I/O error */ result = emu_close(ev->ev_emu, ev->ev_handle); if (result) { lock_release(ef->ef_emu->e_lock); return result; } num = vnodearray_num(ef->ef_vnodes); ix = num; for (i=0; i<num; i++) { struct vnode *vx; vx = vnodearray_get(ef->ef_vnodes, i); if (vx == v) { ix = i; break; } } if (ix == num) { panic("emu%d: reclaim vnode %u not in vnode pool\n", ef->ef_emu->e_unit, ev->ev_handle); } vnodearray_remove(ef->ef_vnodes, ix); vnode_cleanup(&ev->ev_v); lock_release(ef->ef_emu->e_lock); kfree(ev); return 0; }
usr_sem_t* syscall_sem_open(char const *name, int value) { usr_sem_t *usr_sem = NULL; interrupt_status_t intr_status; int i; if (strlen(name) > MAX_NAME_LEN) { return NULL; } intr_status = _interrupt_disable(); spinlock_acquire(&usr_semaphore_table_slock); // Get an existing userland semaphore (value < 0) if (value < 0) { for (i = 0; i < MAX_USR_SEMAPHORES; i++) { if(stringcmp(usr_semaphore_table[i].name, name) == 0) { usr_sem = &usr_semaphore_table[i]; spinlock_release(&usr_semaphore_table_slock); _interrupt_set_state(intr_status); return usr_sem; } } // No semaphore with given name exists spinlock_release(&usr_semaphore_table_slock); _interrupt_set_state(intr_status); return NULL; } // Create new userland semaphore (value >= 0) else { int sem_id = MAX_USR_SEMAPHORES; for (i = 0; i < MAX_USR_SEMAPHORES; i++) { // Semaphore already exists if(stringcmp(usr_semaphore_table[i].name, name) == 0) { spinlock_release(&usr_semaphore_table_slock); _interrupt_set_state(intr_status); return NULL; } // Find an available spot in the userland semaphore table if (i < sem_id && usr_semaphore_table[i].sem == NULL) { sem_id = i; } } /* If there is no more space in the userland semaphore table, * return an error. This should never happen, since the actual * kernel semaphore table would be full before this could occur */ if (sem_id == MAX_USR_SEMAPHORES) { return NULL; } // Create the actual userland semaphore usr_semaphore_table[sem_id].sem = semaphore_create(value); stringcopy(usr_semaphore_table[sem_id].name, name, MAX_NAME_LEN); usr_sem = &usr_semaphore_table[sem_id]; spinlock_release(&usr_semaphore_table_slock); _interrupt_set_state(intr_status); return usr_sem; } // Something went wrong spinlock_release(&usr_semaphore_table_slock); _interrupt_set_state(intr_status); return NULL; }
/** Creates a new thread. A free slot is allocated from the thread * table for the new thread and its content is initialized to 'nil' * values. The new thread will call function 'func' with the argument * 'arg' when the thread is run by thread_run(). * * @param func Function pointer to the threads 'main' function. * @param arg Argument to pass to 'func' (meaning defined by 'func'). * * @return The thread ID of the created thread, or negative if * creation failed (thread table is full). */ TID_t thread_create(void (*func)(uint32_t), uint32_t arg) { static TID_t next_tid = 0; TID_t i, tid = -1; func = func; arg = arg; interrupt_status_t intr_status; intr_status = _interrupt_disable(); spinlock_acquire(&thread_table_slock); /* Find the first free thread table entry starting from 'next_tid' */ for (i=0; i<CONFIG_MAX_THREADS; i++) { TID_t t = (i + next_tid) % CONFIG_MAX_THREADS; if(t == IDLE_THREAD_TID) continue; if (thread_table[t].state == THREAD_FREE) { tid = t; break; } } /* Is the thread table full? */ if (tid < 0) { spinlock_release(&thread_table_slock); _interrupt_set_state(intr_status); return tid; } next_tid = (tid+1) % CONFIG_MAX_THREADS; thread_table[tid].state = THREAD_NONREADY; spinlock_release(&thread_table_slock); _interrupt_set_state(intr_status); thread_table[tid].context = (context_t *) (thread_stack_areas +CONFIG_THREAD_STACKSIZE*tid + CONFIG_THREAD_STACKSIZE - sizeof(context_t)); for (i=0; i< (int) sizeof(context_t)/4; i++) { *(((virtaddr_t*) thread_table[tid].context) + i) = 0; } thread_table[tid].user_context = NULL; thread_table[tid].pagetable = NULL; thread_table[tid].sleeps_on = 0; thread_table[tid].attribs = 0; thread_table[tid].process_id = -1; thread_table[tid].next = -1; /* Make sure that we always have a valid back reference on context chain */ thread_table[tid].context->prev_context = thread_table[tid].context; /* This functions magically sets up context to new * working state */ _context_init(thread_table[tid].context, (virtaddr_t)func, (virtaddr_t)thread_finish, (virtaddr_t)thread_stack_areas + (CONFIG_THREAD_STACKSIZE * tid) + CONFIG_THREAD_STACKSIZE - 4 - sizeof(context_t), arg); return tid; }
/** * The clientReply entry point. This is passed the response buffer * to which the filter should be applied. Once processed the * query is passed to the upstream component * (filter or router) in the filter chain. * * @param instance The filter instance data * @param session The filter session * @param reply The response data */ static int clientReply (FILTER* instance, void *session, GWBUF *reply) { int rc, branch, eof; TEE_SESSION *my_session = (TEE_SESSION *) session; bool route = false,mpkt; GWBUF *complete = NULL; unsigned char *ptr; uint16_t flags = 0; int min_eof = my_session->command != 0x04 ? 2 : 1; int more_results = 0; #ifdef SS_DEBUG ptr = (unsigned char*) reply->start; skygw_log_write(LOGFILE_TRACE,"Tee clientReply [%s] [%s] [%s]: %d", instance ? "parent":"child", my_session->active ? "open" : "closed", PTR_IS_ERR(ptr) ? "ERR" : PTR_IS_OK(ptr) ? "OK" : "RSET", atomic_add(&debug_seq,1)); #endif spinlock_acquire(&my_session->tee_lock); if(!my_session->active) { skygw_log_write(LOGFILE_TRACE,"Tee: Failed to return reply, session is closed"); gwbuf_free(reply); rc = 0; if(my_session->waiting[PARENT]) { GWBUF* errbuf = modutil_create_mysql_err_msg(1,0,1,"0000","Session closed."); my_session->waiting[PARENT] = false; my_session->up.clientReply (my_session->up.instance, my_session->up.session, errbuf); } goto retblock; } branch = instance == NULL ? CHILD : PARENT; my_session->tee_partials[branch] = gwbuf_append(my_session->tee_partials[branch], reply); my_session->tee_partials[branch] = gwbuf_make_contiguous(my_session->tee_partials[branch]); complete = modutil_get_complete_packets(&my_session->tee_partials[branch]); if(complete == NULL) { /** Incomplete packet */ skygw_log_write(LOGFILE_DEBUG,"tee.c: Incomplete packet, " "waiting for a complete packet before forwarding."); rc = 1; goto retblock; } complete = gwbuf_make_contiguous(complete); if(my_session->tee_partials[branch] && GWBUF_EMPTY(my_session->tee_partials[branch])) { gwbuf_free(my_session->tee_partials[branch]); my_session->tee_partials[branch] = NULL; } ptr = (unsigned char*) complete->start; if(my_session->replies[branch] == 0) { skygw_log_write(LOGFILE_TRACE,"Tee: First reply to a query for [%s].",branch == PARENT ? "PARENT":"CHILD"); /* Reply is in a single packet if it is an OK, ERR or LOCAL_INFILE packet. * Otherwise the reply is a result set and the amount of packets is unknown. */ if(PTR_IS_ERR(ptr) || PTR_IS_LOCAL_INFILE(ptr) || PTR_IS_OK(ptr) || !my_session->multipacket[branch] ) { my_session->waiting[branch] = false; my_session->multipacket[branch] = false; if(PTR_IS_OK(ptr)) { flags = get_response_flags(ptr,true); more_results = (flags & 0x08) && my_session->client_multistatement; if(more_results) { skygw_log_write(LOGFILE_TRACE, "Tee: [%s] waiting for more results.",branch == PARENT ? "PARENT":"CHILD"); } } } #ifdef SS_DEBUG else { skygw_log_write_flush(LOGFILE_DEBUG,"tee.c: [%d] Waiting for a result set from %s session.", my_session->d_id, branch == PARENT?"parent":"child"); } #endif } if(my_session->waiting[branch]) { eof = modutil_count_signal_packets(complete,my_session->use_ok,my_session->eof[branch] > 0,&more_results); more_results &= my_session->client_multistatement; my_session->eof[branch] += eof; if(my_session->eof[branch] >= min_eof) { #ifdef SS_DEBUG skygw_log_write_flush(LOGFILE_DEBUG,"tee.c [%d] %s received last EOF packet", my_session->d_id, branch == PARENT?"parent":"child"); #endif my_session->waiting[branch] = more_results; if(more_results) { my_session->eof[branch] = 0; } } } if(branch == PARENT) { my_session->tee_replybuf = gwbuf_append(my_session->tee_replybuf,complete); } else { gwbuf_free(complete); } my_session->replies[branch]++; rc = 1; mpkt = my_session->multipacket[PARENT] || my_session->multipacket[CHILD]; if(my_session->tee_replybuf != NULL) { if(my_session->branch_session == NULL) { rc = 0; gwbuf_free(my_session->tee_replybuf); my_session->tee_replybuf = NULL; skygw_log_write_flush(LOGFILE_ERROR,"Error : Tee child session was closed."); } if(mpkt) { if(my_session->waiting[PARENT]) { route = true; } else if(my_session->eof[PARENT] >= min_eof && my_session->eof[CHILD] >= min_eof) { route = true; #ifdef SS_DEBUG skygw_log_write_flush(LOGFILE_DEBUG,"tee.c:[%d] Routing final packet of response set.",my_session->d_id); #endif } } else if(!my_session->waiting[PARENT] && !my_session->waiting[CHILD]) { #ifdef SS_DEBUG skygw_log_write_flush(LOGFILE_DEBUG,"tee.c:[%d] Routing single packet response.",my_session->d_id); #endif route = true; } } if(route) { #ifdef SS_DEBUG skygw_log_write_flush(LOGFILE_DEBUG, "tee.c:[%d] Routing buffer '%p' parent(waiting [%s] replies [%d] eof[%d])" " child(waiting [%s] replies[%d] eof [%d])", my_session->d_id, my_session->tee_replybuf, my_session->waiting[PARENT] ? "true":"false", my_session->replies[PARENT], my_session->eof[PARENT], my_session->waiting[CHILD]?"true":"false", my_session->replies[CHILD], my_session->eof[CHILD]); #endif rc = my_session->up.clientReply (my_session->up.instance, my_session->up.session, my_session->tee_replybuf); my_session->tee_replybuf = NULL; } if(my_session->queue && !my_session->waiting[PARENT] && !my_session->waiting[CHILD]) { GWBUF* buffer = modutil_get_next_MySQL_packet(&my_session->queue); GWBUF* clone = clone_query(my_session->instance,my_session,buffer); reset_session_state(my_session,buffer); route_single_query(my_session->instance,my_session,buffer,clone); LOGIF(LT,(skygw_log_write(LT,"tee: routing queued query"))); } retblock: spinlock_release(&my_session->tee_lock); return rc; }
void signal_handle_pending() { int i, rc; sigset_t pending, mask; struct sigaction *action; spinlock_acquire(&CURR_THREAD->lock); pending = CURR_THREAD->pending_signals; pending &= ~(CURR_THREAD->signal_mask | CURR_PROC->signal_mask); if (!pending) { return; } for (i = 0; i < NSIG; i++) { if (!(pending & (1 << i))) { continue; } CURR_THREAD->pending_signals &= ~(1 << i); /* Check if the signal is ignored */ action = &CURR_PROC->signal_act[i]; if (action->sa_handler == SIG_IGN || (action->sa_handler == SIG_DFT && SIG_DFT_IGNORE(i))) { continue; } /* If not the default action, we must execute a user-mode * signal handler. */ if (action->sa_handler != SIG_DFT) { /* Save the current mask, and apply a new mask. */ mask = CURR_PROC->signal_mask; CURR_PROC->signal_mask |= action->sa_mask; rc = arch_signal_setup_frame(action, &CURR_THREAD->signal_info[i], mask); if (rc != 0) { signal_force(CURR_THREAD, SIGSEGV, i); } break; } /* Release the lock while handling default action in case * we need to kill the process. */ spinlock_release(&CURR_THREAD->lock); /* Handle the default action */ if (SIG_DFT_TERM(i)) { process_exit(i); } else if (SIG_DFT_CORE(i)) { // TODO: Core dump process_exit(i); } else if (SIG_DFT_STOP(i)) { // TODO: Stop process process_exit(i); } else if (SIG_DFT_CONT(i)) { break; } spinlock_acquire(&CURR_THREAD->lock); } spinlock_release(&CURR_THREAD->lock); }
/** * Associate a new session with this instance of the filter. * * Create the file to log to and open it. * * @param instance The filter instance data * @param session The session itself * @return Session specific data for this session */ static void * newSession(FILTER *instance, SESSION *session) { TEE_INSTANCE *my_instance = (TEE_INSTANCE *)instance; TEE_SESSION *my_session; char *remote, *userName; if (strcmp(my_instance->service->name, session->service->name) == 0) { LOGIF(LE, (skygw_log_write_flush(LOGFILE_ERROR, "Error : %s: Recursive use of tee filter in service.", session->service->name))); my_session = NULL; goto retblock; } HASHTABLE* ht = hashtable_alloc(100,simple_str_hash,strcmp); bool is_loop = detect_loops(my_instance,ht,session->service); hashtable_free(ht); if(is_loop) { LOGIF(LE, (skygw_log_write_flush(LOGFILE_ERROR, "Error : %s: Recursive use of tee filter in service.", session->service->name))); my_session = NULL; goto retblock; } if ((my_session = calloc(1, sizeof(TEE_SESSION))) != NULL) { my_session->active = 1; my_session->residual = 0; my_session->tee_replybuf = NULL; my_session->client_dcb = session->client; my_session->instance = my_instance; my_session->client_multistatement = false; my_session->queue = NULL; spinlock_init(&my_session->tee_lock); if (my_instance->source && (remote = session_get_remote(session)) != NULL) { if (strcmp(remote, my_instance->source)) { my_session->active = 0; LOGIF(LE, (skygw_log_write_flush( LOGFILE_ERROR, "Warning : Tee filter is not active."))); } } userName = session_getUser(session); if (my_instance->userName && userName && strcmp(userName, my_instance->userName)) { my_session->active = 0; LOGIF(LE, (skygw_log_write_flush( LOGFILE_ERROR, "Warning : Tee filter is not active."))); } if (my_session->active) { DCB* dcb; SESSION* ses; FILTER_DEF* dummy; UPSTREAM* dummy_upstream; if ((dcb = dcb_clone(session->client)) == NULL) { freeSession(instance, (void *)my_session); my_session = NULL; LOGIF(LE, (skygw_log_write_flush( LOGFILE_ERROR, "Error : Creating client DCB for Tee " "filter failed. Terminating session."))); goto retblock; } if((dummy = filter_alloc("tee_dummy","tee_dummy")) == NULL) { dcb_close(dcb); freeSession(instance, (void *)my_session); my_session = NULL; LOGIF(LE, (skygw_log_write_flush( LOGFILE_ERROR, "Error : tee: Allocating memory for " "dummy filter definition failed." " Terminating session."))); goto retblock; } if ((ses = session_alloc(my_instance->service, dcb)) == NULL) { dcb_close(dcb); freeSession(instance, (void *)my_session); my_session = NULL; LOGIF(LE, (skygw_log_write_flush( LOGFILE_ERROR, "Error : Creating client session for Tee " "filter failed. Terminating session."))); goto retblock; } ss_dassert(ses->ses_is_child); dummy->obj = GetModuleObject(); dummy->filter = NULL; if((dummy_upstream = filterUpstream( dummy, my_session, &ses->tail)) == NULL) { spinlock_acquire(&ses->ses_lock); ses->state = SESSION_STATE_STOPPING; spinlock_release(&ses->ses_lock); ses->service->router->closeSession( ses->service->router_instance, ses->router_session); ses->client = NULL; dcb->session = NULL; session_free(ses); dcb_close(dcb); freeSession(instance, (void *) my_session); my_session = NULL; LOGIF(LE, (skygw_log_write_flush( LOGFILE_ERROR, "Error : tee: Allocating memory for" "dummy upstream failed." " Terminating session."))); goto retblock; } ses->tail = *dummy_upstream; my_session->branch_session = ses; my_session->branch_dcb = dcb; my_session->dummy_filterdef = dummy; MySQLProtocol* protocol = (MySQLProtocol*)session->client->protocol; my_session->use_ok = protocol->client_capabilities & (1 << 6); free(dummy_upstream); } } retblock: return my_session; }
static void orphan_free(void* data) { spinlock_acquire(&orphanLock); orphan_session_t *ptr = allOrphans, *finished = NULL, *tmp = NULL; #ifdef SS_DEBUG int o_stopping = 0, o_ready = 0, o_freed = 0; #endif while(ptr) { if(ptr->session->state == SESSION_STATE_TO_BE_FREED) { if(ptr == allOrphans) { tmp = ptr; allOrphans = ptr->next; } else { tmp = allOrphans; while(tmp && tmp->next != ptr) tmp = tmp->next; if(tmp) { tmp->next = ptr->next; tmp = ptr; } } } /* * The session has been unlinked from all the DCBs and it is ready to be freed. */ if(ptr->session->state == SESSION_STATE_STOPPING && ptr->session->refcount == 0 && ptr->session->client == NULL) { ptr->session->state = SESSION_STATE_TO_BE_FREED; } #ifdef SS_DEBUG else if(ptr->session->state == SESSION_STATE_STOPPING) { o_stopping++; } else if(ptr->session->state == SESSION_STATE_ROUTER_READY) { o_ready++; } #endif ptr = ptr->next; if(tmp) { tmp->next = finished; finished = tmp; tmp = NULL; } } spinlock_release(&orphanLock); #ifdef SS_DEBUG if(o_stopping + o_ready > 0) skygw_log_write(LOGFILE_DEBUG, "tee.c: %d orphans in " "SESSION_STATE_STOPPING, %d orphans in " "SESSION_STATE_ROUTER_READY. ", o_stopping, o_ready); #endif while(finished) { #ifdef SS_DEBUG o_freed++; #endif tmp = finished; finished = finished->next; tmp->session->service->router->freeSession( tmp->session->service->router_instance, tmp->session->router_session); tmp->session->state = SESSION_STATE_FREE; free(tmp->session); free(tmp); } #ifdef SS_DEBUG skygw_log_write(LOGFILE_DEBUG, "tee.c: %d orphans freed.", o_freed); #endif }
void page_fault_bottom(thread_exception te) { thread_exception_print(&te); uint32& addr = te.data[0]; uint32& code = te.data[1]; serial_printf("PAGE_FALUT: PROC: %u ADDRESS: %h, THREAD: %u, CODE: %h\n", process_get_current()->id, addr, thread_get_current()->id, code); if (process_get_current()->contract_spinlock == 1) PANIC("PAge fault spinlock is already reserved\n"); spinlock_acquire(&process_get_current()->contract_spinlock); vm_area* p_area = vm_contract_find_area(&thread_get_current()->parent->memory_contract, addr); if (p_area == 0) { serial_printf("could not find address %h in memory contract", addr); PANIC(""); // terminate thread and process with SIGSEGV } vm_area area = *p_area; spinlock_release(&process_get_current()->contract_spinlock); // tried to acccess inaccessible page if ((area.flags & MMAP_PROTECTION) == MMAP_NO_ACCESS) { serial_printf("address: %h is inaccessible\n", addr); PANIC(""); } // tried to write to read-only or inaccessible page if (page_fault_error_is_write(code) && (area.flags & MMAP_WRITE) != MMAP_WRITE) { serial_printf("cannot write to address: %h\n", addr); PANIC(""); } // tried to read a write-only or inaccesible page ???what??? /*if (!page_fault_error_is_write(code) && CHK_BIT(area.flags, MMAP_READ)) { serial_printf("cannot read from address: %h", addr); PANIC(""); }*/ // if the page is present then a violation happened (we do not implement swap out/shared anonymous yet) if (page_fault_error_is_page_present(code) == true) { serial_printf("memory violation at address: %h with code: %h\n", addr, code); serial_printf("area flags: %h\n", area.flags); PANIC(""); } // here we found out that the page is not present, so we need to allocate it properly if (CHK_BIT(area.flags, MMAP_PRIVATE)) { if (CHK_BIT(area.flags, MMAP_ALLOC_IMMEDIATE)) { // loop through all addresses and map them for (virtual_addr address = area.start_addr; address < area.end_addr; address += 4096) //if (CHK_BIT(area.flags, MMAP_ANONYMOUS)) ALLOC_IMMEDIATE works only for anonymous (imposed in mmap) page_fault_alloc_page(area.flags, address); } else { if (CHK_BIT(area.flags, MMAP_ANONYMOUS)) page_fault_alloc_page(area.flags, addr & (~0xFFF)); else { uint32 flags = page_fault_calculate_present_flags(area.flags); vmmngr_alloc_page_f(addr & (~0xFFF), flags); uint32 read_start = area.offset + ((addr - area.start_addr) / PAGE_SIZE) * PAGE_SIZE; // file read start uint32 read_size = PAGE_SIZE; // we read one page at a time (not the whole area as this may not be necessary) //if (read_start < area.start_addr + PAGE_SIZE) // we are reading the first page so subtract offset from read_size // read_size -= area.offset; serial_printf("gfd: %u, reading at mem: %h, phys: %h file: %h, size: %u\n", area.fd, addr & (~0xfff), vmmngr_get_phys_addr(addr & (~0xfff)), read_start, read_size); gfe* entry = gft_get(area.fd); if (entry == 0) { serial_printf("area.fd = %u", area.fd); PANIC("page fault gfd entry = 0"); } // read one page from the file offset given at the 4KB-aligned fault address if (read_file_global(area.fd, read_start, read_size, addr & (~0xFFF), VFS_CAP_READ | VFS_CAP_CACHE) != read_size) { serial_printf("read fd: %u\n", area.fd); PANIC("mmap anonymous file read less bytes than expected"); } } } } else // MMAP_SHARED { if (CHK_BIT(area.flags, MMAP_ANONYMOUS)) PANIC("A shared area cannot be marked as anonymous yet."); else { // in the shared file mapping the address to read is ignored as data are read only to page cache. uint32 read_start = area.offset + ((addr & (~0xfff)) - area.start_addr); gfe* entry = gft_get(area.fd); if (read_file_global(area.fd, read_start, PAGE_SIZE, -1, VFS_CAP_READ | VFS_CAP_CACHE) != PAGE_SIZE) PANIC("mmap shared file failed"); virtual_addr used_cache = page_cache_get_buffer(area.fd, read_start / PAGE_SIZE); //serial_printf("m%h\n", used_cache); uint32 flags = page_fault_calculate_present_flags(area.flags); vmmngr_map_page(vmmngr_get_directory(), vmmngr_get_phys_addr(used_cache), addr & (~0xfff), flags/*DEFAULT_FLAGS*/); //serial_printf("shared mapping fd: %u, cache: %h, phys cache: %h, read: %u, addr: %h\n", area.fd, used_cache, used_cache, read_start, addr); } } }
static void do_get(trapframe *tf, uint32_t cmd) { proc *p = proc_cur(); assert(p->state == PROC_RUN && p->runcpu == cpu_cur()); //cprintf("GET proc %x eip %x esp %x cmd %x\n", p, tf->eip, tf->esp, cmd); spinlock_acquire(&p->lock); // Find the named child process; DON'T create if it doesn't exist uint32_t cn = tf->regs.edx & 0xff; proc *cp = p->child[cn]; if (!cp) cp = &proc_null; // Synchronize with child if necessary. if (cp->state != PROC_STOP) proc_wait(p, cp, tf); // Since the child is now stopped, it's ours to control; // we no longer need our process lock - // and we don't want to be holding it if usercopy() below aborts. spinlock_release(&p->lock); // Get child's general register state if (cmd & SYS_REGS) { int len = offsetof(procstate, fx); // just integer regs if (cmd & SYS_FPU) len = sizeof(procstate); // whole shebang usercopy(tf, 1, &cp->sv, tf->regs.ebx, len); // Copy child process's trapframe into user space procstate *cs = (procstate*) tf->regs.ebx; memcpy(cs, &cp->sv, len); } uint32_t sva = tf->regs.esi; uint32_t dva = tf->regs.edi; uint32_t size = tf->regs.ecx; switch (cmd & SYS_MEMOP) { case 0: // no memory operation break; case SYS_COPY: case SYS_MERGE: // validate source region if (PTOFF(sva) || PTOFF(size) || sva < VM_USERLO || sva > VM_USERHI || size > VM_USERHI-sva) systrap(tf, T_GPFLT, 0); // fall thru... case SYS_ZERO: // validate destination region if (PTOFF(dva) || PTOFF(size) || dva < VM_USERLO || dva > VM_USERHI || size > VM_USERHI-dva) systrap(tf, T_GPFLT, 0); switch (cmd & SYS_MEMOP) { case SYS_ZERO: // zero memory and clear permissions pmap_remove(p->pdir, dva, size); break; case SYS_COPY: // copy from local src to dest in child pmap_copy(cp->pdir, sva, p->pdir, dva, size); break; case SYS_MERGE: // merge from local src to dest in child pmap_merge(cp->rpdir, cp->pdir, sva, p->pdir, dva, size); break; } break; default: systrap(tf, T_GPFLT, 0); } if (cmd & SYS_PERM) { // validate destination region if (PGOFF(dva) || PGOFF(size) || dva < VM_USERLO || dva > VM_USERHI || size > VM_USERHI-dva) systrap(tf, T_GPFLT, 0); if (!pmap_setperm(p->pdir, dva, size, cmd & SYS_RW)) panic("pmap_get: no memory to set permissions"); } if (cmd & SYS_SNAP) systrap(tf, T_GPFLT, 0); // only valid for PUT trap_return(tf); // syscall completed }
// Function that simply recurses to a specified depth. // The useless return value and volatile parameter are // so GCC doesn't collapse it via tail-call elimination. int gcc_noinline spinlock_godeep(volatile int depth, spinlock* lk) { if (depth==0) { spinlock_acquire(lk); return 1; } else return spinlock_godeep(depth-1, lk) * depth; }
void scheduler_schedule(void) { TID_t t; TID_t lowestDL = -1; thread_table_t *current_thread; int this_cpu; this_cpu = _interrupt_getcpu(); spinlock_acquire(&thread_table_slock); current_thread = &(thread_table[scheduler_current_thread[this_cpu]]); if(current_thread->state == THREAD_DYING) { current_thread->state = THREAD_FREE; } else if(current_thread->sleeps_on != 0) { current_thread->state = THREAD_SLEEPING; } else { if(scheduler_current_thread[this_cpu] != IDLE_THREAD_TID) scheduler_add_to_ready_list(scheduler_current_thread[this_cpu]); current_thread->state = THREAD_READY; } t = scheduler_ready_to_run.head; /* Initialize the TID lowestDL to a thread with deadline larger than 0 */ /* if possible */ while (lowestDL == -1 && t != -1) { if (thread_table[t].deadline > 0) { lowestDL = t; break; } t = thread_table[t].next; } t = scheduler_ready_to_run.head; if (lowestDL != -1) { /* If a deadline was found, find the lowest deadline */ while (t != -1) { if (thread_table[t].deadline < thread_table[lowestDL].deadline && thread_table[t].deadline > 0) { lowestDL = t; } t = thread_table[t].next; } /* Set the thread with this deadline to the head */ if (lowestDL == scheduler_ready_to_run.tail && lowestDL != scheduler_ready_to_run.head) { scheduler_ready_to_run.tail = thread_table[lowestDL].previous; thread_table[scheduler_ready_to_run.tail].next = -1; thread_table[lowestDL].next = scheduler_ready_to_run.head; thread_table[scheduler_ready_to_run.head].previous = lowestDL; thread_table[lowestDL].previous = -1; scheduler_ready_to_run.head = lowestDL; } else if (lowestDL != scheduler_ready_to_run.head) { thread_table[thread_table[lowestDL].previous].next = thread_table[lowestDL].next; thread_table[thread_table[lowestDL].next].previous = thread_table[lowestDL].previous; thread_table[lowestDL].next = scheduler_ready_to_run.head; thread_table[scheduler_ready_to_run.head].previous = lowestDL; thread_table[lowestDL].previous = -1; scheduler_ready_to_run.head = lowestDL; } } /* Remove the head and run it */ t = scheduler_remove_first_ready(); thread_table[t].state = THREAD_RUNNING; spinlock_release(&thread_table_slock); scheduler_current_thread[this_cpu] = t; /* Schedule timer interrupt to occur after thread timeslice is spent */ timer_set_ticks(_get_rand(CONFIG_SCHEDULER_TIMESLICE) + CONFIG_SCHEDULER_TIMESLICE / 2); }
/** * Create an instance of the router for a particular service * within the gateway. * * @param service The service this router is being create for * @param options An array of options for this query router * * @return The instance data for this new instance */ static ROUTER * createInstance(SERVICE *service, char **options) { ROUTER_INSTANCE *inst; SERVER *server; int i, n; if ((inst = calloc(1, sizeof(ROUTER_INSTANCE))) == NULL) { return NULL; } inst->service = service; spinlock_init(&inst->lock); /* * We need an array of the backend servers in the instance structure so * that we can maintain a count of the number of connections to each * backend server. */ for (server = service->databases, n = 0; server; server = server->nextdb) n++; inst->servers = (BACKEND **)calloc(n + 1, sizeof(BACKEND *)); if (!inst->servers) { free(inst); return NULL; } for (server = service->databases, n = 0; server; server = server->nextdb) { if ((inst->servers[n] = malloc(sizeof(BACKEND))) == NULL) { for (i = 0; i < n; i++) free(inst->servers[i]); free(inst->servers); free(inst); return NULL; } inst->servers[n]->server = server; inst->servers[n]->current_connection_count = 0; n++; } inst->servers[n] = NULL; /* * Process the options */ inst->bitmask = 0; inst->bitvalue = 0; if (options) { for (i = 0; options[i]; i++) { if (!strcasecmp(options[i], "master")) { inst->bitmask |= (SERVER_MASTER|SERVER_SLAVE); inst->bitvalue |= SERVER_MASTER; } else if (!strcasecmp(options[i], "slave")) { inst->bitmask |= (SERVER_MASTER|SERVER_SLAVE); inst->bitvalue |= SERVER_SLAVE; } else if (!strcasecmp(options[i], "synced")) { inst->bitmask |= (SERVER_JOINED); inst->bitvalue |= SERVER_JOINED; } else { LOGIF(LE, (skygw_log_write( LOGFILE_ERROR, "Warning : Unsupported router " "option %s for readconnroute.", options[i]))); } } } /* * We have completed the creation of the instance data, so now * insert this router instance into the linked list of routers * that have been created with this module. */ spinlock_acquire(&instlock); inst->next = instances; instances = inst; spinlock_release(&instlock); return (ROUTER *)inst; }
/** * Process of the queue of DCB's that have outstanding events * * The first event on the queue will be chosen to be executed by this thread, * all other events will be left on the queue and may be picked up by other * threads. When the processing is complete the thread will take the DCB off the * queue if there are no pending events that have arrived since the thread started * to process the DCB. If there are pending events the DCB will be moved to the * back of the queue so that other DCB's will have a share of the threads to * execute events for them. * * Including session id to log entries depends on this function. Assumption is * that when maxscale thread starts processing of an event it processes one * and only one session until it returns from this function. Session id is * read to thread's local storage in macro LOGIF_MAYBE(...) and reset back * to zero just before returning in LOGIF(...) macro. * Thread local storage (tls_log_info_t) follows thread and is accessed every * time log is written to particular log. * * @param thread_id The thread ID of the calling thread * @return 0 if no DCB's have been processed */ static int process_pollq(int thread_id) { DCB *dcb; int found = 0; uint32_t ev; unsigned long qtime; spinlock_acquire(&pollqlock); if (eventq == NULL) { /* Nothing to process */ spinlock_release(&pollqlock); return 0; } dcb = eventq; if (dcb->evq.next == dcb->evq.prev && dcb->evq.processing == 0) { found = 1; dcb->evq.processing = 1; } else if (dcb->evq.next == dcb->evq.prev) { /* Only item in queue is being processed */ spinlock_release(&pollqlock); return 0; } else { do { dcb = dcb->evq.next; } while (dcb != eventq && dcb->evq.processing == 1); if (dcb->evq.processing == 0) { /* Found DCB to process */ dcb->evq.processing = 1; found = 1; } } if (found) { ev = dcb->evq.pending_events; dcb->evq.processing_events = ev; dcb->evq.pending_events = 0; pollStats.evq_pending--; ss_dassert(pollStats.evq_pending >= 0); } spinlock_release(&pollqlock); if (found == 0) return 0; #if PROFILE_POLL memlog_log(plog, hkheartbeat - dcb->evq.inserted); #endif qtime = hkheartbeat - dcb->evq.inserted; dcb->evq.started = hkheartbeat; if (qtime > N_QUEUE_TIMES) queueStats.qtimes[N_QUEUE_TIMES]++; else queueStats.qtimes[qtime]++; if (qtime > queueStats.maxqtime) queueStats.maxqtime = qtime; CHK_DCB(dcb); if (thread_data) { thread_data[thread_id].state = THREAD_PROCESSING; thread_data[thread_id].cur_dcb = dcb; thread_data[thread_id].event = ev; } #if defined(FAKE_CODE) if (dcb_fake_write_ev[dcb->fd] != 0) { LOGIF(LD, (skygw_log_write( LOGFILE_DEBUG, "%lu [poll_waitevents] " "Added fake events %d to ev %d.", pthread_self(), dcb_fake_write_ev[dcb->fd], ev))); ev |= dcb_fake_write_ev[dcb->fd]; dcb_fake_write_ev[dcb->fd] = 0; } #endif /* FAKE_CODE */ ss_debug(spinlock_acquire(&dcb->dcb_initlock);)
/** * Associate a new session with this instance of the router. * * @param instance The router instance data * @param session The session itself * @return Session specific data for this session */ static void * newSession(ROUTER *instance, SESSION *session) { ROUTER_INSTANCE *inst = (ROUTER_INSTANCE *)instance; ROUTER_CLIENT_SES *client_rses; BACKEND *candidate = NULL; int i; int master_host = -1; LOGIF(LD, (skygw_log_write_flush( LOGFILE_DEBUG, "%lu [newSession] new router session with session " "%p, and inst %p.", pthread_self(), session, inst))); client_rses = (ROUTER_CLIENT_SES *)calloc(1, sizeof(ROUTER_CLIENT_SES)); if (client_rses == NULL) { return NULL; } #if defined(SS_DEBUG) client_rses->rses_chk_top = CHK_NUM_ROUTER_SES; client_rses->rses_chk_tail = CHK_NUM_ROUTER_SES; #endif /** * Find a backend server to connect to. This is the extent of the * load balancing algorithm we need to implement for this simple * connection router. */ /* * Loop over all the servers and find any that have fewer connections * than the candidate server. * * If a server has less connections than the current candidate we mark this * as the new candidate to connect to. * * If a server has the same number of connections currently as the candidate * and has had less connections over time than the candidate it will also * become the new candidate. This has the effect of spreading the * connections over different servers during periods of very low load. */ for (i = 0; inst->servers[i]; i++) { if(inst->servers[i]) { LOGIF(LD, (skygw_log_write( LOGFILE_DEBUG, "%lu [newSession] Examine server in port %d with " "%d connections. Status is %d, " "inst->bitvalue is %d", pthread_self(), inst->servers[i]->server->port, inst->servers[i]->current_connection_count, inst->servers[i]->server->status, inst->bitmask))); } /* * If router_options=slave, get the running master * It will be used if there are no running slaves at all */ if (inst->bitvalue == SERVER_SLAVE) { if (master_host < 0 && (SERVER_IS_MASTER(inst->servers[i]->server))) { master_host = i; } } if (inst->servers[i] && SERVER_IS_RUNNING(inst->servers[i]->server) && (inst->servers[i]->server->status & inst->bitmask) == inst->bitvalue) { /* If no candidate set, set first running server as our initial candidate server */ if (candidate == NULL) { candidate = inst->servers[i]; } else if (inst->servers[i]->current_connection_count < candidate->current_connection_count) { /* This running server has fewer connections, set it as a new candidate */ candidate = inst->servers[i]; } else if (inst->servers[i]->current_connection_count == candidate->current_connection_count && inst->servers[i]->server->stats.n_connections < candidate->server->stats.n_connections) { /* This running server has the same number of connections currently as the candidate but has had fewer connections over time than candidate, set this server to candidate*/ candidate = inst->servers[i]; } } } /* There is no candidate server here! * With router_option=slave a master_host could be set, so route traffic there. * Otherwise, just clean up and return NULL */ if (!candidate) { if (master_host >= 0) { candidate = inst->servers[master_host]; } else { LOGIF(LE, (skygw_log_write_flush( LOGFILE_ERROR, "Error : Failed to create new routing session. " "Couldn't find eligible candidate server. Freeing " "allocated resources."))); free(client_rses); return NULL; } } client_rses->rses_capabilities = RCAP_TYPE_PACKET_INPUT; /* * We now have the server with the least connections. * Bump the connection count for this server */ atomic_add(&candidate->current_connection_count, 1); client_rses->backend = candidate; LOGIF(LD, (skygw_log_write( LOGFILE_DEBUG, "%lu [newSession] Selected server in port %d. " "Connections : %d\n", pthread_self(), candidate->server->port, candidate->current_connection_count))); /* * Open a backend connection, putting the DCB for this * connection in the client_rses->backend_dcb */ client_rses->backend_dcb = dcb_connect(candidate->server, session, candidate->server->protocol); if (client_rses->backend_dcb == NULL) { atomic_add(&candidate->current_connection_count, -1); free(client_rses); return NULL; } inst->stats.n_sessions++; /** * Add this session to the list of active sessions. */ spinlock_acquire(&inst->lock); client_rses->next = inst->connections; inst->connections = client_rses; spinlock_release(&inst->lock); CHK_CLIENT_RSES(client_rses); return (void *)client_rses; }
static void * subpage_kmalloc(size_t sz) { unsigned blktype; // index into sizes[] that we're using struct pageref *pr; // pageref for page we're allocating from vaddr_t prpage; // PR_PAGEADDR(pr) vaddr_t fla; // free list entry address struct freelist *volatile fl; // free list entry void *retptr; // our result volatile int i; blktype = blocktype(sz); sz = sizes[blktype]; spinlock_acquire(&kmalloc_spinlock); checksubpages(); for (pr = sizebases[blktype]; pr != NULL; pr = pr->next_samesize) { /* check for corruption */ KASSERT(PR_BLOCKTYPE(pr) == blktype); checksubpage(pr); if (pr->nfree > 0) { doalloc: /* comes here after getting a whole fresh page */ KASSERT(pr->freelist_offset < PAGE_SIZE); prpage = PR_PAGEADDR(pr); fla = prpage + pr->freelist_offset; fl = (struct freelist *)fla; retptr = fl; fl = fl->next; pr->nfree--; if (fl != NULL) { KASSERT(pr->nfree > 0); fla = (vaddr_t)fl; KASSERT(fla - prpage < PAGE_SIZE); pr->freelist_offset = fla - prpage; } else { KASSERT(pr->nfree == 0); pr->freelist_offset = INVALID_OFFSET; } checksubpages(); spinlock_release(&kmalloc_spinlock); return retptr; } } /* * No page of the right size available. * Make a new one. * * We release the spinlock while calling alloc_kpages. This * avoids deadlock if alloc_kpages needs to come back here. * Note that this means things can change behind our back... */ spinlock_release(&kmalloc_spinlock); prpage = alloc_kpages(1); if (prpage==0) { /* Out of memory. */ kprintf("kmalloc: Subpage allocator couldn't get a page\n"); return NULL; } spinlock_acquire(&kmalloc_spinlock); pr = allocpageref(); if (pr==NULL) { /* Couldn't allocate accounting space for the new page. */ spinlock_release(&kmalloc_spinlock); free_kpages(prpage); kprintf("kmalloc: Subpage allocator couldn't get pageref\n"); return NULL; } pr->pageaddr_and_blocktype = MKPAB(prpage, blktype); pr->nfree = PAGE_SIZE / sizes[blktype]; /* * Note: fl is volatile because the MIPS toolchain we were * using in spring 2001 attempted to optimize this loop and * blew it. Making fl volatile inhibits the optimization. */ fla = prpage; fl = (struct freelist *)fla; fl->next = NULL; for (i=1; i<pr->nfree; i++) { fl = (struct freelist *)(fla + i*sizes[blktype]); fl->next = (struct freelist *)(fla + (i-1)*sizes[blktype]); KASSERT(fl != fl->next); } fla = (vaddr_t) fl; pr->freelist_offset = fla - prpage; KASSERT(pr->freelist_offset == (pr->nfree-1)*sizes[blktype]); pr->next_samesize = sizebases[blktype]; sizebases[blktype] = pr; pr->next_all = allbase; allbase = pr; /* This is kind of cheesy, but avoids duplicating the alloc code. */ goto doalloc; }
/* * lpage_lock & lpage_unlock * * A logical page may be accessed by more than one thread: not only * the thread that owns it, but also the pager thread if such a thing * should exist, plus anyone else who might be swapping the page out. * * Therefore, it needs to be locked for usage. We use a spinlock; to * avoid ballooning memory usage, it might be more desirable to use a * bare spinlock_data_t. * * It is more or less incorrect to wait on this lock for any great * length of time. * * lpage_lock: acquires the lock on an lpage. * lpage_unlock: releases the lock on an lpage. */ void lpage_lock(struct lpage *lp) { spinlock_acquire(&lp->lp_spinlock); }
/** * The entry point for the monitoring module thread * * @param arg The handle of the monitor */ static void monitorMain(void *arg) { MONITOR* mon = (MONITOR*) arg; GALERA_MONITOR *handle; MONITOR_SERVERS *ptr; size_t nrounds = 0; MONITOR_SERVERS *candidate_master = NULL; int master_stickiness; int is_cluster = 0; int log_no_members = 1; monitor_event_t evtype; spinlock_acquire(&mon->lock); handle = (GALERA_MONITOR *) mon->handle; spinlock_release(&mon->lock); master_stickiness = handle->disableMasterFailback; if (mysql_thread_init()) { MXS_ERROR("mysql_thread_init failed in monitor module. Exiting."); return; } handle->status = MONITOR_RUNNING; while (1) { if (handle->shutdown) { handle->status = MONITOR_STOPPING; mysql_thread_end(); handle->status = MONITOR_STOPPED; return; } /** Wait base interval */ thread_millisleep(MON_BASE_INTERVAL_MS); /** * Calculate how far away the monitor interval is from its full * cycle and if monitor interval time further than the base * interval, then skip monitoring checks. Excluding the first * round. */ if (nrounds != 0 && ((nrounds * MON_BASE_INTERVAL_MS) % mon->interval) >= MON_BASE_INTERVAL_MS) { nrounds += 1; continue; } nrounds += 1; /* reset cluster members counter */ is_cluster = 0; ptr = mon->databases; while (ptr) { ptr->mon_prev_status = ptr->server->status; monitorDatabase(mon, ptr); /* Log server status change */ if (mon_status_changed(ptr)) { MXS_DEBUG("Backend server %s:%d state : %s", ptr->server->name, ptr->server->port, STRSRVSTATUS(ptr->server)); } if (!(SERVER_IS_RUNNING(ptr->server)) || !(SERVER_IS_IN_CLUSTER(ptr->server))) { dcb_hangup_foreach(ptr->server); } if (SERVER_IS_DOWN(ptr->server)) { /** Increase this server'e error count */ dcb_hangup_foreach(ptr->server); ptr->mon_err_count += 1; } else { /** Reset this server's error count */ ptr->mon_err_count = 0; } ptr = ptr->next; } /* * Let's select a master server: * it could be the candidate master following MIN(node_id) rule or * the server that was master in the previous monitor polling cycle * Decision depends on master_stickiness value set in configuration */ /* get the candidate master, following MIN(node_id) rule */ candidate_master = get_candidate_master(mon); /* Select the master, based on master_stickiness */ if (1 == handle->disableMasterRoleSetting) { handle->master = NULL; } else { handle->master = set_cluster_master(handle->master, candidate_master, master_stickiness); } ptr = mon->databases; while (ptr) { const int repl_bits = (SERVER_SLAVE | SERVER_MASTER | SERVER_MASTER_STICKINESS); if (SERVER_IS_JOINED(ptr->server)) { if (handle->master) { if (ptr != handle->master) { /* set the Slave role and clear master stickiness */ server_clear_set_status(ptr->server, repl_bits, SERVER_SLAVE); } else { if (candidate_master && handle->master->server->node_id != candidate_master->server->node_id) { /* set master role and master stickiness */ server_clear_set_status(ptr->server, repl_bits, (SERVER_MASTER | SERVER_MASTER_STICKINESS)); } else { /* set master role and clear master stickiness */ server_clear_set_status(ptr->server, repl_bits, SERVER_MASTER); } } } is_cluster++; } else { server_clear_set_status(ptr->server, repl_bits, 0); } ptr = ptr->next; } if (is_cluster == 0 && log_no_members) { MXS_ERROR("There are no cluster members"); log_no_members = 0; } else { if (is_cluster > 0 && log_no_members == 0) { MXS_NOTICE("Found cluster members"); log_no_members = 1; } } ptr = mon->databases; while (ptr) { /** Execute monitor script if a server state has changed */ if (mon_status_changed(ptr)) { evtype = mon_get_event_type(ptr); if (isGaleraEvent(evtype)) { MXS_NOTICE("Server changed state: %s[%s:%u]: %s", ptr->server->unique_name, ptr->server->name, ptr->server->port, mon_get_event_name(ptr)); if (handle->script && handle->events[evtype]) { monitor_launch_script(mon, ptr, handle->script); } } } ptr = ptr->next; } } }
/* * lpage_fault - handle a fault on a specific lpage. If the page is * not resident, get a physical page from coremap and swap it in. * * You do not yet need to distinguish a readonly fault from a write * fault. When we implement sharing, there will be a difference. * * Synchronization: Lock the lpage while checking if it's in memory. * If it's not, unlock the page while allocating space and loading the * page in. This only works because lpages are not currently sharable. * The page should be locked again as soon as it is loaded, but be * careful of interactions with other locks while modifying the coremap. * * After it has been loaded, the page must be pinned so that it is not * evicted while changes are made to the TLB. It can be unpinned as soon * as the TLB is updated. */ int lpage_fault(struct lpage *lp, struct addrspace *as, int faulttype, vaddr_t va) { KASSERT(lp != NULL); // kernel pages never get paged out, thus never fault lock_acquire(global_paging_lock); if ((lp->lp_paddr & PAGE_FRAME) != INVALID_PADDR) { lpage_lock_and_pin(lp); } else { lpage_lock(lp); } lock_release(global_paging_lock); KASSERT(lp->lp_swapaddr != INVALID_SWAPADDR); paddr_t pa = lp->lp_paddr; int writable; // 0 if page is read-only, 1 if page is writable /* case 1 - minor fault: the frame is still in memory */ if ((pa & PAGE_FRAME) != INVALID_PADDR) { /* make sure it's a minor fault */ KASSERT(pa != INVALID_PADDR); /* Setting the TLB entry's dirty bit */ writable = (faulttype != VM_FAULT_READ); /* update stats */ spinlock_acquire(&stats_spinlock); ct_minfaults++; DEBUG(DB_VM, "\nlpage_fault: minor faults = %d.", ct_minfaults); spinlock_release(&stats_spinlock); } else { /* case 2 - major fault: the frame was swapped out to disk */ /* make sure it is a major fault */ KASSERT(pa == INVALID_PADDR); /* allocate a new frame */ lpage_unlock(lp); // must not hold lpage locks before entering coremap pa = coremap_allocuser(lp); // do evict if needed, also pin coremap if ((pa & PAGE_FRAME)== INVALID_PADDR) { DEBUG(DB_VM, "lpage_fault: ENOMEM: va=0x%x\n", va); return ENOMEM; } KASSERT(coremap_pageispinned(pa)); /* retrieving the content from disk */ lock_acquire(global_paging_lock); // because swap_pagein needs it swap_pagein((pa & PAGE_FRAME), lp->lp_swapaddr); // coremap is already pinned above lpage_lock(lp); lock_release(global_paging_lock); /* assert that nobody else did the pagein */ KASSERT((lp->lp_paddr & PAGE_FRAME) == INVALID_PADDR); /* now update PTE with new PFN */ lp->lp_paddr = pa ; // page is clean /* Setting the TLB entry's dirty bit */ writable = 0; // this way we can detect the first write to a page /* update stats */ spinlock_acquire(&stats_spinlock); ct_majfaults++; DEBUG(DB_VM, "\nlpage_fault: MAJOR faults = %d", ct_majfaults); spinlock_release(&stats_spinlock); } /* check preconditions before update TLB/PTE */ KASSERT(coremap_pageispinned(lp->lp_paddr)); KASSERT(spinlock_do_i_hold(&lp->lp_spinlock)); /* PTE entry is dirty if the instruction is a write */ if (writable) { LP_SET(lp, LPF_DIRTY); } /* Put the new TLB entry into the TLB */ KASSERT(coremap_pageispinned(lp->lp_paddr)); // done in both cases of above IF clause mmu_map(as, va, lp->lp_paddr, writable); // update TLB and unpin coremap lpage_unlock(lp); return 0; }
/** * Process a COM_BINLOG_DUMP message from the slave. This is the * final step in the process of registration. The new master, MaxScale * must send a response packet and generate a fake BINLOG_ROTATE event * with the binlog file requested by the slave. And then send a * FORMAT_DESCRIPTION_EVENT that has been saved from the real master. * * Once send MaxScale must continue to send binlog events to the slave. * * @param router The router instance * @param slave The slave server * @param queue The BINLOG_DUMP packet * @return The number of bytes written to the slave */ static int blr_slave_binlog_dump(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *queue) { GWBUF *resp; uint8_t *ptr; int len, flags, serverid, rval, binlognamelen; REP_HEADER hdr; uint32_t chksum; ptr = GWBUF_DATA(queue); len = extract_field(ptr, 24); binlognamelen = len - 11; ptr += 4; // Skip length and sequence number if (*ptr++ != COM_BINLOG_DUMP) { LOGIF(LE, (skygw_log_write( LOGFILE_ERROR, "blr_slave_binlog_dump expected a COM_BINLOG_DUMP but received %d", *(ptr-1)))); return 0; } slave->binlog_pos = extract_field(ptr, 32); ptr += 4; flags = extract_field(ptr, 16); ptr += 2; serverid = extract_field(ptr, 32); ptr += 4; strncpy(slave->binlogfile, (char *)ptr, binlognamelen); slave->binlogfile[binlognamelen] = 0; slave->seqno = 1; if (slave->nocrc) len = 19 + 8 + binlognamelen; else len = 19 + 8 + 4 + binlognamelen; // Build a fake rotate event resp = gwbuf_alloc(len + 5); hdr.payload_len = len + 1; hdr.seqno = slave->seqno++; hdr.ok = 0; hdr.timestamp = 0L; hdr.event_type = ROTATE_EVENT; hdr.serverid = router->masterid; hdr.event_size = len; hdr.next_pos = 0; hdr.flags = 0x20; ptr = blr_build_header(resp, &hdr); encode_value(ptr, slave->binlog_pos, 64); ptr += 8; memcpy(ptr, slave->binlogfile, binlognamelen); ptr += binlognamelen; if (!slave->nocrc) { /* * Now add the CRC to the fake binlog rotate event. * * The algorithm is first to compute the checksum of an empty buffer * and then the checksum of the event portion of the message, ie we do not * include the length, sequence number and ok byte that makes up the first * 5 bytes of the message. We also do not include the 4 byte checksum itself. */ chksum = crc32(0L, NULL, 0); chksum = crc32(chksum, GWBUF_DATA(resp) + 5, hdr.event_size - 4); encode_value(ptr, chksum, 32); } rval = slave->dcb->func.write(slave->dcb, resp); /* Send the FORMAT_DESCRIPTION_EVENT */ if (slave->binlog_pos != 4) blr_slave_send_fde(router, slave); slave->dcb->low_water = router->low_water; slave->dcb->high_water = router->high_water; dcb_add_callback(slave->dcb, DCB_REASON_DRAINED, blr_slave_callback, slave); slave->state = BLRS_DUMPING; LOGIF(LM, (skygw_log_write( LOGFILE_MESSAGE, "%s: New slave %s, server id %d, requested binlog file %s from position %lu", router->service->name, slave->dcb->remote, slave->serverid, slave->binlogfile, slave->binlog_pos))); if (slave->binlog_pos != router->binlog_position || strcmp(slave->binlogfile, router->binlog_name) != 0) { spinlock_acquire(&slave->catch_lock); slave->cstate &= ~CS_UPTODATE; slave->cstate |= CS_EXPECTCB; spinlock_release(&slave->catch_lock); poll_fake_write_event(slave->dcb); } return rval; }
/* * Free a pointer previously returned from subpage_kmalloc. If the * pointer is not on any heap page we recognize, return -1. */ static int subpage_kfree(void *ptr) { int blktype; // index into sizes[] that we're using vaddr_t ptraddr; // same as ptr struct pageref *pr; // pageref for page we're freeing in vaddr_t prpage; // PR_PAGEADDR(pr) vaddr_t fla; // free list entry address struct freelist *fl; // free list entry vaddr_t offset; // offset into page #ifdef GUARDS size_t blocksize, smallerblocksize; #endif ptraddr = (vaddr_t)ptr; #ifdef GUARDS if (ptraddr % PAGE_SIZE == 0) { /* * With guard bands, all client-facing subpage * pointers are offset by GUARD_PTROFFSET (which is 4) * from the underlying blocks and are therefore not * page-aligned. So a page-aligned pointer is not one * of ours. Catch this up front, as otherwise * subtracting GUARD_PTROFFSET could give a pointer on * a page we *do* own, and then we'll panic because * it's not a valid one. */ return -1; } ptraddr -= GUARD_PTROFFSET; #endif #ifdef LABELS if (ptraddr % PAGE_SIZE == 0) { /* ditto */ return -1; } ptraddr -= LABEL_PTROFFSET; #endif spinlock_acquire(&kmalloc_spinlock); checksubpages(); for (pr = allbase; pr; pr = pr->next_all) { prpage = PR_PAGEADDR(pr); blktype = PR_BLOCKTYPE(pr); KASSERT(blktype >= 0 && blktype < NSIZES); /* check for corruption */ KASSERT(blktype>=0 && blktype<NSIZES); checksubpage(pr); if (ptraddr >= prpage && ptraddr < prpage + PAGE_SIZE) { break; } } if (pr==NULL) { /* Not on any of our pages - not a subpage allocation */ spinlock_release(&kmalloc_spinlock); return -1; } offset = ptraddr - prpage; /* Check for proper positioning and alignment */ if (offset >= PAGE_SIZE || offset % sizes[blktype] != 0) { panic("kfree: subpage free of invalid addr %p\n", ptr); } #ifdef GUARDS blocksize = sizes[blktype]; smallerblocksize = blktype > 0 ? sizes[blktype - 1] : 0; checkguardband(ptraddr, smallerblocksize, blocksize); #endif /* * Clear the block to 0xdeadbeef to make it easier to detect * uses of dangling pointers. */ fill_deadbeef((void *)ptraddr, sizes[blktype]); /* * We probably ought to check for free twice by seeing if the block * is already on the free list. But that's expensive, so we don't. */ fla = prpage + offset; fl = (struct freelist *)fla; if (pr->freelist_offset == INVALID_OFFSET) { fl->next = NULL; } else { fl->next = (struct freelist *)(prpage + pr->freelist_offset); /* this block should not already be on the free list! */ #ifdef SLOW { struct freelist *fl2; for (fl2 = fl->next; fl2 != NULL; fl2 = fl2->next) { KASSERT(fl2 != fl); } } #else /* check just the head */ KASSERT(fl != fl->next); #endif } pr->freelist_offset = offset; pr->nfree++; KASSERT(pr->nfree <= PAGE_SIZE / sizes[blktype]); if (pr->nfree == PAGE_SIZE / sizes[blktype]) { /* Whole page is free. */ remove_lists(pr, blktype); freepageref(pr); /* Call free_kpages without kmalloc_spinlock. */ spinlock_release(&kmalloc_spinlock); free_kpages(prpage); } else { spinlock_release(&kmalloc_spinlock); } #ifdef SLOWER /* Don't get the lock unless checksubpages does something. */ spinlock_acquire(&kmalloc_spinlock); checksubpages(); spinlock_release(&kmalloc_spinlock); #endif return 0; }
void xio_tcp_waitforChange (struct tcpsocket *sock, xio_nwinfo_t *nwinfo, u_quad_t timeout) { #ifdef EXOPC #define XIO_PRED_LEN 64 struct wk_term t[XIO_PRED_LEN]; int predlen; u_int delta; int lock_used = 0; u_quad_t current_time = __ticks2usecs (__sysinfo.si_system_ticks); delta = timeout - current_time; /* don't sleep if we're past or almost at our timeout. */ //kprintf ("timeout = %qd current_time = %qd\n", timeout, current_time); predlen = xio_net_wrap_wkpred_packetarrival (nwinfo, t); if (predlen != -1) { if (timeout) { if (timeout < current_time) delta = 0; predlen = wk_mkop (predlen, t, WK_OR); //delta = 250000; predlen += wk_mkusleep_pred (&t[predlen], delta); } assert ((predlen > 0) && (predlen <= XIO_PRED_LEN)); if (sock->info->lock.lock == 1) { assert(sock->info->lock.owner == getpid()); lock_used = 1; spinlock_release(&sock->info->lock); } wk_waitfor_pred (t, predlen); if (lock_used) spinlock_acquire(&sock->info->lock); } #else /* do a blocking select on the fd that is nwinfo->ringid, */ /* with max time equal to the timeout time */ fd_set fds; struct timeval tv; int timeout = ((tcb) && (tcb->timer_retrans)); int ret; if (nwinfo->ringid < 0) { return; } FD_ZERO (&fds); FD_SET (nwinfo->ringid, &fds); if (timeout) { assert (0); /* timer_retrans is now in usecs */ tv.tv_sec = max (0, tcb->timer_retrans - time(NULL)); tv.tv_usec = 0; } #if 0 else { tv.tv_sec = 10; tv.tv_usec = 0; } ret = select ((nwinfo->ringid + 1), &fds, NULL, NULL, &tv); #else ret = select ((nwinfo->ringid + 1), &fds, NULL, NULL, ((timeout) ? &tv : NULL)); #endif #if 0 if (ret <= 0) { printf ("xio_tcp_waitfor timeout (ret %d, tcb %p, timeout %d)\n", ret, tcb, (int)((tcb) ? (tcb->timer_retrans - time(NULL)) : -1)); } #endif #endif }
static void do_put(trapframe *tf, uint32_t cmd) { proc *p = proc_cur(); assert(p->state == PROC_RUN && p->runcpu == cpu_cur()); cprintf("PUT proc %x eip %x esp %x cmd %x\n", p, tf->eip, tf->esp, cmd); spinlock_acquire(&p->lock); // Find the named child process; create if it doesn't exist uint32_t cn = tf->regs.edx & 0xff; proc *cp = p->child[cn]; if (!cp) { cp = proc_alloc(p, cn); if (!cp) // XX handle more gracefully panic("sys_put: no memory for child"); } // Synchronize with child if necessary. if (cp->state != PROC_STOP) proc_wait(p, cp, tf); // Since the child is now stopped, it's ours to control; // we no longer need our process lock - // and we don't want to be holding it if usercopy() below aborts. spinlock_release(&p->lock); // Put child's general register state if (cmd & SYS_REGS) { int len = offsetof(procstate, fx); // just integer regs if (cmd & SYS_FPU) len = sizeof(procstate); // whole shebang usercopy(tf,0,&cp->sv, tf->regs.ebx, len); // Copy user's trapframe into child process procstate *cs = (procstate*) tf->regs.ebx; memcpy(&cp->sv, cs, len); // Make sure process uses user-mode segments and eflag settings cp->sv.tf.ds = CPU_GDT_UDATA | 3; cp->sv.tf.es = CPU_GDT_UDATA | 3; cp->sv.tf.cs = CPU_GDT_UCODE | 3; cp->sv.tf.ss = CPU_GDT_UDATA | 3; cp->sv.tf.eflags &= FL_USER; cp->sv.tf.eflags |= FL_IF; // enable interrupts } uint32_t sva = tf->regs.esi; uint32_t dva = tf->regs.edi; uint32_t size = tf->regs.ecx; switch (cmd & SYS_MEMOP) { case 0: // no memory operation break; case SYS_COPY: // validate source region if (PTOFF(sva) || PTOFF(size) || sva < VM_USERLO || sva > VM_USERHI || size > VM_USERHI-sva) systrap(tf, T_GPFLT, 0); // fall thru... case SYS_ZERO: // validate destination region if (PTOFF(dva) || PTOFF(size) || dva < VM_USERLO || dva > VM_USERHI || size > VM_USERHI-dva) systrap(tf, T_GPFLT, 0); switch (cmd & SYS_MEMOP) { case SYS_ZERO: // zero memory and clear permissions pmap_remove(cp->pdir, dva, size); break; case SYS_COPY: // copy from local src to dest in child pmap_copy(p->pdir, sva, cp->pdir, dva, size); break; } break; default: systrap(tf, T_GPFLT, 0); } if (cmd & SYS_PERM) { // validate destination region if (PGOFF(dva) || PGOFF(size) || dva < VM_USERLO || dva > VM_USERHI || size > VM_USERHI-dva) systrap(tf, T_GPFLT, 0); if (!pmap_setperm(cp->pdir, dva, size, cmd & SYS_RW)) panic("pmap_put: no memory to set permissions"); } if (cmd & SYS_SNAP) // Snapshot child's state pmap_copy(cp->pdir, VM_USERLO, cp->rpdir, VM_USERLO, VM_USERHI-VM_USERLO); // Start the child if requested if (cmd & SYS_START) proc_ready(cp); trap_return(tf); // syscall completed }
/** * The clientReply entry point. This is passed the response buffer * to which the filter should be applied. Once processed the * query is passed to the upstream component * (filter or router) in the filter chain. * * The function tries to extract a SQL query response out of the response buffer, * adds a timestamp to it and publishes the resulting string on the exchange. * The message is tagged with the same identifier that the query was. * * @param instance The filter instance data * @param session The filter session * @param reply The response data */ static int clientReply(FILTER* instance, void *session, GWBUF *reply) { MQ_SESSION *my_session = (MQ_SESSION *)session; MQ_INSTANCE *my_instance = (MQ_INSTANCE *)instance; char t_buf[128],*combined; unsigned int err_code = AMQP_STATUS_OK, pkt_len = pktlen(reply->sbuf->data), offset = 0; amqp_basic_properties_t prop; spinlock_acquire(my_instance->rconn_lock); if(my_instance->conn_stat != AMQP_STATUS_OK){ if(difftime(time(NULL),my_instance->last_rconn) > my_instance->rconn_intv){ my_instance->last_rconn = time(NULL); if(init_conn(my_instance,my_session)){ my_instance->rconn_intv = 1.0; my_instance->conn_stat = AMQP_STATUS_OK; }else{ my_instance->rconn_intv += 5.0; skygw_log_write(LOGFILE_ERROR, "Error : Failed to reconnect to the MQRabbit server "); } err_code = my_instance->conn_stat; } } spinlock_release(my_instance->rconn_lock); if (err_code == AMQP_STATUS_OK && my_session->was_query){ int packet_ok = 0, was_last = 0; my_session->was_query = 0; if(pkt_len > 0){ prop._flags = AMQP_BASIC_CONTENT_TYPE_FLAG | AMQP_BASIC_DELIVERY_MODE_FLAG | AMQP_BASIC_MESSAGE_ID_FLAG | AMQP_BASIC_CORRELATION_ID_FLAG; prop.content_type = amqp_cstring_bytes("text/plain"); prop.delivery_mode = AMQP_DELIVERY_PERSISTENT; prop.correlation_id = amqp_cstring_bytes(my_session->uid); prop.message_id = amqp_cstring_bytes("reply"); if(!(combined = calloc(GWBUF_LENGTH(reply) + 256,sizeof(char)))){ skygw_log_write_flush(LOGFILE_ERROR, "Error : Out of memory"); } memset(t_buf,0,128); sprintf(t_buf,"%lu|",(unsigned long)time(NULL)); memcpy(combined + offset,t_buf,strnlen(t_buf,40)); offset += strnlen(t_buf,40); if(*(reply->sbuf->data + 4) == 0x00){ /**OK packet*/ unsigned int aff_rows = 0, l_id = 0, s_flg = 0, wrn = 0; unsigned char *ptr = (unsigned char*)(reply->sbuf->data + 5); pkt_len = pktlen(reply->sbuf->data); aff_rows = consume_leitoi(&ptr); l_id = consume_leitoi(&ptr); s_flg |= *ptr++; s_flg |= (*ptr++ << 8); wrn |= *ptr++; wrn |= (*ptr++ << 8); sprintf(combined + offset,"OK - affected_rows: %d " " last_insert_id: %d " " status_flags: %#0x " " warnings: %d ", aff_rows,l_id,s_flg,wrn); offset += strnlen(combined,GWBUF_LENGTH(reply) + 256) - offset; if(pkt_len > 7){ int plen = consume_leitoi(&ptr); if(plen > 0){ sprintf(combined + offset," message: %.*s\n",plen,ptr); } } packet_ok = 1; was_last = 1; }else if(*(reply->sbuf->data + 4) == 0xff){ /**ERR packet*/ sprintf(combined + offset,"ERROR - message: %.*s", (int)(reply->end - ((void*)(reply->sbuf->data + 13))), (char *)reply->sbuf->data + 13); packet_ok = 1; was_last = 1; }else if(*(reply->sbuf->data + 4) == 0xfb){ /**LOCAL_INFILE request packet*/ unsigned char *rset = (unsigned char*)reply->sbuf->data; strcpy(combined + offset,"LOCAL_INFILE: "); strncat(combined + offset,(const char*)rset+5,pktlen(rset)); packet_ok = 1; was_last = 1; }else{ /**Result set*/ unsigned char *rset = (unsigned char*)(reply->sbuf->data + 4); char *tmp; unsigned int col_cnt = consume_leitoi(&rset); tmp = calloc(256,sizeof(char)); sprintf(tmp,"Columns: %d",col_cnt); memcpy(combined + offset,tmp,strnlen(tmp,256)); offset += strnlen(tmp,256); memcpy(combined + offset,"\n",1); offset++; free(tmp); packet_ok = 1; was_last = 1; } if(packet_ok){ if((err_code = amqp_basic_publish(my_session->conn,my_session->channel, amqp_cstring_bytes(my_instance->exchange), amqp_cstring_bytes(my_instance->key), 0,0,&prop,amqp_cstring_bytes(combined)) ) != AMQP_STATUS_OK){ spinlock_acquire(my_instance->rconn_lock); my_instance->conn_stat = err_code; spinlock_release(my_instance->rconn_lock); skygw_log_write_flush(LOGFILE_ERROR, "Error : Failed to publish message to MQRabbit server: " "%s",amqp_error_string2(err_code)); }else if(was_last){ /**Successful reply received and sent, releasing uid*/ free(my_session->uid); my_session->uid = NULL; } } free(combined); } } return my_session->up.clientReply(my_session->up.instance, my_session->up.session, reply); }
//Load and run the executable as a new process in a new thread // Argument: executable file name; Returns: process ID of the new process process_id_t process_spawn(char const* executable, char const **argv){ // Initialise 'global' variables interrupt_status_t intr_status; TID_t my_thread; process_id_t pid; int ret; pid = 0; kprintf("vi er nu i spawn jaaaaa \n"); //stop disables intr_status = _interrupt_disable(); //spinlock spinlock_acquire(&process_table_slock); //Find empty spot for ( int i = 0; i < PROCESS_MAX_PROCESSES; i++) { if (process_table[i].state == STATE_FREE) { pid = i; break; } } kprintf("vi er nu længere i spawn wuuhuu\n"); //Spawn new thread in 'process run' my_thread = thread_create((void (*)(uint32_t))(&process_run), pid); process_table[pid].Thread_ID = my_thread; kprintf("fejler spawn ved checket? %d\n",my_thread); // Check if thread has been created if (!(my_thread >= 0)) { kprintf("Her er fejl spawn my threads er for lille"); return -1; } kprintf("lige før setup new process. pid: %d\n",pid); kprintf("lige før setup new process. my_thread: %d\n",my_thread); kprintf("lige før setup new process. entry_point: %d\n",process_table[pid].entry_point); // Attempt to start new process ret = setup_new_process( my_thread, executable, argv, &process_table[pid].entry_point, &process_table[pid].stack_top ); if (ret < 0){ kprintf("fejl i setup_new_process ret < 0 ret: %d\n",ret); return -1; } /* Unlock the process table */ spinlock_release(&process_table_slock); //enable interrupts _interrupt_set_state(intr_status); kprintf("retval ved s**t af spawn: %d\n",ret); thread_run(my_thread); return pid; }