/** * Initializes the high availability (ha) structure * * returns * 0 on success * -1 on error */ inline int init_pingtable(struct ha *table,int timeout,int maxpings) { if(maxpings<=0) maxpings=1; table->begin=0; table->end=0; table->timed_out_pings=0; table->size=maxpings; table->timeout=timeout; if (!(table->mutex=lock_alloc())){ LM_ERR("Unable to allocate a lock for the ping table\n"); goto error; }else lock_init(table->mutex); LM_ERR("alloc'ing %d bytes for %d pings\n",(int)(maxpings*sizeof(struct ping)),maxpings); if (0==(table->pings=shm_malloc(maxpings*sizeof(struct ping)))){ LM_ERR("Unable to shm_malloc %d bytes for %d pings\n",(int)(maxpings*sizeof(struct ping)),maxpings); goto error; }else{ memset(table->pings,0,(maxpings*sizeof(struct ping))); } return 0; error: destroy_pingtable(table); return -1; }
/** Main loop for the Event Dispatcher process. * */ int dispatcher_main_loop(void) { struct pollfd poll_fds[3+MAX_AS_NR],*poll_tmp; int clean_index,i,j,k,fd,poll_events=0,socks[2],chld_status; int as_nr,unc_as_nr; pid_t chld; struct timeval last_ping,now; struct as_entry *as; sig_flag=0; is_dispatcher=1; as_nr=0; timerclear(&last_ping); timerclear(&now); signal(SIGCHLD,seas_sighandler); signal(SIGTERM,seas_sighandler); signal(SIGUSR1,seas_sighandler); signal(SIGINT, seas_sighandler); signal(SIGKILL,seas_sighandler); strcpy(whoami,"Seas Event Dispatcher process"); /*I set process_no to -1 because otherwise, the logging process confuses this process with another from SER * (see LM_*() and dprint() and my_pid())*/ process_no = -1; if(open_server_sockets(seas_listen_ip,seas_listen_port,socks)==-1){ LM_ERR("unable to open server sockets on dispatcher\n"); return -1; } for(i=0;i<2;i++){ poll_fds[i].fd=socks[i]; poll_fds[i].revents=0; poll_fds[i].events=POLLIN; } poll_fds[2].fd=read_pipe; poll_fds[2].revents=0; poll_fds[2].events=POLLIN;/*pollhup ?*/ poll_events=0; unc_as_nr=0; if(use_ha) spawn_pinger(); while(1){ if(sig_flag==SIGCHLD){ while ((chld=waitpid( -1, &chld_status, WNOHANG ))>0) { if (WIFEXITED(chld_status)){ LM_INFO("child process %d exited normally, status=%d\n", chld,WEXITSTATUS(chld_status)); }else if (WIFSIGNALED(chld_status)) { LM_INFO("child process %d exited by a signal %d\n", chld,WTERMSIG(chld_status)); }else if (WIFSTOPPED(chld_status)) LM_INFO("child process %d stopped by a signal %d\n", chld,WSTOPSIG(chld_status)); for (as=as_list;as;as=as->next) { if(as->type!=AS_TYPE) continue; if(as->u.as.action_pid==chld){ for(i=0;i<as_nr && ((poll_fds[3+i].fd)!=(as->u.as.event_fd));i++) ; if(i==as_nr){ LM_ERR("Either the pinger has died or BUG found..\n"); continue; } /*overwrite the obsolete 'i' position with the next position*/ for(j=3+i;j<(as_nr+unc_as_nr+3-1);i++){ poll_fds[j].fd=poll_fds[j+1].fd; poll_fds[j].events=poll_fds[j+1].events; poll_fds[j].revents=poll_fds[j+1].revents; } close(as->u.as.event_fd);/*close the socket fd*/ if (as->u.as.ev_buffer.s) { pkg_free(as->u.as.ev_buffer.s); as->u.as.ev_buffer.s=(char *)0; as->u.as.ev_buffer.len=0; } as->u.as.event_fd=as->u.as.action_fd=-1; as->connected=0; destroy_pingtable(&as->u.as.jain_pings); destroy_pingtable(&as->u.as.servlet_pings); as_nr--; LM_WARN("client [%.*s] leaving (Action Dispatcher Process died !)\n", as->name.len,as->name.s); break; }/*if(action_pid==chld)*/ }/*for(as=as_list;as;as=as->next)*/ }/*while(waitpid(-1)>0)*/ }else if (sig_flag) { LM_WARN("received signal != sigchld(%d)\n",sig_flag); } sig_flag=0; clean_index=0; LM_INFO("polling [2 ServSock] [1 pipe] [%d App Servers]" " [%d Uncomplete AS]\n",as_nr,unc_as_nr); poll_events = poll(poll_fds,3+unc_as_nr+as_nr,-1); if (poll_events == -1) { if(errno==EINTR){ /*handle the case a child has died. * It will be done in the next iteration in if(seas_sigchld_received)*/ continue; } if(errno==EBADF){ LM_ERR("invalid file descriptor pased to poll (%s)\n", strerror(errno)); return -1;/*??*/ } /* errors */ LM_ERR("poll'ing:%s\n",strerror(errno)); poll_events=0; continue; } else if (poll_events == 0) {/*timeout*/ continue; } else {/*there are events !*/ /*handle connections from server sockets*/ for(i=0;i<2;i++){ if(poll_fds[i].revents) poll_events--; if(poll_fds[i].revents & POLLIN){ poll_fds[i].revents &= (~POLLIN); if((fd=new_as_connect(socks[i],i==0?'e':'a'))>=0){ poll_tmp=&poll_fds[3+as_nr+unc_as_nr]; poll_tmp->fd=fd; poll_tmp->events=POLLIN|POLLHUP; unc_as_nr++; LM_DBG("Have new %s client\n",i==0?"event":"action"); }else{ LM_ERR("accepting connection from AS\n"); } } } /*handle data from pipe*/ if(poll_fds[2].revents & POLLIN){ poll_fds[2].revents &= (~POLLIN); poll_events--; if(dispatch_relay()<0){ LM_ERR("dispatch_relay returned -1" "should clean-up table\n"); } } /*now handle receive data from completed AS*/ clean_index=0; LM_DBG("Scanning data from %d AS\n",as_nr); for(i=0;(i<as_nr) && poll_events;i++){ clean_index=0; poll_tmp=&poll_fds[3+i]; if(poll_tmp->revents) poll_events--; if(poll_tmp->revents & POLLIN){ LM_DBG("POLLIN found in AS #%i\n",i); poll_tmp->revents &= (~POLLIN); switch(handle_as_data(poll_tmp->fd)){ case -2:/*read returned 0 bytes, an AS client is leaving*/ clean_index=1; break; case -1:/*shouldnt happen*/ LM_ERR("reading from AS socket\n"); break; case 0:/* event_response received and processed*/ break; default: LM_WARN("unknown return type from handle_as_data\n"); } } if(clean_index || (poll_tmp->revents & POLLHUP)){ LM_DBG("POLHUP or read==0 found in %i AS \n",i); clean_index=0; poll_tmp->revents = 0; for(as=as_list;as;as=as->next){ if(as->type==CLUSTER_TYPE) continue; if(as->connected && (as->u.as.event_fd == poll_tmp->fd)){ close(poll_tmp->fd);/*close the socket fd*/ /*TODO we should send a signal to the Action Dispatcher !!!*/ as->connected=0; as_nr--; /*overwrite the obsolete 'i' position with the next position*/ for(k=i;k<(as_nr+unc_as_nr);k++){ j=3+k; poll_fds[j].fd=poll_fds[j+1].fd; poll_fds[j].events=poll_fds[j+1].events; poll_fds[j].revents=poll_fds[j+1].revents; } --i; LM_WARN("client %.*s leaving !!!\n",as->name.len,as->name.s); break; } } if (!as) { LM_ERR("the leaving client was not found in the as_list\n"); } } } /*now handle data sent from uncompleted AS*/ LM_DBG("Scanning data from %d uncomplete AS \n",unc_as_nr); clean_index=0; for(i=0;i<unc_as_nr && poll_events;i++){ poll_tmp=&poll_fds[3+as_nr+i]; if(poll_tmp->revents) poll_events--; if(poll_tmp->revents & POLLIN){ LM_DBG("POLLIN found in %d uncomplete AS \n",i); poll_tmp->revents &= (~POLLIN); fd=handle_unc_as_data(poll_tmp->fd); if(fd>0){ /* there's a new AS, push the uncomplete poll_fds up and set the AS */ for(k=i;k>0;k--){ j=3+as_nr+k; poll_fds[j].fd=poll_fds[j-1].fd; poll_fds[j].events=poll_fds[j-1].events; poll_fds[j].revents=poll_fds[j-1].revents; } poll_fds[3+as_nr].fd=fd; poll_fds[3+as_nr].events=POLLIN|POLLHUP; poll_fds[3+as_nr].revents=0; as_nr++;/*not very sure if this is thread-safe*/ unc_as_nr--; }else if(fd<=0){/* pull the upper set of uncomplete AS down and take this one out*/ poll_tmp->revents=0; for(k=i;k<(unc_as_nr-1);k++){ j=3+as_nr+k; poll_fds[j].fd=poll_fds[j+1].fd; poll_fds[j].events=poll_fds[j+1].events; poll_fds[j].revents=poll_fds[j+1].revents; } unc_as_nr--; /** we decrement i so that pulling down the upper part of the unc_as array so that * it doesn't affect our for loop */ i--; } } if(poll_tmp->revents & POLLHUP){ LM_DBG("POLLHUP found in %d uncomplete AS \n",i); close(poll_tmp->fd); for(k=i;k<(unc_as_nr-1);k++){ j=3+as_nr+k; poll_fds[j].fd=poll_fds[j+1].fd; poll_fds[j].events=poll_fds[j+1].events; poll_fds[j].revents=poll_fds[j+1].revents; } unc_as_nr--; i--; poll_tmp->revents = 0; } }/*for*/ }/*else ...(poll_events>0)*/ }/*while(1)*/ }
/** * receives 2 indexes in unc_as_t which correspond one to * the events socket and the other to the actions socket * * returns * 0 on success * -1 on error */ static inline int add_new_as(int event_idx,int action_idx,struct as_entry *as) { struct unc_as *ev,*ac; int j; as_p the_as=0; struct as_entry *tmp; ev=&unc_as_t[event_idx]; ac=&unc_as_t[action_idx]; the_as=&(as->u.as); the_as->action_fd=ac->fd; the_as->event_fd=ev->fd; the_as->name.len = strlen(ev->name); if(use_ha){ if(jain_ping_timeout){ if (0>init_pingtable(&the_as->jain_pings,jain_ping_timeout,(jain_ping_timeout/jain_ping_period+1)*PING_OVER_FACTOR)){ LM_ERR("Unable to init jain pinging table...\n"); goto error; } } if(servlet_ping_timeout){ if (0>init_pingtable(&the_as->servlet_pings,servlet_ping_timeout,(servlet_ping_timeout/servlet_ping_period+1)*PING_OVER_FACTOR)){ LM_ERR("Unable to init servlet pinging table...\n"); goto error; } } } /*TODO attention, this is pkg_malloc because only the Event_Dispatcher process * has to use it !!*/ if(!(the_as->ev_buffer.s = pkg_malloc(AS_BUF_SIZE))){ LM_ERR("unable to alloc pkg mem for the event buffer\n"); goto error; } the_as->ev_buffer.len=0; as->connected=1; the_as->action_pid=0; for(tmp=as_list;tmp;tmp=tmp->next){ if(tmp->type==AS_TYPE) continue; for (j=0;j<tmp->u.cs.num;j++) { if (tmp->u.cs.as_names[j].len == the_as->name.len && !memcmp(tmp->u.cs.as_names[j].s,the_as->name.s,the_as->name.len)) { if(tmp->u.cs.num==tmp->u.cs.registered){ LM_ERR("AS %.*s belongs to cluster %.*s which is already completed\n", the_as->name.len,the_as->name.s,tmp->name.len,tmp->name.s); break; } tmp->u.cs.registered++; break; } } } if(0>spawn_action_dispatcher(as)){ LM_ERR("Unable to spawn Action Dispatcher for as %s\n",ev->name); goto error; } if(send_sockinfo(the_as->event_fd)==-1){ LM_ERR("Unable to send socket info to as %s\n",ev->name); goto error; } return 0; error: if(the_as->ev_buffer.s){ pkg_free(the_as->ev_buffer.s); the_as->ev_buffer.s=(char*)0; } if(the_as->action_pid) kill(the_as->action_pid,SIGTERM); if(jain_ping_timeout) destroy_pingtable(&the_as->jain_pings); if(servlet_ping_timeout) destroy_pingtable(&the_as->servlet_pings); return -1; }