int check_tcp(void) { socklen_t optlen; int sk, ret; int val; sk = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); if (sk < 0) { pr_perror("Can't create TCP socket :("); return -1; } ret = tcp_repair_on(sk); if (ret) goto out; optlen = sizeof(val); ret = getsockopt(sk, SOL_TCP, TCP_TIMESTAMP, &val, &optlen); if (ret) pr_perror("Can't get TCP_TIMESTAMP"); out: close(sk); return ret; }
static int restore_tcp_queues(int sk, TcpStreamEntry *tse, int fd) { u32 len; if (restore_prepare_socket(sk)) return -1; len = tse->inq_len; if (len && send_tcp_queue(sk, TCP_RECV_QUEUE, len, fd)) return -1; /* * All data in a write buffer can be divided on two parts sent * but not yet acknowledged data and unsent data. * The TCP stack must know which data have been sent, because * acknowledgment can be received for them. These data must be * restored in repair mode. */ len = tse->outq_len - tse->unsq_len; if (len && send_tcp_queue(sk, TCP_SEND_QUEUE, len, fd)) return -1; /* * The second part of data have never been sent to outside, so * they can be restored without any tricks. */ len = tse->unsq_len; tcp_repair_off(sk); if (len && __send_tcp_queue(sk, TCP_SEND_QUEUE, len, fd)) return -1; if (tcp_repair_on(sk)) return -1; return 0; }
int restore_one_tcp(int fd, struct inet_sk_info *ii) { pr_info("Restoring TCP connection\n"); if (tcp_repair_on(fd)) return -1; if (restore_tcp_conn_state(fd, ii)) return -1; return 0; }
static int tcp_repair_establised(int fd, struct inet_sk_desc *sk) { int ret; pr_info("\tTurning repair on for socket %x\n", sk->sd.ino); /* * Keep the socket open in criu till the very end. In * case we close this fd after one task fd dumping and * fail we'll have to turn repair mode off */ sk->rfd = dup(fd); if (sk->rfd < 0) { pr_perror("Can't save socket fd for repair"); goto err1; } if (!(root_ns_mask & CLONE_NEWNET)) { ret = nf_lock_connection(sk); if (ret < 0) goto err2; } ret = tcp_repair_on(sk->rfd); if (ret < 0) goto err3; list_add_tail(&sk->rlist, &cpt_tcp_repair_sockets); ret = refresh_inet_sk(sk); if (ret < 0) goto err1; return 0; err3: if (!(root_ns_mask & CLONE_NEWNET)) nf_unlock_connection(sk); err2: close(sk->rfd); err1: return -1; }
struct libsoccr_sk *libsoccr_pause(int fd) { struct libsoccr_sk *ret; ret = malloc(sizeof(*ret)); if (!ret) { loge("Unable to allocate memory\n"); return NULL; } if (tcp_repair_on(fd) < 0) { free(ret); return NULL; } ret->flags = 0; ret->recv_queue = NULL; ret->send_queue = NULL; ret->src_addr = NULL; ret->dst_addr = NULL; ret->fd = fd; return ret; }
static int libsoccr_set_sk_data_noq(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, unsigned data_size) { struct tcp_repair_opt opts[4]; int addr_size, mstate; int onr = 0; __u32 seq; if (!data || data_size < SOCR_DATA_MIN_SIZE) { loge("Invalid input parameters\n"); return -1; } if (!sk->dst_addr || !sk->src_addr) { loge("Destination or/and source addresses aren't set\n"); return -1; } mstate = 1 << data->state; if (data->state == TCP_LISTEN) { loge("Unable to handle listen sockets\n"); return -1; } if (sk->src_addr->sa.sa_family == AF_INET) addr_size = sizeof(sk->src_addr->v4); else addr_size = sizeof(sk->src_addr->v6); if (bind(sk->fd, &sk->src_addr->sa, addr_size)) { logerr("Can't bind inet socket back"); return -1; } if (mstate & (RCVQ_FIRST_FIN | RCVQ_SECOND_FIN)) data->inq_seq--; /* outq_seq is adjusted due to not accointing the fin packet */ if (mstate & (SNDQ_FIRST_FIN | SNDQ_SECOND_FIN)) data->outq_seq--; if (set_queue_seq(sk, TCP_RECV_QUEUE, data->inq_seq - data->inq_len)) return -2; seq = data->outq_seq - data->outq_len; if (data->state == TCP_SYN_SENT) seq--; if (set_queue_seq(sk, TCP_SEND_QUEUE, seq)) return -3; if (sk->dst_addr->sa.sa_family == AF_INET) addr_size = sizeof(sk->dst_addr->v4); else addr_size = sizeof(sk->dst_addr->v6); if (data->state == TCP_SYN_SENT && tcp_repair_off(sk->fd)) return -1; if (connect(sk->fd, &sk->dst_addr->sa, addr_size) == -1 && errno != EINPROGRESS) { loge("Can't connect inet socket back\n"); return -1; } if (data->state == TCP_SYN_SENT && tcp_repair_on(sk->fd)) return -1; logd("\tRestoring TCP options\n"); if (data->opt_mask & TCPI_OPT_SACK) { logd("\t\tWill turn SAK on\n"); opts[onr].opt_code = TCPOPT_SACK_PERM; opts[onr].opt_val = 0; onr++; } if (data->opt_mask & TCPI_OPT_WSCALE) { logd("\t\tWill set snd_wscale to %u\n", data->snd_wscale); logd("\t\tWill set rcv_wscale to %u\n", data->rcv_wscale); opts[onr].opt_code = TCPOPT_WINDOW; opts[onr].opt_val = data->snd_wscale + (data->rcv_wscale << 16); onr++; } if (data->opt_mask & TCPI_OPT_TIMESTAMPS) { logd("\t\tWill turn timestamps on\n"); opts[onr].opt_code = TCPOPT_TIMESTAMP; opts[onr].opt_val = 0; onr++; } logd("Will set mss clamp to %u\n", data->mss_clamp); opts[onr].opt_code = TCPOPT_MAXSEG; opts[onr].opt_val = data->mss_clamp; onr++; if (data->state != TCP_SYN_SENT && setsockopt(sk->fd, SOL_TCP, TCP_REPAIR_OPTIONS, opts, onr * sizeof(struct tcp_repair_opt)) < 0) { logerr("Can't repair options"); return -2; } if (data->opt_mask & TCPI_OPT_TIMESTAMPS) { if (setsockopt(sk->fd, SOL_TCP, TCP_TIMESTAMP, &data->timestamp, sizeof(data->timestamp)) < 0) { logerr("Can't set timestamp"); return -3; } } return 0; }