static uint64_t run_test(struct Test *test) { unsigned i; const unsigned THREADS = 1; memset(test, 0, sizeof(*test)); /* Generate producer threads */ for (i=0; i<THREADS; i++) { pixie_begin_thread(test_producer_thread, 0, test); } /* Wait for threads to start */ while (test->producer_started < THREADS) pixie_usleep(10); /* Now start consuming */ pixie_begin_thread(test_consumer_thread, 0, test); /* Wait for producer threads to end */ while (test->producer_done < THREADS) pixie_usleep(10); /* Tell consumer thread to end */ test->not_active = 1; /* Wait for consumer thread to end */ while (!test->consumer_done) pixie_usleep(10); return test->total_count; }
/*************************************************************************** * The recieve thread doesn't transmit packets. Instead, it queues them * up on the transmit thread. Every so often, the transmit thread needs * to flush this transmit queue and send everything. * * This is an inherent design issue trying to send things as batches rather * than individually. It increases latency, but increases performance. We * don't really care about latency. ***************************************************************************/ void flush_packets(struct Adapter *adapter, PACKET_QUEUE *packet_buffers, PACKET_QUEUE *transmit_queue, struct Throttler *throttler, uint64_t *packets_sent) { uint64_t batch_size; unsigned is_queue_empty = 0; while (!is_queue_empty) { /* * Only send a few packets at a time, throttled according to the max * --max-rate set by the user */ batch_size = throttler_next_batch(throttler, *packets_sent); /* * Send a batch of queued packets */ for ( ; batch_size; batch_size--) { int err; struct PacketBuffer *p; /* * Get the next packet from the transmit queue. This packet was * put there by a receive thread, and will contain things like * an ACK or an HTTP request */ err = rte_ring_sc_dequeue(transmit_queue, (void**)&p); if (err) { is_queue_empty = 1; break; /* queue is empty, nothing to send */ } /* * Actually send the packet */ rawsock_send_packet(adapter, p->px, (unsigned)p->length, 1); /* * Now that we are done with the packet, put it on the free list * of buffers that the transmit thread can reuse */ for (err=1; err; ) { err = rte_ring_sp_enqueue(packet_buffers, p); if (err) { LOG(0, "transmit queue full (should be impossible)\n"); pixie_usleep(10000); } } /* * Remember that we sent a packet, which will be used in * throttling. */ (*packets_sent)++; } } }
/*************************************************************************** * We return the number of packets that can be sent in a batch. Thus, * instead of trying to throttle each packet individually, which has a * high per-packet cost, we try to throttle a bunch at a time. Normally, * this function will return 1, only at high rates does it return larger * numbers. * * NOTE: The minimum value this returns is 1. When it's less than that, * it'll pause and wait until it's ready to send a packet. ***************************************************************************/ uint64_t throttler_next_batch(struct Throttler *throttler, uint64_t packet_count) { uint64_t timestamp; uint64_t index; uint64_t old_timestamp; uint64_t old_packet_count; double current_rate; double max_rate = throttler->max_rate; again: /* NOTE: this uses CLOCK_MONOTONIC_RAW on Linux, so the timstamp doesn't * move forward when the machine is suspended */ timestamp = pixie_gettime(); /* * We record that last 256 buckets, and average the rate over all of * them. */ index = (throttler->index) & 0xFF; throttler->buckets[index].timestamp = timestamp; throttler->buckets[index].packet_count = packet_count; index = (++throttler->index) & 0xFF; old_timestamp = throttler->buckets[index].timestamp; old_packet_count = throttler->buckets[index].packet_count; /* * If the delay is more than 1-second, then we should reset the system * in order to avoid transmittting too fast. */ if (timestamp - old_timestamp > 1000000) { //throttler_start(throttler, throttler->max_rate); throttler->batch_size = 1; goto again; } /* * Calculate the recent rate. * NOTE: this isn't the rate "since start", but only the "recent" rate. * That's so that if the system pauses for a while, we don't flood the * network trying to catch up. */ current_rate = 1.0*(packet_count - old_packet_count)/((timestamp - old_timestamp)/1000000.0); /* * If we've been going too fast, then <pause> for a moment, then * try again. */ if (current_rate > max_rate) { double waittime; /* calculate waittime, in seconds */ waittime = (current_rate - max_rate) / throttler->max_rate; /* At higher rates of speed, we don't actually need to wait the full * interval. It's better to have a much smaller interval, so that * we converge back on the true rate faster */ waittime *= 0.1; /* This is in case of gross failure of the system. This should never * actually happen, unless there is a bug. Really, I ought to make * this an 'assert()' instead to fail and fix the bug rather than * silently continueing, but I'm too lazy */ if (waittime > 0.1) waittime = 0.1; /* Since we've exceeded the speed limit, we should reduce the * batch size slightly. We don't do it only by a little bit to * avoid over-correcting. We want to converge on the correct * speed gradually. Note that since this happens hundres or * thousands of times a second, the convergence is very fast * even with 0.1% adjustment */ throttler->batch_size *= 0.999; /* Now we wait for a bit */ pixie_usleep((uint64_t)(waittime * 1000000.0)); /* There are two choices here. We could either return immediately, * or we can loop around again. Right now, the code loops around * again in order to support very slow rates, such as 0.5 packets * per second. Nobody would want to run a scanner that slowly of * course, but it's great for testing */ //return (uint64_t)throttler->batch_size; goto again; } /* * Calculate how many packets are needed to catch up again to the current * rate, and return that. * * NOTE: this is almost always going to have the value of 1 (one). Only at * very high speeds (above 100,000 packets/second) will this value get * larger. */ throttler->batch_size *= 1.005; if (throttler->batch_size > 10000) throttler->batch_size = 10000; throttler->current_rate = current_rate; throttler->test_timestamp = timestamp; throttler->test_packet_count = packet_count; return (uint64_t)throttler->batch_size; }
/*************************************************************************** * * Asynchronous receive thread * * The transmit and receive threads run independently of each other. There * is no record what was transmitted. Instead, the transmit thread sets a * "SYN-cookie" in transmitted packets, which the receive thread will then * use to match up requests with responses. ***************************************************************************/ static void receive_thread(void *v) { struct ThreadPair *parms = (struct ThreadPair *)v; const struct Masscan *masscan = parms->masscan; struct Output *out; struct DedupTable *dedup; struct PcapFile *pcapfile = NULL; struct TCP_ConnectionTable *tcpcon = 0; LOG(1, "recv: start receive thread #%u\n", parms->nic_index); /* Lock this thread to a CPU. Transmit threads are on even CPUs, * receive threads on odd CPUs */ if (pixie_cpu_get_count() > 1) { unsigned cpu_count = pixie_cpu_get_count(); unsigned cpu = parms->nic_index * 2 + 1; while (cpu >= cpu_count) { cpu -= cpu_count; cpu++; } pixie_cpu_set_affinity(cpu); } /* * If configured, open a --pcap file for saving raw packets. This is * so that we can debug scans, but also so that we can look at the * strange things people send us. Note that we don't record transmitted * packets, just the packets we've received. */ /*if (masscan->pcap_filename[0]) pcapfile = pcapfile_openwrite(masscan->pcap_filename, 1);*/ /* * Open output. This is where results are reported when saving * the --output-format to the --output-filename */ out = output_create(masscan); /* * Create deduplication table. This is so when somebody sends us * multiple responses, we only record the first one. */ dedup = dedup_create(); /* * Create a TCP connection table for interacting with live * connections when doing --banners */ if (masscan->is_banners) { tcpcon = tcpcon_create_table( (size_t)((masscan->max_rate/5) / masscan->nic_count), parms->transmit_queue, parms->packet_buffers, &parms->tmplset->pkts[Proto_TCP], output_report_banner, out, masscan->tcb.timeout ); } if (masscan->is_offline) { while (!control_c_pressed_again) pixie_usleep(10000); parms->done_receiving = 1; return; } /* * Receive packets. This is where we catch any responses and print * them to the terminal. */ LOG(1, "begin receive thread\n"); while (!control_c_pressed_again) { int status; unsigned length; unsigned secs; unsigned usecs; const unsigned char *px; int err; unsigned x; struct PreprocessedInfo parsed; unsigned ip_me; unsigned ip_them; unsigned seqno_them; unsigned seqno_me; /* * RECIEVE * * This is the boring part of actually receiving a packet */ err = rawsock_recv_packet( parms->adapter, &length, &secs, &usecs, &px); if (err != 0) continue; /* * Do any TCP event timeouts based on the current timestamp from * the packet. For example, if the connection has been open for * around 10 seconds, we'll close the connection. (--banners) */ if (tcpcon) { tcpcon_timeouts(tcpcon, secs, usecs); } if (length > 1514) continue; /* * "Preprocess" the response packet. This means to go through and * figure out where the TCP/IP headers are and the locations of * some fields, like IP address and port numbers. */ x = preprocess_frame(px, length, 1, &parsed); if (!x) continue; /* corrupt packet */ ip_me = parsed.ip_dst[0]<<24 | parsed.ip_dst[1]<<16 | parsed.ip_dst[2]<< 8 | parsed.ip_dst[3]<<0; ip_them = parsed.ip_src[0]<<24 | parsed.ip_src[1]<<16 | parsed.ip_src[2]<< 8 | parsed.ip_src[3]<<0; seqno_them = TCP_SEQNO(px, parsed.transport_offset); seqno_me = TCP_ACKNO(px, parsed.transport_offset); /* verify: my IP address */ if (parms->adapter_ip != ip_me) continue; /* * Handle non-TCP protocols */ switch (parsed.found) { case FOUND_ARP: /* OOPS: handle arp instead. Since we may completely bypass the TCP/IP * stack, we may have to handle ARPs ourself, or the router will * lose track of us. */ LOGip(2, ip_them, 0, "-> ARP [%u] \n", px[parsed.found_offset]); arp_response( parms->adapter_ip, parms->adapter_mac, px, length, parms->packet_buffers, parms->transmit_queue); continue; case FOUND_UDP: case FOUND_DNS: if (!is_my_port(masscan, parsed.port_dst)) continue; handle_udp(out, px, length, &parsed); continue; case FOUND_ICMP: handle_icmp(out, px, length, &parsed); continue; case FOUND_TCP: /* fall down to below */ break; default: continue; } /* verify: my port number */ if (parms->adapter_port != parsed.port_dst) continue; /* Save raw packet in --pcap file */ if (pcapfile) { pcapfile_writeframe( pcapfile, px, length, length, secs, usecs); } { char buf[64]; LOGip(5, ip_them, parsed.port_src, "-> TCP ackno=0x%08x flags=0x%02x(%s)\n", seqno_me, TCP_FLAGS(px, parsed.transport_offset), reason_string(TCP_FLAGS(px, parsed.transport_offset), buf, sizeof(buf))); } /* If recording --banners, create a new "TCP Control Block (TCB)" */ if (tcpcon) { struct TCP_Control_Block *tcb; /* does a TCB already exist for this connection? */ tcb = tcpcon_lookup_tcb(tcpcon, ip_me, ip_them, parsed.port_dst, parsed.port_src); if (TCP_IS_SYNACK(px, parsed.transport_offset)) { if (syn_hash(ip_them, parsed.port_src) != seqno_me - 1) { LOG(2, "%u.%u.%u.%u - bad cookie: ackno=0x%08x expected=0x%08x\n", (ip_them>>24)&0xff, (ip_them>>16)&0xff, (ip_them>>8)&0xff, (ip_them>>0)&0xff, seqno_me-1, syn_hash(ip_them, parsed.port_src)); continue; } if (tcb == NULL) { tcb = tcpcon_create_tcb(tcpcon, ip_me, ip_them, parsed.port_dst, parsed.port_src, seqno_me, seqno_them+1); } tcpcon_handle(tcpcon, tcb, TCP_WHAT_SYNACK, 0, 0, secs, usecs, seqno_them+1); } else if (tcb) { /* If this is an ACK, then handle that first */ if (TCP_IS_ACK(px, parsed.transport_offset)) { tcpcon_handle(tcpcon, tcb, TCP_WHAT_ACK, 0, seqno_me, secs, usecs, seqno_them); } /* If this contains payload, handle that */ if (parsed.app_length) { tcpcon_handle(tcpcon, tcb, TCP_WHAT_DATA, px + parsed.app_offset, parsed.app_length, secs, usecs, seqno_them); } /* If this is a FIN, handle that. Note that ACK + * payload + FIN can come together */ if (TCP_IS_FIN(px, parsed.transport_offset) && !TCP_IS_RST(px, parsed.transport_offset)) { tcpcon_handle(tcpcon, tcb, TCP_WHAT_FIN, 0, 0, secs, usecs, seqno_them); } /* If this is a RST, then we'll be closing the connection */ if (TCP_IS_RST(px, parsed.transport_offset)) { tcpcon_handle(tcpcon, tcb, TCP_WHAT_RST, 0, 0, secs, usecs, seqno_them); } } else if (TCP_IS_FIN(px, parsed.transport_offset)) { /* * NO TCB! * This happens when we've sent a FIN, deleted our connection, * but the other side didn't get the packet. */ if (!TCP_IS_RST(px, parsed.transport_offset)) tcpcon_send_FIN( tcpcon, ip_me, ip_them, parsed.port_dst, parsed.port_src, seqno_them, seqno_me); } }
/*************************************************************************** * This thread spews packets as fast as it can * * THIS IS WHERE ALL THE EXCITEMENT HAPPENS!!!! * 90% of CPU cycles are in the function. * ***************************************************************************/ static void transmit_thread(void *v) /*aka. scanning_thread() */ { struct ThreadPair *parms = (struct ThreadPair *)v; uint64_t i; uint64_t start; uint64_t end; const struct Masscan *masscan = parms->masscan; unsigned retries = masscan->retries; unsigned rate = (unsigned)masscan->max_rate; unsigned r = retries + 1; uint64_t range; struct BlackRock blackrock; uint64_t count_ips = rangelist_count(&masscan->targets); struct Throttler *throttler = parms->throttler; struct TemplateSet *pkt_template = parms->tmplset; unsigned *picker = parms->picker; struct Adapter *adapter = parms->adapter; uint64_t packets_sent = 0; unsigned increment = masscan->shard.of + masscan->nic_count; LOG(1, "xmit: starting transmit thread #%u\n", parms->nic_index); /* Lock this thread to a CPU. Transmit threads are on even CPUs, * receive threads on odd CPUs */ if (pixie_cpu_get_count() > 1) { unsigned cpu_count = pixie_cpu_get_count(); unsigned cpu = parms->nic_index * 2; while (cpu >= cpu_count) { cpu -= cpu_count; cpu++; } pixie_cpu_set_affinity(cpu); //pixie_cpu_raise_priority(); } /* Create the shuffler/randomizer. This creates the 'range' variable, * which is simply the number of IP addresses times the number of * ports */ range = rangelist_count(&masscan->targets) * rangelist_count(&masscan->ports); blackrock_init(&blackrock, range, masscan->seed); /* Calculate the 'start' and 'end' of a scan. One reason to do this is * to support --shard, so that multiple machines can co-operate on * the same scan. Another reason to do this is so that we can bleed * a little bit past the end when we have --retries. Yet another * thing to do here is deal with multiple network adapters, which * is essentially the same logic as shards. */ start = masscan->resume.index + (masscan->shard.one-1) + parms->nic_index; end = range; if (masscan->resume.count && end > start + masscan->resume.count) end = start + masscan->resume.count; end += retries * rate; /* "THROTTLER" rate-limits how fast we transmit, set with the * --max-rate parameter */ throttler_start(throttler, masscan->max_rate/masscan->nic_count); /* ----------------- * the main loop * -----------------*/ LOG(3, "xmit: starting main loop: [%llu..%llu]\n", start, end); for (i=start; i<end; ) { uint64_t batch_size; /* * Do a batch of many packets at a time. That because per-packet * throttling is expensive at 10-million pps, so we reduce the * per-packet cost by doing batches. At slower rates, the batch * size will always be one. (--max-rate) */ batch_size = throttler_next_batch(throttler, packets_sent); packets_sent += batch_size; while (batch_size && i < end) { uint64_t xXx; unsigned ip; unsigned port; /* * RANDOMIZE THE TARGET: * This is kinda a tricky bit that picks a random IP and port * number in order to scan. We monotonically increment the * index 'i' from [0..range]. We then shuffle (randomly transmog) * that index into some other, but unique/1-to-1, number in the * same range. That way we visit all targets, but in a random * order. Then, once we've shuffled the index, we "pick" the * the IP address and port that the index refers to. */ xXx = (i + (r--) * rate); while (xXx >= range) xXx -= range; xXx = blackrock_shuffle(&blackrock, xXx); ip = rangelist_pick2(&masscan->targets, xXx % count_ips, picker); port = rangelist_pick(&masscan->ports, xXx / count_ips); /* * SEND THE PROBE * This is sorta the entire point of the program, but little * exciting happens here. The thing to note that this may * be a "raw" transmit that bypasses the kernel, meaning * we can call this function millions of times a second. */ rawsock_send_probe( adapter, ip, port, syn_hash(ip, port), !batch_size, /* flush queue on last packet in batch */ pkt_template ); batch_size--; foo_count++; /* * SEQUENTIALLY INCREMENT THROUGH THE RANGE * Yea, I know this is a puny 'i++' here, but it's a core feature * of the system that is linearly increments through the range, * but produces from that a shuffled sequence of targets (as * described above). Because we are linearly incrementing this * number, we can do lots of creative stuff, like doing clever * retransmits and sharding. */ if (r == 0) { i += increment; /* <------ increment by 1 normally, more with shards/nics */ r = retries + 1; } } /* end of batch */ /* Transmit packets from other thread, when doing --banners */ flush_packets(adapter, parms->packet_buffers, parms->transmit_queue, throttler, &packets_sent); /* If the user pressed <ctrl-c>, then we need to exit. but, in case * the user wants to --resume the scan later, we save the current * state in a file */ if (control_c_pressed) { break; } /* save our current location for resuming, if the user pressed * <ctrl-c> to exit early */ parms->my_index = i; } /* * We are done transmitting. However, response packets will take several * seconds to arrive. Therefore, sit in short loop waiting for those * packets to arrive. Pressing <ctrl-c> a second time will exit this * prematurely. */ while (!control_c_pressed_again) { unsigned k; for (k=0; k<1000; k++) { /* Transmit packets from the receive thread */ flush_packets( adapter, parms->packet_buffers, parms->transmit_queue, throttler, &packets_sent); pixie_usleep(1000); } } /* Thread is about to exit */ parms->done_transmitting = 1; LOG(1, "xmit: stopping transmit thread #%u\n", parms->nic_index); }
int perftest(int argc, char *argv[]) { struct PerfTest perftest[1]; struct ZoneFileParser *parser; struct Catalog *db; size_t i; perftest->loop_count = 10000000; /* * Create a pseudo-network subsystem for generating packets */ perftest->server.parent = perftest; perftest->server.adapter = adapter_create( perftest_alloc_packet, perftest_server_to_client_response, &perftest->server); adapter_add_ipv4(perftest->server.adapter, 0xC0A80101, 0xFFFFffff); /* create a catalog/database, this is where all the parsed zonefile * records will be put */ perftest->db = catalog_create(); perftest->thread->catalog = perftest->db; db = perftest->db; /* * Parse a sample zone */ parser = zonefile_begin( example_origin, /* origin */ 60, /* TTL */ 10000, /* filesize */ "<perftest>", /* filename */ zonefile_load, /* callback */ db, /* callback data */ 0 ); zonefile_set_singlestep(parser); for (i=0; perftest_zone[i]; i++) { zonefile_parse(parser, (const unsigned char*)perftest_zone[i], strlen(perftest_zone[i]) ); } zonefile_end(parser); /* * Send packets. This creates one thread per CPU processing requests. */ { unsigned threads_desired = pixie_cpu_get_count(); uint64_t start, stop; double requests_per_second; fprintf(stderr, "running %u threads\n", threads_desired); start = pixie_gettime(); for (i=0; i<threads_desired; i++) { __sync_fetch_and_add(&threads_running, 1); pixie_begin_thread((void(*)(void*))run_perf, 0, perftest); } while (threads_running) pixie_usleep(1000); stop = pixie_gettime(); requests_per_second = 1000000.0 * (1.0 * threads_desired * perftest->loop_count) / (stop - start); fprintf(stderr, "queries = %u\n", (unsigned)(threads_desired * perftest->loop_count)); fprintf(stderr, "seconds = %5.3f\n", (stop - start)/1000000.0); fprintf(stderr, "queries/second = %5.3f\n", requests_per_second); } exit(1); return 0; }