void change_resolver_threads(struct Core *core, struct Configuration *cfg_new) { /* If no threads were specified, then default to the number of threads * in the system */ if (cfg_new->worker_threads == 0) { cfg_new->worker_threads = pixie_cpu_get_count(); } if (cfg_new->worker_threads == 0) cfg_new->worker_threads = 1; if (cfg_new->worker_threads > 1024) cfg_new->worker_threads = 1024; /* See if we need to stop some threads */ while (core->workers_count > cfg_new->worker_threads) { thread_worker_stop(core); } /* See if we need to start some threads */ while (core->workers_count < cfg_new->worker_threads) { thread_worker_start(core); } }
int main(int argc, char *argv[]) { unsigned cpu_count; UNUSEDPARM(argc); UNUSEDPARM(argv); /* Grab the number of CPUs. We want to run benchmarks on as many CPUs as * possible in order to test scaling */ cpu_count = pixie_cpu_get_count(); /* Print OS, version, CPU, etc. */ print_version(); /* * Print header information */ printf(" ,CPUs, Mm/sec, Total, nsecs\n"); //alloc_all(); bench_mainmem(cpu_count, MemBench_MaxRateHuge); bench_mainmem(cpu_count, MemBench_PointerChaseHuge); //bench_mainmem(cpu_count, MemBench_CmovChase); bench_mainmem(cpu_count, MemBench_MaxRate); bench_mainmem(cpu_count, MemBench_PointerChase); bench_cache_bounce(cpu_count, CacheBench_Add); bench_cache_bounce(cpu_count, CacheBench_LockedAdd); bench_cache_bounce(cpu_count, CacheBench_MutexAdd); bench_syscall(cpu_count); bench_funcall(cpu_count, add_two_numbers); bench_msgrate_pipe(cpu_count); bench_msgrate_ring(cpu_count); return 1; }
/*************************************************************************** * * Asynchronous receive thread * * The transmit and receive threads run independently of each other. There * is no record what was transmitted. Instead, the transmit thread sets a * "SYN-cookie" in transmitted packets, which the receive thread will then * use to match up requests with responses. ***************************************************************************/ static void receive_thread(void *v) { struct ThreadPair *parms = (struct ThreadPair *)v; const struct Masscan *masscan = parms->masscan; struct Output *out; struct DedupTable *dedup; struct PcapFile *pcapfile = NULL; struct TCP_ConnectionTable *tcpcon = 0; LOG(1, "recv: start receive thread #%u\n", parms->nic_index); /* Lock this thread to a CPU. Transmit threads are on even CPUs, * receive threads on odd CPUs */ if (pixie_cpu_get_count() > 1) { unsigned cpu_count = pixie_cpu_get_count(); unsigned cpu = parms->nic_index * 2 + 1; while (cpu >= cpu_count) { cpu -= cpu_count; cpu++; } pixie_cpu_set_affinity(cpu); } /* * If configured, open a --pcap file for saving raw packets. This is * so that we can debug scans, but also so that we can look at the * strange things people send us. Note that we don't record transmitted * packets, just the packets we've received. */ /*if (masscan->pcap_filename[0]) pcapfile = pcapfile_openwrite(masscan->pcap_filename, 1);*/ /* * Open output. This is where results are reported when saving * the --output-format to the --output-filename */ out = output_create(masscan); /* * Create deduplication table. This is so when somebody sends us * multiple responses, we only record the first one. */ dedup = dedup_create(); /* * Create a TCP connection table for interacting with live * connections when doing --banners */ if (masscan->is_banners) { tcpcon = tcpcon_create_table( (size_t)((masscan->max_rate/5) / masscan->nic_count), parms->transmit_queue, parms->packet_buffers, &parms->tmplset->pkts[Proto_TCP], output_report_banner, out, masscan->tcb.timeout ); } if (masscan->is_offline) { while (!control_c_pressed_again) pixie_usleep(10000); parms->done_receiving = 1; return; } /* * Receive packets. This is where we catch any responses and print * them to the terminal. */ LOG(1, "begin receive thread\n"); while (!control_c_pressed_again) { int status; unsigned length; unsigned secs; unsigned usecs; const unsigned char *px; int err; unsigned x; struct PreprocessedInfo parsed; unsigned ip_me; unsigned ip_them; unsigned seqno_them; unsigned seqno_me; /* * RECIEVE * * This is the boring part of actually receiving a packet */ err = rawsock_recv_packet( parms->adapter, &length, &secs, &usecs, &px); if (err != 0) continue; /* * Do any TCP event timeouts based on the current timestamp from * the packet. For example, if the connection has been open for * around 10 seconds, we'll close the connection. (--banners) */ if (tcpcon) { tcpcon_timeouts(tcpcon, secs, usecs); } if (length > 1514) continue; /* * "Preprocess" the response packet. This means to go through and * figure out where the TCP/IP headers are and the locations of * some fields, like IP address and port numbers. */ x = preprocess_frame(px, length, 1, &parsed); if (!x) continue; /* corrupt packet */ ip_me = parsed.ip_dst[0]<<24 | parsed.ip_dst[1]<<16 | parsed.ip_dst[2]<< 8 | parsed.ip_dst[3]<<0; ip_them = parsed.ip_src[0]<<24 | parsed.ip_src[1]<<16 | parsed.ip_src[2]<< 8 | parsed.ip_src[3]<<0; seqno_them = TCP_SEQNO(px, parsed.transport_offset); seqno_me = TCP_ACKNO(px, parsed.transport_offset); /* verify: my IP address */ if (parms->adapter_ip != ip_me) continue; /* * Handle non-TCP protocols */ switch (parsed.found) { case FOUND_ARP: /* OOPS: handle arp instead. Since we may completely bypass the TCP/IP * stack, we may have to handle ARPs ourself, or the router will * lose track of us. */ LOGip(2, ip_them, 0, "-> ARP [%u] \n", px[parsed.found_offset]); arp_response( parms->adapter_ip, parms->adapter_mac, px, length, parms->packet_buffers, parms->transmit_queue); continue; case FOUND_UDP: case FOUND_DNS: if (!is_my_port(masscan, parsed.port_dst)) continue; handle_udp(out, px, length, &parsed); continue; case FOUND_ICMP: handle_icmp(out, px, length, &parsed); continue; case FOUND_TCP: /* fall down to below */ break; default: continue; } /* verify: my port number */ if (parms->adapter_port != parsed.port_dst) continue; /* Save raw packet in --pcap file */ if (pcapfile) { pcapfile_writeframe( pcapfile, px, length, length, secs, usecs); } { char buf[64]; LOGip(5, ip_them, parsed.port_src, "-> TCP ackno=0x%08x flags=0x%02x(%s)\n", seqno_me, TCP_FLAGS(px, parsed.transport_offset), reason_string(TCP_FLAGS(px, parsed.transport_offset), buf, sizeof(buf))); } /* If recording --banners, create a new "TCP Control Block (TCB)" */ if (tcpcon) { struct TCP_Control_Block *tcb; /* does a TCB already exist for this connection? */ tcb = tcpcon_lookup_tcb(tcpcon, ip_me, ip_them, parsed.port_dst, parsed.port_src); if (TCP_IS_SYNACK(px, parsed.transport_offset)) { if (syn_hash(ip_them, parsed.port_src) != seqno_me - 1) { LOG(2, "%u.%u.%u.%u - bad cookie: ackno=0x%08x expected=0x%08x\n", (ip_them>>24)&0xff, (ip_them>>16)&0xff, (ip_them>>8)&0xff, (ip_them>>0)&0xff, seqno_me-1, syn_hash(ip_them, parsed.port_src)); continue; } if (tcb == NULL) { tcb = tcpcon_create_tcb(tcpcon, ip_me, ip_them, parsed.port_dst, parsed.port_src, seqno_me, seqno_them+1); } tcpcon_handle(tcpcon, tcb, TCP_WHAT_SYNACK, 0, 0, secs, usecs, seqno_them+1); } else if (tcb) { /* If this is an ACK, then handle that first */ if (TCP_IS_ACK(px, parsed.transport_offset)) { tcpcon_handle(tcpcon, tcb, TCP_WHAT_ACK, 0, seqno_me, secs, usecs, seqno_them); } /* If this contains payload, handle that */ if (parsed.app_length) { tcpcon_handle(tcpcon, tcb, TCP_WHAT_DATA, px + parsed.app_offset, parsed.app_length, secs, usecs, seqno_them); } /* If this is a FIN, handle that. Note that ACK + * payload + FIN can come together */ if (TCP_IS_FIN(px, parsed.transport_offset) && !TCP_IS_RST(px, parsed.transport_offset)) { tcpcon_handle(tcpcon, tcb, TCP_WHAT_FIN, 0, 0, secs, usecs, seqno_them); } /* If this is a RST, then we'll be closing the connection */ if (TCP_IS_RST(px, parsed.transport_offset)) { tcpcon_handle(tcpcon, tcb, TCP_WHAT_RST, 0, 0, secs, usecs, seqno_them); } } else if (TCP_IS_FIN(px, parsed.transport_offset)) { /* * NO TCB! * This happens when we've sent a FIN, deleted our connection, * but the other side didn't get the packet. */ if (!TCP_IS_RST(px, parsed.transport_offset)) tcpcon_send_FIN( tcpcon, ip_me, ip_them, parsed.port_dst, parsed.port_src, seqno_them, seqno_me); } }
/*************************************************************************** * This thread spews packets as fast as it can * * THIS IS WHERE ALL THE EXCITEMENT HAPPENS!!!! * 90% of CPU cycles are in the function. * ***************************************************************************/ static void transmit_thread(void *v) /*aka. scanning_thread() */ { struct ThreadPair *parms = (struct ThreadPair *)v; uint64_t i; uint64_t start; uint64_t end; const struct Masscan *masscan = parms->masscan; unsigned retries = masscan->retries; unsigned rate = (unsigned)masscan->max_rate; unsigned r = retries + 1; uint64_t range; struct BlackRock blackrock; uint64_t count_ips = rangelist_count(&masscan->targets); struct Throttler *throttler = parms->throttler; struct TemplateSet *pkt_template = parms->tmplset; unsigned *picker = parms->picker; struct Adapter *adapter = parms->adapter; uint64_t packets_sent = 0; unsigned increment = masscan->shard.of + masscan->nic_count; LOG(1, "xmit: starting transmit thread #%u\n", parms->nic_index); /* Lock this thread to a CPU. Transmit threads are on even CPUs, * receive threads on odd CPUs */ if (pixie_cpu_get_count() > 1) { unsigned cpu_count = pixie_cpu_get_count(); unsigned cpu = parms->nic_index * 2; while (cpu >= cpu_count) { cpu -= cpu_count; cpu++; } pixie_cpu_set_affinity(cpu); //pixie_cpu_raise_priority(); } /* Create the shuffler/randomizer. This creates the 'range' variable, * which is simply the number of IP addresses times the number of * ports */ range = rangelist_count(&masscan->targets) * rangelist_count(&masscan->ports); blackrock_init(&blackrock, range, masscan->seed); /* Calculate the 'start' and 'end' of a scan. One reason to do this is * to support --shard, so that multiple machines can co-operate on * the same scan. Another reason to do this is so that we can bleed * a little bit past the end when we have --retries. Yet another * thing to do here is deal with multiple network adapters, which * is essentially the same logic as shards. */ start = masscan->resume.index + (masscan->shard.one-1) + parms->nic_index; end = range; if (masscan->resume.count && end > start + masscan->resume.count) end = start + masscan->resume.count; end += retries * rate; /* "THROTTLER" rate-limits how fast we transmit, set with the * --max-rate parameter */ throttler_start(throttler, masscan->max_rate/masscan->nic_count); /* ----------------- * the main loop * -----------------*/ LOG(3, "xmit: starting main loop: [%llu..%llu]\n", start, end); for (i=start; i<end; ) { uint64_t batch_size; /* * Do a batch of many packets at a time. That because per-packet * throttling is expensive at 10-million pps, so we reduce the * per-packet cost by doing batches. At slower rates, the batch * size will always be one. (--max-rate) */ batch_size = throttler_next_batch(throttler, packets_sent); packets_sent += batch_size; while (batch_size && i < end) { uint64_t xXx; unsigned ip; unsigned port; /* * RANDOMIZE THE TARGET: * This is kinda a tricky bit that picks a random IP and port * number in order to scan. We monotonically increment the * index 'i' from [0..range]. We then shuffle (randomly transmog) * that index into some other, but unique/1-to-1, number in the * same range. That way we visit all targets, but in a random * order. Then, once we've shuffled the index, we "pick" the * the IP address and port that the index refers to. */ xXx = (i + (r--) * rate); while (xXx >= range) xXx -= range; xXx = blackrock_shuffle(&blackrock, xXx); ip = rangelist_pick2(&masscan->targets, xXx % count_ips, picker); port = rangelist_pick(&masscan->ports, xXx / count_ips); /* * SEND THE PROBE * This is sorta the entire point of the program, but little * exciting happens here. The thing to note that this may * be a "raw" transmit that bypasses the kernel, meaning * we can call this function millions of times a second. */ rawsock_send_probe( adapter, ip, port, syn_hash(ip, port), !batch_size, /* flush queue on last packet in batch */ pkt_template ); batch_size--; foo_count++; /* * SEQUENTIALLY INCREMENT THROUGH THE RANGE * Yea, I know this is a puny 'i++' here, but it's a core feature * of the system that is linearly increments through the range, * but produces from that a shuffled sequence of targets (as * described above). Because we are linearly incrementing this * number, we can do lots of creative stuff, like doing clever * retransmits and sharding. */ if (r == 0) { i += increment; /* <------ increment by 1 normally, more with shards/nics */ r = retries + 1; } } /* end of batch */ /* Transmit packets from other thread, when doing --banners */ flush_packets(adapter, parms->packet_buffers, parms->transmit_queue, throttler, &packets_sent); /* If the user pressed <ctrl-c>, then we need to exit. but, in case * the user wants to --resume the scan later, we save the current * state in a file */ if (control_c_pressed) { break; } /* save our current location for resuming, if the user pressed * <ctrl-c> to exit early */ parms->my_index = i; } /* * We are done transmitting. However, response packets will take several * seconds to arrive. Therefore, sit in short loop waiting for those * packets to arrive. Pressing <ctrl-c> a second time will exit this * prematurely. */ while (!control_c_pressed_again) { unsigned k; for (k=0; k<1000; k++) { /* Transmit packets from the receive thread */ flush_packets( adapter, parms->packet_buffers, parms->transmit_queue, throttler, &packets_sent); pixie_usleep(1000); } } /* Thread is about to exit */ parms->done_transmitting = 1; LOG(1, "xmit: stopping transmit thread #%u\n", parms->nic_index); }
int perftest(int argc, char *argv[]) { struct PerfTest perftest[1]; struct ZoneFileParser *parser; struct Catalog *db; size_t i; perftest->loop_count = 10000000; /* * Create a pseudo-network subsystem for generating packets */ perftest->server.parent = perftest; perftest->server.adapter = adapter_create( perftest_alloc_packet, perftest_server_to_client_response, &perftest->server); adapter_add_ipv4(perftest->server.adapter, 0xC0A80101, 0xFFFFffff); /* create a catalog/database, this is where all the parsed zonefile * records will be put */ perftest->db = catalog_create(); perftest->thread->catalog = perftest->db; db = perftest->db; /* * Parse a sample zone */ parser = zonefile_begin( example_origin, /* origin */ 60, /* TTL */ 10000, /* filesize */ "<perftest>", /* filename */ zonefile_load, /* callback */ db, /* callback data */ 0 ); zonefile_set_singlestep(parser); for (i=0; perftest_zone[i]; i++) { zonefile_parse(parser, (const unsigned char*)perftest_zone[i], strlen(perftest_zone[i]) ); } zonefile_end(parser); /* * Send packets. This creates one thread per CPU processing requests. */ { unsigned threads_desired = pixie_cpu_get_count(); uint64_t start, stop; double requests_per_second; fprintf(stderr, "running %u threads\n", threads_desired); start = pixie_gettime(); for (i=0; i<threads_desired; i++) { __sync_fetch_and_add(&threads_running, 1); pixie_begin_thread((void(*)(void*))run_perf, 0, perftest); } while (threads_running) pixie_usleep(1000); stop = pixie_gettime(); requests_per_second = 1000000.0 * (1.0 * threads_desired * perftest->loop_count) / (stop - start); fprintf(stderr, "queries = %u\n", (unsigned)(threads_desired * perftest->loop_count)); fprintf(stderr, "seconds = %5.3f\n", (stop - start)/1000000.0); fprintf(stderr, "queries/second = %5.3f\n", requests_per_second); } exit(1); return 0; }