forked from BLepers/PinThreads
/
pin.c
174 lines (138 loc) · 4.92 KB
/
pin.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
#include "common.h"
#include "parse_args.h"
#include "shm.h"
#include <stdarg.h>
static void m_init(void);
static int (*old_pthread_create) (pthread_t *thread, const pthread_attr_t *attr, void *(*start_routine) (void *), void *arg);
static int (*old_pthread_setaffinity_np) (pthread_t, size_t, const cpu_set_t *);
static int (*old_sched_setaffinity) (pid_t, size_t, const cpu_set_t*);
static pid_t (*old_fork)(void);
static int (*old_clone)(int (*)(void *), void *, int, void *, ...);
static void set_affinity(pid_t tid, int cpu_id) {
if(!get_shm()->active)
return;
if(!get_shm()->per_node) {
cpu_set_t mask;
CPU_ZERO(&mask);
CPU_SET(cpu_id, &mask);
VERBOSE("--> Setting tid %d on core %d\n", tid, cpu_id);
int r = old_sched_setaffinity(tid, sizeof(mask), &mask);
if (r < 0) {
fprintf(stderr, "couldn't set affinity on %d\n", cpu_id);
exit(1);
}
} else {
int r = numa_run_on_node(numa_node_of_cpu(cpu_id));
if(r < 0) {
fprintf(stderr, "couldn't set affinity on node of cpu %d\n", cpu_id);
exit(1);
}
}
}
#define MAGIC_NUMBER 0xdeedbeaf
int pthread_create(pthread_t *thread, const pthread_attr_t *attr, void *(*start_routine) (void *), void *arg) {
int core;
int ret;
cpu_set_t mask;
CPU_ZERO(&mask);
ret = old_pthread_create(thread, attr, start_routine, arg);
// Hack: if arg == MAGIC_NUMBER, ignore the thread
if (arg == (void*)MAGIC_NUMBER) {
return ret;
}
if(!get_shm()->active)
return ret;
core = get_next_core();
if(!get_shm()->per_node) {
CPU_SET(core, &mask);
} else {
int i, node = numa_node_of_cpu(core);
struct bitmask * bmp = numa_allocate_cpumask();
numa_node_to_cpus(node, bmp);
for(i = 0; i < numa_num_configured_cpus(); i++) {
if(numa_bitmask_isbitset(bmp, i))
CPU_SET(i, &mask);
}
numa_free_cpumask(bmp);
}
old_pthread_setaffinity_np(*thread, sizeof(mask), &mask);
VERBOSE("-> Set affinity to %d\n", core);
return ret;
}
int pthread_setaffinity_np(pthread_t thread, size_t cpusetsize, const cpu_set_t *cpuset) {
VERBOSE("-> Ignoring call to pthread_setaffinity_np performed by the application\n");
return 0;
}
int sched_setaffinity(pid_t pid, size_t cpusetsize, const cpu_set_t *mask) {
VERBOSE("-> Ignoring call to sched_setaffinity performed by the application\n");
return 0;
}
pid_t fork(void) {
pid_t ret;
// Increment refcount on fork to avoid parent dying before child and destroying the shm
__sync_fetch_and_add(&get_shm()->refcount, 1);
ret = old_fork();
if(ret > 0) {
set_affinity(ret, get_next_core());
} else if (ret < 0) {
// fork failed, decrement
__sync_fetch_and_sub(&get_shm()->refcount, 1);
}
return ret;
}
int clone(int (*fn)(void *), void *child_stack, int flags, void *arg, ... ) {
va_list arg_list;
int ret;
va_start(arg_list, arg);
if((flags & CLONE_CHILD_CLEARTID) || (flags & CLONE_CHILD_CLEARTID)) {
pid_t *ptid = va_arg(arg_list, pid_t*);
struct user_desc *tls = va_arg(arg_list, struct user_desc*);
pid_t *ctid = va_arg(arg_list, pid_t*);
ret = old_clone(fn, child_stack, flags, arg, ptid, tls, ctid);
}
else if (flags & CLONE_SETTLS) {
pid_t *ptid = va_arg(arg_list, pid_t*);
struct user_desc *tls = va_arg(arg_list, struct user_desc*);
ret = old_clone(fn, child_stack, flags, arg, ptid, tls);
}
else if(flags & CLONE_PARENT_SETTID) {
pid_t *ptid = va_arg(arg_list, pid_t*);
ret = old_clone(fn, child_stack, flags, arg, ptid);
}
else {
ret = old_clone(fn, child_stack, flags, arg);
}
va_end(arg_list);
if(ret > 0) {
set_affinity(gettid(), get_next_core());
}
return ret;
}
void m_exit(void) {
cleanup_shm(getenv("PINTHREADS_SHMID"));
}
void m_signal(int signal) {
exit(signal);
}
void __attribute__((constructor)) m_init(void) {
if(old_pthread_create)
return;
restore_shm(getenv("PINTHREADS_SHMID"), getenv("PINTHREADS_SHMSIZE"));
VERBOSE("Init called for pid %d\n", gettid());
old_sched_setaffinity = (int (*) (pid_t, size_t, const cpu_set_t*)) dlsym(RTLD_NEXT, "sched_setaffinity");
old_pthread_setaffinity_np = (int (*) (pthread_t, size_t, const cpu_set_t *)) dlsym(RTLD_NEXT, "pthread_setaffinity_np");
old_pthread_create = (int (*)(pthread_t*, const pthread_attr_t*, void* (*)(void*), void*)) dlsym(RTLD_NEXT, "pthread_create");
old_fork = (pid_t (*)(void)) dlsym(RTLD_NEXT, "fork");
old_clone = (int (*)(int (*)(void *), void *, int flags, void *arg, ...)) dlsym(RTLD_NEXT, "clone");
atexit(m_exit);
signal(SIGTERM, m_signal);
signal(SIGINT, m_signal);
signal(SIGSEGV, m_signal);
if(get_shm()->server) {
pthread_t server_thread;
init_server();
old_pthread_create(&server_thread, NULL, server, NULL);
wait_for_server();
}
set_affinity(gettid(), get_next_core());
}