Commit | Line | Data |
---|---|---|
6fa3eb70 S |
1 | /* |
2 | * Kernel iptables module to track stats for packets based on user tags. | |
3 | * | |
4 | * (C) 2011 Google, Inc | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 as | |
8 | * published by the Free Software Foundation. | |
9 | */ | |
10 | ||
11 | /* | |
12 | * There are run-time debug flags enabled via the debug_mask module param, or | |
13 | * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h. | |
14 | */ | |
15 | #define DEBUG | |
16 | ||
17 | #include <linux/file.h> | |
18 | #include <linux/inetdevice.h> | |
19 | #include <linux/module.h> | |
20 | #include <linux/netfilter/x_tables.h> | |
21 | #include <linux/netfilter/xt_qtaguid.h> | |
22 | #include <linux/ratelimit.h> | |
23 | #include <linux/seq_file.h> | |
24 | #include <linux/skbuff.h> | |
25 | #include <linux/workqueue.h> | |
26 | #include <net/addrconf.h> | |
27 | #include <net/sock.h> | |
28 | #include <net/tcp.h> | |
29 | #include <net/udp.h> | |
30 | ||
31 | #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) | |
32 | #include <linux/netfilter_ipv6/ip6_tables.h> | |
33 | #endif | |
34 | ||
35 | #include <linux/netfilter/xt_socket.h> | |
36 | #include "xt_qtaguid_internal.h" | |
37 | #include "xt_qtaguid_print.h" | |
38 | #include "../../fs/proc/internal.h" | |
39 | ||
40 | /* | |
41 | * We only use the xt_socket funcs within a similar context to avoid unexpected | |
42 | * return values. | |
43 | */ | |
44 | #define XT_SOCKET_SUPPORTED_HOOKS \ | |
45 | ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN)) | |
46 | ||
47 | ||
48 | static const char *module_procdirname = "xt_qtaguid"; | |
49 | static struct proc_dir_entry *xt_qtaguid_procdir; | |
50 | ||
51 | static unsigned int proc_iface_perms = S_IRUGO; | |
52 | module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR); | |
53 | ||
54 | static struct proc_dir_entry *xt_qtaguid_stats_file; | |
55 | static unsigned int proc_stats_perms = S_IRUGO; | |
56 | module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR); | |
57 | ||
58 | static struct proc_dir_entry *xt_qtaguid_ctrl_file; | |
59 | ||
60 | /* Everybody can write. But proc_ctrl_write_limited is true by default which | |
61 | * limits what can be controlled. See the can_*() functions. | |
62 | */ | |
63 | static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO; | |
64 | module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR); | |
65 | ||
66 | /* Limited by default, so the gid of the ctrl and stats proc entries | |
67 | * will limit what can be done. See the can_*() functions. | |
68 | */ | |
69 | static bool proc_stats_readall_limited = true; | |
70 | static bool proc_ctrl_write_limited = true; | |
71 | ||
72 | module_param_named(stats_readall_limited, proc_stats_readall_limited, bool, | |
73 | S_IRUGO | S_IWUSR); | |
74 | module_param_named(ctrl_write_limited, proc_ctrl_write_limited, bool, | |
75 | S_IRUGO | S_IWUSR); | |
76 | ||
77 | /* | |
78 | * Limit the number of active tags (via socket tags) for a given UID. | |
79 | * Multiple processes could share the UID. | |
80 | */ | |
81 | static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS; | |
82 | module_param(max_sock_tags, int, S_IRUGO | S_IWUSR); | |
83 | ||
84 | /* | |
85 | * After the kernel has initiallized this module, it is still possible | |
86 | * to make it passive. | |
87 | * Setting passive to Y: | |
88 | * - the iface stats handling will not act on notifications. | |
89 | * - iptables matches will never match. | |
90 | * - ctrl commands silently succeed. | |
91 | * - stats are always empty. | |
92 | * This is mostly usefull when a bug is suspected. | |
93 | */ | |
94 | static bool module_passive; | |
95 | module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR); | |
96 | ||
97 | /* | |
98 | * Control how qtaguid data is tracked per proc/uid. | |
99 | * Setting tag_tracking_passive to Y: | |
100 | * - don't create proc specific structs to track tags | |
101 | * - don't check that active tag stats exceed some limits. | |
102 | * - don't clean up socket tags on process exits. | |
103 | * This is mostly usefull when a bug is suspected. | |
104 | */ | |
105 | static bool qtu_proc_handling_passive; | |
106 | module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool, | |
107 | S_IRUGO | S_IWUSR); | |
108 | ||
109 | #define QTU_DEV_NAME "xt_qtaguid" | |
110 | ||
111 | uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK; | |
112 | module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR); | |
113 | ||
114 | /*---------------------------------------------------------------------------*/ | |
115 | static const char *iface_stat_procdirname = "iface_stat"; | |
116 | static struct proc_dir_entry *iface_stat_procdir; | |
117 | /* | |
118 | * The iface_stat_all* will go away once userspace gets use to the new fields | |
119 | * that have a format line. | |
120 | */ | |
121 | static const char *iface_stat_all_procfilename = "iface_stat_all"; | |
122 | static struct proc_dir_entry *iface_stat_all_procfile; | |
123 | static const char *iface_stat_fmt_procfilename = "iface_stat_fmt"; | |
124 | static struct proc_dir_entry *iface_stat_fmt_procfile; | |
125 | ||
126 | ||
127 | static LIST_HEAD(iface_stat_list); | |
128 | static DEFINE_SPINLOCK(iface_stat_list_lock); | |
129 | ||
130 | static struct rb_root sock_tag_tree = RB_ROOT; | |
131 | static DEFINE_SPINLOCK(sock_tag_list_lock); | |
132 | ||
133 | static struct rb_root tag_counter_set_tree = RB_ROOT; | |
134 | static DEFINE_SPINLOCK(tag_counter_set_list_lock); | |
135 | ||
136 | static struct rb_root uid_tag_data_tree = RB_ROOT; | |
137 | static DEFINE_SPINLOCK(uid_tag_data_tree_lock); | |
138 | ||
139 | static struct rb_root proc_qtu_data_tree = RB_ROOT; | |
140 | /* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */ | |
141 | ||
142 | static struct qtaguid_event_counts qtu_events; | |
143 | /*----------------------------------------------*/ | |
144 | static bool can_manipulate_uids(void) | |
145 | { | |
146 | /* root pwnd */ | |
147 | return in_egroup_p(xt_qtaguid_ctrl_file->gid) | |
148 | || unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_limited) | |
149 | || unlikely(current_fsuid() == xt_qtaguid_ctrl_file->uid); | |
150 | } | |
151 | ||
152 | static bool can_impersonate_uid(uid_t uid) | |
153 | { | |
154 | return uid == current_fsuid() || can_manipulate_uids(); | |
155 | } | |
156 | ||
157 | static bool can_read_other_uid_stats(uid_t uid) | |
158 | { | |
159 | /* root pwnd */ | |
160 | return in_egroup_p(xt_qtaguid_stats_file->gid) | |
161 | || unlikely(!current_fsuid()) || uid == current_fsuid() | |
162 | || unlikely(!proc_stats_readall_limited) | |
163 | || unlikely(current_fsuid() == xt_qtaguid_ctrl_file->uid); | |
164 | } | |
165 | ||
166 | static inline void dc_add_byte_packets(struct data_counters *counters, int set, | |
167 | enum ifs_tx_rx direction, | |
168 | enum ifs_proto ifs_proto, | |
169 | int bytes, | |
170 | int packets) | |
171 | { | |
172 | counters->bpc[set][direction][ifs_proto].bytes += bytes; | |
173 | counters->bpc[set][direction][ifs_proto].packets += packets; | |
174 | } | |
175 | ||
176 | static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag) | |
177 | { | |
178 | struct rb_node *node = root->rb_node; | |
179 | ||
180 | while (node) { | |
181 | struct tag_node *data = rb_entry(node, struct tag_node, node); | |
182 | int result; | |
183 | RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): " | |
184 | " node=%p data=%p\n", tag, node, data); | |
185 | result = tag_compare(tag, data->tag); | |
186 | RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): " | |
187 | " data.tag=0x%llx (uid=%u) res=%d\n", | |
188 | tag, data->tag, get_uid_from_tag(data->tag), result); | |
189 | if (result < 0) | |
190 | node = node->rb_left; | |
191 | else if (result > 0) | |
192 | node = node->rb_right; | |
193 | else | |
194 | return data; | |
195 | } | |
196 | return NULL; | |
197 | } | |
198 | ||
199 | static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root) | |
200 | { | |
201 | struct rb_node **new = &(root->rb_node), *parent = NULL; | |
202 | ||
203 | /* Figure out where to put new node */ | |
204 | while (*new) { | |
205 | struct tag_node *this = rb_entry(*new, struct tag_node, | |
206 | node); | |
207 | int result = tag_compare(data->tag, this->tag); | |
208 | RB_DEBUG("qtaguid: %s(): tag=0x%llx" | |
209 | " (uid=%u)\n", __func__, | |
210 | this->tag, | |
211 | get_uid_from_tag(this->tag)); | |
212 | parent = *new; | |
213 | if (result < 0) | |
214 | new = &((*new)->rb_left); | |
215 | else if (result > 0) | |
216 | new = &((*new)->rb_right); | |
217 | else | |
218 | BUG(); | |
219 | } | |
220 | ||
221 | /* Add new node and rebalance tree. */ | |
222 | rb_link_node(&data->node, parent, new); | |
223 | rb_insert_color(&data->node, root); | |
224 | } | |
225 | ||
226 | static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root) | |
227 | { | |
228 | tag_node_tree_insert(&data->tn, root); | |
229 | } | |
230 | ||
231 | static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag) | |
232 | { | |
233 | struct tag_node *node = tag_node_tree_search(root, tag); | |
234 | if (!node) | |
235 | return NULL; | |
236 | return rb_entry(&node->node, struct tag_stat, tn.node); | |
237 | } | |
238 | ||
239 | static void tag_counter_set_tree_insert(struct tag_counter_set *data, | |
240 | struct rb_root *root) | |
241 | { | |
242 | tag_node_tree_insert(&data->tn, root); | |
243 | } | |
244 | ||
245 | static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root, | |
246 | tag_t tag) | |
247 | { | |
248 | struct tag_node *node = tag_node_tree_search(root, tag); | |
249 | if (!node) | |
250 | return NULL; | |
251 | return rb_entry(&node->node, struct tag_counter_set, tn.node); | |
252 | ||
253 | } | |
254 | ||
255 | static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root) | |
256 | { | |
257 | tag_node_tree_insert(&data->tn, root); | |
258 | } | |
259 | ||
260 | static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag) | |
261 | { | |
262 | struct tag_node *node = tag_node_tree_search(root, tag); | |
263 | if (!node) | |
264 | return NULL; | |
265 | return rb_entry(&node->node, struct tag_ref, tn.node); | |
266 | } | |
267 | ||
268 | static struct sock_tag *sock_tag_tree_search(struct rb_root *root, | |
269 | const struct sock *sk) | |
270 | { | |
271 | struct rb_node *node = root->rb_node; | |
272 | ||
273 | while (node) { | |
274 | struct sock_tag *data = rb_entry(node, struct sock_tag, | |
275 | sock_node); | |
276 | if (sk < data->sk) | |
277 | node = node->rb_left; | |
278 | else if (sk > data->sk) | |
279 | node = node->rb_right; | |
280 | else | |
281 | return data; | |
282 | } | |
283 | return NULL; | |
284 | } | |
285 | ||
286 | static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root) | |
287 | { | |
288 | struct rb_node **new = &(root->rb_node), *parent = NULL; | |
289 | ||
290 | /* Figure out where to put new node */ | |
291 | while (*new) { | |
292 | struct sock_tag *this = rb_entry(*new, struct sock_tag, | |
293 | sock_node); | |
294 | parent = *new; | |
295 | if (data->sk < this->sk) | |
296 | new = &((*new)->rb_left); | |
297 | else if (data->sk > this->sk) | |
298 | new = &((*new)->rb_right); | |
299 | else | |
300 | BUG(); | |
301 | } | |
302 | ||
303 | /* Add new node and rebalance tree. */ | |
304 | rb_link_node(&data->sock_node, parent, new); | |
305 | rb_insert_color(&data->sock_node, root); | |
306 | } | |
307 | ||
308 | static void sock_tag_tree_erase(struct rb_root *st_to_free_tree) | |
309 | { | |
310 | struct rb_node *node; | |
311 | struct sock_tag *st_entry; | |
312 | ||
313 | node = rb_first(st_to_free_tree); | |
314 | while (node) { | |
315 | st_entry = rb_entry(node, struct sock_tag, sock_node); | |
316 | node = rb_next(node); | |
317 | CT_DEBUG("qtaguid: %s(): " | |
318 | "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__, | |
319 | st_entry->sk, | |
320 | st_entry->tag, | |
321 | get_uid_from_tag(st_entry->tag)); | |
322 | rb_erase(&st_entry->sock_node, st_to_free_tree); | |
323 | sockfd_put(st_entry->socket); | |
324 | kfree(st_entry); | |
325 | } | |
326 | } | |
327 | ||
328 | static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root, | |
329 | const pid_t pid) | |
330 | { | |
331 | struct rb_node *node = root->rb_node; | |
332 | ||
333 | while (node) { | |
334 | struct proc_qtu_data *data = rb_entry(node, | |
335 | struct proc_qtu_data, | |
336 | node); | |
337 | if (pid < data->pid) | |
338 | node = node->rb_left; | |
339 | else if (pid > data->pid) | |
340 | node = node->rb_right; | |
341 | else | |
342 | return data; | |
343 | } | |
344 | return NULL; | |
345 | } | |
346 | ||
347 | static void proc_qtu_data_tree_insert(struct proc_qtu_data *data, | |
348 | struct rb_root *root) | |
349 | { | |
350 | struct rb_node **new = &(root->rb_node), *parent = NULL; | |
351 | ||
352 | /* Figure out where to put new node */ | |
353 | while (*new) { | |
354 | struct proc_qtu_data *this = rb_entry(*new, | |
355 | struct proc_qtu_data, | |
356 | node); | |
357 | parent = *new; | |
358 | if (data->pid < this->pid) | |
359 | new = &((*new)->rb_left); | |
360 | else if (data->pid > this->pid) | |
361 | new = &((*new)->rb_right); | |
362 | else | |
363 | BUG(); | |
364 | } | |
365 | ||
366 | /* Add new node and rebalance tree. */ | |
367 | rb_link_node(&data->node, parent, new); | |
368 | rb_insert_color(&data->node, root); | |
369 | } | |
370 | ||
371 | static void uid_tag_data_tree_insert(struct uid_tag_data *data, | |
372 | struct rb_root *root) | |
373 | { | |
374 | struct rb_node **new = &(root->rb_node), *parent = NULL; | |
375 | ||
376 | /* Figure out where to put new node */ | |
377 | while (*new) { | |
378 | struct uid_tag_data *this = rb_entry(*new, | |
379 | struct uid_tag_data, | |
380 | node); | |
381 | parent = *new; | |
382 | if (data->uid < this->uid) | |
383 | new = &((*new)->rb_left); | |
384 | else if (data->uid > this->uid) | |
385 | new = &((*new)->rb_right); | |
386 | else | |
387 | BUG(); | |
388 | } | |
389 | ||
390 | /* Add new node and rebalance tree. */ | |
391 | rb_link_node(&data->node, parent, new); | |
392 | rb_insert_color(&data->node, root); | |
393 | } | |
394 | ||
395 | static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root, | |
396 | uid_t uid) | |
397 | { | |
398 | struct rb_node *node = root->rb_node; | |
399 | ||
400 | while (node) { | |
401 | struct uid_tag_data *data = rb_entry(node, | |
402 | struct uid_tag_data, | |
403 | node); | |
404 | if (uid < data->uid) | |
405 | node = node->rb_left; | |
406 | else if (uid > data->uid) | |
407 | node = node->rb_right; | |
408 | else | |
409 | return data; | |
410 | } | |
411 | return NULL; | |
412 | } | |
413 | ||
414 | /* | |
415 | * Allocates a new uid_tag_data struct if needed. | |
416 | * Returns a pointer to the found or allocated uid_tag_data. | |
417 | * Returns a PTR_ERR on failures, and lock is not held. | |
418 | * If found is not NULL: | |
419 | * sets *found to true if not allocated. | |
420 | * sets *found to false if allocated. | |
421 | */ | |
422 | struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res) | |
423 | { | |
424 | struct uid_tag_data *utd_entry; | |
425 | ||
426 | /* Look for top level uid_tag_data for the UID */ | |
427 | utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid); | |
428 | DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry); | |
429 | ||
430 | if (found_res) | |
431 | *found_res = utd_entry; | |
432 | if (utd_entry) | |
433 | return utd_entry; | |
434 | ||
435 | utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC); | |
436 | if (!utd_entry) { | |
437 | pr_err("qtaguid: get_uid_data(%u): " | |
438 | "tag data alloc failed\n", uid); | |
439 | return ERR_PTR(-ENOMEM); | |
440 | } | |
441 | ||
442 | utd_entry->uid = uid; | |
443 | utd_entry->tag_ref_tree = RB_ROOT; | |
444 | uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree); | |
445 | DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry); | |
446 | return utd_entry; | |
447 | } | |
448 | ||
449 | /* Never returns NULL. Either PTR_ERR or a valid ptr. */ | |
450 | static struct tag_ref *new_tag_ref(tag_t new_tag, | |
451 | struct uid_tag_data *utd_entry) | |
452 | { | |
453 | struct tag_ref *tr_entry; | |
454 | int res; | |
455 | ||
456 | if (utd_entry->num_active_tags + 1 > max_sock_tags) { | |
457 | pr_info("qtaguid: new_tag_ref(0x%llx): " | |
458 | "tag ref alloc quota exceeded. max=%d\n", | |
459 | new_tag, max_sock_tags); | |
460 | res = -EMFILE; | |
461 | goto err_res; | |
462 | ||
463 | } | |
464 | ||
465 | tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC); | |
466 | if (!tr_entry) { | |
467 | pr_err("qtaguid: new_tag_ref(0x%llx): " | |
468 | "tag ref alloc failed\n", | |
469 | new_tag); | |
470 | res = -ENOMEM; | |
471 | goto err_res; | |
472 | } | |
473 | tr_entry->tn.tag = new_tag; | |
474 | /* tr_entry->num_sock_tags handled by caller */ | |
475 | utd_entry->num_active_tags++; | |
476 | tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree); | |
477 | DR_DEBUG("qtaguid: new_tag_ref(0x%llx): " | |
478 | " inserted new tag ref %p\n", | |
479 | new_tag, tr_entry); | |
480 | return tr_entry; | |
481 | ||
482 | err_res: | |
483 | return ERR_PTR(res); | |
484 | } | |
485 | ||
486 | static struct tag_ref *lookup_tag_ref(tag_t full_tag, | |
487 | struct uid_tag_data **utd_res) | |
488 | { | |
489 | struct uid_tag_data *utd_entry; | |
490 | struct tag_ref *tr_entry; | |
491 | bool found_utd; | |
492 | uid_t uid = get_uid_from_tag(full_tag); | |
493 | ||
494 | DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n", | |
495 | full_tag, uid); | |
496 | ||
497 | utd_entry = get_uid_data(uid, &found_utd); | |
498 | if (IS_ERR_OR_NULL(utd_entry)) { | |
499 | if (utd_res) | |
500 | *utd_res = utd_entry; | |
501 | return NULL; | |
502 | } | |
503 | ||
504 | tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag); | |
505 | if (utd_res) | |
506 | *utd_res = utd_entry; | |
507 | DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n", | |
508 | full_tag, utd_entry, tr_entry); | |
509 | return tr_entry; | |
510 | } | |
511 | ||
512 | /* Never returns NULL. Either PTR_ERR or a valid ptr. */ | |
513 | static struct tag_ref *get_tag_ref(tag_t full_tag, | |
514 | struct uid_tag_data **utd_res) | |
515 | { | |
516 | struct uid_tag_data *utd_entry; | |
517 | struct tag_ref *tr_entry; | |
518 | ||
519 | DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n", | |
520 | full_tag); | |
521 | spin_lock_bh(&uid_tag_data_tree_lock); | |
522 | tr_entry = lookup_tag_ref(full_tag, &utd_entry); | |
523 | BUG_ON(IS_ERR_OR_NULL(utd_entry)); | |
524 | if (!tr_entry) | |
525 | tr_entry = new_tag_ref(full_tag, utd_entry); | |
526 | ||
527 | spin_unlock_bh(&uid_tag_data_tree_lock); | |
528 | if (utd_res) | |
529 | *utd_res = utd_entry; | |
530 | DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n", | |
531 | full_tag, utd_entry, tr_entry); | |
532 | return tr_entry; | |
533 | } | |
534 | ||
535 | /* Checks and maybe frees the UID Tag Data entry */ | |
536 | static void put_utd_entry(struct uid_tag_data *utd_entry) | |
537 | { | |
538 | /* Are we done with the UID tag data entry? */ | |
539 | if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) && | |
540 | !utd_entry->num_pqd) { | |
541 | DR_DEBUG("qtaguid: %s(): " | |
542 | "erase utd_entry=%p uid=%u " | |
543 | "by pid=%u tgid=%u uid=%u\n", __func__, | |
544 | utd_entry, utd_entry->uid, | |
545 | current->pid, current->tgid, current_fsuid()); | |
546 | BUG_ON(utd_entry->num_active_tags); | |
547 | rb_erase(&utd_entry->node, &uid_tag_data_tree); | |
548 | kfree(utd_entry); | |
549 | } else { | |
550 | DR_DEBUG("qtaguid: %s(): " | |
551 | "utd_entry=%p still has %d tags %d proc_qtu_data\n", | |
552 | __func__, utd_entry, utd_entry->num_active_tags, | |
553 | utd_entry->num_pqd); | |
554 | BUG_ON(!(utd_entry->num_active_tags || | |
555 | utd_entry->num_pqd)); | |
556 | } | |
557 | } | |
558 | ||
559 | /* | |
560 | * If no sock_tags are using this tag_ref, | |
561 | * decrements refcount of utd_entry, removes tr_entry | |
562 | * from utd_entry->tag_ref_tree and frees. | |
563 | */ | |
564 | static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry, | |
565 | struct uid_tag_data *utd_entry) | |
566 | { | |
567 | DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__, | |
568 | tr_entry, tr_entry->tn.tag, | |
569 | get_uid_from_tag(tr_entry->tn.tag)); | |
570 | if (!tr_entry->num_sock_tags) { | |
571 | BUG_ON(!utd_entry->num_active_tags); | |
572 | utd_entry->num_active_tags--; | |
573 | rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree); | |
574 | DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry); | |
575 | kfree(tr_entry); | |
576 | } | |
577 | } | |
578 | ||
579 | static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry) | |
580 | { | |
581 | struct rb_node *node; | |
582 | struct tag_ref *tr_entry; | |
583 | tag_t acct_tag; | |
584 | ||
585 | DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__, | |
586 | full_tag, get_uid_from_tag(full_tag)); | |
587 | acct_tag = get_atag_from_tag(full_tag); | |
588 | node = rb_first(&utd_entry->tag_ref_tree); | |
589 | while (node) { | |
590 | tr_entry = rb_entry(node, struct tag_ref, tn.node); | |
591 | node = rb_next(node); | |
592 | if (!acct_tag || tr_entry->tn.tag == full_tag) | |
593 | free_tag_ref_from_utd_entry(tr_entry, utd_entry); | |
594 | } | |
595 | } | |
596 | ||
597 | static ssize_t read_proc_u64(struct file *file, char __user *buf, | |
598 | size_t size, loff_t *ppos) | |
599 | { | |
600 | uint64_t *valuep = PDE_DATA(file_inode(file)); | |
601 | char tmp[24]; | |
602 | size_t tmp_size; | |
603 | ||
604 | tmp_size = scnprintf(tmp, sizeof(tmp), "%llu\n", *valuep); | |
605 | return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size); | |
606 | } | |
607 | ||
608 | static ssize_t read_proc_bool(struct file *file, char __user *buf, | |
609 | size_t size, loff_t *ppos) | |
610 | { | |
611 | bool *valuep = PDE_DATA(file_inode(file)); | |
612 | char tmp[24]; | |
613 | size_t tmp_size; | |
614 | ||
615 | tmp_size = scnprintf(tmp, sizeof(tmp), "%u\n", *valuep); | |
616 | return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size); | |
617 | } | |
618 | ||
619 | static int get_active_counter_set(tag_t tag) | |
620 | { | |
621 | int active_set = 0; | |
622 | struct tag_counter_set *tcs; | |
623 | ||
624 | MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)" | |
625 | " (uid=%u)\n", | |
626 | tag, get_uid_from_tag(tag)); | |
627 | /* For now we only handle UID tags for active sets */ | |
628 | tag = get_utag_from_tag(tag); | |
629 | spin_lock_bh(&tag_counter_set_list_lock); | |
630 | tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag); | |
631 | if (tcs) | |
632 | active_set = tcs->active_set; | |
633 | spin_unlock_bh(&tag_counter_set_list_lock); | |
634 | return active_set; | |
635 | } | |
636 | ||
637 | /* | |
638 | * Find the entry for tracking the specified interface. | |
639 | * Caller must hold iface_stat_list_lock | |
640 | */ | |
641 | static struct iface_stat *get_iface_entry(const char *ifname) | |
642 | { | |
643 | struct iface_stat *iface_entry; | |
644 | ||
645 | /* Find the entry for tracking the specified tag within the interface */ | |
646 | if (ifname == NULL) { | |
647 | pr_info("qtaguid: iface_stat: get() NULL device name\n"); | |
648 | return NULL; | |
649 | } | |
650 | ||
651 | /* Iterate over interfaces */ | |
652 | list_for_each_entry(iface_entry, &iface_stat_list, list) { | |
653 | if (!strcmp(ifname, iface_entry->ifname)) | |
654 | goto done; | |
655 | } | |
656 | iface_entry = NULL; | |
657 | done: | |
658 | return iface_entry; | |
659 | } | |
660 | ||
661 | /* This is for fmt2 only */ | |
662 | static void pp_iface_stat_header(struct seq_file *m) | |
663 | { | |
664 | seq_puts(m, | |
665 | "ifname " | |
666 | "total_skb_rx_bytes total_skb_rx_packets " | |
667 | "total_skb_tx_bytes total_skb_tx_packets " | |
668 | "rx_tcp_bytes rx_tcp_packets " | |
669 | "rx_udp_bytes rx_udp_packets " | |
670 | "rx_other_bytes rx_other_packets " | |
671 | "tx_tcp_bytes tx_tcp_packets " | |
672 | "tx_udp_bytes tx_udp_packets " | |
673 | "tx_other_bytes tx_other_packets\n" | |
674 | ); | |
675 | } | |
676 | ||
677 | static void pp_iface_stat_line(struct seq_file *m, | |
678 | struct iface_stat *iface_entry) | |
679 | { | |
680 | struct data_counters *cnts; | |
681 | int cnt_set = 0; /* We only use one set for the device */ | |
682 | cnts = &iface_entry->totals_via_skb; | |
683 | seq_printf(m, "%s %llu %llu %llu %llu %llu %llu %llu %llu " | |
684 | "%llu %llu %llu %llu %llu %llu %llu %llu\n", | |
685 | iface_entry->ifname, | |
686 | dc_sum_bytes(cnts, cnt_set, IFS_RX), | |
687 | dc_sum_packets(cnts, cnt_set, IFS_RX), | |
688 | dc_sum_bytes(cnts, cnt_set, IFS_TX), | |
689 | dc_sum_packets(cnts, cnt_set, IFS_TX), | |
690 | cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes, | |
691 | cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets, | |
692 | cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes, | |
693 | cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets, | |
694 | cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes, | |
695 | cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets, | |
696 | cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes, | |
697 | cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets, | |
698 | cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes, | |
699 | cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets, | |
700 | cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes, | |
701 | cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets); | |
702 | } | |
703 | ||
704 | struct proc_iface_stat_fmt_info { | |
705 | int fmt; | |
706 | }; | |
707 | ||
708 | static void *iface_stat_fmt_proc_start(struct seq_file *m, loff_t *pos) | |
709 | { | |
710 | struct proc_iface_stat_fmt_info *p = m->private; | |
711 | loff_t n = *pos; | |
712 | ||
713 | /* | |
714 | * This lock will prevent iface_stat_update() from changing active, | |
715 | * and in turn prevent an interface from unregistering itself. | |
716 | */ | |
717 | spin_lock_bh(&iface_stat_list_lock); | |
718 | ||
719 | if (unlikely(module_passive)) | |
720 | return NULL; | |
721 | ||
722 | if (!n && p->fmt == 2) | |
723 | pp_iface_stat_header(m); | |
724 | ||
725 | return seq_list_start(&iface_stat_list, n); | |
726 | } | |
727 | ||
728 | static void *iface_stat_fmt_proc_next(struct seq_file *m, void *p, loff_t *pos) | |
729 | { | |
730 | return seq_list_next(p, &iface_stat_list, pos); | |
731 | } | |
732 | ||
733 | static void iface_stat_fmt_proc_stop(struct seq_file *m, void *p) | |
734 | { | |
735 | spin_unlock_bh(&iface_stat_list_lock); | |
736 | } | |
737 | ||
738 | static int iface_stat_fmt_proc_show(struct seq_file *m, void *v) | |
739 | { | |
740 | struct proc_iface_stat_fmt_info *p = m->private; | |
741 | struct iface_stat *iface_entry; | |
742 | struct rtnl_link_stats64 dev_stats, *stats; | |
743 | struct rtnl_link_stats64 no_dev_stats = {0}; | |
744 | ||
745 | ||
746 | CT_DEBUG("qtaguid:proc iface_stat_fmt pid=%u tgid=%u uid=%u\n", | |
747 | current->pid, current->tgid, current_fsuid()); | |
748 | ||
749 | iface_entry = list_entry(v, struct iface_stat, list); | |
750 | ||
751 | if (iface_entry->active) { | |
752 | stats = dev_get_stats(iface_entry->net_dev, | |
753 | &dev_stats); | |
754 | } else { | |
755 | stats = &no_dev_stats; | |
756 | } | |
757 | /* | |
758 | * If the meaning of the data changes, then update the fmtX | |
759 | * string. | |
760 | */ | |
761 | if (p->fmt == 1) { | |
762 | seq_printf(m, "%s %d %llu %llu %llu %llu %llu %llu %llu %llu\n", | |
763 | iface_entry->ifname, | |
764 | iface_entry->active, | |
765 | iface_entry->totals_via_dev[IFS_RX].bytes, | |
766 | iface_entry->totals_via_dev[IFS_RX].packets, | |
767 | iface_entry->totals_via_dev[IFS_TX].bytes, | |
768 | iface_entry->totals_via_dev[IFS_TX].packets, | |
769 | stats->rx_bytes, stats->rx_packets, | |
770 | stats->tx_bytes, stats->tx_packets | |
771 | ); | |
772 | } else { | |
773 | pp_iface_stat_line(m, iface_entry); | |
774 | } | |
775 | return 0; | |
776 | } | |
777 | ||
778 | static const struct file_operations read_u64_fops = { | |
779 | .read = read_proc_u64, | |
780 | .llseek = default_llseek, | |
781 | }; | |
782 | ||
783 | static const struct file_operations read_bool_fops = { | |
784 | .read = read_proc_bool, | |
785 | .llseek = default_llseek, | |
786 | }; | |
787 | ||
788 | static void iface_create_proc_worker(struct work_struct *work) | |
789 | { | |
790 | struct proc_dir_entry *proc_entry; | |
791 | struct iface_stat_work *isw = container_of(work, struct iface_stat_work, | |
792 | iface_work); | |
793 | struct iface_stat *new_iface = isw->iface_entry; | |
794 | ||
795 | /* iface_entries are not deleted, so safe to manipulate. */ | |
796 | proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir); | |
797 | if (IS_ERR_OR_NULL(proc_entry)) { | |
798 | pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n"); | |
799 | kfree(isw); | |
800 | return; | |
801 | } | |
802 | ||
803 | new_iface->proc_ptr = proc_entry; | |
804 | ||
805 | proc_create_data("tx_bytes", proc_iface_perms, proc_entry, | |
806 | &read_u64_fops, | |
807 | &new_iface->totals_via_dev[IFS_TX].bytes); | |
808 | proc_create_data("rx_bytes", proc_iface_perms, proc_entry, | |
809 | &read_u64_fops, | |
810 | &new_iface->totals_via_dev[IFS_RX].bytes); | |
811 | proc_create_data("tx_packets", proc_iface_perms, proc_entry, | |
812 | &read_u64_fops, | |
813 | &new_iface->totals_via_dev[IFS_TX].packets); | |
814 | proc_create_data("rx_packets", proc_iface_perms, proc_entry, | |
815 | &read_u64_fops, | |
816 | &new_iface->totals_via_dev[IFS_RX].packets); | |
817 | proc_create_data("active", proc_iface_perms, proc_entry, | |
818 | &read_bool_fops, &new_iface->active); | |
819 | ||
820 | IF_DEBUG("qtaguid: iface_stat: create_proc(): done " | |
821 | "entry=%p dev=%s\n", new_iface, new_iface->ifname); | |
822 | kfree(isw); | |
823 | } | |
824 | ||
825 | /* | |
826 | * Will set the entry's active state, and | |
827 | * update the net_dev accordingly also. | |
828 | */ | |
829 | static void _iface_stat_set_active(struct iface_stat *entry, | |
830 | struct net_device *net_dev, | |
831 | bool activate) | |
832 | { | |
833 | if (activate) { | |
834 | entry->net_dev = net_dev; | |
835 | entry->active = true; | |
836 | IF_DEBUG("qtaguid: %s(%s): " | |
837 | "enable tracking. rfcnt=%d\n", __func__, | |
838 | entry->ifname, | |
839 | __this_cpu_read(*net_dev->pcpu_refcnt)); | |
840 | } else { | |
841 | entry->active = false; | |
842 | entry->net_dev = NULL; | |
843 | IF_DEBUG("qtaguid: %s(%s): " | |
844 | "disable tracking. rfcnt=%d\n", __func__, | |
845 | entry->ifname, | |
846 | __this_cpu_read(*net_dev->pcpu_refcnt)); | |
847 | ||
848 | } | |
849 | } | |
850 | ||
851 | /* Caller must hold iface_stat_list_lock */ | |
852 | static struct iface_stat *iface_alloc(struct net_device *net_dev) | |
853 | { | |
854 | struct iface_stat *new_iface; | |
855 | struct iface_stat_work *isw; | |
856 | ||
857 | new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC); | |
858 | if (new_iface == NULL) { | |
859 | pr_err("qtaguid: iface_stat: create(%s): " | |
860 | "iface_stat alloc failed\n", net_dev->name); | |
861 | return NULL; | |
862 | } | |
863 | new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC); | |
864 | if (new_iface->ifname == NULL) { | |
865 | pr_err("qtaguid: iface_stat: create(%s): " | |
866 | "ifname alloc failed\n", net_dev->name); | |
867 | kfree(new_iface); | |
868 | return NULL; | |
869 | } | |
870 | spin_lock_init(&new_iface->tag_stat_list_lock); | |
871 | new_iface->tag_stat_tree = RB_ROOT; | |
872 | _iface_stat_set_active(new_iface, net_dev, true); | |
873 | ||
874 | /* | |
875 | * ipv6 notifier chains are atomic :( | |
876 | * No create_proc_read_entry() for you! | |
877 | */ | |
878 | isw = kmalloc(sizeof(*isw), GFP_ATOMIC); | |
879 | if (!isw) { | |
880 | pr_err("qtaguid: iface_stat: create(%s): " | |
881 | "work alloc failed\n", new_iface->ifname); | |
882 | _iface_stat_set_active(new_iface, net_dev, false); | |
883 | kfree(new_iface->ifname); | |
884 | kfree(new_iface); | |
885 | return NULL; | |
886 | } | |
887 | isw->iface_entry = new_iface; | |
888 | INIT_WORK(&isw->iface_work, iface_create_proc_worker); | |
889 | schedule_work(&isw->iface_work); | |
890 | list_add(&new_iface->list, &iface_stat_list); | |
891 | return new_iface; | |
892 | } | |
893 | ||
894 | static void iface_check_stats_reset_and_adjust(struct net_device *net_dev, | |
895 | struct iface_stat *iface) | |
896 | { | |
897 | struct rtnl_link_stats64 dev_stats, *stats; | |
898 | bool stats_rewound; | |
899 | ||
900 | stats = dev_get_stats(net_dev, &dev_stats); | |
901 | /* No empty packets */ | |
902 | stats_rewound = | |
903 | (stats->rx_bytes < iface->last_known[IFS_RX].bytes) | |
904 | || (stats->tx_bytes < iface->last_known[IFS_TX].bytes); | |
905 | ||
906 | IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p " | |
907 | "bytes rx/tx=%llu/%llu " | |
908 | "active=%d last_known=%d " | |
909 | "stats_rewound=%d\n", __func__, | |
910 | net_dev ? net_dev->name : "?", | |
911 | iface, net_dev, | |
912 | stats->rx_bytes, stats->tx_bytes, | |
913 | iface->active, iface->last_known_valid, stats_rewound); | |
914 | ||
915 | if (iface->active && iface->last_known_valid && stats_rewound) { | |
916 | pr_warn_once("qtaguid: iface_stat: %s(%s): " | |
917 | "iface reset its stats unexpectedly\n", __func__, | |
918 | net_dev->name); | |
919 | ||
920 | iface->totals_via_dev[IFS_TX].bytes += | |
921 | iface->last_known[IFS_TX].bytes; | |
922 | iface->totals_via_dev[IFS_TX].packets += | |
923 | iface->last_known[IFS_TX].packets; | |
924 | iface->totals_via_dev[IFS_RX].bytes += | |
925 | iface->last_known[IFS_RX].bytes; | |
926 | iface->totals_via_dev[IFS_RX].packets += | |
927 | iface->last_known[IFS_RX].packets; | |
928 | iface->last_known_valid = false; | |
929 | IF_DEBUG("qtaguid: %s(%s): iface=%p " | |
930 | "used last known bytes rx/tx=%llu/%llu\n", __func__, | |
931 | iface->ifname, iface, iface->last_known[IFS_RX].bytes, | |
932 | iface->last_known[IFS_TX].bytes); | |
933 | } | |
934 | } | |
935 | ||
936 | /* | |
937 | * Create a new entry for tracking the specified interface. | |
938 | * Do nothing if the entry already exists. | |
939 | * Called when an interface is configured with a valid IP address. | |
940 | */ | |
941 | static void iface_stat_create(struct net_device *net_dev, | |
942 | struct in_ifaddr *ifa) | |
943 | { | |
944 | struct in_device *in_dev = NULL; | |
945 | const char *ifname; | |
946 | struct iface_stat *entry; | |
947 | __be32 ipaddr = 0; | |
948 | struct iface_stat *new_iface; | |
949 | ||
950 | IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n", | |
951 | net_dev ? net_dev->name : "?", | |
952 | ifa, net_dev); | |
953 | if (!net_dev) { | |
954 | pr_err("qtaguid: iface_stat: create(): no net dev\n"); | |
955 | return; | |
956 | } | |
957 | ||
958 | ifname = net_dev->name; | |
959 | if (!ifa) { | |
960 | in_dev = in_dev_get(net_dev); | |
961 | if (!in_dev) { | |
962 | pr_err("qtaguid: iface_stat: create(%s): no inet dev\n", | |
963 | ifname); | |
964 | return; | |
965 | } | |
966 | IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n", | |
967 | ifname, in_dev); | |
968 | for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { | |
969 | IF_DEBUG("qtaguid: iface_stat: create(%s): " | |
970 | "ifa=%p ifa_label=%s\n", | |
971 | ifname, ifa, | |
972 | ifa->ifa_label ? ifa->ifa_label : "(null)"); | |
973 | if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label)) | |
974 | break; | |
975 | } | |
976 | } | |
977 | ||
978 | if (!ifa) { | |
979 | IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n", | |
980 | ifname); | |
981 | goto done_put; | |
982 | } | |
983 | ipaddr = ifa->ifa_local; | |
984 | ||
985 | spin_lock_bh(&iface_stat_list_lock); | |
986 | entry = get_iface_entry(ifname); | |
987 | if (entry != NULL) { | |
988 | IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n", | |
989 | ifname, entry); | |
990 | iface_check_stats_reset_and_adjust(net_dev, entry); | |
991 | _iface_stat_set_active(entry, net_dev, true); | |
992 | IF_DEBUG("qtaguid: %s(%s): " | |
993 | "tracking now %d on ip=%pI4\n", __func__, | |
994 | entry->ifname, true, &ipaddr); | |
995 | goto done_unlock_put; | |
996 | } | |
997 | ||
998 | new_iface = iface_alloc(net_dev); | |
999 | IF_DEBUG("qtaguid: iface_stat: create(%s): done " | |
1000 | "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr); | |
1001 | done_unlock_put: | |
1002 | spin_unlock_bh(&iface_stat_list_lock); | |
1003 | done_put: | |
1004 | if (in_dev) | |
1005 | in_dev_put(in_dev); | |
1006 | } | |
1007 | ||
1008 | static void iface_stat_create_ipv6(struct net_device *net_dev, | |
1009 | struct inet6_ifaddr *ifa) | |
1010 | { | |
1011 | struct in_device *in_dev; | |
1012 | const char *ifname; | |
1013 | struct iface_stat *entry; | |
1014 | struct iface_stat *new_iface; | |
1015 | int addr_type; | |
1016 | ||
1017 | IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n", | |
1018 | ifa, net_dev, net_dev ? net_dev->name : ""); | |
1019 | if (!net_dev) { | |
1020 | pr_err("qtaguid: iface_stat: create6(): no net dev!\n"); | |
1021 | return; | |
1022 | } | |
1023 | ifname = net_dev->name; | |
1024 | ||
1025 | in_dev = in_dev_get(net_dev); | |
1026 | if (!in_dev) { | |
1027 | pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n", | |
1028 | ifname); | |
1029 | return; | |
1030 | } | |
1031 | ||
1032 | IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n", | |
1033 | ifname, in_dev); | |
1034 | ||
1035 | if (!ifa) { | |
1036 | IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n", | |
1037 | ifname); | |
1038 | goto done_put; | |
1039 | } | |
1040 | addr_type = ipv6_addr_type(&ifa->addr); | |
1041 | ||
1042 | spin_lock_bh(&iface_stat_list_lock); | |
1043 | entry = get_iface_entry(ifname); | |
1044 | if (entry != NULL) { | |
1045 | IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__, | |
1046 | ifname, entry); | |
1047 | iface_check_stats_reset_and_adjust(net_dev, entry); | |
1048 | _iface_stat_set_active(entry, net_dev, true); | |
1049 | IF_DEBUG("qtaguid: %s(%s): " | |
1050 | "tracking now %d on ip=%pI6c\n", __func__, | |
1051 | entry->ifname, true, &ifa->addr); | |
1052 | goto done_unlock_put; | |
1053 | } | |
1054 | ||
1055 | new_iface = iface_alloc(net_dev); | |
1056 | IF_DEBUG("qtaguid: iface_stat: create6(%s): done " | |
1057 | "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr); | |
1058 | ||
1059 | done_unlock_put: | |
1060 | spin_unlock_bh(&iface_stat_list_lock); | |
1061 | done_put: | |
1062 | in_dev_put(in_dev); | |
1063 | } | |
1064 | ||
1065 | static struct sock_tag *get_sock_stat_nl(const struct sock *sk) | |
1066 | { | |
1067 | MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk); | |
1068 | return sock_tag_tree_search(&sock_tag_tree, sk); | |
1069 | } | |
1070 | ||
1071 | static struct sock_tag *get_sock_stat(const struct sock *sk) | |
1072 | { | |
1073 | struct sock_tag *sock_tag_entry; | |
1074 | MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk); | |
1075 | if (!sk) | |
1076 | return NULL; | |
1077 | spin_lock_bh(&sock_tag_list_lock); | |
1078 | sock_tag_entry = get_sock_stat_nl(sk); | |
1079 | spin_unlock_bh(&sock_tag_list_lock); | |
1080 | return sock_tag_entry; | |
1081 | } | |
1082 | ||
1083 | static int ipx_proto(const struct sk_buff *skb, | |
1084 | struct xt_action_param *par) | |
1085 | { | |
1086 | int thoff = 0, tproto; | |
1087 | ||
1088 | switch (par->family) { | |
1089 | case NFPROTO_IPV6: | |
1090 | tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); | |
1091 | if (tproto < 0) | |
1092 | MT_DEBUG("%s(): transport header not found in ipv6" | |
1093 | " skb=%p\n", __func__, skb); | |
1094 | break; | |
1095 | case NFPROTO_IPV4: | |
1096 | tproto = ip_hdr(skb)->protocol; | |
1097 | break; | |
1098 | default: | |
1099 | tproto = IPPROTO_RAW; | |
1100 | } | |
1101 | return tproto; | |
1102 | } | |
1103 | ||
1104 | static void | |
1105 | data_counters_update(struct data_counters *dc, int set, | |
1106 | enum ifs_tx_rx direction, int proto, int bytes) | |
1107 | { | |
1108 | switch (proto) { | |
1109 | case IPPROTO_TCP: | |
1110 | dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1); | |
1111 | break; | |
1112 | case IPPROTO_UDP: | |
1113 | dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1); | |
1114 | break; | |
1115 | case IPPROTO_IP: | |
1116 | default: | |
1117 | dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes, | |
1118 | 1); | |
1119 | break; | |
1120 | } | |
1121 | } | |
1122 | ||
1123 | /* | |
1124 | * Update stats for the specified interface. Do nothing if the entry | |
1125 | * does not exist (when a device was never configured with an IP address). | |
1126 | * Called when an device is being unregistered. | |
1127 | */ | |
1128 | static void iface_stat_update(struct net_device *net_dev, bool stash_only) | |
1129 | { | |
1130 | struct rtnl_link_stats64 dev_stats, *stats; | |
1131 | struct iface_stat *entry; | |
1132 | ||
1133 | stats = dev_get_stats(net_dev, &dev_stats); | |
1134 | spin_lock_bh(&iface_stat_list_lock); | |
1135 | entry = get_iface_entry(net_dev->name); | |
1136 | if (entry == NULL) { | |
1137 | IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n", | |
1138 | net_dev->name); | |
1139 | spin_unlock_bh(&iface_stat_list_lock); | |
1140 | return; | |
1141 | } | |
1142 | ||
1143 | IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__, | |
1144 | net_dev->name, entry); | |
1145 | if (!entry->active) { | |
1146 | IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__, | |
1147 | net_dev->name); | |
1148 | spin_unlock_bh(&iface_stat_list_lock); | |
1149 | return; | |
1150 | } | |
1151 | ||
1152 | if (stash_only) { | |
1153 | entry->last_known[IFS_TX].bytes = stats->tx_bytes; | |
1154 | entry->last_known[IFS_TX].packets = stats->tx_packets; | |
1155 | entry->last_known[IFS_RX].bytes = stats->rx_bytes; | |
1156 | entry->last_known[IFS_RX].packets = stats->rx_packets; | |
1157 | entry->last_known_valid = true; | |
1158 | IF_DEBUG("qtaguid: %s(%s): " | |
1159 | "dev stats stashed rx/tx=%llu/%llu\n", __func__, | |
1160 | net_dev->name, stats->rx_bytes, stats->tx_bytes); | |
1161 | spin_unlock_bh(&iface_stat_list_lock); | |
1162 | return; | |
1163 | } | |
1164 | entry->totals_via_dev[IFS_TX].bytes += stats->tx_bytes; | |
1165 | entry->totals_via_dev[IFS_TX].packets += stats->tx_packets; | |
1166 | entry->totals_via_dev[IFS_RX].bytes += stats->rx_bytes; | |
1167 | entry->totals_via_dev[IFS_RX].packets += stats->rx_packets; | |
1168 | /* We don't need the last_known[] anymore */ | |
1169 | entry->last_known_valid = false; | |
1170 | _iface_stat_set_active(entry, net_dev, false); | |
1171 | IF_DEBUG("qtaguid: %s(%s): " | |
1172 | "disable tracking. rx/tx=%llu/%llu\n", __func__, | |
1173 | net_dev->name, stats->rx_bytes, stats->tx_bytes); | |
1174 | spin_unlock_bh(&iface_stat_list_lock); | |
1175 | } | |
1176 | ||
1177 | /* | |
1178 | * Update stats for the specified interface from the skb. | |
1179 | * Do nothing if the entry | |
1180 | * does not exist (when a device was never configured with an IP address). | |
1181 | * Called on each sk. | |
1182 | */ | |
1183 | static void iface_stat_update_from_skb(const struct sk_buff *skb, | |
1184 | struct xt_action_param *par) | |
1185 | { | |
1186 | struct iface_stat *entry; | |
1187 | const struct net_device *el_dev; | |
1188 | enum ifs_tx_rx direction = par->in ? IFS_RX : IFS_TX; | |
1189 | int bytes = skb->len; | |
1190 | int proto; | |
1191 | ||
1192 | if (!skb->dev) { | |
1193 | MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum); | |
1194 | el_dev = par->in ? : par->out; | |
1195 | } else { | |
1196 | const struct net_device *other_dev; | |
1197 | el_dev = skb->dev; | |
1198 | other_dev = par->in ? : par->out; | |
1199 | if (el_dev != other_dev) { | |
1200 | MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs " | |
1201 | "par->(in/out)=%p %s\n", | |
1202 | par->hooknum, el_dev, el_dev->name, other_dev, | |
1203 | other_dev->name); | |
1204 | } | |
1205 | } | |
1206 | ||
1207 | if (unlikely(!el_dev)) { | |
1208 | pr_err_ratelimited("qtaguid[%d]: %s(): no par->in/out?!!\n", | |
1209 | par->hooknum, __func__); | |
1210 | BUG(); | |
1211 | } else if (unlikely(!el_dev->name)) { | |
1212 | pr_err_ratelimited("qtaguid[%d]: %s(): no dev->name?!!\n", | |
1213 | par->hooknum, __func__); | |
1214 | BUG(); | |
1215 | } else { | |
1216 | proto = ipx_proto(skb, par); | |
1217 | MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n", | |
1218 | par->hooknum, el_dev->name, el_dev->type, | |
1219 | par->family, proto); | |
1220 | } | |
1221 | ||
1222 | spin_lock_bh(&iface_stat_list_lock); | |
1223 | entry = get_iface_entry(el_dev->name); | |
1224 | if (entry == NULL) { | |
1225 | IF_DEBUG("qtaguid: iface_stat: %s(%s): not tracked\n", | |
1226 | __func__, el_dev->name); | |
1227 | spin_unlock_bh(&iface_stat_list_lock); | |
1228 | return; | |
1229 | } | |
1230 | ||
1231 | IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__, | |
1232 | el_dev->name, entry); | |
1233 | ||
1234 | data_counters_update(&entry->totals_via_skb, 0, direction, proto, | |
1235 | bytes); | |
1236 | spin_unlock_bh(&iface_stat_list_lock); | |
1237 | } | |
1238 | ||
1239 | static void tag_stat_update(struct tag_stat *tag_entry, | |
1240 | enum ifs_tx_rx direction, int proto, int bytes) | |
1241 | { | |
1242 | int active_set; | |
1243 | active_set = get_active_counter_set(tag_entry->tn.tag); | |
1244 | MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d " | |
1245 | "dir=%d proto=%d bytes=%d)\n", | |
1246 | tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag), | |
1247 | active_set, direction, proto, bytes); | |
1248 | data_counters_update(&tag_entry->counters, active_set, direction, | |
1249 | proto, bytes); | |
1250 | if (tag_entry->parent_counters) | |
1251 | data_counters_update(tag_entry->parent_counters, active_set, | |
1252 | direction, proto, bytes); | |
1253 | } | |
1254 | ||
1255 | /* | |
1256 | * Create a new entry for tracking the specified {acct_tag,uid_tag} within | |
1257 | * the interface. | |
1258 | * iface_entry->tag_stat_list_lock should be held. | |
1259 | */ | |
1260 | static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry, | |
1261 | tag_t tag) | |
1262 | { | |
1263 | struct tag_stat *new_tag_stat_entry = NULL; | |
1264 | IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx" | |
1265 | " (uid=%u)\n", __func__, | |
1266 | iface_entry, tag, get_uid_from_tag(tag)); | |
1267 | new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC); | |
1268 | if (!new_tag_stat_entry) { | |
1269 | pr_err("qtaguid: iface_stat: tag stat alloc failed\n"); | |
1270 | goto done; | |
1271 | } | |
1272 | new_tag_stat_entry->tn.tag = tag; | |
1273 | tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree); | |
1274 | done: | |
1275 | return new_tag_stat_entry; | |
1276 | } | |
1277 | ||
1278 | static void if_tag_stat_update(const char *ifname, uid_t uid, | |
1279 | const struct sock *sk, enum ifs_tx_rx direction, | |
1280 | int proto, int bytes) | |
1281 | { | |
1282 | struct tag_stat *tag_stat_entry; | |
1283 | tag_t tag, acct_tag; | |
1284 | tag_t uid_tag; | |
1285 | struct data_counters *uid_tag_counters; | |
1286 | struct sock_tag *sock_tag_entry; | |
1287 | struct iface_stat *iface_entry; | |
1288 | struct tag_stat *new_tag_stat = NULL; | |
1289 | MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s " | |
1290 | "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n", | |
1291 | ifname, uid, sk, direction, proto, bytes); | |
1292 | ||
1293 | ||
1294 | iface_entry = get_iface_entry(ifname); | |
1295 | if (!iface_entry) { | |
1296 | pr_err_ratelimited("qtaguid: iface_stat: stat_update() " | |
1297 | "%s not found\n", ifname); | |
1298 | return; | |
1299 | } | |
1300 | /* It is ok to process data when an iface_entry is inactive */ | |
1301 | ||
1302 | MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n", | |
1303 | ifname, iface_entry); | |
1304 | ||
1305 | /* | |
1306 | * Look for a tagged sock. | |
1307 | * It will have an acct_uid. | |
1308 | */ | |
1309 | sock_tag_entry = get_sock_stat(sk); | |
1310 | if (sock_tag_entry) { | |
1311 | tag = sock_tag_entry->tag; | |
1312 | acct_tag = get_atag_from_tag(tag); | |
1313 | uid_tag = get_utag_from_tag(tag); | |
1314 | } else { | |
1315 | acct_tag = make_atag_from_value(0); | |
1316 | tag = combine_atag_with_uid(acct_tag, uid); | |
1317 | uid_tag = make_tag_from_uid(uid); | |
1318 | } | |
1319 | MT_DEBUG("qtaguid: iface_stat: stat_update(): " | |
1320 | " looking for tag=0x%llx (uid=%u) in ife=%p\n", | |
1321 | tag, get_uid_from_tag(tag), iface_entry); | |
1322 | /* Loop over tag list under this interface for {acct_tag,uid_tag} */ | |
1323 | spin_lock_bh(&iface_entry->tag_stat_list_lock); | |
1324 | ||
1325 | tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree, | |
1326 | tag); | |
1327 | if (tag_stat_entry) { | |
1328 | /* | |
1329 | * Updating the {acct_tag, uid_tag} entry handles both stats: | |
1330 | * {0, uid_tag} will also get updated. | |
1331 | */ | |
1332 | tag_stat_update(tag_stat_entry, direction, proto, bytes); | |
1333 | spin_unlock_bh(&iface_entry->tag_stat_list_lock); | |
1334 | return; | |
1335 | } | |
1336 | ||
1337 | /* Loop over tag list under this interface for {0,uid_tag} */ | |
1338 | tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree, | |
1339 | uid_tag); | |
1340 | if (!tag_stat_entry) { | |
1341 | /* Here: the base uid_tag did not exist */ | |
1342 | /* | |
1343 | * No parent counters. So | |
1344 | * - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats. | |
1345 | */ | |
1346 | new_tag_stat = create_if_tag_stat(iface_entry, uid_tag); | |
1347 | if (!new_tag_stat) | |
1348 | goto unlock; | |
1349 | uid_tag_counters = &new_tag_stat->counters; | |
1350 | } else { | |
1351 | uid_tag_counters = &tag_stat_entry->counters; | |
1352 | } | |
1353 | ||
1354 | if (acct_tag) { | |
1355 | /* Create the child {acct_tag, uid_tag} and hook up parent. */ | |
1356 | new_tag_stat = create_if_tag_stat(iface_entry, tag); | |
1357 | if (!new_tag_stat) | |
1358 | goto unlock; | |
1359 | new_tag_stat->parent_counters = uid_tag_counters; | |
1360 | } else { | |
1361 | /* | |
1362 | * For new_tag_stat to be still NULL here would require: | |
1363 | * {0, uid_tag} exists | |
1364 | * and {acct_tag, uid_tag} doesn't exist | |
1365 | * AND acct_tag == 0. | |
1366 | * Impossible. This reassures us that new_tag_stat | |
1367 | * below will always be assigned. | |
1368 | */ | |
1369 | BUG_ON(!new_tag_stat); | |
1370 | } | |
1371 | tag_stat_update(new_tag_stat, direction, proto, bytes); | |
1372 | unlock: | |
1373 | spin_unlock_bh(&iface_entry->tag_stat_list_lock); | |
1374 | } | |
1375 | ||
1376 | static int iface_netdev_event_handler(struct notifier_block *nb, | |
1377 | unsigned long event, void *ptr) { | |
1378 | struct net_device *dev = ptr; | |
1379 | ||
1380 | if (unlikely(module_passive)) | |
1381 | return NOTIFY_DONE; | |
1382 | ||
1383 | IF_DEBUG("qtaguid: iface_stat: netdev_event(): " | |
1384 | "ev=0x%lx/%s netdev=%p->name=%s\n", | |
1385 | event, netdev_evt_str(event), dev, dev ? dev->name : ""); | |
1386 | ||
1387 | switch (event) { | |
1388 | case NETDEV_UP: | |
1389 | iface_stat_create(dev, NULL); | |
1390 | atomic64_inc(&qtu_events.iface_events); | |
1391 | break; | |
1392 | case NETDEV_DOWN: | |
1393 | case NETDEV_UNREGISTER: | |
1394 | iface_stat_update(dev, event == NETDEV_DOWN); | |
1395 | atomic64_inc(&qtu_events.iface_events); | |
1396 | break; | |
1397 | } | |
1398 | return NOTIFY_DONE; | |
1399 | } | |
1400 | ||
1401 | static int iface_inet6addr_event_handler(struct notifier_block *nb, | |
1402 | unsigned long event, void *ptr) | |
1403 | { | |
1404 | struct inet6_ifaddr *ifa = ptr; | |
1405 | struct net_device *dev; | |
1406 | ||
1407 | if (unlikely(module_passive)) | |
1408 | return NOTIFY_DONE; | |
1409 | ||
1410 | IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): " | |
1411 | "ev=0x%lx/%s ifa=%p\n", | |
1412 | event, netdev_evt_str(event), ifa); | |
1413 | ||
1414 | switch (event) { | |
1415 | case NETDEV_UP: | |
1416 | BUG_ON(!ifa || !ifa->idev); | |
1417 | dev = (struct net_device *)ifa->idev->dev; | |
1418 | iface_stat_create_ipv6(dev, ifa); | |
1419 | atomic64_inc(&qtu_events.iface_events); | |
1420 | break; | |
1421 | case NETDEV_DOWN: | |
1422 | case NETDEV_UNREGISTER: | |
1423 | BUG_ON(!ifa || !ifa->idev); | |
1424 | dev = (struct net_device *)ifa->idev->dev; | |
1425 | iface_stat_update(dev, event == NETDEV_DOWN); | |
1426 | atomic64_inc(&qtu_events.iface_events); | |
1427 | break; | |
1428 | } | |
1429 | return NOTIFY_DONE; | |
1430 | } | |
1431 | ||
1432 | static int iface_inetaddr_event_handler(struct notifier_block *nb, | |
1433 | unsigned long event, void *ptr) | |
1434 | { | |
1435 | struct in_ifaddr *ifa = ptr; | |
1436 | struct net_device *dev; | |
1437 | ||
1438 | if (unlikely(module_passive)) | |
1439 | return NOTIFY_DONE; | |
1440 | ||
1441 | IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): " | |
1442 | "ev=0x%lx/%s ifa=%p\n", | |
1443 | event, netdev_evt_str(event), ifa); | |
1444 | ||
1445 | switch (event) { | |
1446 | case NETDEV_UP: | |
1447 | BUG_ON(!ifa || !ifa->ifa_dev); | |
1448 | dev = ifa->ifa_dev->dev; | |
1449 | iface_stat_create(dev, ifa); | |
1450 | atomic64_inc(&qtu_events.iface_events); | |
1451 | break; | |
1452 | case NETDEV_DOWN: | |
1453 | case NETDEV_UNREGISTER: | |
1454 | BUG_ON(!ifa || !ifa->ifa_dev); | |
1455 | dev = ifa->ifa_dev->dev; | |
1456 | iface_stat_update(dev, event == NETDEV_DOWN); | |
1457 | atomic64_inc(&qtu_events.iface_events); | |
1458 | break; | |
1459 | } | |
1460 | return NOTIFY_DONE; | |
1461 | } | |
1462 | ||
1463 | static struct notifier_block iface_netdev_notifier_blk = { | |
1464 | .notifier_call = iface_netdev_event_handler, | |
1465 | }; | |
1466 | ||
1467 | static struct notifier_block iface_inetaddr_notifier_blk = { | |
1468 | .notifier_call = iface_inetaddr_event_handler, | |
1469 | }; | |
1470 | ||
1471 | static struct notifier_block iface_inet6addr_notifier_blk = { | |
1472 | .notifier_call = iface_inet6addr_event_handler, | |
1473 | }; | |
1474 | ||
1475 | static const struct seq_operations iface_stat_fmt_proc_seq_ops = { | |
1476 | .start = iface_stat_fmt_proc_start, | |
1477 | .next = iface_stat_fmt_proc_next, | |
1478 | .stop = iface_stat_fmt_proc_stop, | |
1479 | .show = iface_stat_fmt_proc_show, | |
1480 | }; | |
1481 | ||
1482 | static int proc_iface_stat_fmt_open(struct inode *inode, struct file *file) | |
1483 | { | |
1484 | struct proc_iface_stat_fmt_info *s; | |
1485 | ||
1486 | s = __seq_open_private(file, &iface_stat_fmt_proc_seq_ops, | |
1487 | sizeof(struct proc_iface_stat_fmt_info)); | |
1488 | if (!s) | |
1489 | return -ENOMEM; | |
1490 | ||
1491 | s->fmt = (uintptr_t)PDE_DATA(inode); | |
1492 | return 0; | |
1493 | } | |
1494 | ||
1495 | static const struct file_operations proc_iface_stat_fmt_fops = { | |
1496 | .open = proc_iface_stat_fmt_open, | |
1497 | .read = seq_read, | |
1498 | .llseek = seq_lseek, | |
1499 | .release = seq_release_private, | |
1500 | }; | |
1501 | ||
1502 | static int __init iface_stat_init(struct proc_dir_entry *parent_procdir) | |
1503 | { | |
1504 | int err; | |
1505 | ||
1506 | iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir); | |
1507 | if (!iface_stat_procdir) { | |
1508 | pr_err("qtaguid: iface_stat: init failed to create proc entry\n"); | |
1509 | err = -1; | |
1510 | goto err; | |
1511 | } | |
1512 | ||
1513 | iface_stat_all_procfile = proc_create_data(iface_stat_all_procfilename, | |
1514 | proc_iface_perms, | |
1515 | parent_procdir, | |
1516 | &proc_iface_stat_fmt_fops, | |
1517 | (void *)1 /* fmt1 */); | |
1518 | if (!iface_stat_all_procfile) { | |
1519 | pr_err("qtaguid: iface_stat: init " | |
1520 | " failed to create stat_old proc entry\n"); | |
1521 | err = -1; | |
1522 | goto err_zap_entry; | |
1523 | } | |
1524 | ||
1525 | iface_stat_fmt_procfile = proc_create_data(iface_stat_fmt_procfilename, | |
1526 | proc_iface_perms, | |
1527 | parent_procdir, | |
1528 | &proc_iface_stat_fmt_fops, | |
1529 | (void *)2 /* fmt2 */); | |
1530 | if (!iface_stat_fmt_procfile) { | |
1531 | pr_err("qtaguid: iface_stat: init " | |
1532 | " failed to create stat_all proc entry\n"); | |
1533 | err = -1; | |
1534 | goto err_zap_all_stats_entry; | |
1535 | } | |
1536 | ||
1537 | ||
1538 | err = register_netdevice_notifier(&iface_netdev_notifier_blk); | |
1539 | if (err) { | |
1540 | pr_err("qtaguid: iface_stat: init " | |
1541 | "failed to register dev event handler\n"); | |
1542 | goto err_zap_all_stats_entries; | |
1543 | } | |
1544 | err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk); | |
1545 | if (err) { | |
1546 | pr_err("qtaguid: iface_stat: init " | |
1547 | "failed to register ipv4 dev event handler\n"); | |
1548 | goto err_unreg_nd; | |
1549 | } | |
1550 | ||
1551 | err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk); | |
1552 | if (err) { | |
1553 | pr_err("qtaguid: iface_stat: init " | |
1554 | "failed to register ipv6 dev event handler\n"); | |
1555 | goto err_unreg_ip4_addr; | |
1556 | } | |
1557 | return 0; | |
1558 | ||
1559 | err_unreg_ip4_addr: | |
1560 | unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk); | |
1561 | err_unreg_nd: | |
1562 | unregister_netdevice_notifier(&iface_netdev_notifier_blk); | |
1563 | err_zap_all_stats_entries: | |
1564 | remove_proc_entry(iface_stat_fmt_procfilename, parent_procdir); | |
1565 | err_zap_all_stats_entry: | |
1566 | remove_proc_entry(iface_stat_all_procfilename, parent_procdir); | |
1567 | err_zap_entry: | |
1568 | remove_proc_entry(iface_stat_procdirname, parent_procdir); | |
1569 | err: | |
1570 | return err; | |
1571 | } | |
1572 | ||
1573 | static struct sock *qtaguid_find_sk(const struct sk_buff *skb, | |
1574 | struct xt_action_param *par) | |
1575 | { | |
1576 | struct sock *sk; | |
1577 | unsigned int hook_mask = (1 << par->hooknum); | |
1578 | ||
1579 | MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb, | |
1580 | par->hooknum, par->family); | |
1581 | ||
1582 | /* | |
1583 | * Let's not abuse the the xt_socket_get*_sk(), or else it will | |
1584 | * return garbage SKs. | |
1585 | */ | |
1586 | if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS)) | |
1587 | return NULL; | |
1588 | ||
1589 | switch (par->family) { | |
1590 | case NFPROTO_IPV6: | |
1591 | sk = xt_socket_get6_sk(skb, par); | |
1592 | break; | |
1593 | case NFPROTO_IPV4: | |
1594 | sk = xt_socket_get4_sk(skb, par); | |
1595 | break; | |
1596 | default: | |
1597 | return NULL; | |
1598 | } | |
1599 | ||
1600 | if (sk) { | |
1601 | MT_DEBUG("qtaguid: %p->sk_proto=%u " | |
1602 | "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state); | |
1603 | /* | |
1604 | * When in TCP_TIME_WAIT the sk is not a "struct sock" but | |
1605 | * "struct inet_timewait_sock" which is missing fields. | |
1606 | */ | |
1607 | if (sk->sk_state == TCP_TIME_WAIT) { | |
1608 | xt_socket_put_sk(sk); | |
1609 | sk = NULL; | |
1610 | } | |
1611 | } | |
1612 | return sk; | |
1613 | } | |
1614 | ||
1615 | static void account_for_uid(const struct sk_buff *skb, | |
1616 | const struct sock *alternate_sk, uid_t uid, | |
1617 | struct xt_action_param *par) | |
1618 | { | |
1619 | const struct net_device *el_dev; | |
1620 | ||
1621 | if (!skb->dev) { | |
1622 | MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum); | |
1623 | el_dev = par->in ? : par->out; | |
1624 | } else { | |
1625 | const struct net_device *other_dev; | |
1626 | el_dev = skb->dev; | |
1627 | other_dev = par->in ? : par->out; | |
1628 | if (el_dev != other_dev) { | |
1629 | MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs " | |
1630 | "par->(in/out)=%p %s\n", | |
1631 | par->hooknum, el_dev, el_dev->name, other_dev, | |
1632 | other_dev->name); | |
1633 | } | |
1634 | } | |
1635 | ||
1636 | if (unlikely(!el_dev)) { | |
1637 | pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum); | |
1638 | } else if (unlikely(!el_dev->name)) { | |
1639 | pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum); | |
1640 | } else { | |
1641 | int proto = ipx_proto(skb, par); | |
1642 | MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n", | |
1643 | par->hooknum, el_dev->name, el_dev->type, | |
1644 | par->family, proto); | |
1645 | ||
1646 | if_tag_stat_update(el_dev->name, uid, | |
1647 | skb->sk ? skb->sk : alternate_sk, | |
1648 | par->in ? IFS_RX : IFS_TX, | |
1649 | proto, skb->len); | |
1650 | } | |
1651 | } | |
1652 | ||
1653 | static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) | |
1654 | { | |
1655 | const struct xt_qtaguid_match_info *info = par->matchinfo; | |
1656 | const struct file *filp; | |
1657 | bool got_sock = false; | |
1658 | struct sock *sk; | |
1659 | uid_t sock_uid; | |
1660 | bool res; | |
1661 | ||
1662 | if (unlikely(module_passive)) | |
1663 | return (info->match ^ info->invert) == 0; | |
1664 | ||
1665 | MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n", | |
1666 | par->hooknum, skb, par->in, par->out, par->family); | |
1667 | ||
1668 | atomic64_inc(&qtu_events.match_calls); | |
1669 | if (skb == NULL) { | |
1670 | res = (info->match ^ info->invert) == 0; | |
1671 | goto ret_res; | |
1672 | } | |
1673 | ||
1674 | switch (par->hooknum) { | |
1675 | case NF_INET_PRE_ROUTING: | |
1676 | case NF_INET_POST_ROUTING: | |
1677 | atomic64_inc(&qtu_events.match_calls_prepost); | |
1678 | iface_stat_update_from_skb(skb, par); | |
1679 | /* | |
1680 | * We are done in pre/post. The skb will get processed | |
1681 | * further alter. | |
1682 | */ | |
1683 | res = (info->match ^ info->invert); | |
1684 | goto ret_res; | |
1685 | break; | |
1686 | /* default: Fall through and do UID releated work */ | |
1687 | } | |
1688 | ||
1689 | sk = skb->sk; | |
1690 | /* | |
1691 | * When in TCP_TIME_WAIT the sk is not a "struct sock" but | |
1692 | * "struct inet_timewait_sock" which is missing fields. | |
1693 | * So we ignore it. | |
1694 | */ | |
1695 | if (sk && sk->sk_state == TCP_TIME_WAIT) | |
1696 | sk = NULL; | |
1697 | if (sk == NULL) { | |
1698 | /* | |
1699 | * A missing sk->sk_socket happens when packets are in-flight | |
1700 | * and the matching socket is already closed and gone. | |
1701 | */ | |
1702 | sk = qtaguid_find_sk(skb, par); | |
1703 | /* | |
1704 | * If we got the socket from the find_sk(), we will need to put | |
1705 | * it back, as nf_tproxy_get_sock_v4() got it. | |
1706 | */ | |
1707 | got_sock = sk; | |
1708 | if (sk) | |
1709 | atomic64_inc(&qtu_events.match_found_sk_in_ct); | |
1710 | else | |
1711 | atomic64_inc(&qtu_events.match_found_no_sk_in_ct); | |
1712 | } else { | |
1713 | atomic64_inc(&qtu_events.match_found_sk); | |
1714 | } | |
1715 | MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d fam=%d proto=%d\n", | |
1716 | par->hooknum, sk, got_sock, par->family, ipx_proto(skb, par)); | |
1717 | if (sk != NULL) { | |
1718 | MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n", | |
1719 | par->hooknum, sk, sk->sk_socket, | |
1720 | sk->sk_socket ? sk->sk_socket->file : (void *)-1LL); | |
1721 | filp = sk->sk_socket ? sk->sk_socket->file : NULL; | |
1722 | MT_DEBUG("qtaguid[%d]: filp...uid=%u\n", | |
1723 | par->hooknum, filp ? filp->f_cred->fsuid : -1); | |
1724 | } | |
1725 | ||
1726 | if (sk == NULL || sk->sk_socket == NULL) { | |
1727 | /* | |
1728 | * Here, the qtaguid_find_sk() using connection tracking | |
1729 | * couldn't find the owner, so for now we just count them | |
1730 | * against the system. | |
1731 | */ | |
1732 | /* | |
1733 | * TODO: unhack how to force just accounting. | |
1734 | * For now we only do iface stats when the uid-owner is not | |
1735 | * requested. | |
1736 | */ | |
1737 | if (!(info->match & XT_QTAGUID_UID)) | |
1738 | account_for_uid(skb, sk, 0, par); | |
1739 | MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n", | |
1740 | par->hooknum, | |
1741 | sk ? sk->sk_socket : NULL); | |
1742 | res = (info->match ^ info->invert) == 0; | |
1743 | atomic64_inc(&qtu_events.match_no_sk); | |
1744 | goto put_sock_ret_res; | |
1745 | } else if (info->match & info->invert & XT_QTAGUID_SOCKET) { | |
1746 | res = false; | |
1747 | goto put_sock_ret_res; | |
1748 | } | |
1749 | filp = sk->sk_socket->file; | |
1750 | if (filp == NULL) { | |
1751 | MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum); | |
1752 | account_for_uid(skb, sk, 0, par); | |
1753 | res = ((info->match ^ info->invert) & | |
1754 | (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0; | |
1755 | /*mtk_net: patch for duplicated account for uid 0*/ | |
1756 | res = true; | |
1757 | atomic64_inc(&qtu_events.match_no_sk_file); | |
1758 | goto put_sock_ret_res; | |
1759 | } | |
1760 | sock_uid = filp->f_cred->fsuid; | |
1761 | /* | |
1762 | * TODO: unhack how to force just accounting. | |
1763 | * For now we only do iface stats when the uid-owner is not requested | |
1764 | */ | |
1765 | if (!(info->match & XT_QTAGUID_UID)) | |
1766 | account_for_uid(skb, sk, sock_uid, par); | |
1767 | ||
1768 | /* | |
1769 | * The following two tests fail the match when: | |
1770 | * id not in range AND no inverted condition requested | |
1771 | * or id in range AND inverted condition requested | |
1772 | * Thus (!a && b) || (a && !b) == a ^ b | |
1773 | */ | |
1774 | if (info->match & XT_QTAGUID_UID) | |
1775 | if ((filp->f_cred->fsuid >= info->uid_min && | |
1776 | filp->f_cred->fsuid <= info->uid_max) ^ | |
1777 | !(info->invert & XT_QTAGUID_UID)) { | |
1778 | MT_DEBUG("qtaguid[%d]: leaving uid not matching\n", | |
1779 | par->hooknum); | |
1780 | res = false; | |
1781 | goto put_sock_ret_res; | |
1782 | } | |
1783 | if (info->match & XT_QTAGUID_GID) | |
1784 | if ((filp->f_cred->fsgid >= info->gid_min && | |
1785 | filp->f_cred->fsgid <= info->gid_max) ^ | |
1786 | !(info->invert & XT_QTAGUID_GID)) { | |
1787 | MT_DEBUG("qtaguid[%d]: leaving gid not matching\n", | |
1788 | par->hooknum); | |
1789 | res = false; | |
1790 | goto put_sock_ret_res; | |
1791 | } | |
1792 | ||
1793 | MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum); | |
1794 | res = true; | |
1795 | ||
1796 | put_sock_ret_res: | |
1797 | if (got_sock) | |
1798 | xt_socket_put_sk(sk); | |
1799 | ret_res: | |
1800 | MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res); | |
1801 | return res; | |
1802 | } | |
1803 | ||
1804 | #ifdef DDEBUG | |
1805 | /* This function is not in xt_qtaguid_print.c because of locks visibility */ | |
1806 | static void prdebug_full_state(int indent_level, const char *fmt, ...) | |
1807 | { | |
1808 | va_list args; | |
1809 | char *fmt_buff; | |
1810 | char *buff; | |
1811 | ||
1812 | if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) | |
1813 | return; | |
1814 | ||
1815 | fmt_buff = kasprintf(GFP_ATOMIC, | |
1816 | "qtaguid: %s(): %s {\n", __func__, fmt); | |
1817 | BUG_ON(!fmt_buff); | |
1818 | va_start(args, fmt); | |
1819 | buff = kvasprintf(GFP_ATOMIC, | |
1820 | fmt_buff, args); | |
1821 | BUG_ON(!buff); | |
1822 | pr_debug("%s", buff); | |
1823 | kfree(fmt_buff); | |
1824 | kfree(buff); | |
1825 | va_end(args); | |
1826 | ||
1827 | spin_lock_bh(&sock_tag_list_lock); | |
1828 | prdebug_sock_tag_tree(indent_level, &sock_tag_tree); | |
1829 | spin_unlock_bh(&sock_tag_list_lock); | |
1830 | ||
1831 | spin_lock_bh(&sock_tag_list_lock); | |
1832 | spin_lock_bh(&uid_tag_data_tree_lock); | |
1833 | prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree); | |
1834 | prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree); | |
1835 | spin_unlock_bh(&uid_tag_data_tree_lock); | |
1836 | spin_unlock_bh(&sock_tag_list_lock); | |
1837 | ||
1838 | spin_lock_bh(&iface_stat_list_lock); | |
1839 | prdebug_iface_stat_list(indent_level, &iface_stat_list); | |
1840 | spin_unlock_bh(&iface_stat_list_lock); | |
1841 | ||
1842 | pr_debug("qtaguid: %s(): }\n", __func__); | |
1843 | } | |
1844 | #else | |
1845 | static void prdebug_full_state(int indent_level, const char *fmt, ...) {} | |
1846 | #endif | |
1847 | ||
1848 | struct proc_ctrl_print_info { | |
1849 | struct sock *sk; /* socket found by reading to sk_pos */ | |
1850 | loff_t sk_pos; | |
1851 | }; | |
1852 | ||
1853 | static void *qtaguid_ctrl_proc_next(struct seq_file *m, void *v, loff_t *pos) | |
1854 | { | |
1855 | struct proc_ctrl_print_info *pcpi = m->private; | |
1856 | struct sock_tag *sock_tag_entry = v; | |
1857 | struct rb_node *node; | |
1858 | ||
1859 | (*pos)++; | |
1860 | ||
1861 | if (!v || v == SEQ_START_TOKEN) | |
1862 | return NULL; | |
1863 | ||
1864 | node = rb_next(&sock_tag_entry->sock_node); | |
1865 | if (!node) { | |
1866 | pcpi->sk = NULL; | |
1867 | sock_tag_entry = SEQ_START_TOKEN; | |
1868 | } else { | |
1869 | sock_tag_entry = rb_entry(node, struct sock_tag, sock_node); | |
1870 | pcpi->sk = sock_tag_entry->sk; | |
1871 | } | |
1872 | pcpi->sk_pos = *pos; | |
1873 | return sock_tag_entry; | |
1874 | } | |
1875 | ||
1876 | static void *qtaguid_ctrl_proc_start(struct seq_file *m, loff_t *pos) | |
1877 | { | |
1878 | struct proc_ctrl_print_info *pcpi = m->private; | |
1879 | struct sock_tag *sock_tag_entry; | |
1880 | struct rb_node *node; | |
1881 | ||
1882 | spin_lock_bh(&sock_tag_list_lock); | |
1883 | ||
1884 | if (unlikely(module_passive)) | |
1885 | return NULL; | |
1886 | ||
1887 | if (*pos == 0) { | |
1888 | pcpi->sk_pos = 0; | |
1889 | node = rb_first(&sock_tag_tree); | |
1890 | if (!node) { | |
1891 | pcpi->sk = NULL; | |
1892 | return SEQ_START_TOKEN; | |
1893 | } | |
1894 | sock_tag_entry = rb_entry(node, struct sock_tag, sock_node); | |
1895 | pcpi->sk = sock_tag_entry->sk; | |
1896 | } else { | |
1897 | sock_tag_entry = (pcpi->sk ? get_sock_stat_nl(pcpi->sk) : | |
1898 | NULL) ?: SEQ_START_TOKEN; | |
1899 | if (*pos != pcpi->sk_pos) { | |
1900 | /* seq_read skipped a next call */ | |
1901 | *pos = pcpi->sk_pos; | |
1902 | return qtaguid_ctrl_proc_next(m, sock_tag_entry, pos); | |
1903 | } | |
1904 | } | |
1905 | return sock_tag_entry; | |
1906 | } | |
1907 | ||
1908 | static void qtaguid_ctrl_proc_stop(struct seq_file *m, void *v) | |
1909 | { | |
1910 | spin_unlock_bh(&sock_tag_list_lock); | |
1911 | } | |
1912 | ||
1913 | /* | |
1914 | * Procfs reader to get all active socket tags using style "1)" as described in | |
1915 | * fs/proc/generic.c | |
1916 | */ | |
1917 | static int qtaguid_ctrl_proc_show(struct seq_file *m, void *v) | |
1918 | { | |
1919 | struct sock_tag *sock_tag_entry = v; | |
1920 | uid_t uid; | |
1921 | long f_count; | |
1922 | ||
1923 | CT_DEBUG("qtaguid: proc ctrl pid=%u tgid=%u uid=%u\n", | |
1924 | current->pid, current->tgid, current_fsuid()); | |
1925 | ||
1926 | if (sock_tag_entry != SEQ_START_TOKEN) { | |
1927 | uid = get_uid_from_tag(sock_tag_entry->tag); | |
1928 | CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) " | |
1929 | "pid=%u\n", | |
1930 | sock_tag_entry->sk, | |
1931 | sock_tag_entry->tag, | |
1932 | uid, | |
1933 | sock_tag_entry->pid | |
1934 | ); | |
1935 | f_count = atomic_long_read( | |
1936 | &sock_tag_entry->socket->file->f_count); | |
4b9e9796 | 1937 | seq_printf(m, "sock=%pK tag=0x%llx (uid=%u) pid=%u " |
6fa3eb70 S |
1938 | "f_count=%lu\n", |
1939 | sock_tag_entry->sk, | |
1940 | sock_tag_entry->tag, uid, | |
1941 | sock_tag_entry->pid, f_count); | |
1942 | } else { | |
1943 | seq_printf(m, "events: sockets_tagged=%llu " | |
1944 | "sockets_untagged=%llu " | |
1945 | "counter_set_changes=%llu " | |
1946 | "delete_cmds=%llu " | |
1947 | "iface_events=%llu " | |
1948 | "match_calls=%llu " | |
1949 | "match_calls_prepost=%llu " | |
1950 | "match_found_sk=%llu " | |
1951 | "match_found_sk_in_ct=%llu " | |
1952 | "match_found_no_sk_in_ct=%llu " | |
1953 | "match_no_sk=%llu " | |
1954 | "match_no_sk_file=%llu\n", | |
1955 | (u64)atomic64_read(&qtu_events.sockets_tagged), | |
1956 | (u64)atomic64_read(&qtu_events.sockets_untagged), | |
1957 | (u64)atomic64_read(&qtu_events.counter_set_changes), | |
1958 | (u64)atomic64_read(&qtu_events.delete_cmds), | |
1959 | (u64)atomic64_read(&qtu_events.iface_events), | |
1960 | (u64)atomic64_read(&qtu_events.match_calls), | |
1961 | (u64)atomic64_read(&qtu_events.match_calls_prepost), | |
1962 | (u64)atomic64_read(&qtu_events.match_found_sk), | |
1963 | (u64)atomic64_read(&qtu_events.match_found_sk_in_ct), | |
1964 | (u64)atomic64_read(&qtu_events.match_found_no_sk_in_ct), | |
1965 | (u64)atomic64_read(&qtu_events.match_no_sk), | |
1966 | (u64)atomic64_read(&qtu_events.match_no_sk_file)); | |
1967 | ||
1968 | /* Count the following as part of the last item_index */ | |
1969 | prdebug_full_state(0, "proc ctrl"); | |
1970 | } | |
1971 | ||
1972 | return 0; | |
1973 | } | |
1974 | ||
1975 | /* | |
1976 | * Delete socket tags, and stat tags associated with a given | |
1977 | * accouting tag and uid. | |
1978 | */ | |
1979 | static int ctrl_cmd_delete(const char *input) | |
1980 | { | |
1981 | char cmd; | |
1982 | uid_t uid; | |
1983 | uid_t entry_uid; | |
1984 | tag_t acct_tag; | |
1985 | tag_t tag; | |
1986 | int res, argc; | |
1987 | struct iface_stat *iface_entry; | |
1988 | struct rb_node *node; | |
1989 | struct sock_tag *st_entry; | |
1990 | struct rb_root st_to_free_tree = RB_ROOT; | |
1991 | struct tag_stat *ts_entry; | |
1992 | struct tag_counter_set *tcs_entry; | |
1993 | struct tag_ref *tr_entry; | |
1994 | struct uid_tag_data *utd_entry; | |
1995 | ||
1996 | argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid); | |
1997 | CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c " | |
1998 | "user_tag=0x%llx uid=%u\n", input, argc, cmd, | |
1999 | acct_tag, uid); | |
2000 | if (argc < 2) { | |
2001 | res = -EINVAL; | |
2002 | goto err; | |
2003 | } | |
2004 | if (!valid_atag(acct_tag)) { | |
2005 | pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input); | |
2006 | res = -EINVAL; | |
2007 | goto err; | |
2008 | } | |
2009 | if (argc < 3) { | |
2010 | uid = current_fsuid(); | |
2011 | } else if (!can_impersonate_uid(uid)) { | |
2012 | pr_info("qtaguid: ctrl_delete(%s): " | |
2013 | "insufficient priv from pid=%u tgid=%u uid=%u\n", | |
2014 | input, current->pid, current->tgid, current_fsuid()); | |
2015 | res = -EPERM; | |
2016 | goto err; | |
2017 | } | |
2018 | ||
2019 | tag = combine_atag_with_uid(acct_tag, uid); | |
2020 | CT_DEBUG("qtaguid: ctrl_delete(%s): " | |
2021 | "looking for tag=0x%llx (uid=%u)\n", | |
2022 | input, tag, uid); | |
2023 | ||
2024 | /* Delete socket tags */ | |
2025 | spin_lock_bh(&sock_tag_list_lock); | |
2026 | node = rb_first(&sock_tag_tree); | |
2027 | while (node) { | |
2028 | st_entry = rb_entry(node, struct sock_tag, sock_node); | |
2029 | entry_uid = get_uid_from_tag(st_entry->tag); | |
2030 | node = rb_next(node); | |
2031 | if (entry_uid != uid) | |
2032 | continue; | |
2033 | ||
2034 | CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n", | |
2035 | input, st_entry->tag, entry_uid); | |
2036 | ||
2037 | if (!acct_tag || st_entry->tag == tag) { | |
2038 | rb_erase(&st_entry->sock_node, &sock_tag_tree); | |
2039 | /* Can't sockfd_put() within spinlock, do it later. */ | |
2040 | sock_tag_tree_insert(st_entry, &st_to_free_tree); | |
2041 | tr_entry = lookup_tag_ref(st_entry->tag, NULL); | |
2042 | BUG_ON(tr_entry->num_sock_tags <= 0); | |
2043 | tr_entry->num_sock_tags--; | |
2044 | /* | |
2045 | * TODO: remove if, and start failing. | |
2046 | * This is a hack to work around the fact that in some | |
2047 | * places we have "if (IS_ERR_OR_NULL(pqd_entry))" | |
2048 | * and are trying to work around apps | |
2049 | * that didn't open the /dev/xt_qtaguid. | |
2050 | */ | |
2051 | if (st_entry->list.next && st_entry->list.prev) | |
2052 | list_del(&st_entry->list); | |
2053 | } | |
2054 | } | |
2055 | spin_unlock_bh(&sock_tag_list_lock); | |
2056 | ||
2057 | sock_tag_tree_erase(&st_to_free_tree); | |
2058 | ||
2059 | /* Delete tag counter-sets */ | |
2060 | spin_lock_bh(&tag_counter_set_list_lock); | |
2061 | /* Counter sets are only on the uid tag, not full tag */ | |
2062 | tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag); | |
2063 | if (tcs_entry) { | |
2064 | CT_DEBUG("qtaguid: ctrl_delete(%s): " | |
2065 | "erase tcs: tag=0x%llx (uid=%u) set=%d\n", | |
2066 | input, | |
2067 | tcs_entry->tn.tag, | |
2068 | get_uid_from_tag(tcs_entry->tn.tag), | |
2069 | tcs_entry->active_set); | |
2070 | rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree); | |
2071 | kfree(tcs_entry); | |
2072 | } | |
2073 | spin_unlock_bh(&tag_counter_set_list_lock); | |
2074 | ||
2075 | /* | |
2076 | * If acct_tag is 0, then all entries belonging to uid are | |
2077 | * erased. | |
2078 | */ | |
2079 | spin_lock_bh(&iface_stat_list_lock); | |
2080 | list_for_each_entry(iface_entry, &iface_stat_list, list) { | |
2081 | spin_lock_bh(&iface_entry->tag_stat_list_lock); | |
2082 | node = rb_first(&iface_entry->tag_stat_tree); | |
2083 | while (node) { | |
2084 | ts_entry = rb_entry(node, struct tag_stat, tn.node); | |
2085 | entry_uid = get_uid_from_tag(ts_entry->tn.tag); | |
2086 | node = rb_next(node); | |
2087 | ||
2088 | CT_DEBUG("qtaguid: ctrl_delete(%s): " | |
2089 | "ts tag=0x%llx (uid=%u)\n", | |
2090 | input, ts_entry->tn.tag, entry_uid); | |
2091 | ||
2092 | if (entry_uid != uid) | |
2093 | continue; | |
2094 | if (!acct_tag || ts_entry->tn.tag == tag) { | |
2095 | CT_DEBUG("qtaguid: ctrl_delete(%s): " | |
2096 | "erase ts: %s 0x%llx %u\n", | |
2097 | input, iface_entry->ifname, | |
2098 | get_atag_from_tag(ts_entry->tn.tag), | |
2099 | entry_uid); | |
2100 | rb_erase(&ts_entry->tn.node, | |
2101 | &iface_entry->tag_stat_tree); | |
2102 | kfree(ts_entry); | |
2103 | } | |
2104 | } | |
2105 | spin_unlock_bh(&iface_entry->tag_stat_list_lock); | |
2106 | } | |
2107 | spin_unlock_bh(&iface_stat_list_lock); | |
2108 | ||
2109 | /* Cleanup the uid_tag_data */ | |
2110 | spin_lock_bh(&uid_tag_data_tree_lock); | |
2111 | node = rb_first(&uid_tag_data_tree); | |
2112 | while (node) { | |
2113 | utd_entry = rb_entry(node, struct uid_tag_data, node); | |
2114 | entry_uid = utd_entry->uid; | |
2115 | node = rb_next(node); | |
2116 | ||
2117 | CT_DEBUG("qtaguid: ctrl_delete(%s): " | |
2118 | "utd uid=%u\n", | |
2119 | input, entry_uid); | |
2120 | ||
2121 | if (entry_uid != uid) | |
2122 | continue; | |
2123 | /* | |
2124 | * Go over the tag_refs, and those that don't have | |
2125 | * sock_tags using them are freed. | |
2126 | */ | |
2127 | put_tag_ref_tree(tag, utd_entry); | |
2128 | put_utd_entry(utd_entry); | |
2129 | } | |
2130 | spin_unlock_bh(&uid_tag_data_tree_lock); | |
2131 | ||
2132 | atomic64_inc(&qtu_events.delete_cmds); | |
2133 | res = 0; | |
2134 | ||
2135 | err: | |
2136 | return res; | |
2137 | } | |
2138 | ||
2139 | static int ctrl_cmd_counter_set(const char *input) | |
2140 | { | |
2141 | char cmd; | |
2142 | uid_t uid = 0; | |
2143 | tag_t tag; | |
2144 | int res, argc; | |
2145 | struct tag_counter_set *tcs; | |
2146 | int counter_set; | |
2147 | ||
2148 | argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid); | |
2149 | CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c " | |
2150 | "set=%d uid=%u\n", input, argc, cmd, | |
2151 | counter_set, uid); | |
2152 | if (argc != 3) { | |
2153 | res = -EINVAL; | |
2154 | goto err; | |
2155 | } | |
2156 | if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) { | |
2157 | pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n", | |
2158 | input); | |
2159 | res = -EINVAL; | |
2160 | goto err; | |
2161 | } | |
2162 | if (!can_manipulate_uids()) { | |
2163 | pr_info("qtaguid: ctrl_counterset(%s): " | |
2164 | "insufficient priv from pid=%u tgid=%u uid=%u\n", | |
2165 | input, current->pid, current->tgid, current_fsuid()); | |
2166 | res = -EPERM; | |
2167 | goto err; | |
2168 | } | |
2169 | ||
2170 | tag = make_tag_from_uid(uid); | |
2171 | spin_lock_bh(&tag_counter_set_list_lock); | |
2172 | tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag); | |
2173 | if (!tcs) { | |
2174 | tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC); | |
2175 | if (!tcs) { | |
2176 | spin_unlock_bh(&tag_counter_set_list_lock); | |
2177 | pr_err("qtaguid: ctrl_counterset(%s): " | |
2178 | "failed to alloc counter set\n", | |
2179 | input); | |
2180 | res = -ENOMEM; | |
2181 | goto err; | |
2182 | } | |
2183 | tcs->tn.tag = tag; | |
2184 | tag_counter_set_tree_insert(tcs, &tag_counter_set_tree); | |
2185 | CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx " | |
2186 | "(uid=%u) set=%d\n", | |
2187 | input, tag, get_uid_from_tag(tag), counter_set); | |
2188 | } | |
2189 | tcs->active_set = counter_set; | |
2190 | spin_unlock_bh(&tag_counter_set_list_lock); | |
2191 | atomic64_inc(&qtu_events.counter_set_changes); | |
2192 | res = 0; | |
2193 | ||
2194 | err: | |
2195 | return res; | |
2196 | } | |
2197 | ||
2198 | static int ctrl_cmd_tag(const char *input) | |
2199 | { | |
2200 | char cmd; | |
2201 | int sock_fd = 0; | |
2202 | uid_t uid = 0; | |
2203 | tag_t acct_tag = make_atag_from_value(0); | |
2204 | tag_t full_tag; | |
2205 | struct socket *el_socket; | |
2206 | int res, argc; | |
2207 | struct sock_tag *sock_tag_entry; | |
2208 | struct tag_ref *tag_ref_entry; | |
2209 | struct uid_tag_data *uid_tag_data_entry; | |
2210 | struct proc_qtu_data *pqd_entry; | |
2211 | ||
2212 | /* Unassigned args will get defaulted later. */ | |
2213 | argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid); | |
2214 | CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d " | |
2215 | "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd, | |
2216 | acct_tag, uid); | |
2217 | if (argc < 2) { | |
2218 | res = -EINVAL; | |
2219 | goto err; | |
2220 | } | |
2221 | el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */ | |
2222 | if (!el_socket) { | |
2223 | pr_info("qtaguid: ctrl_tag(%s): failed to lookup" | |
2224 | " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n", | |
2225 | input, sock_fd, res, current->pid, current->tgid, | |
2226 | current_fsuid()); | |
2227 | goto err; | |
2228 | } | |
2229 | CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n", | |
2230 | input, atomic_long_read(&el_socket->file->f_count), | |
2231 | el_socket->sk); | |
2232 | if (argc < 3) { | |
2233 | acct_tag = make_atag_from_value(0); | |
2234 | } else if (!valid_atag(acct_tag)) { | |
2235 | pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input); | |
2236 | res = -EINVAL; | |
2237 | goto err_put; | |
2238 | } | |
2239 | CT_DEBUG("qtaguid: ctrl_tag(%s): " | |
2240 | "pid=%u tgid=%u uid=%u euid=%u fsuid=%u " | |
2241 | "ctrl.gid=%u in_group()=%d in_egroup()=%d\n", | |
2242 | input, current->pid, current->tgid, current_uid(), | |
2243 | current_euid(), current_fsuid(), | |
2244 | xt_qtaguid_ctrl_file->gid, | |
2245 | in_group_p(xt_qtaguid_ctrl_file->gid), | |
2246 | in_egroup_p(xt_qtaguid_ctrl_file->gid)); | |
2247 | if (argc < 4) { | |
2248 | uid = current_fsuid(); | |
2249 | } else if (!can_impersonate_uid(uid)) { | |
2250 | pr_info("qtaguid: ctrl_tag(%s): " | |
2251 | "insufficient priv from pid=%u tgid=%u uid=%u\n", | |
2252 | input, current->pid, current->tgid, current_fsuid()); | |
2253 | res = -EPERM; | |
2254 | goto err_put; | |
2255 | } | |
2256 | full_tag = combine_atag_with_uid(acct_tag, uid); | |
2257 | ||
2258 | spin_lock_bh(&sock_tag_list_lock); | |
2259 | sock_tag_entry = get_sock_stat_nl(el_socket->sk); | |
2260 | tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry); | |
2261 | if (IS_ERR(tag_ref_entry)) { | |
2262 | res = PTR_ERR(tag_ref_entry); | |
2263 | spin_unlock_bh(&sock_tag_list_lock); | |
2264 | goto err_put; | |
2265 | } | |
2266 | tag_ref_entry->num_sock_tags++; | |
2267 | if (sock_tag_entry) { | |
2268 | struct tag_ref *prev_tag_ref_entry; | |
2269 | ||
2270 | CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p " | |
2271 | "st@%p ...->f_count=%ld\n", | |
2272 | input, el_socket->sk, sock_tag_entry, | |
2273 | atomic_long_read(&el_socket->file->f_count)); | |
2274 | /* | |
2275 | * This is a re-tagging, so release the sock_fd that was | |
2276 | * locked at the time of the 1st tagging. | |
2277 | * There is still the ref from this call's sockfd_lookup() so | |
2278 | * it can be done within the spinlock. | |
2279 | */ | |
2280 | sockfd_put(sock_tag_entry->socket); | |
2281 | prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, | |
2282 | &uid_tag_data_entry); | |
2283 | BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry)); | |
2284 | BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0); | |
2285 | prev_tag_ref_entry->num_sock_tags--; | |
2286 | sock_tag_entry->tag = full_tag; | |
2287 | } else { | |
2288 | CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n", | |
2289 | input, el_socket->sk); | |
2290 | sock_tag_entry = kzalloc(sizeof(*sock_tag_entry), | |
2291 | GFP_ATOMIC); | |
2292 | if (!sock_tag_entry) { | |
2293 | pr_err("qtaguid: ctrl_tag(%s): " | |
2294 | "socket tag alloc failed\n", | |
2295 | input); | |
2296 | spin_unlock_bh(&sock_tag_list_lock); | |
2297 | res = -ENOMEM; | |
2298 | goto err_tag_unref_put; | |
2299 | } | |
2300 | sock_tag_entry->sk = el_socket->sk; | |
2301 | sock_tag_entry->socket = el_socket; | |
2302 | sock_tag_entry->pid = current->tgid; | |
2303 | sock_tag_entry->tag = combine_atag_with_uid(acct_tag, | |
2304 | uid); | |
2305 | spin_lock_bh(&uid_tag_data_tree_lock); | |
2306 | pqd_entry = proc_qtu_data_tree_search( | |
2307 | &proc_qtu_data_tree, current->tgid); | |
2308 | /* | |
2309 | * TODO: remove if, and start failing. | |
2310 | * At first, we want to catch user-space code that is not | |
2311 | * opening the /dev/xt_qtaguid. | |
2312 | */ | |
2313 | if (IS_ERR_OR_NULL(pqd_entry)) | |
2314 | pr_warn_once( | |
2315 | "qtaguid: %s(): " | |
2316 | "User space forgot to open /dev/xt_qtaguid? " | |
2317 | "pid=%u tgid=%u uid=%u\n", __func__, | |
2318 | current->pid, current->tgid, | |
2319 | current_fsuid()); | |
2320 | else | |
2321 | list_add(&sock_tag_entry->list, | |
2322 | &pqd_entry->sock_tag_list); | |
2323 | spin_unlock_bh(&uid_tag_data_tree_lock); | |
2324 | ||
2325 | sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree); | |
2326 | atomic64_inc(&qtu_events.sockets_tagged); | |
2327 | } | |
2328 | spin_unlock_bh(&sock_tag_list_lock); | |
2329 | /* We keep the ref to the socket (file) until it is untagged */ | |
2330 | CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n", | |
2331 | input, sock_tag_entry, | |
2332 | atomic_long_read(&el_socket->file->f_count)); | |
2333 | return 0; | |
2334 | ||
2335 | err_tag_unref_put: | |
2336 | BUG_ON(tag_ref_entry->num_sock_tags <= 0); | |
2337 | tag_ref_entry->num_sock_tags--; | |
2338 | free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry); | |
2339 | err_put: | |
2340 | CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n", | |
2341 | input, atomic_long_read(&el_socket->file->f_count) - 1); | |
2342 | /* Release the sock_fd that was grabbed by sockfd_lookup(). */ | |
2343 | sockfd_put(el_socket); | |
2344 | return res; | |
2345 | ||
2346 | err: | |
2347 | CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input); | |
2348 | return res; | |
2349 | } | |
2350 | ||
2351 | static int ctrl_cmd_untag(const char *input) | |
2352 | { | |
2353 | char cmd; | |
2354 | int sock_fd = 0; | |
2355 | struct socket *el_socket; | |
2356 | int res, argc; | |
2357 | struct sock_tag *sock_tag_entry; | |
2358 | struct tag_ref *tag_ref_entry; | |
2359 | struct uid_tag_data *utd_entry; | |
2360 | struct proc_qtu_data *pqd_entry; | |
2361 | ||
2362 | argc = sscanf(input, "%c %d", &cmd, &sock_fd); | |
2363 | CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n", | |
2364 | input, argc, cmd, sock_fd); | |
2365 | if (argc < 2) { | |
2366 | res = -EINVAL; | |
2367 | goto err; | |
2368 | } | |
2369 | el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */ | |
2370 | if (!el_socket) { | |
2371 | pr_info("qtaguid: ctrl_untag(%s): failed to lookup" | |
2372 | " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n", | |
2373 | input, sock_fd, res, current->pid, current->tgid, | |
2374 | current_fsuid()); | |
2375 | goto err; | |
2376 | } | |
2377 | CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n", | |
2378 | input, atomic_long_read(&el_socket->file->f_count), | |
2379 | el_socket->sk); | |
2380 | spin_lock_bh(&sock_tag_list_lock); | |
2381 | sock_tag_entry = get_sock_stat_nl(el_socket->sk); | |
2382 | if (!sock_tag_entry) { | |
2383 | spin_unlock_bh(&sock_tag_list_lock); | |
2384 | res = -EINVAL; | |
2385 | goto err_put; | |
2386 | } | |
2387 | /* | |
2388 | * The socket already belongs to the current process | |
2389 | * so it can do whatever it wants to it. | |
2390 | */ | |
2391 | rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree); | |
2392 | ||
2393 | tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry); | |
2394 | BUG_ON(!tag_ref_entry); | |
2395 | BUG_ON(tag_ref_entry->num_sock_tags <= 0); | |
2396 | spin_lock_bh(&uid_tag_data_tree_lock); | |
2397 | pqd_entry = proc_qtu_data_tree_search( | |
2398 | &proc_qtu_data_tree, current->tgid); | |
2399 | /* | |
2400 | * TODO: remove if, and start failing. | |
2401 | * At first, we want to catch user-space code that is not | |
2402 | * opening the /dev/xt_qtaguid. | |
2403 | */ | |
2404 | if (IS_ERR_OR_NULL(pqd_entry)) | |
2405 | pr_warn_once("qtaguid: %s(): " | |
2406 | "User space forgot to open /dev/xt_qtaguid? " | |
2407 | "pid=%u tgid=%u uid=%u\n", __func__, | |
2408 | current->pid, current->tgid, current_fsuid()); | |
2409 | else | |
2410 | list_del(&sock_tag_entry->list); | |
2411 | spin_unlock_bh(&uid_tag_data_tree_lock); | |
2412 | /* | |
2413 | * We don't free tag_ref from the utd_entry here, | |
2414 | * only during a cmd_delete(). | |
2415 | */ | |
2416 | tag_ref_entry->num_sock_tags--; | |
2417 | spin_unlock_bh(&sock_tag_list_lock); | |
2418 | /* | |
2419 | * Release the sock_fd that was grabbed at tag time, | |
2420 | * and once more for the sockfd_lookup() here. | |
2421 | */ | |
2422 | sockfd_put(sock_tag_entry->socket); | |
2423 | CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n", | |
2424 | input, sock_tag_entry, | |
2425 | atomic_long_read(&el_socket->file->f_count) - 1); | |
2426 | sockfd_put(el_socket); | |
2427 | ||
2428 | kfree(sock_tag_entry); | |
2429 | atomic64_inc(&qtu_events.sockets_untagged); | |
2430 | ||
2431 | return 0; | |
2432 | ||
2433 | err_put: | |
2434 | CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n", | |
2435 | input, atomic_long_read(&el_socket->file->f_count) - 1); | |
2436 | /* Release the sock_fd that was grabbed by sockfd_lookup(). */ | |
2437 | sockfd_put(el_socket); | |
2438 | return res; | |
2439 | ||
2440 | err: | |
2441 | CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input); | |
2442 | return res; | |
2443 | } | |
2444 | ||
2445 | static ssize_t qtaguid_ctrl_parse(const char *input, size_t count) | |
2446 | { | |
2447 | char cmd; | |
2448 | ssize_t res; | |
2449 | ||
2450 | CT_DEBUG("qtaguid: ctrl(%s): pid=%u tgid=%u uid=%u\n", | |
2451 | input, current->pid, current->tgid, current_fsuid()); | |
2452 | ||
2453 | cmd = input[0]; | |
2454 | /* Collect params for commands */ | |
2455 | switch (cmd) { | |
2456 | case 'd': | |
2457 | res = ctrl_cmd_delete(input); | |
2458 | break; | |
2459 | ||
2460 | case 's': | |
2461 | res = ctrl_cmd_counter_set(input); | |
2462 | break; | |
2463 | ||
2464 | case 't': | |
2465 | res = ctrl_cmd_tag(input); | |
2466 | break; | |
2467 | ||
2468 | case 'u': | |
2469 | res = ctrl_cmd_untag(input); | |
2470 | break; | |
2471 | ||
2472 | default: | |
2473 | res = -EINVAL; | |
2474 | goto err; | |
2475 | } | |
2476 | if (!res) | |
2477 | res = count; | |
2478 | err: | |
2479 | CT_DEBUG("qtaguid: ctrl(%s): res=%zd\n", input, res); | |
2480 | return res; | |
2481 | } | |
2482 | ||
2483 | #define MAX_QTAGUID_CTRL_INPUT_LEN 255 | |
2484 | static ssize_t qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer, | |
2485 | size_t count, loff_t *offp) | |
2486 | { | |
2487 | char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN]; | |
2488 | ||
2489 | if (unlikely(module_passive)) | |
2490 | return count; | |
2491 | ||
2492 | if (count >= MAX_QTAGUID_CTRL_INPUT_LEN) | |
2493 | return -EINVAL; | |
2494 | ||
2495 | if (copy_from_user(input_buf, buffer, count)) | |
2496 | return -EFAULT; | |
2497 | ||
2498 | input_buf[count] = '\0'; | |
2499 | return qtaguid_ctrl_parse(input_buf, count); | |
2500 | } | |
2501 | ||
2502 | struct proc_print_info { | |
2503 | struct iface_stat *iface_entry; | |
2504 | int item_index; | |
2505 | tag_t tag; /* tag found by reading to tag_pos */ | |
2506 | off_t tag_pos; | |
2507 | int tag_item_index; | |
2508 | }; | |
2509 | ||
2510 | static void pp_stats_header(struct seq_file *m) | |
2511 | { | |
2512 | seq_puts(m, | |
2513 | "idx iface acct_tag_hex uid_tag_int cnt_set " | |
2514 | "rx_bytes rx_packets " | |
2515 | "tx_bytes tx_packets " | |
2516 | "rx_tcp_bytes rx_tcp_packets " | |
2517 | "rx_udp_bytes rx_udp_packets " | |
2518 | "rx_other_bytes rx_other_packets " | |
2519 | "tx_tcp_bytes tx_tcp_packets " | |
2520 | "tx_udp_bytes tx_udp_packets " | |
2521 | "tx_other_bytes tx_other_packets\n"); | |
2522 | } | |
2523 | ||
2524 | static int pp_stats_line(struct seq_file *m, struct tag_stat *ts_entry, | |
2525 | int cnt_set) | |
2526 | { | |
2527 | int ret; | |
2528 | struct data_counters *cnts; | |
2529 | tag_t tag = ts_entry->tn.tag; | |
2530 | uid_t stat_uid = get_uid_from_tag(tag); | |
2531 | struct proc_print_info *ppi = m->private; | |
2532 | /* Detailed tags are not available to everybody */ | |
2533 | if (get_atag_from_tag(tag) && !can_read_other_uid_stats(stat_uid)) { | |
2534 | CT_DEBUG("qtaguid: stats line: " | |
2535 | "%s 0x%llx %u: insufficient priv " | |
2536 | "from pid=%u tgid=%u uid=%u stats.gid=%u\n", | |
2537 | ppi->iface_entry->ifname, | |
2538 | get_atag_from_tag(tag), stat_uid, | |
2539 | current->pid, current->tgid, current_fsuid(), | |
2540 | xt_qtaguid_stats_file->gid); | |
2541 | return 0; | |
2542 | } | |
2543 | ppi->item_index++; | |
2544 | cnts = &ts_entry->counters; | |
2545 | ret = seq_printf(m, "%d %s 0x%llx %u %u " | |
2546 | "%llu %llu " | |
2547 | "%llu %llu " | |
2548 | "%llu %llu " | |
2549 | "%llu %llu " | |
2550 | "%llu %llu " | |
2551 | "%llu %llu " | |
2552 | "%llu %llu " | |
2553 | "%llu %llu\n", | |
2554 | ppi->item_index, | |
2555 | ppi->iface_entry->ifname, | |
2556 | get_atag_from_tag(tag), | |
2557 | stat_uid, | |
2558 | cnt_set, | |
2559 | dc_sum_bytes(cnts, cnt_set, IFS_RX), | |
2560 | dc_sum_packets(cnts, cnt_set, IFS_RX), | |
2561 | dc_sum_bytes(cnts, cnt_set, IFS_TX), | |
2562 | dc_sum_packets(cnts, cnt_set, IFS_TX), | |
2563 | cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes, | |
2564 | cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets, | |
2565 | cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes, | |
2566 | cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets, | |
2567 | cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes, | |
2568 | cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets, | |
2569 | cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes, | |
2570 | cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets, | |
2571 | cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes, | |
2572 | cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets, | |
2573 | cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes, | |
2574 | cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets); | |
2575 | return ret ?: 1; | |
2576 | } | |
2577 | ||
2578 | static bool pp_sets(struct seq_file *m, struct tag_stat *ts_entry) | |
2579 | { | |
2580 | int ret; | |
2581 | int counter_set; | |
2582 | for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS; | |
2583 | counter_set++) { | |
2584 | ret = pp_stats_line(m, ts_entry, counter_set); | |
2585 | if (ret < 0) | |
2586 | return false; | |
2587 | } | |
2588 | return true; | |
2589 | } | |
2590 | ||
2591 | static int qtaguid_stats_proc_iface_stat_ptr_valid(struct iface_stat *ptr) | |
2592 | { | |
2593 | struct iface_stat *iface_entry; | |
2594 | ||
2595 | if (!ptr) | |
2596 | return false; | |
2597 | ||
2598 | list_for_each_entry(iface_entry, &iface_stat_list, list) | |
2599 | if (iface_entry == ptr) | |
2600 | return true; | |
2601 | return false; | |
2602 | } | |
2603 | ||
2604 | static void qtaguid_stats_proc_next_iface_entry(struct proc_print_info *ppi) | |
2605 | { | |
2606 | spin_unlock_bh(&ppi->iface_entry->tag_stat_list_lock); | |
2607 | list_for_each_entry_continue(ppi->iface_entry, &iface_stat_list, list) { | |
2608 | spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock); | |
2609 | return; | |
2610 | } | |
2611 | ppi->iface_entry = NULL; | |
2612 | } | |
2613 | ||
2614 | static void *qtaguid_stats_proc_next(struct seq_file *m, void *v, loff_t *pos) | |
2615 | { | |
2616 | struct proc_print_info *ppi = m->private; | |
2617 | struct tag_stat *ts_entry; | |
2618 | struct rb_node *node; | |
2619 | ||
2620 | if (!v) { | |
2621 | pr_err("qtaguid: %s(): unexpected v: NULL\n", __func__); | |
2622 | return NULL; | |
2623 | } | |
2624 | ||
2625 | (*pos)++; | |
2626 | ||
2627 | if (!ppi->iface_entry || unlikely(module_passive)) | |
2628 | return NULL; | |
2629 | ||
2630 | if (v == SEQ_START_TOKEN) | |
2631 | node = rb_first(&ppi->iface_entry->tag_stat_tree); | |
2632 | else | |
2633 | node = rb_next(&((struct tag_stat *)v)->tn.node); | |
2634 | ||
2635 | while (!node) { | |
2636 | qtaguid_stats_proc_next_iface_entry(ppi); | |
2637 | if (!ppi->iface_entry) | |
2638 | return NULL; | |
2639 | node = rb_first(&ppi->iface_entry->tag_stat_tree); | |
2640 | } | |
2641 | ||
2642 | ts_entry = rb_entry(node, struct tag_stat, tn.node); | |
2643 | ppi->tag = ts_entry->tn.tag; | |
2644 | ppi->tag_pos = *pos; | |
2645 | ppi->tag_item_index = ppi->item_index; | |
2646 | return ts_entry; | |
2647 | } | |
2648 | ||
2649 | static void *qtaguid_stats_proc_start(struct seq_file *m, loff_t *pos) | |
2650 | { | |
2651 | struct proc_print_info *ppi = m->private; | |
2652 | struct tag_stat *ts_entry = NULL; | |
2653 | ||
2654 | spin_lock_bh(&iface_stat_list_lock); | |
2655 | ||
2656 | if (*pos == 0) { | |
2657 | ppi->item_index = 1; | |
2658 | ppi->tag_pos = 0; | |
2659 | if (list_empty(&iface_stat_list)) { | |
2660 | ppi->iface_entry = NULL; | |
2661 | } else { | |
2662 | ppi->iface_entry = list_first_entry(&iface_stat_list, | |
2663 | struct iface_stat, | |
2664 | list); | |
2665 | spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock); | |
2666 | } | |
2667 | return SEQ_START_TOKEN; | |
2668 | } | |
2669 | if (!qtaguid_stats_proc_iface_stat_ptr_valid(ppi->iface_entry)) { | |
2670 | if (ppi->iface_entry) { | |
2671 | pr_err("qtaguid: %s(): iface_entry %p not found\n", | |
2672 | __func__, ppi->iface_entry); | |
2673 | ppi->iface_entry = NULL; | |
2674 | } | |
2675 | return NULL; | |
2676 | } | |
2677 | ||
2678 | spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock); | |
2679 | ||
2680 | if (!ppi->tag_pos) { | |
2681 | /* seq_read skipped first next call */ | |
2682 | ts_entry = SEQ_START_TOKEN; | |
2683 | } else { | |
2684 | ts_entry = tag_stat_tree_search( | |
2685 | &ppi->iface_entry->tag_stat_tree, ppi->tag); | |
2686 | if (!ts_entry) { | |
2687 | pr_info("qtaguid: %s(): tag_stat.tag 0x%llx not found. Abort.\n", | |
2688 | __func__, ppi->tag); | |
2689 | return NULL; | |
2690 | } | |
2691 | } | |
2692 | ||
2693 | if (*pos == ppi->tag_pos) { /* normal resume */ | |
2694 | ppi->item_index = ppi->tag_item_index; | |
2695 | } else { | |
2696 | /* seq_read skipped a next call */ | |
2697 | *pos = ppi->tag_pos; | |
2698 | ts_entry = qtaguid_stats_proc_next(m, ts_entry, pos); | |
2699 | } | |
2700 | ||
2701 | return ts_entry; | |
2702 | } | |
2703 | ||
2704 | static void qtaguid_stats_proc_stop(struct seq_file *m, void *v) | |
2705 | { | |
2706 | struct proc_print_info *ppi = m->private; | |
2707 | if (ppi->iface_entry) | |
2708 | spin_unlock_bh(&ppi->iface_entry->tag_stat_list_lock); | |
2709 | spin_unlock_bh(&iface_stat_list_lock); | |
2710 | } | |
2711 | ||
2712 | /* | |
2713 | * Procfs reader to get all tag stats using style "1)" as described in | |
2714 | * fs/proc/generic.c | |
2715 | * Groups all protocols tx/rx bytes. | |
2716 | */ | |
2717 | static int qtaguid_stats_proc_show(struct seq_file *m, void *v) | |
2718 | { | |
2719 | struct tag_stat *ts_entry = v; | |
2720 | ||
2721 | if (v == SEQ_START_TOKEN) | |
2722 | pp_stats_header(m); | |
2723 | else | |
2724 | pp_sets(m, ts_entry); | |
2725 | ||
2726 | return 0; | |
2727 | } | |
2728 | ||
2729 | /*------------------------------------------*/ | |
2730 | static int qtudev_open(struct inode *inode, struct file *file) | |
2731 | { | |
2732 | struct uid_tag_data *utd_entry; | |
2733 | struct proc_qtu_data *pqd_entry; | |
2734 | struct proc_qtu_data *new_pqd_entry; | |
2735 | int res; | |
2736 | bool utd_entry_found; | |
2737 | ||
2738 | if (unlikely(qtu_proc_handling_passive)) | |
2739 | return 0; | |
2740 | ||
2741 | DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n", | |
2742 | current->pid, current->tgid, current_fsuid()); | |
2743 | ||
2744 | spin_lock_bh(&uid_tag_data_tree_lock); | |
2745 | ||
2746 | /* Look for existing uid data, or alloc one. */ | |
2747 | utd_entry = get_uid_data(current_fsuid(), &utd_entry_found); | |
2748 | if (IS_ERR_OR_NULL(utd_entry)) { | |
2749 | res = PTR_ERR(utd_entry); | |
2750 | goto err_unlock; | |
2751 | } | |
2752 | ||
2753 | /* Look for existing PID based proc_data */ | |
2754 | pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree, | |
2755 | current->tgid); | |
2756 | if (pqd_entry) { | |
2757 | pr_err("qtaguid: qtudev_open(): %u/%u %u " | |
2758 | "%s already opened\n", | |
2759 | current->pid, current->tgid, current_fsuid(), | |
2760 | QTU_DEV_NAME); | |
2761 | res = -EBUSY; | |
2762 | goto err_unlock_free_utd; | |
2763 | } | |
2764 | ||
2765 | new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC); | |
2766 | if (!new_pqd_entry) { | |
2767 | pr_err("qtaguid: qtudev_open(): %u/%u %u: " | |
2768 | "proc data alloc failed\n", | |
2769 | current->pid, current->tgid, current_fsuid()); | |
2770 | res = -ENOMEM; | |
2771 | goto err_unlock_free_utd; | |
2772 | } | |
2773 | new_pqd_entry->pid = current->tgid; | |
2774 | INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list); | |
2775 | new_pqd_entry->parent_tag_data = utd_entry; | |
2776 | utd_entry->num_pqd++; | |
2777 | ||
2778 | proc_qtu_data_tree_insert(new_pqd_entry, | |
2779 | &proc_qtu_data_tree); | |
2780 | ||
2781 | spin_unlock_bh(&uid_tag_data_tree_lock); | |
2782 | DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n", | |
2783 | current_fsuid(), new_pqd_entry); | |
2784 | file->private_data = new_pqd_entry; | |
2785 | return 0; | |
2786 | ||
2787 | err_unlock_free_utd: | |
2788 | if (!utd_entry_found) { | |
2789 | rb_erase(&utd_entry->node, &uid_tag_data_tree); | |
2790 | kfree(utd_entry); | |
2791 | } | |
2792 | err_unlock: | |
2793 | spin_unlock_bh(&uid_tag_data_tree_lock); | |
2794 | return res; | |
2795 | } | |
2796 | ||
2797 | static int qtudev_release(struct inode *inode, struct file *file) | |
2798 | { | |
2799 | struct proc_qtu_data *pqd_entry = file->private_data; | |
2800 | struct uid_tag_data *utd_entry = pqd_entry->parent_tag_data; | |
2801 | struct sock_tag *st_entry; | |
2802 | struct rb_root st_to_free_tree = RB_ROOT; | |
2803 | struct list_head *entry, *next; | |
2804 | struct tag_ref *tr; | |
2805 | ||
2806 | if (unlikely(qtu_proc_handling_passive)) | |
2807 | return 0; | |
2808 | ||
2809 | /* | |
2810 | * Do not trust the current->pid, it might just be a kworker cleaning | |
2811 | * up after a dead proc. | |
2812 | */ | |
2813 | DR_DEBUG("qtaguid: qtudev_release(): " | |
2814 | "pid=%u tgid=%u uid=%u " | |
2815 | "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n", | |
2816 | current->pid, current->tgid, pqd_entry->parent_tag_data->uid, | |
2817 | pqd_entry, pqd_entry->pid, utd_entry, | |
2818 | utd_entry->num_active_tags); | |
2819 | ||
2820 | spin_lock_bh(&sock_tag_list_lock); | |
2821 | spin_lock_bh(&uid_tag_data_tree_lock); | |
2822 | ||
2823 | list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) { | |
2824 | st_entry = list_entry(entry, struct sock_tag, list); | |
2825 | DR_DEBUG("qtaguid: %s(): " | |
2826 | "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n", | |
2827 | __func__, | |
2828 | st_entry, st_entry->sk, | |
2829 | current->pid, current->tgid, | |
2830 | pqd_entry->parent_tag_data->uid); | |
2831 | ||
2832 | utd_entry = uid_tag_data_tree_search( | |
2833 | &uid_tag_data_tree, | |
2834 | get_uid_from_tag(st_entry->tag)); | |
2835 | BUG_ON(IS_ERR_OR_NULL(utd_entry)); | |
2836 | DR_DEBUG("qtaguid: %s(): " | |
2837 | "looking for tag=0x%llx in utd_entry=%p\n", __func__, | |
2838 | st_entry->tag, utd_entry); | |
2839 | tr = tag_ref_tree_search(&utd_entry->tag_ref_tree, | |
2840 | st_entry->tag); | |
2841 | BUG_ON(!tr); | |
2842 | BUG_ON(tr->num_sock_tags <= 0); | |
2843 | tr->num_sock_tags--; | |
2844 | free_tag_ref_from_utd_entry(tr, utd_entry); | |
2845 | ||
2846 | rb_erase(&st_entry->sock_node, &sock_tag_tree); | |
2847 | list_del(&st_entry->list); | |
2848 | /* Can't sockfd_put() within spinlock, do it later. */ | |
2849 | sock_tag_tree_insert(st_entry, &st_to_free_tree); | |
2850 | ||
2851 | /* | |
2852 | * Try to free the utd_entry if no other proc_qtu_data is | |
2853 | * using it (num_pqd is 0) and it doesn't have active tags | |
2854 | * (num_active_tags is 0). | |
2855 | */ | |
2856 | put_utd_entry(utd_entry); | |
2857 | } | |
2858 | ||
2859 | rb_erase(&pqd_entry->node, &proc_qtu_data_tree); | |
2860 | BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1); | |
2861 | pqd_entry->parent_tag_data->num_pqd--; | |
2862 | put_utd_entry(pqd_entry->parent_tag_data); | |
2863 | kfree(pqd_entry); | |
2864 | file->private_data = NULL; | |
2865 | ||
2866 | spin_unlock_bh(&uid_tag_data_tree_lock); | |
2867 | spin_unlock_bh(&sock_tag_list_lock); | |
2868 | ||
2869 | ||
2870 | sock_tag_tree_erase(&st_to_free_tree); | |
2871 | ||
2872 | prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__, | |
2873 | current->pid, current->tgid); | |
2874 | return 0; | |
2875 | } | |
2876 | ||
2877 | /*------------------------------------------*/ | |
2878 | static const struct file_operations qtudev_fops = { | |
2879 | .owner = THIS_MODULE, | |
2880 | .open = qtudev_open, | |
2881 | .release = qtudev_release, | |
2882 | }; | |
2883 | ||
2884 | static struct miscdevice qtu_device = { | |
2885 | .minor = MISC_DYNAMIC_MINOR, | |
2886 | .name = QTU_DEV_NAME, | |
2887 | .fops = &qtudev_fops, | |
2888 | /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */ | |
2889 | }; | |
2890 | ||
2891 | static const struct seq_operations proc_qtaguid_ctrl_seqops = { | |
2892 | .start = qtaguid_ctrl_proc_start, | |
2893 | .next = qtaguid_ctrl_proc_next, | |
2894 | .stop = qtaguid_ctrl_proc_stop, | |
2895 | .show = qtaguid_ctrl_proc_show, | |
2896 | }; | |
2897 | ||
2898 | static int proc_qtaguid_ctrl_open(struct inode *inode, struct file *file) | |
2899 | { | |
2900 | return seq_open_private(file, &proc_qtaguid_ctrl_seqops, | |
2901 | sizeof(struct proc_ctrl_print_info)); | |
2902 | } | |
2903 | ||
2904 | static const struct file_operations proc_qtaguid_ctrl_fops = { | |
2905 | .open = proc_qtaguid_ctrl_open, | |
2906 | .read = seq_read, | |
2907 | .write = qtaguid_ctrl_proc_write, | |
2908 | .llseek = seq_lseek, | |
2909 | .release = seq_release_private, | |
2910 | }; | |
2911 | ||
2912 | static const struct seq_operations proc_qtaguid_stats_seqops = { | |
2913 | .start = qtaguid_stats_proc_start, | |
2914 | .next = qtaguid_stats_proc_next, | |
2915 | .stop = qtaguid_stats_proc_stop, | |
2916 | .show = qtaguid_stats_proc_show, | |
2917 | }; | |
2918 | ||
2919 | static int proc_qtaguid_stats_open(struct inode *inode, struct file *file) | |
2920 | { | |
2921 | return seq_open_private(file, &proc_qtaguid_stats_seqops, | |
2922 | sizeof(struct proc_print_info)); | |
2923 | } | |
2924 | ||
2925 | static const struct file_operations proc_qtaguid_stats_fops = { | |
2926 | .open = proc_qtaguid_stats_open, | |
2927 | .read = seq_read, | |
2928 | .llseek = seq_lseek, | |
2929 | .release = seq_release_private, | |
2930 | }; | |
2931 | ||
2932 | /*------------------------------------------*/ | |
2933 | static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir) | |
2934 | { | |
2935 | int ret; | |
2936 | *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net); | |
2937 | if (!*res_procdir) { | |
2938 | pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n"); | |
2939 | ret = -ENOMEM; | |
2940 | goto no_dir; | |
2941 | } | |
2942 | ||
2943 | xt_qtaguid_ctrl_file = proc_create_data("ctrl", proc_ctrl_perms, | |
2944 | *res_procdir, | |
2945 | &proc_qtaguid_ctrl_fops, | |
2946 | NULL); | |
2947 | if (!xt_qtaguid_ctrl_file) { | |
2948 | pr_err("qtaguid: failed to create xt_qtaguid/ctrl " | |
2949 | " file\n"); | |
2950 | ret = -ENOMEM; | |
2951 | goto no_ctrl_entry; | |
2952 | } | |
2953 | ||
2954 | xt_qtaguid_stats_file = proc_create_data("stats", proc_stats_perms, | |
2955 | *res_procdir, | |
2956 | &proc_qtaguid_stats_fops, | |
2957 | NULL); | |
2958 | if (!xt_qtaguid_stats_file) { | |
2959 | pr_err("qtaguid: failed to create xt_qtaguid/stats " | |
2960 | "file\n"); | |
2961 | ret = -ENOMEM; | |
2962 | goto no_stats_entry; | |
2963 | } | |
2964 | /* | |
2965 | * TODO: add support counter hacking | |
2966 | * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write; | |
2967 | */ | |
2968 | return 0; | |
2969 | ||
2970 | no_stats_entry: | |
2971 | remove_proc_entry("ctrl", *res_procdir); | |
2972 | no_ctrl_entry: | |
2973 | remove_proc_entry("xt_qtaguid", NULL); | |
2974 | no_dir: | |
2975 | return ret; | |
2976 | } | |
2977 | ||
2978 | static struct xt_match qtaguid_mt_reg __read_mostly = { | |
2979 | /* | |
2980 | * This module masquerades as the "owner" module so that iptables | |
2981 | * tools can deal with it. | |
2982 | */ | |
2983 | .name = "owner", | |
2984 | .revision = 1, | |
2985 | .family = NFPROTO_UNSPEC, | |
2986 | .match = qtaguid_mt, | |
2987 | .matchsize = sizeof(struct xt_qtaguid_match_info), | |
2988 | .me = THIS_MODULE, | |
2989 | }; | |
2990 | ||
2991 | static int __init qtaguid_mt_init(void) | |
2992 | { | |
2993 | if (qtaguid_proc_register(&xt_qtaguid_procdir) | |
2994 | || iface_stat_init(xt_qtaguid_procdir) | |
2995 | || xt_register_match(&qtaguid_mt_reg) | |
2996 | || misc_register(&qtu_device)) | |
2997 | return -1; | |
2998 | return 0; | |
2999 | } | |
3000 | ||
3001 | /* | |
3002 | * TODO: allow unloading of the module. | |
3003 | * For now stats are permanent. | |
3004 | * Kconfig forces'y/n' and never an 'm'. | |
3005 | */ | |
3006 | ||
3007 | module_init(qtaguid_mt_init); | |
3008 | MODULE_AUTHOR("jpa <jpa@google.com>"); | |
3009 | MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats"); | |
3010 | MODULE_LICENSE("GPL"); | |
3011 | MODULE_ALIAS("ipt_owner"); | |
3012 | MODULE_ALIAS("ip6t_owner"); | |
3013 | MODULE_ALIAS("ipt_qtaguid"); | |
3014 | MODULE_ALIAS("ip6t_qtaguid"); |