2 * Kernel iptables module to track stats for packets based on user tags.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
12 * There are run-time debug flags enabled via the debug_mask module param, or
13 * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h.
17 #include <linux/file.h>
18 #include <linux/inetdevice.h>
19 #include <linux/module.h>
20 #include <linux/netfilter/x_tables.h>
21 #include <linux/netfilter/xt_qtaguid.h>
22 #include <linux/ratelimit.h>
23 #include <linux/seq_file.h>
24 #include <linux/skbuff.h>
25 #include <linux/workqueue.h>
26 #include <net/addrconf.h>
31 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
32 #include <linux/netfilter_ipv6/ip6_tables.h>
35 #include <linux/netfilter/xt_socket.h>
36 #include "xt_qtaguid_internal.h"
37 #include "xt_qtaguid_print.h"
38 #include "../../fs/proc/internal.h"
41 * We only use the xt_socket funcs within a similar context to avoid unexpected
44 #define XT_SOCKET_SUPPORTED_HOOKS \
45 ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN))
48 static const char *module_procdirname
= "xt_qtaguid";
49 static struct proc_dir_entry
*xt_qtaguid_procdir
;
51 static unsigned int proc_iface_perms
= S_IRUGO
;
52 module_param_named(iface_perms
, proc_iface_perms
, uint
, S_IRUGO
| S_IWUSR
);
54 static struct proc_dir_entry
*xt_qtaguid_stats_file
;
55 static unsigned int proc_stats_perms
= S_IRUGO
;
56 module_param_named(stats_perms
, proc_stats_perms
, uint
, S_IRUGO
| S_IWUSR
);
58 static struct proc_dir_entry
*xt_qtaguid_ctrl_file
;
60 /* Everybody can write. But proc_ctrl_write_limited is true by default which
61 * limits what can be controlled. See the can_*() functions.
63 static unsigned int proc_ctrl_perms
= S_IRUGO
| S_IWUGO
;
64 module_param_named(ctrl_perms
, proc_ctrl_perms
, uint
, S_IRUGO
| S_IWUSR
);
66 /* Limited by default, so the gid of the ctrl and stats proc entries
67 * will limit what can be done. See the can_*() functions.
69 static bool proc_stats_readall_limited
= true;
70 static bool proc_ctrl_write_limited
= true;
72 module_param_named(stats_readall_limited
, proc_stats_readall_limited
, bool,
74 module_param_named(ctrl_write_limited
, proc_ctrl_write_limited
, bool,
78 * Limit the number of active tags (via socket tags) for a given UID.
79 * Multiple processes could share the UID.
81 static int max_sock_tags
= DEFAULT_MAX_SOCK_TAGS
;
82 module_param(max_sock_tags
, int, S_IRUGO
| S_IWUSR
);
85 * After the kernel has initiallized this module, it is still possible
87 * Setting passive to Y:
88 * - the iface stats handling will not act on notifications.
89 * - iptables matches will never match.
90 * - ctrl commands silently succeed.
91 * - stats are always empty.
92 * This is mostly usefull when a bug is suspected.
94 static bool module_passive
;
95 module_param_named(passive
, module_passive
, bool, S_IRUGO
| S_IWUSR
);
98 * Control how qtaguid data is tracked per proc/uid.
99 * Setting tag_tracking_passive to Y:
100 * - don't create proc specific structs to track tags
101 * - don't check that active tag stats exceed some limits.
102 * - don't clean up socket tags on process exits.
103 * This is mostly usefull when a bug is suspected.
105 static bool qtu_proc_handling_passive
;
106 module_param_named(tag_tracking_passive
, qtu_proc_handling_passive
, bool,
109 #define QTU_DEV_NAME "xt_qtaguid"
111 uint qtaguid_debug_mask
= DEFAULT_DEBUG_MASK
;
112 module_param_named(debug_mask
, qtaguid_debug_mask
, uint
, S_IRUGO
| S_IWUSR
);
114 /*---------------------------------------------------------------------------*/
115 static const char *iface_stat_procdirname
= "iface_stat";
116 static struct proc_dir_entry
*iface_stat_procdir
;
118 * The iface_stat_all* will go away once userspace gets use to the new fields
119 * that have a format line.
121 static const char *iface_stat_all_procfilename
= "iface_stat_all";
122 static struct proc_dir_entry
*iface_stat_all_procfile
;
123 static const char *iface_stat_fmt_procfilename
= "iface_stat_fmt";
124 static struct proc_dir_entry
*iface_stat_fmt_procfile
;
127 static LIST_HEAD(iface_stat_list
);
128 static DEFINE_SPINLOCK(iface_stat_list_lock
);
130 static struct rb_root sock_tag_tree
= RB_ROOT
;
131 static DEFINE_SPINLOCK(sock_tag_list_lock
);
133 static struct rb_root tag_counter_set_tree
= RB_ROOT
;
134 static DEFINE_SPINLOCK(tag_counter_set_list_lock
);
136 static struct rb_root uid_tag_data_tree
= RB_ROOT
;
137 static DEFINE_SPINLOCK(uid_tag_data_tree_lock
);
139 static struct rb_root proc_qtu_data_tree
= RB_ROOT
;
140 /* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */
142 static struct qtaguid_event_counts qtu_events
;
143 /*----------------------------------------------*/
144 static bool can_manipulate_uids(void)
147 return in_egroup_p(xt_qtaguid_ctrl_file
->gid
)
148 || unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_limited
)
149 || unlikely(current_fsuid() == xt_qtaguid_ctrl_file
->uid
);
152 static bool can_impersonate_uid(uid_t uid
)
154 return uid
== current_fsuid() || can_manipulate_uids();
157 static bool can_read_other_uid_stats(uid_t uid
)
160 return in_egroup_p(xt_qtaguid_stats_file
->gid
)
161 || unlikely(!current_fsuid()) || uid
== current_fsuid()
162 || unlikely(!proc_stats_readall_limited
)
163 || unlikely(current_fsuid() == xt_qtaguid_ctrl_file
->uid
);
166 static inline void dc_add_byte_packets(struct data_counters
*counters
, int set
,
167 enum ifs_tx_rx direction
,
168 enum ifs_proto ifs_proto
,
172 counters
->bpc
[set
][direction
][ifs_proto
].bytes
+= bytes
;
173 counters
->bpc
[set
][direction
][ifs_proto
].packets
+= packets
;
176 static struct tag_node
*tag_node_tree_search(struct rb_root
*root
, tag_t tag
)
178 struct rb_node
*node
= root
->rb_node
;
181 struct tag_node
*data
= rb_entry(node
, struct tag_node
, node
);
183 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
184 " node=%p data=%p\n", tag
, node
, data
);
185 result
= tag_compare(tag
, data
->tag
);
186 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
187 " data.tag=0x%llx (uid=%u) res=%d\n",
188 tag
, data
->tag
, get_uid_from_tag(data
->tag
), result
);
190 node
= node
->rb_left
;
192 node
= node
->rb_right
;
199 static void tag_node_tree_insert(struct tag_node
*data
, struct rb_root
*root
)
201 struct rb_node
**new = &(root
->rb_node
), *parent
= NULL
;
203 /* Figure out where to put new node */
205 struct tag_node
*this = rb_entry(*new, struct tag_node
,
207 int result
= tag_compare(data
->tag
, this->tag
);
208 RB_DEBUG("qtaguid: %s(): tag=0x%llx"
209 " (uid=%u)\n", __func__
,
211 get_uid_from_tag(this->tag
));
214 new = &((*new)->rb_left
);
216 new = &((*new)->rb_right
);
221 /* Add new node and rebalance tree. */
222 rb_link_node(&data
->node
, parent
, new);
223 rb_insert_color(&data
->node
, root
);
226 static void tag_stat_tree_insert(struct tag_stat
*data
, struct rb_root
*root
)
228 tag_node_tree_insert(&data
->tn
, root
);
231 static struct tag_stat
*tag_stat_tree_search(struct rb_root
*root
, tag_t tag
)
233 struct tag_node
*node
= tag_node_tree_search(root
, tag
);
236 return rb_entry(&node
->node
, struct tag_stat
, tn
.node
);
239 static void tag_counter_set_tree_insert(struct tag_counter_set
*data
,
240 struct rb_root
*root
)
242 tag_node_tree_insert(&data
->tn
, root
);
245 static struct tag_counter_set
*tag_counter_set_tree_search(struct rb_root
*root
,
248 struct tag_node
*node
= tag_node_tree_search(root
, tag
);
251 return rb_entry(&node
->node
, struct tag_counter_set
, tn
.node
);
255 static void tag_ref_tree_insert(struct tag_ref
*data
, struct rb_root
*root
)
257 tag_node_tree_insert(&data
->tn
, root
);
260 static struct tag_ref
*tag_ref_tree_search(struct rb_root
*root
, tag_t tag
)
262 struct tag_node
*node
= tag_node_tree_search(root
, tag
);
265 return rb_entry(&node
->node
, struct tag_ref
, tn
.node
);
268 static struct sock_tag
*sock_tag_tree_search(struct rb_root
*root
,
269 const struct sock
*sk
)
271 struct rb_node
*node
= root
->rb_node
;
274 struct sock_tag
*data
= rb_entry(node
, struct sock_tag
,
277 node
= node
->rb_left
;
278 else if (sk
> data
->sk
)
279 node
= node
->rb_right
;
286 static void sock_tag_tree_insert(struct sock_tag
*data
, struct rb_root
*root
)
288 struct rb_node
**new = &(root
->rb_node
), *parent
= NULL
;
290 /* Figure out where to put new node */
292 struct sock_tag
*this = rb_entry(*new, struct sock_tag
,
295 if (data
->sk
< this->sk
)
296 new = &((*new)->rb_left
);
297 else if (data
->sk
> this->sk
)
298 new = &((*new)->rb_right
);
303 /* Add new node and rebalance tree. */
304 rb_link_node(&data
->sock_node
, parent
, new);
305 rb_insert_color(&data
->sock_node
, root
);
308 static void sock_tag_tree_erase(struct rb_root
*st_to_free_tree
)
310 struct rb_node
*node
;
311 struct sock_tag
*st_entry
;
313 node
= rb_first(st_to_free_tree
);
315 st_entry
= rb_entry(node
, struct sock_tag
, sock_node
);
316 node
= rb_next(node
);
317 CT_DEBUG("qtaguid: %s(): "
318 "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__
,
321 get_uid_from_tag(st_entry
->tag
));
322 rb_erase(&st_entry
->sock_node
, st_to_free_tree
);
323 sockfd_put(st_entry
->socket
);
328 static struct proc_qtu_data
*proc_qtu_data_tree_search(struct rb_root
*root
,
331 struct rb_node
*node
= root
->rb_node
;
334 struct proc_qtu_data
*data
= rb_entry(node
,
335 struct proc_qtu_data
,
338 node
= node
->rb_left
;
339 else if (pid
> data
->pid
)
340 node
= node
->rb_right
;
347 static void proc_qtu_data_tree_insert(struct proc_qtu_data
*data
,
348 struct rb_root
*root
)
350 struct rb_node
**new = &(root
->rb_node
), *parent
= NULL
;
352 /* Figure out where to put new node */
354 struct proc_qtu_data
*this = rb_entry(*new,
355 struct proc_qtu_data
,
358 if (data
->pid
< this->pid
)
359 new = &((*new)->rb_left
);
360 else if (data
->pid
> this->pid
)
361 new = &((*new)->rb_right
);
366 /* Add new node and rebalance tree. */
367 rb_link_node(&data
->node
, parent
, new);
368 rb_insert_color(&data
->node
, root
);
371 static void uid_tag_data_tree_insert(struct uid_tag_data
*data
,
372 struct rb_root
*root
)
374 struct rb_node
**new = &(root
->rb_node
), *parent
= NULL
;
376 /* Figure out where to put new node */
378 struct uid_tag_data
*this = rb_entry(*new,
382 if (data
->uid
< this->uid
)
383 new = &((*new)->rb_left
);
384 else if (data
->uid
> this->uid
)
385 new = &((*new)->rb_right
);
390 /* Add new node and rebalance tree. */
391 rb_link_node(&data
->node
, parent
, new);
392 rb_insert_color(&data
->node
, root
);
395 static struct uid_tag_data
*uid_tag_data_tree_search(struct rb_root
*root
,
398 struct rb_node
*node
= root
->rb_node
;
401 struct uid_tag_data
*data
= rb_entry(node
,
405 node
= node
->rb_left
;
406 else if (uid
> data
->uid
)
407 node
= node
->rb_right
;
415 * Allocates a new uid_tag_data struct if needed.
416 * Returns a pointer to the found or allocated uid_tag_data.
417 * Returns a PTR_ERR on failures, and lock is not held.
418 * If found is not NULL:
419 * sets *found to true if not allocated.
420 * sets *found to false if allocated.
422 struct uid_tag_data
*get_uid_data(uid_t uid
, bool *found_res
)
424 struct uid_tag_data
*utd_entry
;
426 /* Look for top level uid_tag_data for the UID */
427 utd_entry
= uid_tag_data_tree_search(&uid_tag_data_tree
, uid
);
428 DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid
, utd_entry
);
431 *found_res
= utd_entry
;
435 utd_entry
= kzalloc(sizeof(*utd_entry
), GFP_ATOMIC
);
437 pr_err("qtaguid: get_uid_data(%u): "
438 "tag data alloc failed\n", uid
);
439 return ERR_PTR(-ENOMEM
);
442 utd_entry
->uid
= uid
;
443 utd_entry
->tag_ref_tree
= RB_ROOT
;
444 uid_tag_data_tree_insert(utd_entry
, &uid_tag_data_tree
);
445 DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid
, utd_entry
);
449 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
450 static struct tag_ref
*new_tag_ref(tag_t new_tag
,
451 struct uid_tag_data
*utd_entry
)
453 struct tag_ref
*tr_entry
;
456 if (utd_entry
->num_active_tags
+ 1 > max_sock_tags
) {
457 pr_info("qtaguid: new_tag_ref(0x%llx): "
458 "tag ref alloc quota exceeded. max=%d\n",
459 new_tag
, max_sock_tags
);
465 tr_entry
= kzalloc(sizeof(*tr_entry
), GFP_ATOMIC
);
467 pr_err("qtaguid: new_tag_ref(0x%llx): "
468 "tag ref alloc failed\n",
473 tr_entry
->tn
.tag
= new_tag
;
474 /* tr_entry->num_sock_tags handled by caller */
475 utd_entry
->num_active_tags
++;
476 tag_ref_tree_insert(tr_entry
, &utd_entry
->tag_ref_tree
);
477 DR_DEBUG("qtaguid: new_tag_ref(0x%llx): "
478 " inserted new tag ref %p\n",
486 static struct tag_ref
*lookup_tag_ref(tag_t full_tag
,
487 struct uid_tag_data
**utd_res
)
489 struct uid_tag_data
*utd_entry
;
490 struct tag_ref
*tr_entry
;
492 uid_t uid
= get_uid_from_tag(full_tag
);
494 DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n",
497 utd_entry
= get_uid_data(uid
, &found_utd
);
498 if (IS_ERR_OR_NULL(utd_entry
)) {
500 *utd_res
= utd_entry
;
504 tr_entry
= tag_ref_tree_search(&utd_entry
->tag_ref_tree
, full_tag
);
506 *utd_res
= utd_entry
;
507 DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n",
508 full_tag
, utd_entry
, tr_entry
);
512 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
513 static struct tag_ref
*get_tag_ref(tag_t full_tag
,
514 struct uid_tag_data
**utd_res
)
516 struct uid_tag_data
*utd_entry
;
517 struct tag_ref
*tr_entry
;
519 DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n",
521 spin_lock_bh(&uid_tag_data_tree_lock
);
522 tr_entry
= lookup_tag_ref(full_tag
, &utd_entry
);
523 BUG_ON(IS_ERR_OR_NULL(utd_entry
));
525 tr_entry
= new_tag_ref(full_tag
, utd_entry
);
527 spin_unlock_bh(&uid_tag_data_tree_lock
);
529 *utd_res
= utd_entry
;
530 DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n",
531 full_tag
, utd_entry
, tr_entry
);
535 /* Checks and maybe frees the UID Tag Data entry */
536 static void put_utd_entry(struct uid_tag_data
*utd_entry
)
538 /* Are we done with the UID tag data entry? */
539 if (RB_EMPTY_ROOT(&utd_entry
->tag_ref_tree
) &&
540 !utd_entry
->num_pqd
) {
541 DR_DEBUG("qtaguid: %s(): "
542 "erase utd_entry=%p uid=%u "
543 "by pid=%u tgid=%u uid=%u\n", __func__
,
544 utd_entry
, utd_entry
->uid
,
545 current
->pid
, current
->tgid
, current_fsuid());
546 BUG_ON(utd_entry
->num_active_tags
);
547 rb_erase(&utd_entry
->node
, &uid_tag_data_tree
);
550 DR_DEBUG("qtaguid: %s(): "
551 "utd_entry=%p still has %d tags %d proc_qtu_data\n",
552 __func__
, utd_entry
, utd_entry
->num_active_tags
,
554 BUG_ON(!(utd_entry
->num_active_tags
||
555 utd_entry
->num_pqd
));
560 * If no sock_tags are using this tag_ref,
561 * decrements refcount of utd_entry, removes tr_entry
562 * from utd_entry->tag_ref_tree and frees.
564 static void free_tag_ref_from_utd_entry(struct tag_ref
*tr_entry
,
565 struct uid_tag_data
*utd_entry
)
567 DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__
,
568 tr_entry
, tr_entry
->tn
.tag
,
569 get_uid_from_tag(tr_entry
->tn
.tag
));
570 if (!tr_entry
->num_sock_tags
) {
571 BUG_ON(!utd_entry
->num_active_tags
);
572 utd_entry
->num_active_tags
--;
573 rb_erase(&tr_entry
->tn
.node
, &utd_entry
->tag_ref_tree
);
574 DR_DEBUG("qtaguid: %s(): erased %p\n", __func__
, tr_entry
);
579 static void put_tag_ref_tree(tag_t full_tag
, struct uid_tag_data
*utd_entry
)
581 struct rb_node
*node
;
582 struct tag_ref
*tr_entry
;
585 DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__
,
586 full_tag
, get_uid_from_tag(full_tag
));
587 acct_tag
= get_atag_from_tag(full_tag
);
588 node
= rb_first(&utd_entry
->tag_ref_tree
);
590 tr_entry
= rb_entry(node
, struct tag_ref
, tn
.node
);
591 node
= rb_next(node
);
592 if (!acct_tag
|| tr_entry
->tn
.tag
== full_tag
)
593 free_tag_ref_from_utd_entry(tr_entry
, utd_entry
);
597 static ssize_t
read_proc_u64(struct file
*file
, char __user
*buf
,
598 size_t size
, loff_t
*ppos
)
600 uint64_t *valuep
= PDE_DATA(file_inode(file
));
604 tmp_size
= scnprintf(tmp
, sizeof(tmp
), "%llu\n", *valuep
);
605 return simple_read_from_buffer(buf
, size
, ppos
, tmp
, tmp_size
);
608 static ssize_t
read_proc_bool(struct file
*file
, char __user
*buf
,
609 size_t size
, loff_t
*ppos
)
611 bool *valuep
= PDE_DATA(file_inode(file
));
615 tmp_size
= scnprintf(tmp
, sizeof(tmp
), "%u\n", *valuep
);
616 return simple_read_from_buffer(buf
, size
, ppos
, tmp
, tmp_size
);
619 static int get_active_counter_set(tag_t tag
)
622 struct tag_counter_set
*tcs
;
624 MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)"
626 tag
, get_uid_from_tag(tag
));
627 /* For now we only handle UID tags for active sets */
628 tag
= get_utag_from_tag(tag
);
629 spin_lock_bh(&tag_counter_set_list_lock
);
630 tcs
= tag_counter_set_tree_search(&tag_counter_set_tree
, tag
);
632 active_set
= tcs
->active_set
;
633 spin_unlock_bh(&tag_counter_set_list_lock
);
638 * Find the entry for tracking the specified interface.
639 * Caller must hold iface_stat_list_lock
641 static struct iface_stat
*get_iface_entry(const char *ifname
)
643 struct iface_stat
*iface_entry
;
645 /* Find the entry for tracking the specified tag within the interface */
646 if (ifname
== NULL
) {
647 pr_info("qtaguid: iface_stat: get() NULL device name\n");
651 /* Iterate over interfaces */
652 list_for_each_entry(iface_entry
, &iface_stat_list
, list
) {
653 if (!strcmp(ifname
, iface_entry
->ifname
))
661 /* This is for fmt2 only */
662 static void pp_iface_stat_header(struct seq_file
*m
)
666 "total_skb_rx_bytes total_skb_rx_packets "
667 "total_skb_tx_bytes total_skb_tx_packets "
668 "rx_tcp_bytes rx_tcp_packets "
669 "rx_udp_bytes rx_udp_packets "
670 "rx_other_bytes rx_other_packets "
671 "tx_tcp_bytes tx_tcp_packets "
672 "tx_udp_bytes tx_udp_packets "
673 "tx_other_bytes tx_other_packets\n"
677 static void pp_iface_stat_line(struct seq_file
*m
,
678 struct iface_stat
*iface_entry
)
680 struct data_counters
*cnts
;
681 int cnt_set
= 0; /* We only use one set for the device */
682 cnts
= &iface_entry
->totals_via_skb
;
683 seq_printf(m
, "%s %llu %llu %llu %llu %llu %llu %llu %llu "
684 "%llu %llu %llu %llu %llu %llu %llu %llu\n",
686 dc_sum_bytes(cnts
, cnt_set
, IFS_RX
),
687 dc_sum_packets(cnts
, cnt_set
, IFS_RX
),
688 dc_sum_bytes(cnts
, cnt_set
, IFS_TX
),
689 dc_sum_packets(cnts
, cnt_set
, IFS_TX
),
690 cnts
->bpc
[cnt_set
][IFS_RX
][IFS_TCP
].bytes
,
691 cnts
->bpc
[cnt_set
][IFS_RX
][IFS_TCP
].packets
,
692 cnts
->bpc
[cnt_set
][IFS_RX
][IFS_UDP
].bytes
,
693 cnts
->bpc
[cnt_set
][IFS_RX
][IFS_UDP
].packets
,
694 cnts
->bpc
[cnt_set
][IFS_RX
][IFS_PROTO_OTHER
].bytes
,
695 cnts
->bpc
[cnt_set
][IFS_RX
][IFS_PROTO_OTHER
].packets
,
696 cnts
->bpc
[cnt_set
][IFS_TX
][IFS_TCP
].bytes
,
697 cnts
->bpc
[cnt_set
][IFS_TX
][IFS_TCP
].packets
,
698 cnts
->bpc
[cnt_set
][IFS_TX
][IFS_UDP
].bytes
,
699 cnts
->bpc
[cnt_set
][IFS_TX
][IFS_UDP
].packets
,
700 cnts
->bpc
[cnt_set
][IFS_TX
][IFS_PROTO_OTHER
].bytes
,
701 cnts
->bpc
[cnt_set
][IFS_TX
][IFS_PROTO_OTHER
].packets
);
704 struct proc_iface_stat_fmt_info
{
708 static void *iface_stat_fmt_proc_start(struct seq_file
*m
, loff_t
*pos
)
710 struct proc_iface_stat_fmt_info
*p
= m
->private;
714 * This lock will prevent iface_stat_update() from changing active,
715 * and in turn prevent an interface from unregistering itself.
717 spin_lock_bh(&iface_stat_list_lock
);
719 if (unlikely(module_passive
))
722 if (!n
&& p
->fmt
== 2)
723 pp_iface_stat_header(m
);
725 return seq_list_start(&iface_stat_list
, n
);
728 static void *iface_stat_fmt_proc_next(struct seq_file
*m
, void *p
, loff_t
*pos
)
730 return seq_list_next(p
, &iface_stat_list
, pos
);
733 static void iface_stat_fmt_proc_stop(struct seq_file
*m
, void *p
)
735 spin_unlock_bh(&iface_stat_list_lock
);
738 static int iface_stat_fmt_proc_show(struct seq_file
*m
, void *v
)
740 struct proc_iface_stat_fmt_info
*p
= m
->private;
741 struct iface_stat
*iface_entry
;
742 struct rtnl_link_stats64 dev_stats
, *stats
;
743 struct rtnl_link_stats64 no_dev_stats
= {0};
746 CT_DEBUG("qtaguid:proc iface_stat_fmt pid=%u tgid=%u uid=%u\n",
747 current
->pid
, current
->tgid
, current_fsuid());
749 iface_entry
= list_entry(v
, struct iface_stat
, list
);
751 if (iface_entry
->active
) {
752 stats
= dev_get_stats(iface_entry
->net_dev
,
755 stats
= &no_dev_stats
;
758 * If the meaning of the data changes, then update the fmtX
762 seq_printf(m
, "%s %d %llu %llu %llu %llu %llu %llu %llu %llu\n",
765 iface_entry
->totals_via_dev
[IFS_RX
].bytes
,
766 iface_entry
->totals_via_dev
[IFS_RX
].packets
,
767 iface_entry
->totals_via_dev
[IFS_TX
].bytes
,
768 iface_entry
->totals_via_dev
[IFS_TX
].packets
,
769 stats
->rx_bytes
, stats
->rx_packets
,
770 stats
->tx_bytes
, stats
->tx_packets
773 pp_iface_stat_line(m
, iface_entry
);
778 static const struct file_operations read_u64_fops
= {
779 .read
= read_proc_u64
,
780 .llseek
= default_llseek
,
783 static const struct file_operations read_bool_fops
= {
784 .read
= read_proc_bool
,
785 .llseek
= default_llseek
,
788 static void iface_create_proc_worker(struct work_struct
*work
)
790 struct proc_dir_entry
*proc_entry
;
791 struct iface_stat_work
*isw
= container_of(work
, struct iface_stat_work
,
793 struct iface_stat
*new_iface
= isw
->iface_entry
;
795 /* iface_entries are not deleted, so safe to manipulate. */
796 proc_entry
= proc_mkdir(new_iface
->ifname
, iface_stat_procdir
);
797 if (IS_ERR_OR_NULL(proc_entry
)) {
798 pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n");
803 new_iface
->proc_ptr
= proc_entry
;
805 proc_create_data("tx_bytes", proc_iface_perms
, proc_entry
,
807 &new_iface
->totals_via_dev
[IFS_TX
].bytes
);
808 proc_create_data("rx_bytes", proc_iface_perms
, proc_entry
,
810 &new_iface
->totals_via_dev
[IFS_RX
].bytes
);
811 proc_create_data("tx_packets", proc_iface_perms
, proc_entry
,
813 &new_iface
->totals_via_dev
[IFS_TX
].packets
);
814 proc_create_data("rx_packets", proc_iface_perms
, proc_entry
,
816 &new_iface
->totals_via_dev
[IFS_RX
].packets
);
817 proc_create_data("active", proc_iface_perms
, proc_entry
,
818 &read_bool_fops
, &new_iface
->active
);
820 IF_DEBUG("qtaguid: iface_stat: create_proc(): done "
821 "entry=%p dev=%s\n", new_iface
, new_iface
->ifname
);
826 * Will set the entry's active state, and
827 * update the net_dev accordingly also.
829 static void _iface_stat_set_active(struct iface_stat
*entry
,
830 struct net_device
*net_dev
,
834 entry
->net_dev
= net_dev
;
835 entry
->active
= true;
836 IF_DEBUG("qtaguid: %s(%s): "
837 "enable tracking. rfcnt=%d\n", __func__
,
839 __this_cpu_read(*net_dev
->pcpu_refcnt
));
841 entry
->active
= false;
842 entry
->net_dev
= NULL
;
843 IF_DEBUG("qtaguid: %s(%s): "
844 "disable tracking. rfcnt=%d\n", __func__
,
846 __this_cpu_read(*net_dev
->pcpu_refcnt
));
851 /* Caller must hold iface_stat_list_lock */
852 static struct iface_stat
*iface_alloc(struct net_device
*net_dev
)
854 struct iface_stat
*new_iface
;
855 struct iface_stat_work
*isw
;
857 new_iface
= kzalloc(sizeof(*new_iface
), GFP_ATOMIC
);
858 if (new_iface
== NULL
) {
859 pr_err("qtaguid: iface_stat: create(%s): "
860 "iface_stat alloc failed\n", net_dev
->name
);
863 new_iface
->ifname
= kstrdup(net_dev
->name
, GFP_ATOMIC
);
864 if (new_iface
->ifname
== NULL
) {
865 pr_err("qtaguid: iface_stat: create(%s): "
866 "ifname alloc failed\n", net_dev
->name
);
870 spin_lock_init(&new_iface
->tag_stat_list_lock
);
871 new_iface
->tag_stat_tree
= RB_ROOT
;
872 _iface_stat_set_active(new_iface
, net_dev
, true);
875 * ipv6 notifier chains are atomic :(
876 * No create_proc_read_entry() for you!
878 isw
= kmalloc(sizeof(*isw
), GFP_ATOMIC
);
880 pr_err("qtaguid: iface_stat: create(%s): "
881 "work alloc failed\n", new_iface
->ifname
);
882 _iface_stat_set_active(new_iface
, net_dev
, false);
883 kfree(new_iface
->ifname
);
887 isw
->iface_entry
= new_iface
;
888 INIT_WORK(&isw
->iface_work
, iface_create_proc_worker
);
889 schedule_work(&isw
->iface_work
);
890 list_add(&new_iface
->list
, &iface_stat_list
);
894 static void iface_check_stats_reset_and_adjust(struct net_device
*net_dev
,
895 struct iface_stat
*iface
)
897 struct rtnl_link_stats64 dev_stats
, *stats
;
900 stats
= dev_get_stats(net_dev
, &dev_stats
);
901 /* No empty packets */
903 (stats
->rx_bytes
< iface
->last_known
[IFS_RX
].bytes
)
904 || (stats
->tx_bytes
< iface
->last_known
[IFS_TX
].bytes
);
906 IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p "
907 "bytes rx/tx=%llu/%llu "
908 "active=%d last_known=%d "
909 "stats_rewound=%d\n", __func__
,
910 net_dev
? net_dev
->name
: "?",
912 stats
->rx_bytes
, stats
->tx_bytes
,
913 iface
->active
, iface
->last_known_valid
, stats_rewound
);
915 if (iface
->active
&& iface
->last_known_valid
&& stats_rewound
) {
916 pr_warn_once("qtaguid: iface_stat: %s(%s): "
917 "iface reset its stats unexpectedly\n", __func__
,
920 iface
->totals_via_dev
[IFS_TX
].bytes
+=
921 iface
->last_known
[IFS_TX
].bytes
;
922 iface
->totals_via_dev
[IFS_TX
].packets
+=
923 iface
->last_known
[IFS_TX
].packets
;
924 iface
->totals_via_dev
[IFS_RX
].bytes
+=
925 iface
->last_known
[IFS_RX
].bytes
;
926 iface
->totals_via_dev
[IFS_RX
].packets
+=
927 iface
->last_known
[IFS_RX
].packets
;
928 iface
->last_known_valid
= false;
929 IF_DEBUG("qtaguid: %s(%s): iface=%p "
930 "used last known bytes rx/tx=%llu/%llu\n", __func__
,
931 iface
->ifname
, iface
, iface
->last_known
[IFS_RX
].bytes
,
932 iface
->last_known
[IFS_TX
].bytes
);
937 * Create a new entry for tracking the specified interface.
938 * Do nothing if the entry already exists.
939 * Called when an interface is configured with a valid IP address.
941 static void iface_stat_create(struct net_device
*net_dev
,
942 struct in_ifaddr
*ifa
)
944 struct in_device
*in_dev
= NULL
;
946 struct iface_stat
*entry
;
948 struct iface_stat
*new_iface
;
950 IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n",
951 net_dev
? net_dev
->name
: "?",
954 pr_err("qtaguid: iface_stat: create(): no net dev\n");
958 ifname
= net_dev
->name
;
960 in_dev
= in_dev_get(net_dev
);
962 pr_err("qtaguid: iface_stat: create(%s): no inet dev\n",
966 IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n",
968 for (ifa
= in_dev
->ifa_list
; ifa
; ifa
= ifa
->ifa_next
) {
969 IF_DEBUG("qtaguid: iface_stat: create(%s): "
970 "ifa=%p ifa_label=%s\n",
972 ifa
->ifa_label
? ifa
->ifa_label
: "(null)");
973 if (ifa
->ifa_label
&& !strcmp(ifname
, ifa
->ifa_label
))
979 IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n",
983 ipaddr
= ifa
->ifa_local
;
985 spin_lock_bh(&iface_stat_list_lock
);
986 entry
= get_iface_entry(ifname
);
988 IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n",
990 iface_check_stats_reset_and_adjust(net_dev
, entry
);
991 _iface_stat_set_active(entry
, net_dev
, true);
992 IF_DEBUG("qtaguid: %s(%s): "
993 "tracking now %d on ip=%pI4\n", __func__
,
994 entry
->ifname
, true, &ipaddr
);
995 goto done_unlock_put
;
998 new_iface
= iface_alloc(net_dev
);
999 IF_DEBUG("qtaguid: iface_stat: create(%s): done "
1000 "entry=%p ip=%pI4\n", ifname
, new_iface
, &ipaddr
);
1002 spin_unlock_bh(&iface_stat_list_lock
);
1008 static void iface_stat_create_ipv6(struct net_device
*net_dev
,
1009 struct inet6_ifaddr
*ifa
)
1011 struct in_device
*in_dev
;
1013 struct iface_stat
*entry
;
1014 struct iface_stat
*new_iface
;
1017 IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n",
1018 ifa
, net_dev
, net_dev
? net_dev
->name
: "");
1020 pr_err("qtaguid: iface_stat: create6(): no net dev!\n");
1023 ifname
= net_dev
->name
;
1025 in_dev
= in_dev_get(net_dev
);
1027 pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n",
1032 IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n",
1036 IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n",
1040 addr_type
= ipv6_addr_type(&ifa
->addr
);
1042 spin_lock_bh(&iface_stat_list_lock
);
1043 entry
= get_iface_entry(ifname
);
1044 if (entry
!= NULL
) {
1045 IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__
,
1047 iface_check_stats_reset_and_adjust(net_dev
, entry
);
1048 _iface_stat_set_active(entry
, net_dev
, true);
1049 IF_DEBUG("qtaguid: %s(%s): "
1050 "tracking now %d on ip=%pI6c\n", __func__
,
1051 entry
->ifname
, true, &ifa
->addr
);
1052 goto done_unlock_put
;
1055 new_iface
= iface_alloc(net_dev
);
1056 IF_DEBUG("qtaguid: iface_stat: create6(%s): done "
1057 "entry=%p ip=%pI6c\n", ifname
, new_iface
, &ifa
->addr
);
1060 spin_unlock_bh(&iface_stat_list_lock
);
1065 static struct sock_tag
*get_sock_stat_nl(const struct sock
*sk
)
1067 MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk
);
1068 return sock_tag_tree_search(&sock_tag_tree
, sk
);
1071 static struct sock_tag
*get_sock_stat(const struct sock
*sk
)
1073 struct sock_tag
*sock_tag_entry
;
1074 MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk
);
1077 spin_lock_bh(&sock_tag_list_lock
);
1078 sock_tag_entry
= get_sock_stat_nl(sk
);
1079 spin_unlock_bh(&sock_tag_list_lock
);
1080 return sock_tag_entry
;
1083 static int ipx_proto(const struct sk_buff
*skb
,
1084 struct xt_action_param
*par
)
1086 int thoff
= 0, tproto
;
1088 switch (par
->family
) {
1090 tproto
= ipv6_find_hdr(skb
, &thoff
, -1, NULL
, NULL
);
1092 MT_DEBUG("%s(): transport header not found in ipv6"
1093 " skb=%p\n", __func__
, skb
);
1096 tproto
= ip_hdr(skb
)->protocol
;
1099 tproto
= IPPROTO_RAW
;
1105 data_counters_update(struct data_counters
*dc
, int set
,
1106 enum ifs_tx_rx direction
, int proto
, int bytes
)
1110 dc_add_byte_packets(dc
, set
, direction
, IFS_TCP
, bytes
, 1);
1113 dc_add_byte_packets(dc
, set
, direction
, IFS_UDP
, bytes
, 1);
1117 dc_add_byte_packets(dc
, set
, direction
, IFS_PROTO_OTHER
, bytes
,
1124 * Update stats for the specified interface. Do nothing if the entry
1125 * does not exist (when a device was never configured with an IP address).
1126 * Called when an device is being unregistered.
1128 static void iface_stat_update(struct net_device
*net_dev
, bool stash_only
)
1130 struct rtnl_link_stats64 dev_stats
, *stats
;
1131 struct iface_stat
*entry
;
1133 stats
= dev_get_stats(net_dev
, &dev_stats
);
1134 spin_lock_bh(&iface_stat_list_lock
);
1135 entry
= get_iface_entry(net_dev
->name
);
1136 if (entry
== NULL
) {
1137 IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n",
1139 spin_unlock_bh(&iface_stat_list_lock
);
1143 IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__
,
1144 net_dev
->name
, entry
);
1145 if (!entry
->active
) {
1146 IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__
,
1148 spin_unlock_bh(&iface_stat_list_lock
);
1153 entry
->last_known
[IFS_TX
].bytes
= stats
->tx_bytes
;
1154 entry
->last_known
[IFS_TX
].packets
= stats
->tx_packets
;
1155 entry
->last_known
[IFS_RX
].bytes
= stats
->rx_bytes
;
1156 entry
->last_known
[IFS_RX
].packets
= stats
->rx_packets
;
1157 entry
->last_known_valid
= true;
1158 IF_DEBUG("qtaguid: %s(%s): "
1159 "dev stats stashed rx/tx=%llu/%llu\n", __func__
,
1160 net_dev
->name
, stats
->rx_bytes
, stats
->tx_bytes
);
1161 spin_unlock_bh(&iface_stat_list_lock
);
1164 entry
->totals_via_dev
[IFS_TX
].bytes
+= stats
->tx_bytes
;
1165 entry
->totals_via_dev
[IFS_TX
].packets
+= stats
->tx_packets
;
1166 entry
->totals_via_dev
[IFS_RX
].bytes
+= stats
->rx_bytes
;
1167 entry
->totals_via_dev
[IFS_RX
].packets
+= stats
->rx_packets
;
1168 /* We don't need the last_known[] anymore */
1169 entry
->last_known_valid
= false;
1170 _iface_stat_set_active(entry
, net_dev
, false);
1171 IF_DEBUG("qtaguid: %s(%s): "
1172 "disable tracking. rx/tx=%llu/%llu\n", __func__
,
1173 net_dev
->name
, stats
->rx_bytes
, stats
->tx_bytes
);
1174 spin_unlock_bh(&iface_stat_list_lock
);
1178 * Update stats for the specified interface from the skb.
1179 * Do nothing if the entry
1180 * does not exist (when a device was never configured with an IP address).
1181 * Called on each sk.
1183 static void iface_stat_update_from_skb(const struct sk_buff
*skb
,
1184 struct xt_action_param
*par
)
1186 struct iface_stat
*entry
;
1187 const struct net_device
*el_dev
;
1188 enum ifs_tx_rx direction
= par
->in
? IFS_RX
: IFS_TX
;
1189 int bytes
= skb
->len
;
1193 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par
->hooknum
);
1194 el_dev
= par
->in
? : par
->out
;
1196 const struct net_device
*other_dev
;
1198 other_dev
= par
->in
? : par
->out
;
1199 if (el_dev
!= other_dev
) {
1200 MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1201 "par->(in/out)=%p %s\n",
1202 par
->hooknum
, el_dev
, el_dev
->name
, other_dev
,
1207 if (unlikely(!el_dev
)) {
1208 pr_err_ratelimited("qtaguid[%d]: %s(): no par->in/out?!!\n",
1209 par
->hooknum
, __func__
);
1211 } else if (unlikely(!el_dev
->name
)) {
1212 pr_err_ratelimited("qtaguid[%d]: %s(): no dev->name?!!\n",
1213 par
->hooknum
, __func__
);
1216 proto
= ipx_proto(skb
, par
);
1217 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
1218 par
->hooknum
, el_dev
->name
, el_dev
->type
,
1219 par
->family
, proto
);
1222 spin_lock_bh(&iface_stat_list_lock
);
1223 entry
= get_iface_entry(el_dev
->name
);
1224 if (entry
== NULL
) {
1225 IF_DEBUG("qtaguid: iface_stat: %s(%s): not tracked\n",
1226 __func__
, el_dev
->name
);
1227 spin_unlock_bh(&iface_stat_list_lock
);
1231 IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__
,
1232 el_dev
->name
, entry
);
1234 data_counters_update(&entry
->totals_via_skb
, 0, direction
, proto
,
1236 spin_unlock_bh(&iface_stat_list_lock
);
1239 static void tag_stat_update(struct tag_stat
*tag_entry
,
1240 enum ifs_tx_rx direction
, int proto
, int bytes
)
1243 active_set
= get_active_counter_set(tag_entry
->tn
.tag
);
1244 MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d "
1245 "dir=%d proto=%d bytes=%d)\n",
1246 tag_entry
->tn
.tag
, get_uid_from_tag(tag_entry
->tn
.tag
),
1247 active_set
, direction
, proto
, bytes
);
1248 data_counters_update(&tag_entry
->counters
, active_set
, direction
,
1250 if (tag_entry
->parent_counters
)
1251 data_counters_update(tag_entry
->parent_counters
, active_set
,
1252 direction
, proto
, bytes
);
1256 * Create a new entry for tracking the specified {acct_tag,uid_tag} within
1258 * iface_entry->tag_stat_list_lock should be held.
1260 static struct tag_stat
*create_if_tag_stat(struct iface_stat
*iface_entry
,
1263 struct tag_stat
*new_tag_stat_entry
= NULL
;
1264 IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx"
1265 " (uid=%u)\n", __func__
,
1266 iface_entry
, tag
, get_uid_from_tag(tag
));
1267 new_tag_stat_entry
= kzalloc(sizeof(*new_tag_stat_entry
), GFP_ATOMIC
);
1268 if (!new_tag_stat_entry
) {
1269 pr_err("qtaguid: iface_stat: tag stat alloc failed\n");
1272 new_tag_stat_entry
->tn
.tag
= tag
;
1273 tag_stat_tree_insert(new_tag_stat_entry
, &iface_entry
->tag_stat_tree
);
1275 return new_tag_stat_entry
;
1278 static void if_tag_stat_update(const char *ifname
, uid_t uid
,
1279 const struct sock
*sk
, enum ifs_tx_rx direction
,
1280 int proto
, int bytes
)
1282 struct tag_stat
*tag_stat_entry
;
1283 tag_t tag
, acct_tag
;
1285 struct data_counters
*uid_tag_counters
;
1286 struct sock_tag
*sock_tag_entry
;
1287 struct iface_stat
*iface_entry
;
1288 struct tag_stat
*new_tag_stat
= NULL
;
1289 MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s "
1290 "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n",
1291 ifname
, uid
, sk
, direction
, proto
, bytes
);
1294 iface_entry
= get_iface_entry(ifname
);
1296 pr_err_ratelimited("qtaguid: iface_stat: stat_update() "
1297 "%s not found\n", ifname
);
1300 /* It is ok to process data when an iface_entry is inactive */
1302 MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n",
1303 ifname
, iface_entry
);
1306 * Look for a tagged sock.
1307 * It will have an acct_uid.
1309 sock_tag_entry
= get_sock_stat(sk
);
1310 if (sock_tag_entry
) {
1311 tag
= sock_tag_entry
->tag
;
1312 acct_tag
= get_atag_from_tag(tag
);
1313 uid_tag
= get_utag_from_tag(tag
);
1315 acct_tag
= make_atag_from_value(0);
1316 tag
= combine_atag_with_uid(acct_tag
, uid
);
1317 uid_tag
= make_tag_from_uid(uid
);
1319 MT_DEBUG("qtaguid: iface_stat: stat_update(): "
1320 " looking for tag=0x%llx (uid=%u) in ife=%p\n",
1321 tag
, get_uid_from_tag(tag
), iface_entry
);
1322 /* Loop over tag list under this interface for {acct_tag,uid_tag} */
1323 spin_lock_bh(&iface_entry
->tag_stat_list_lock
);
1325 tag_stat_entry
= tag_stat_tree_search(&iface_entry
->tag_stat_tree
,
1327 if (tag_stat_entry
) {
1329 * Updating the {acct_tag, uid_tag} entry handles both stats:
1330 * {0, uid_tag} will also get updated.
1332 tag_stat_update(tag_stat_entry
, direction
, proto
, bytes
);
1333 spin_unlock_bh(&iface_entry
->tag_stat_list_lock
);
1337 /* Loop over tag list under this interface for {0,uid_tag} */
1338 tag_stat_entry
= tag_stat_tree_search(&iface_entry
->tag_stat_tree
,
1340 if (!tag_stat_entry
) {
1341 /* Here: the base uid_tag did not exist */
1343 * No parent counters. So
1344 * - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
1346 new_tag_stat
= create_if_tag_stat(iface_entry
, uid_tag
);
1349 uid_tag_counters
= &new_tag_stat
->counters
;
1351 uid_tag_counters
= &tag_stat_entry
->counters
;
1355 /* Create the child {acct_tag, uid_tag} and hook up parent. */
1356 new_tag_stat
= create_if_tag_stat(iface_entry
, tag
);
1359 new_tag_stat
->parent_counters
= uid_tag_counters
;
1362 * For new_tag_stat to be still NULL here would require:
1363 * {0, uid_tag} exists
1364 * and {acct_tag, uid_tag} doesn't exist
1365 * AND acct_tag == 0.
1366 * Impossible. This reassures us that new_tag_stat
1367 * below will always be assigned.
1369 BUG_ON(!new_tag_stat
);
1371 tag_stat_update(new_tag_stat
, direction
, proto
, bytes
);
1373 spin_unlock_bh(&iface_entry
->tag_stat_list_lock
);
1376 static int iface_netdev_event_handler(struct notifier_block
*nb
,
1377 unsigned long event
, void *ptr
) {
1378 struct net_device
*dev
= ptr
;
1380 if (unlikely(module_passive
))
1383 IF_DEBUG("qtaguid: iface_stat: netdev_event(): "
1384 "ev=0x%lx/%s netdev=%p->name=%s\n",
1385 event
, netdev_evt_str(event
), dev
, dev
? dev
->name
: "");
1389 iface_stat_create(dev
, NULL
);
1390 atomic64_inc(&qtu_events
.iface_events
);
1393 case NETDEV_UNREGISTER
:
1394 iface_stat_update(dev
, event
== NETDEV_DOWN
);
1395 atomic64_inc(&qtu_events
.iface_events
);
1401 static int iface_inet6addr_event_handler(struct notifier_block
*nb
,
1402 unsigned long event
, void *ptr
)
1404 struct inet6_ifaddr
*ifa
= ptr
;
1405 struct net_device
*dev
;
1407 if (unlikely(module_passive
))
1410 IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): "
1411 "ev=0x%lx/%s ifa=%p\n",
1412 event
, netdev_evt_str(event
), ifa
);
1416 BUG_ON(!ifa
|| !ifa
->idev
);
1417 dev
= (struct net_device
*)ifa
->idev
->dev
;
1418 iface_stat_create_ipv6(dev
, ifa
);
1419 atomic64_inc(&qtu_events
.iface_events
);
1422 case NETDEV_UNREGISTER
:
1423 BUG_ON(!ifa
|| !ifa
->idev
);
1424 dev
= (struct net_device
*)ifa
->idev
->dev
;
1425 iface_stat_update(dev
, event
== NETDEV_DOWN
);
1426 atomic64_inc(&qtu_events
.iface_events
);
1432 static int iface_inetaddr_event_handler(struct notifier_block
*nb
,
1433 unsigned long event
, void *ptr
)
1435 struct in_ifaddr
*ifa
= ptr
;
1436 struct net_device
*dev
;
1438 if (unlikely(module_passive
))
1441 IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): "
1442 "ev=0x%lx/%s ifa=%p\n",
1443 event
, netdev_evt_str(event
), ifa
);
1447 BUG_ON(!ifa
|| !ifa
->ifa_dev
);
1448 dev
= ifa
->ifa_dev
->dev
;
1449 iface_stat_create(dev
, ifa
);
1450 atomic64_inc(&qtu_events
.iface_events
);
1453 case NETDEV_UNREGISTER
:
1454 BUG_ON(!ifa
|| !ifa
->ifa_dev
);
1455 dev
= ifa
->ifa_dev
->dev
;
1456 iface_stat_update(dev
, event
== NETDEV_DOWN
);
1457 atomic64_inc(&qtu_events
.iface_events
);
1463 static struct notifier_block iface_netdev_notifier_blk
= {
1464 .notifier_call
= iface_netdev_event_handler
,
1467 static struct notifier_block iface_inetaddr_notifier_blk
= {
1468 .notifier_call
= iface_inetaddr_event_handler
,
1471 static struct notifier_block iface_inet6addr_notifier_blk
= {
1472 .notifier_call
= iface_inet6addr_event_handler
,
1475 static const struct seq_operations iface_stat_fmt_proc_seq_ops
= {
1476 .start
= iface_stat_fmt_proc_start
,
1477 .next
= iface_stat_fmt_proc_next
,
1478 .stop
= iface_stat_fmt_proc_stop
,
1479 .show
= iface_stat_fmt_proc_show
,
1482 static int proc_iface_stat_fmt_open(struct inode
*inode
, struct file
*file
)
1484 struct proc_iface_stat_fmt_info
*s
;
1486 s
= __seq_open_private(file
, &iface_stat_fmt_proc_seq_ops
,
1487 sizeof(struct proc_iface_stat_fmt_info
));
1491 s
->fmt
= (uintptr_t)PDE_DATA(inode
);
1495 static const struct file_operations proc_iface_stat_fmt_fops
= {
1496 .open
= proc_iface_stat_fmt_open
,
1498 .llseek
= seq_lseek
,
1499 .release
= seq_release_private
,
1502 static int __init
iface_stat_init(struct proc_dir_entry
*parent_procdir
)
1506 iface_stat_procdir
= proc_mkdir(iface_stat_procdirname
, parent_procdir
);
1507 if (!iface_stat_procdir
) {
1508 pr_err("qtaguid: iface_stat: init failed to create proc entry\n");
1513 iface_stat_all_procfile
= proc_create_data(iface_stat_all_procfilename
,
1516 &proc_iface_stat_fmt_fops
,
1517 (void *)1 /* fmt1 */);
1518 if (!iface_stat_all_procfile
) {
1519 pr_err("qtaguid: iface_stat: init "
1520 " failed to create stat_old proc entry\n");
1525 iface_stat_fmt_procfile
= proc_create_data(iface_stat_fmt_procfilename
,
1528 &proc_iface_stat_fmt_fops
,
1529 (void *)2 /* fmt2 */);
1530 if (!iface_stat_fmt_procfile
) {
1531 pr_err("qtaguid: iface_stat: init "
1532 " failed to create stat_all proc entry\n");
1534 goto err_zap_all_stats_entry
;
1538 err
= register_netdevice_notifier(&iface_netdev_notifier_blk
);
1540 pr_err("qtaguid: iface_stat: init "
1541 "failed to register dev event handler\n");
1542 goto err_zap_all_stats_entries
;
1544 err
= register_inetaddr_notifier(&iface_inetaddr_notifier_blk
);
1546 pr_err("qtaguid: iface_stat: init "
1547 "failed to register ipv4 dev event handler\n");
1551 err
= register_inet6addr_notifier(&iface_inet6addr_notifier_blk
);
1553 pr_err("qtaguid: iface_stat: init "
1554 "failed to register ipv6 dev event handler\n");
1555 goto err_unreg_ip4_addr
;
1560 unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk
);
1562 unregister_netdevice_notifier(&iface_netdev_notifier_blk
);
1563 err_zap_all_stats_entries
:
1564 remove_proc_entry(iface_stat_fmt_procfilename
, parent_procdir
);
1565 err_zap_all_stats_entry
:
1566 remove_proc_entry(iface_stat_all_procfilename
, parent_procdir
);
1568 remove_proc_entry(iface_stat_procdirname
, parent_procdir
);
1573 static struct sock
*qtaguid_find_sk(const struct sk_buff
*skb
,
1574 struct xt_action_param
*par
)
1577 unsigned int hook_mask
= (1 << par
->hooknum
);
1579 MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb
,
1580 par
->hooknum
, par
->family
);
1583 * Let's not abuse the the xt_socket_get*_sk(), or else it will
1584 * return garbage SKs.
1586 if (!(hook_mask
& XT_SOCKET_SUPPORTED_HOOKS
))
1589 switch (par
->family
) {
1591 sk
= xt_socket_get6_sk(skb
, par
);
1594 sk
= xt_socket_get4_sk(skb
, par
);
1601 MT_DEBUG("qtaguid: %p->sk_proto=%u "
1602 "->sk_state=%d\n", sk
, sk
->sk_protocol
, sk
->sk_state
);
1604 * When in TCP_TIME_WAIT the sk is not a "struct sock" but
1605 * "struct inet_timewait_sock" which is missing fields.
1607 if (sk
->sk_state
== TCP_TIME_WAIT
) {
1608 xt_socket_put_sk(sk
);
1615 static void account_for_uid(const struct sk_buff
*skb
,
1616 const struct sock
*alternate_sk
, uid_t uid
,
1617 struct xt_action_param
*par
)
1619 const struct net_device
*el_dev
;
1622 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par
->hooknum
);
1623 el_dev
= par
->in
? : par
->out
;
1625 const struct net_device
*other_dev
;
1627 other_dev
= par
->in
? : par
->out
;
1628 if (el_dev
!= other_dev
) {
1629 MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1630 "par->(in/out)=%p %s\n",
1631 par
->hooknum
, el_dev
, el_dev
->name
, other_dev
,
1636 if (unlikely(!el_dev
)) {
1637 pr_info("qtaguid[%d]: no par->in/out?!!\n", par
->hooknum
);
1638 } else if (unlikely(!el_dev
->name
)) {
1639 pr_info("qtaguid[%d]: no dev->name?!!\n", par
->hooknum
);
1641 int proto
= ipx_proto(skb
, par
);
1642 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
1643 par
->hooknum
, el_dev
->name
, el_dev
->type
,
1644 par
->family
, proto
);
1646 if_tag_stat_update(el_dev
->name
, uid
,
1647 skb
->sk
? skb
->sk
: alternate_sk
,
1648 par
->in
? IFS_RX
: IFS_TX
,
1653 static bool qtaguid_mt(const struct sk_buff
*skb
, struct xt_action_param
*par
)
1655 const struct xt_qtaguid_match_info
*info
= par
->matchinfo
;
1656 const struct file
*filp
;
1657 bool got_sock
= false;
1662 if (unlikely(module_passive
))
1663 return (info
->match
^ info
->invert
) == 0;
1665 MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
1666 par
->hooknum
, skb
, par
->in
, par
->out
, par
->family
);
1668 atomic64_inc(&qtu_events
.match_calls
);
1670 res
= (info
->match
^ info
->invert
) == 0;
1674 switch (par
->hooknum
) {
1675 case NF_INET_PRE_ROUTING
:
1676 case NF_INET_POST_ROUTING
:
1677 atomic64_inc(&qtu_events
.match_calls_prepost
);
1678 iface_stat_update_from_skb(skb
, par
);
1680 * We are done in pre/post. The skb will get processed
1683 res
= (info
->match
^ info
->invert
);
1686 /* default: Fall through and do UID releated work */
1691 * When in TCP_TIME_WAIT the sk is not a "struct sock" but
1692 * "struct inet_timewait_sock" which is missing fields.
1695 if (sk
&& sk
->sk_state
== TCP_TIME_WAIT
)
1699 * A missing sk->sk_socket happens when packets are in-flight
1700 * and the matching socket is already closed and gone.
1702 sk
= qtaguid_find_sk(skb
, par
);
1704 * If we got the socket from the find_sk(), we will need to put
1705 * it back, as nf_tproxy_get_sock_v4() got it.
1709 atomic64_inc(&qtu_events
.match_found_sk_in_ct
);
1711 atomic64_inc(&qtu_events
.match_found_no_sk_in_ct
);
1713 atomic64_inc(&qtu_events
.match_found_sk
);
1715 MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d fam=%d proto=%d\n",
1716 par
->hooknum
, sk
, got_sock
, par
->family
, ipx_proto(skb
, par
));
1718 MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n",
1719 par
->hooknum
, sk
, sk
->sk_socket
,
1720 sk
->sk_socket
? sk
->sk_socket
->file
: (void *)-1LL);
1721 filp
= sk
->sk_socket
? sk
->sk_socket
->file
: NULL
;
1722 MT_DEBUG("qtaguid[%d]: filp...uid=%u\n",
1723 par
->hooknum
, filp
? filp
->f_cred
->fsuid
: -1);
1726 if (sk
== NULL
|| sk
->sk_socket
== NULL
) {
1728 * Here, the qtaguid_find_sk() using connection tracking
1729 * couldn't find the owner, so for now we just count them
1730 * against the system.
1733 * TODO: unhack how to force just accounting.
1734 * For now we only do iface stats when the uid-owner is not
1737 if (!(info
->match
& XT_QTAGUID_UID
))
1738 account_for_uid(skb
, sk
, 0, par
);
1739 MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n",
1741 sk
? sk
->sk_socket
: NULL
);
1742 res
= (info
->match
^ info
->invert
) == 0;
1743 atomic64_inc(&qtu_events
.match_no_sk
);
1744 goto put_sock_ret_res
;
1745 } else if (info
->match
& info
->invert
& XT_QTAGUID_SOCKET
) {
1747 goto put_sock_ret_res
;
1749 filp
= sk
->sk_socket
->file
;
1751 MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par
->hooknum
);
1752 account_for_uid(skb
, sk
, 0, par
);
1753 res
= ((info
->match
^ info
->invert
) &
1754 (XT_QTAGUID_UID
| XT_QTAGUID_GID
)) == 0;
1755 /*mtk_net: patch for duplicated account for uid 0*/
1757 atomic64_inc(&qtu_events
.match_no_sk_file
);
1758 goto put_sock_ret_res
;
1760 sock_uid
= filp
->f_cred
->fsuid
;
1762 * TODO: unhack how to force just accounting.
1763 * For now we only do iface stats when the uid-owner is not requested
1765 if (!(info
->match
& XT_QTAGUID_UID
))
1766 account_for_uid(skb
, sk
, sock_uid
, par
);
1769 * The following two tests fail the match when:
1770 * id not in range AND no inverted condition requested
1771 * or id in range AND inverted condition requested
1772 * Thus (!a && b) || (a && !b) == a ^ b
1774 if (info
->match
& XT_QTAGUID_UID
)
1775 if ((filp
->f_cred
->fsuid
>= info
->uid_min
&&
1776 filp
->f_cred
->fsuid
<= info
->uid_max
) ^
1777 !(info
->invert
& XT_QTAGUID_UID
)) {
1778 MT_DEBUG("qtaguid[%d]: leaving uid not matching\n",
1781 goto put_sock_ret_res
;
1783 if (info
->match
& XT_QTAGUID_GID
)
1784 if ((filp
->f_cred
->fsgid
>= info
->gid_min
&&
1785 filp
->f_cred
->fsgid
<= info
->gid_max
) ^
1786 !(info
->invert
& XT_QTAGUID_GID
)) {
1787 MT_DEBUG("qtaguid[%d]: leaving gid not matching\n",
1790 goto put_sock_ret_res
;
1793 MT_DEBUG("qtaguid[%d]: leaving matched\n", par
->hooknum
);
1798 xt_socket_put_sk(sk
);
1800 MT_DEBUG("qtaguid[%d]: left %d\n", par
->hooknum
, res
);
1805 /* This function is not in xt_qtaguid_print.c because of locks visibility */
1806 static void prdebug_full_state(int indent_level
, const char *fmt
, ...)
1812 if (!unlikely(qtaguid_debug_mask
& DDEBUG_MASK
))
1815 fmt_buff
= kasprintf(GFP_ATOMIC
,
1816 "qtaguid: %s(): %s {\n", __func__
, fmt
);
1818 va_start(args
, fmt
);
1819 buff
= kvasprintf(GFP_ATOMIC
,
1822 pr_debug("%s", buff
);
1827 spin_lock_bh(&sock_tag_list_lock
);
1828 prdebug_sock_tag_tree(indent_level
, &sock_tag_tree
);
1829 spin_unlock_bh(&sock_tag_list_lock
);
1831 spin_lock_bh(&sock_tag_list_lock
);
1832 spin_lock_bh(&uid_tag_data_tree_lock
);
1833 prdebug_uid_tag_data_tree(indent_level
, &uid_tag_data_tree
);
1834 prdebug_proc_qtu_data_tree(indent_level
, &proc_qtu_data_tree
);
1835 spin_unlock_bh(&uid_tag_data_tree_lock
);
1836 spin_unlock_bh(&sock_tag_list_lock
);
1838 spin_lock_bh(&iface_stat_list_lock
);
1839 prdebug_iface_stat_list(indent_level
, &iface_stat_list
);
1840 spin_unlock_bh(&iface_stat_list_lock
);
1842 pr_debug("qtaguid: %s(): }\n", __func__
);
1845 static void prdebug_full_state(int indent_level
, const char *fmt
, ...) {}
1848 struct proc_ctrl_print_info
{
1849 struct sock
*sk
; /* socket found by reading to sk_pos */
1853 static void *qtaguid_ctrl_proc_next(struct seq_file
*m
, void *v
, loff_t
*pos
)
1855 struct proc_ctrl_print_info
*pcpi
= m
->private;
1856 struct sock_tag
*sock_tag_entry
= v
;
1857 struct rb_node
*node
;
1861 if (!v
|| v
== SEQ_START_TOKEN
)
1864 node
= rb_next(&sock_tag_entry
->sock_node
);
1867 sock_tag_entry
= SEQ_START_TOKEN
;
1869 sock_tag_entry
= rb_entry(node
, struct sock_tag
, sock_node
);
1870 pcpi
->sk
= sock_tag_entry
->sk
;
1872 pcpi
->sk_pos
= *pos
;
1873 return sock_tag_entry
;
1876 static void *qtaguid_ctrl_proc_start(struct seq_file
*m
, loff_t
*pos
)
1878 struct proc_ctrl_print_info
*pcpi
= m
->private;
1879 struct sock_tag
*sock_tag_entry
;
1880 struct rb_node
*node
;
1882 spin_lock_bh(&sock_tag_list_lock
);
1884 if (unlikely(module_passive
))
1889 node
= rb_first(&sock_tag_tree
);
1892 return SEQ_START_TOKEN
;
1894 sock_tag_entry
= rb_entry(node
, struct sock_tag
, sock_node
);
1895 pcpi
->sk
= sock_tag_entry
->sk
;
1897 sock_tag_entry
= (pcpi
->sk
? get_sock_stat_nl(pcpi
->sk
) :
1898 NULL
) ?: SEQ_START_TOKEN
;
1899 if (*pos
!= pcpi
->sk_pos
) {
1900 /* seq_read skipped a next call */
1901 *pos
= pcpi
->sk_pos
;
1902 return qtaguid_ctrl_proc_next(m
, sock_tag_entry
, pos
);
1905 return sock_tag_entry
;
1908 static void qtaguid_ctrl_proc_stop(struct seq_file
*m
, void *v
)
1910 spin_unlock_bh(&sock_tag_list_lock
);
1914 * Procfs reader to get all active socket tags using style "1)" as described in
1917 static int qtaguid_ctrl_proc_show(struct seq_file
*m
, void *v
)
1919 struct sock_tag
*sock_tag_entry
= v
;
1923 CT_DEBUG("qtaguid: proc ctrl pid=%u tgid=%u uid=%u\n",
1924 current
->pid
, current
->tgid
, current_fsuid());
1926 if (sock_tag_entry
!= SEQ_START_TOKEN
) {
1927 uid
= get_uid_from_tag(sock_tag_entry
->tag
);
1928 CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) "
1931 sock_tag_entry
->tag
,
1935 f_count
= atomic_long_read(
1936 &sock_tag_entry
->socket
->file
->f_count
);
1937 seq_printf(m
, "sock=%pK tag=0x%llx (uid=%u) pid=%u "
1940 sock_tag_entry
->tag
, uid
,
1941 sock_tag_entry
->pid
, f_count
);
1943 seq_printf(m
, "events: sockets_tagged=%llu "
1944 "sockets_untagged=%llu "
1945 "counter_set_changes=%llu "
1947 "iface_events=%llu "
1949 "match_calls_prepost=%llu "
1950 "match_found_sk=%llu "
1951 "match_found_sk_in_ct=%llu "
1952 "match_found_no_sk_in_ct=%llu "
1954 "match_no_sk_file=%llu\n",
1955 (u64
)atomic64_read(&qtu_events
.sockets_tagged
),
1956 (u64
)atomic64_read(&qtu_events
.sockets_untagged
),
1957 (u64
)atomic64_read(&qtu_events
.counter_set_changes
),
1958 (u64
)atomic64_read(&qtu_events
.delete_cmds
),
1959 (u64
)atomic64_read(&qtu_events
.iface_events
),
1960 (u64
)atomic64_read(&qtu_events
.match_calls
),
1961 (u64
)atomic64_read(&qtu_events
.match_calls_prepost
),
1962 (u64
)atomic64_read(&qtu_events
.match_found_sk
),
1963 (u64
)atomic64_read(&qtu_events
.match_found_sk_in_ct
),
1964 (u64
)atomic64_read(&qtu_events
.match_found_no_sk_in_ct
),
1965 (u64
)atomic64_read(&qtu_events
.match_no_sk
),
1966 (u64
)atomic64_read(&qtu_events
.match_no_sk_file
));
1968 /* Count the following as part of the last item_index */
1969 prdebug_full_state(0, "proc ctrl");
1976 * Delete socket tags, and stat tags associated with a given
1977 * accouting tag and uid.
1979 static int ctrl_cmd_delete(const char *input
)
1987 struct iface_stat
*iface_entry
;
1988 struct rb_node
*node
;
1989 struct sock_tag
*st_entry
;
1990 struct rb_root st_to_free_tree
= RB_ROOT
;
1991 struct tag_stat
*ts_entry
;
1992 struct tag_counter_set
*tcs_entry
;
1993 struct tag_ref
*tr_entry
;
1994 struct uid_tag_data
*utd_entry
;
1996 argc
= sscanf(input
, "%c %llu %u", &cmd
, &acct_tag
, &uid
);
1997 CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
1998 "user_tag=0x%llx uid=%u\n", input
, argc
, cmd
,
2004 if (!valid_atag(acct_tag
)) {
2005 pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input
);
2010 uid
= current_fsuid();
2011 } else if (!can_impersonate_uid(uid
)) {
2012 pr_info("qtaguid: ctrl_delete(%s): "
2013 "insufficient priv from pid=%u tgid=%u uid=%u\n",
2014 input
, current
->pid
, current
->tgid
, current_fsuid());
2019 tag
= combine_atag_with_uid(acct_tag
, uid
);
2020 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2021 "looking for tag=0x%llx (uid=%u)\n",
2024 /* Delete socket tags */
2025 spin_lock_bh(&sock_tag_list_lock
);
2026 node
= rb_first(&sock_tag_tree
);
2028 st_entry
= rb_entry(node
, struct sock_tag
, sock_node
);
2029 entry_uid
= get_uid_from_tag(st_entry
->tag
);
2030 node
= rb_next(node
);
2031 if (entry_uid
!= uid
)
2034 CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n",
2035 input
, st_entry
->tag
, entry_uid
);
2037 if (!acct_tag
|| st_entry
->tag
== tag
) {
2038 rb_erase(&st_entry
->sock_node
, &sock_tag_tree
);
2039 /* Can't sockfd_put() within spinlock, do it later. */
2040 sock_tag_tree_insert(st_entry
, &st_to_free_tree
);
2041 tr_entry
= lookup_tag_ref(st_entry
->tag
, NULL
);
2042 BUG_ON(tr_entry
->num_sock_tags
<= 0);
2043 tr_entry
->num_sock_tags
--;
2045 * TODO: remove if, and start failing.
2046 * This is a hack to work around the fact that in some
2047 * places we have "if (IS_ERR_OR_NULL(pqd_entry))"
2048 * and are trying to work around apps
2049 * that didn't open the /dev/xt_qtaguid.
2051 if (st_entry
->list
.next
&& st_entry
->list
.prev
)
2052 list_del(&st_entry
->list
);
2055 spin_unlock_bh(&sock_tag_list_lock
);
2057 sock_tag_tree_erase(&st_to_free_tree
);
2059 /* Delete tag counter-sets */
2060 spin_lock_bh(&tag_counter_set_list_lock
);
2061 /* Counter sets are only on the uid tag, not full tag */
2062 tcs_entry
= tag_counter_set_tree_search(&tag_counter_set_tree
, tag
);
2064 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2065 "erase tcs: tag=0x%llx (uid=%u) set=%d\n",
2068 get_uid_from_tag(tcs_entry
->tn
.tag
),
2069 tcs_entry
->active_set
);
2070 rb_erase(&tcs_entry
->tn
.node
, &tag_counter_set_tree
);
2073 spin_unlock_bh(&tag_counter_set_list_lock
);
2076 * If acct_tag is 0, then all entries belonging to uid are
2079 spin_lock_bh(&iface_stat_list_lock
);
2080 list_for_each_entry(iface_entry
, &iface_stat_list
, list
) {
2081 spin_lock_bh(&iface_entry
->tag_stat_list_lock
);
2082 node
= rb_first(&iface_entry
->tag_stat_tree
);
2084 ts_entry
= rb_entry(node
, struct tag_stat
, tn
.node
);
2085 entry_uid
= get_uid_from_tag(ts_entry
->tn
.tag
);
2086 node
= rb_next(node
);
2088 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2089 "ts tag=0x%llx (uid=%u)\n",
2090 input
, ts_entry
->tn
.tag
, entry_uid
);
2092 if (entry_uid
!= uid
)
2094 if (!acct_tag
|| ts_entry
->tn
.tag
== tag
) {
2095 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2096 "erase ts: %s 0x%llx %u\n",
2097 input
, iface_entry
->ifname
,
2098 get_atag_from_tag(ts_entry
->tn
.tag
),
2100 rb_erase(&ts_entry
->tn
.node
,
2101 &iface_entry
->tag_stat_tree
);
2105 spin_unlock_bh(&iface_entry
->tag_stat_list_lock
);
2107 spin_unlock_bh(&iface_stat_list_lock
);
2109 /* Cleanup the uid_tag_data */
2110 spin_lock_bh(&uid_tag_data_tree_lock
);
2111 node
= rb_first(&uid_tag_data_tree
);
2113 utd_entry
= rb_entry(node
, struct uid_tag_data
, node
);
2114 entry_uid
= utd_entry
->uid
;
2115 node
= rb_next(node
);
2117 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2121 if (entry_uid
!= uid
)
2124 * Go over the tag_refs, and those that don't have
2125 * sock_tags using them are freed.
2127 put_tag_ref_tree(tag
, utd_entry
);
2128 put_utd_entry(utd_entry
);
2130 spin_unlock_bh(&uid_tag_data_tree_lock
);
2132 atomic64_inc(&qtu_events
.delete_cmds
);
2139 static int ctrl_cmd_counter_set(const char *input
)
2145 struct tag_counter_set
*tcs
;
2148 argc
= sscanf(input
, "%c %d %u", &cmd
, &counter_set
, &uid
);
2149 CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c "
2150 "set=%d uid=%u\n", input
, argc
, cmd
,
2156 if (counter_set
< 0 || counter_set
>= IFS_MAX_COUNTER_SETS
) {
2157 pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n",
2162 if (!can_manipulate_uids()) {
2163 pr_info("qtaguid: ctrl_counterset(%s): "
2164 "insufficient priv from pid=%u tgid=%u uid=%u\n",
2165 input
, current
->pid
, current
->tgid
, current_fsuid());
2170 tag
= make_tag_from_uid(uid
);
2171 spin_lock_bh(&tag_counter_set_list_lock
);
2172 tcs
= tag_counter_set_tree_search(&tag_counter_set_tree
, tag
);
2174 tcs
= kzalloc(sizeof(*tcs
), GFP_ATOMIC
);
2176 spin_unlock_bh(&tag_counter_set_list_lock
);
2177 pr_err("qtaguid: ctrl_counterset(%s): "
2178 "failed to alloc counter set\n",
2184 tag_counter_set_tree_insert(tcs
, &tag_counter_set_tree
);
2185 CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx "
2186 "(uid=%u) set=%d\n",
2187 input
, tag
, get_uid_from_tag(tag
), counter_set
);
2189 tcs
->active_set
= counter_set
;
2190 spin_unlock_bh(&tag_counter_set_list_lock
);
2191 atomic64_inc(&qtu_events
.counter_set_changes
);
2198 static int ctrl_cmd_tag(const char *input
)
2203 tag_t acct_tag
= make_atag_from_value(0);
2205 struct socket
*el_socket
;
2207 struct sock_tag
*sock_tag_entry
;
2208 struct tag_ref
*tag_ref_entry
;
2209 struct uid_tag_data
*uid_tag_data_entry
;
2210 struct proc_qtu_data
*pqd_entry
;
2212 /* Unassigned args will get defaulted later. */
2213 argc
= sscanf(input
, "%c %d %llu %u", &cmd
, &sock_fd
, &acct_tag
, &uid
);
2214 CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
2215 "acct_tag=0x%llx uid=%u\n", input
, argc
, cmd
, sock_fd
,
2221 el_socket
= sockfd_lookup(sock_fd
, &res
); /* This locks the file */
2223 pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
2224 " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
2225 input
, sock_fd
, res
, current
->pid
, current
->tgid
,
2229 CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n",
2230 input
, atomic_long_read(&el_socket
->file
->f_count
),
2233 acct_tag
= make_atag_from_value(0);
2234 } else if (!valid_atag(acct_tag
)) {
2235 pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input
);
2239 CT_DEBUG("qtaguid: ctrl_tag(%s): "
2240 "pid=%u tgid=%u uid=%u euid=%u fsuid=%u "
2241 "ctrl.gid=%u in_group()=%d in_egroup()=%d\n",
2242 input
, current
->pid
, current
->tgid
, current_uid(),
2243 current_euid(), current_fsuid(),
2244 xt_qtaguid_ctrl_file
->gid
,
2245 in_group_p(xt_qtaguid_ctrl_file
->gid
),
2246 in_egroup_p(xt_qtaguid_ctrl_file
->gid
));
2248 uid
= current_fsuid();
2249 } else if (!can_impersonate_uid(uid
)) {
2250 pr_info("qtaguid: ctrl_tag(%s): "
2251 "insufficient priv from pid=%u tgid=%u uid=%u\n",
2252 input
, current
->pid
, current
->tgid
, current_fsuid());
2256 full_tag
= combine_atag_with_uid(acct_tag
, uid
);
2258 spin_lock_bh(&sock_tag_list_lock
);
2259 sock_tag_entry
= get_sock_stat_nl(el_socket
->sk
);
2260 tag_ref_entry
= get_tag_ref(full_tag
, &uid_tag_data_entry
);
2261 if (IS_ERR(tag_ref_entry
)) {
2262 res
= PTR_ERR(tag_ref_entry
);
2263 spin_unlock_bh(&sock_tag_list_lock
);
2266 tag_ref_entry
->num_sock_tags
++;
2267 if (sock_tag_entry
) {
2268 struct tag_ref
*prev_tag_ref_entry
;
2270 CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p "
2271 "st@%p ...->f_count=%ld\n",
2272 input
, el_socket
->sk
, sock_tag_entry
,
2273 atomic_long_read(&el_socket
->file
->f_count
));
2275 * This is a re-tagging, so release the sock_fd that was
2276 * locked at the time of the 1st tagging.
2277 * There is still the ref from this call's sockfd_lookup() so
2278 * it can be done within the spinlock.
2280 sockfd_put(sock_tag_entry
->socket
);
2281 prev_tag_ref_entry
= lookup_tag_ref(sock_tag_entry
->tag
,
2282 &uid_tag_data_entry
);
2283 BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry
));
2284 BUG_ON(prev_tag_ref_entry
->num_sock_tags
<= 0);
2285 prev_tag_ref_entry
->num_sock_tags
--;
2286 sock_tag_entry
->tag
= full_tag
;
2288 CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n",
2289 input
, el_socket
->sk
);
2290 sock_tag_entry
= kzalloc(sizeof(*sock_tag_entry
),
2292 if (!sock_tag_entry
) {
2293 pr_err("qtaguid: ctrl_tag(%s): "
2294 "socket tag alloc failed\n",
2296 spin_unlock_bh(&sock_tag_list_lock
);
2298 goto err_tag_unref_put
;
2300 sock_tag_entry
->sk
= el_socket
->sk
;
2301 sock_tag_entry
->socket
= el_socket
;
2302 sock_tag_entry
->pid
= current
->tgid
;
2303 sock_tag_entry
->tag
= combine_atag_with_uid(acct_tag
,
2305 spin_lock_bh(&uid_tag_data_tree_lock
);
2306 pqd_entry
= proc_qtu_data_tree_search(
2307 &proc_qtu_data_tree
, current
->tgid
);
2309 * TODO: remove if, and start failing.
2310 * At first, we want to catch user-space code that is not
2311 * opening the /dev/xt_qtaguid.
2313 if (IS_ERR_OR_NULL(pqd_entry
))
2316 "User space forgot to open /dev/xt_qtaguid? "
2317 "pid=%u tgid=%u uid=%u\n", __func__
,
2318 current
->pid
, current
->tgid
,
2321 list_add(&sock_tag_entry
->list
,
2322 &pqd_entry
->sock_tag_list
);
2323 spin_unlock_bh(&uid_tag_data_tree_lock
);
2325 sock_tag_tree_insert(sock_tag_entry
, &sock_tag_tree
);
2326 atomic64_inc(&qtu_events
.sockets_tagged
);
2328 spin_unlock_bh(&sock_tag_list_lock
);
2329 /* We keep the ref to the socket (file) until it is untagged */
2330 CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n",
2331 input
, sock_tag_entry
,
2332 atomic_long_read(&el_socket
->file
->f_count
));
2336 BUG_ON(tag_ref_entry
->num_sock_tags
<= 0);
2337 tag_ref_entry
->num_sock_tags
--;
2338 free_tag_ref_from_utd_entry(tag_ref_entry
, uid_tag_data_entry
);
2340 CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n",
2341 input
, atomic_long_read(&el_socket
->file
->f_count
) - 1);
2342 /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2343 sockfd_put(el_socket
);
2347 CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input
);
2351 static int ctrl_cmd_untag(const char *input
)
2355 struct socket
*el_socket
;
2357 struct sock_tag
*sock_tag_entry
;
2358 struct tag_ref
*tag_ref_entry
;
2359 struct uid_tag_data
*utd_entry
;
2360 struct proc_qtu_data
*pqd_entry
;
2362 argc
= sscanf(input
, "%c %d", &cmd
, &sock_fd
);
2363 CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
2364 input
, argc
, cmd
, sock_fd
);
2369 el_socket
= sockfd_lookup(sock_fd
, &res
); /* This locks the file */
2371 pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
2372 " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
2373 input
, sock_fd
, res
, current
->pid
, current
->tgid
,
2377 CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
2378 input
, atomic_long_read(&el_socket
->file
->f_count
),
2380 spin_lock_bh(&sock_tag_list_lock
);
2381 sock_tag_entry
= get_sock_stat_nl(el_socket
->sk
);
2382 if (!sock_tag_entry
) {
2383 spin_unlock_bh(&sock_tag_list_lock
);
2388 * The socket already belongs to the current process
2389 * so it can do whatever it wants to it.
2391 rb_erase(&sock_tag_entry
->sock_node
, &sock_tag_tree
);
2393 tag_ref_entry
= lookup_tag_ref(sock_tag_entry
->tag
, &utd_entry
);
2394 BUG_ON(!tag_ref_entry
);
2395 BUG_ON(tag_ref_entry
->num_sock_tags
<= 0);
2396 spin_lock_bh(&uid_tag_data_tree_lock
);
2397 pqd_entry
= proc_qtu_data_tree_search(
2398 &proc_qtu_data_tree
, current
->tgid
);
2400 * TODO: remove if, and start failing.
2401 * At first, we want to catch user-space code that is not
2402 * opening the /dev/xt_qtaguid.
2404 if (IS_ERR_OR_NULL(pqd_entry
))
2405 pr_warn_once("qtaguid: %s(): "
2406 "User space forgot to open /dev/xt_qtaguid? "
2407 "pid=%u tgid=%u uid=%u\n", __func__
,
2408 current
->pid
, current
->tgid
, current_fsuid());
2410 list_del(&sock_tag_entry
->list
);
2411 spin_unlock_bh(&uid_tag_data_tree_lock
);
2413 * We don't free tag_ref from the utd_entry here,
2414 * only during a cmd_delete().
2416 tag_ref_entry
->num_sock_tags
--;
2417 spin_unlock_bh(&sock_tag_list_lock
);
2419 * Release the sock_fd that was grabbed at tag time,
2420 * and once more for the sockfd_lookup() here.
2422 sockfd_put(sock_tag_entry
->socket
);
2423 CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n",
2424 input
, sock_tag_entry
,
2425 atomic_long_read(&el_socket
->file
->f_count
) - 1);
2426 sockfd_put(el_socket
);
2428 kfree(sock_tag_entry
);
2429 atomic64_inc(&qtu_events
.sockets_untagged
);
2434 CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n",
2435 input
, atomic_long_read(&el_socket
->file
->f_count
) - 1);
2436 /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2437 sockfd_put(el_socket
);
2441 CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input
);
2445 static ssize_t
qtaguid_ctrl_parse(const char *input
, size_t count
)
2450 CT_DEBUG("qtaguid: ctrl(%s): pid=%u tgid=%u uid=%u\n",
2451 input
, current
->pid
, current
->tgid
, current_fsuid());
2454 /* Collect params for commands */
2457 res
= ctrl_cmd_delete(input
);
2461 res
= ctrl_cmd_counter_set(input
);
2465 res
= ctrl_cmd_tag(input
);
2469 res
= ctrl_cmd_untag(input
);
2479 CT_DEBUG("qtaguid: ctrl(%s): res=%zd\n", input
, res
);
2483 #define MAX_QTAGUID_CTRL_INPUT_LEN 255
2484 static ssize_t
qtaguid_ctrl_proc_write(struct file
*file
, const char __user
*buffer
,
2485 size_t count
, loff_t
*offp
)
2487 char input_buf
[MAX_QTAGUID_CTRL_INPUT_LEN
];
2489 if (unlikely(module_passive
))
2492 if (count
>= MAX_QTAGUID_CTRL_INPUT_LEN
)
2495 if (copy_from_user(input_buf
, buffer
, count
))
2498 input_buf
[count
] = '\0';
2499 return qtaguid_ctrl_parse(input_buf
, count
);
2502 struct proc_print_info
{
2503 struct iface_stat
*iface_entry
;
2505 tag_t tag
; /* tag found by reading to tag_pos */
2510 static void pp_stats_header(struct seq_file
*m
)
2513 "idx iface acct_tag_hex uid_tag_int cnt_set "
2514 "rx_bytes rx_packets "
2515 "tx_bytes tx_packets "
2516 "rx_tcp_bytes rx_tcp_packets "
2517 "rx_udp_bytes rx_udp_packets "
2518 "rx_other_bytes rx_other_packets "
2519 "tx_tcp_bytes tx_tcp_packets "
2520 "tx_udp_bytes tx_udp_packets "
2521 "tx_other_bytes tx_other_packets\n");
2524 static int pp_stats_line(struct seq_file
*m
, struct tag_stat
*ts_entry
,
2528 struct data_counters
*cnts
;
2529 tag_t tag
= ts_entry
->tn
.tag
;
2530 uid_t stat_uid
= get_uid_from_tag(tag
);
2531 struct proc_print_info
*ppi
= m
->private;
2532 /* Detailed tags are not available to everybody */
2533 if (get_atag_from_tag(tag
) && !can_read_other_uid_stats(stat_uid
)) {
2534 CT_DEBUG("qtaguid: stats line: "
2535 "%s 0x%llx %u: insufficient priv "
2536 "from pid=%u tgid=%u uid=%u stats.gid=%u\n",
2537 ppi
->iface_entry
->ifname
,
2538 get_atag_from_tag(tag
), stat_uid
,
2539 current
->pid
, current
->tgid
, current_fsuid(),
2540 xt_qtaguid_stats_file
->gid
);
2544 cnts
= &ts_entry
->counters
;
2545 ret
= seq_printf(m
, "%d %s 0x%llx %u %u "
2555 ppi
->iface_entry
->ifname
,
2556 get_atag_from_tag(tag
),
2559 dc_sum_bytes(cnts
, cnt_set
, IFS_RX
),
2560 dc_sum_packets(cnts
, cnt_set
, IFS_RX
),
2561 dc_sum_bytes(cnts
, cnt_set
, IFS_TX
),
2562 dc_sum_packets(cnts
, cnt_set
, IFS_TX
),
2563 cnts
->bpc
[cnt_set
][IFS_RX
][IFS_TCP
].bytes
,
2564 cnts
->bpc
[cnt_set
][IFS_RX
][IFS_TCP
].packets
,
2565 cnts
->bpc
[cnt_set
][IFS_RX
][IFS_UDP
].bytes
,
2566 cnts
->bpc
[cnt_set
][IFS_RX
][IFS_UDP
].packets
,
2567 cnts
->bpc
[cnt_set
][IFS_RX
][IFS_PROTO_OTHER
].bytes
,
2568 cnts
->bpc
[cnt_set
][IFS_RX
][IFS_PROTO_OTHER
].packets
,
2569 cnts
->bpc
[cnt_set
][IFS_TX
][IFS_TCP
].bytes
,
2570 cnts
->bpc
[cnt_set
][IFS_TX
][IFS_TCP
].packets
,
2571 cnts
->bpc
[cnt_set
][IFS_TX
][IFS_UDP
].bytes
,
2572 cnts
->bpc
[cnt_set
][IFS_TX
][IFS_UDP
].packets
,
2573 cnts
->bpc
[cnt_set
][IFS_TX
][IFS_PROTO_OTHER
].bytes
,
2574 cnts
->bpc
[cnt_set
][IFS_TX
][IFS_PROTO_OTHER
].packets
);
2578 static bool pp_sets(struct seq_file
*m
, struct tag_stat
*ts_entry
)
2582 for (counter_set
= 0; counter_set
< IFS_MAX_COUNTER_SETS
;
2584 ret
= pp_stats_line(m
, ts_entry
, counter_set
);
2591 static int qtaguid_stats_proc_iface_stat_ptr_valid(struct iface_stat
*ptr
)
2593 struct iface_stat
*iface_entry
;
2598 list_for_each_entry(iface_entry
, &iface_stat_list
, list
)
2599 if (iface_entry
== ptr
)
2604 static void qtaguid_stats_proc_next_iface_entry(struct proc_print_info
*ppi
)
2606 spin_unlock_bh(&ppi
->iface_entry
->tag_stat_list_lock
);
2607 list_for_each_entry_continue(ppi
->iface_entry
, &iface_stat_list
, list
) {
2608 spin_lock_bh(&ppi
->iface_entry
->tag_stat_list_lock
);
2611 ppi
->iface_entry
= NULL
;
2614 static void *qtaguid_stats_proc_next(struct seq_file
*m
, void *v
, loff_t
*pos
)
2616 struct proc_print_info
*ppi
= m
->private;
2617 struct tag_stat
*ts_entry
;
2618 struct rb_node
*node
;
2621 pr_err("qtaguid: %s(): unexpected v: NULL\n", __func__
);
2627 if (!ppi
->iface_entry
|| unlikely(module_passive
))
2630 if (v
== SEQ_START_TOKEN
)
2631 node
= rb_first(&ppi
->iface_entry
->tag_stat_tree
);
2633 node
= rb_next(&((struct tag_stat
*)v
)->tn
.node
);
2636 qtaguid_stats_proc_next_iface_entry(ppi
);
2637 if (!ppi
->iface_entry
)
2639 node
= rb_first(&ppi
->iface_entry
->tag_stat_tree
);
2642 ts_entry
= rb_entry(node
, struct tag_stat
, tn
.node
);
2643 ppi
->tag
= ts_entry
->tn
.tag
;
2644 ppi
->tag_pos
= *pos
;
2645 ppi
->tag_item_index
= ppi
->item_index
;
2649 static void *qtaguid_stats_proc_start(struct seq_file
*m
, loff_t
*pos
)
2651 struct proc_print_info
*ppi
= m
->private;
2652 struct tag_stat
*ts_entry
= NULL
;
2654 spin_lock_bh(&iface_stat_list_lock
);
2657 ppi
->item_index
= 1;
2659 if (list_empty(&iface_stat_list
)) {
2660 ppi
->iface_entry
= NULL
;
2662 ppi
->iface_entry
= list_first_entry(&iface_stat_list
,
2665 spin_lock_bh(&ppi
->iface_entry
->tag_stat_list_lock
);
2667 return SEQ_START_TOKEN
;
2669 if (!qtaguid_stats_proc_iface_stat_ptr_valid(ppi
->iface_entry
)) {
2670 if (ppi
->iface_entry
) {
2671 pr_err("qtaguid: %s(): iface_entry %p not found\n",
2672 __func__
, ppi
->iface_entry
);
2673 ppi
->iface_entry
= NULL
;
2678 spin_lock_bh(&ppi
->iface_entry
->tag_stat_list_lock
);
2680 if (!ppi
->tag_pos
) {
2681 /* seq_read skipped first next call */
2682 ts_entry
= SEQ_START_TOKEN
;
2684 ts_entry
= tag_stat_tree_search(
2685 &ppi
->iface_entry
->tag_stat_tree
, ppi
->tag
);
2687 pr_info("qtaguid: %s(): tag_stat.tag 0x%llx not found. Abort.\n",
2688 __func__
, ppi
->tag
);
2693 if (*pos
== ppi
->tag_pos
) { /* normal resume */
2694 ppi
->item_index
= ppi
->tag_item_index
;
2696 /* seq_read skipped a next call */
2697 *pos
= ppi
->tag_pos
;
2698 ts_entry
= qtaguid_stats_proc_next(m
, ts_entry
, pos
);
2704 static void qtaguid_stats_proc_stop(struct seq_file
*m
, void *v
)
2706 struct proc_print_info
*ppi
= m
->private;
2707 if (ppi
->iface_entry
)
2708 spin_unlock_bh(&ppi
->iface_entry
->tag_stat_list_lock
);
2709 spin_unlock_bh(&iface_stat_list_lock
);
2713 * Procfs reader to get all tag stats using style "1)" as described in
2715 * Groups all protocols tx/rx bytes.
2717 static int qtaguid_stats_proc_show(struct seq_file
*m
, void *v
)
2719 struct tag_stat
*ts_entry
= v
;
2721 if (v
== SEQ_START_TOKEN
)
2724 pp_sets(m
, ts_entry
);
2729 /*------------------------------------------*/
2730 static int qtudev_open(struct inode
*inode
, struct file
*file
)
2732 struct uid_tag_data
*utd_entry
;
2733 struct proc_qtu_data
*pqd_entry
;
2734 struct proc_qtu_data
*new_pqd_entry
;
2736 bool utd_entry_found
;
2738 if (unlikely(qtu_proc_handling_passive
))
2741 DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n",
2742 current
->pid
, current
->tgid
, current_fsuid());
2744 spin_lock_bh(&uid_tag_data_tree_lock
);
2746 /* Look for existing uid data, or alloc one. */
2747 utd_entry
= get_uid_data(current_fsuid(), &utd_entry_found
);
2748 if (IS_ERR_OR_NULL(utd_entry
)) {
2749 res
= PTR_ERR(utd_entry
);
2753 /* Look for existing PID based proc_data */
2754 pqd_entry
= proc_qtu_data_tree_search(&proc_qtu_data_tree
,
2757 pr_err("qtaguid: qtudev_open(): %u/%u %u "
2758 "%s already opened\n",
2759 current
->pid
, current
->tgid
, current_fsuid(),
2762 goto err_unlock_free_utd
;
2765 new_pqd_entry
= kzalloc(sizeof(*new_pqd_entry
), GFP_ATOMIC
);
2766 if (!new_pqd_entry
) {
2767 pr_err("qtaguid: qtudev_open(): %u/%u %u: "
2768 "proc data alloc failed\n",
2769 current
->pid
, current
->tgid
, current_fsuid());
2771 goto err_unlock_free_utd
;
2773 new_pqd_entry
->pid
= current
->tgid
;
2774 INIT_LIST_HEAD(&new_pqd_entry
->sock_tag_list
);
2775 new_pqd_entry
->parent_tag_data
= utd_entry
;
2776 utd_entry
->num_pqd
++;
2778 proc_qtu_data_tree_insert(new_pqd_entry
,
2779 &proc_qtu_data_tree
);
2781 spin_unlock_bh(&uid_tag_data_tree_lock
);
2782 DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n",
2783 current_fsuid(), new_pqd_entry
);
2784 file
->private_data
= new_pqd_entry
;
2787 err_unlock_free_utd
:
2788 if (!utd_entry_found
) {
2789 rb_erase(&utd_entry
->node
, &uid_tag_data_tree
);
2793 spin_unlock_bh(&uid_tag_data_tree_lock
);
2797 static int qtudev_release(struct inode
*inode
, struct file
*file
)
2799 struct proc_qtu_data
*pqd_entry
= file
->private_data
;
2800 struct uid_tag_data
*utd_entry
= pqd_entry
->parent_tag_data
;
2801 struct sock_tag
*st_entry
;
2802 struct rb_root st_to_free_tree
= RB_ROOT
;
2803 struct list_head
*entry
, *next
;
2806 if (unlikely(qtu_proc_handling_passive
))
2810 * Do not trust the current->pid, it might just be a kworker cleaning
2811 * up after a dead proc.
2813 DR_DEBUG("qtaguid: qtudev_release(): "
2814 "pid=%u tgid=%u uid=%u "
2815 "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n",
2816 current
->pid
, current
->tgid
, pqd_entry
->parent_tag_data
->uid
,
2817 pqd_entry
, pqd_entry
->pid
, utd_entry
,
2818 utd_entry
->num_active_tags
);
2820 spin_lock_bh(&sock_tag_list_lock
);
2821 spin_lock_bh(&uid_tag_data_tree_lock
);
2823 list_for_each_safe(entry
, next
, &pqd_entry
->sock_tag_list
) {
2824 st_entry
= list_entry(entry
, struct sock_tag
, list
);
2825 DR_DEBUG("qtaguid: %s(): "
2826 "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n",
2828 st_entry
, st_entry
->sk
,
2829 current
->pid
, current
->tgid
,
2830 pqd_entry
->parent_tag_data
->uid
);
2832 utd_entry
= uid_tag_data_tree_search(
2834 get_uid_from_tag(st_entry
->tag
));
2835 BUG_ON(IS_ERR_OR_NULL(utd_entry
));
2836 DR_DEBUG("qtaguid: %s(): "
2837 "looking for tag=0x%llx in utd_entry=%p\n", __func__
,
2838 st_entry
->tag
, utd_entry
);
2839 tr
= tag_ref_tree_search(&utd_entry
->tag_ref_tree
,
2842 BUG_ON(tr
->num_sock_tags
<= 0);
2843 tr
->num_sock_tags
--;
2844 free_tag_ref_from_utd_entry(tr
, utd_entry
);
2846 rb_erase(&st_entry
->sock_node
, &sock_tag_tree
);
2847 list_del(&st_entry
->list
);
2848 /* Can't sockfd_put() within spinlock, do it later. */
2849 sock_tag_tree_insert(st_entry
, &st_to_free_tree
);
2852 * Try to free the utd_entry if no other proc_qtu_data is
2853 * using it (num_pqd is 0) and it doesn't have active tags
2854 * (num_active_tags is 0).
2856 put_utd_entry(utd_entry
);
2859 rb_erase(&pqd_entry
->node
, &proc_qtu_data_tree
);
2860 BUG_ON(pqd_entry
->parent_tag_data
->num_pqd
< 1);
2861 pqd_entry
->parent_tag_data
->num_pqd
--;
2862 put_utd_entry(pqd_entry
->parent_tag_data
);
2864 file
->private_data
= NULL
;
2866 spin_unlock_bh(&uid_tag_data_tree_lock
);
2867 spin_unlock_bh(&sock_tag_list_lock
);
2870 sock_tag_tree_erase(&st_to_free_tree
);
2872 prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__
,
2873 current
->pid
, current
->tgid
);
2877 /*------------------------------------------*/
2878 static const struct file_operations qtudev_fops
= {
2879 .owner
= THIS_MODULE
,
2880 .open
= qtudev_open
,
2881 .release
= qtudev_release
,
2884 static struct miscdevice qtu_device
= {
2885 .minor
= MISC_DYNAMIC_MINOR
,
2886 .name
= QTU_DEV_NAME
,
2887 .fops
= &qtudev_fops
,
2888 /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */
2891 static const struct seq_operations proc_qtaguid_ctrl_seqops
= {
2892 .start
= qtaguid_ctrl_proc_start
,
2893 .next
= qtaguid_ctrl_proc_next
,
2894 .stop
= qtaguid_ctrl_proc_stop
,
2895 .show
= qtaguid_ctrl_proc_show
,
2898 static int proc_qtaguid_ctrl_open(struct inode
*inode
, struct file
*file
)
2900 return seq_open_private(file
, &proc_qtaguid_ctrl_seqops
,
2901 sizeof(struct proc_ctrl_print_info
));
2904 static const struct file_operations proc_qtaguid_ctrl_fops
= {
2905 .open
= proc_qtaguid_ctrl_open
,
2907 .write
= qtaguid_ctrl_proc_write
,
2908 .llseek
= seq_lseek
,
2909 .release
= seq_release_private
,
2912 static const struct seq_operations proc_qtaguid_stats_seqops
= {
2913 .start
= qtaguid_stats_proc_start
,
2914 .next
= qtaguid_stats_proc_next
,
2915 .stop
= qtaguid_stats_proc_stop
,
2916 .show
= qtaguid_stats_proc_show
,
2919 static int proc_qtaguid_stats_open(struct inode
*inode
, struct file
*file
)
2921 return seq_open_private(file
, &proc_qtaguid_stats_seqops
,
2922 sizeof(struct proc_print_info
));
2925 static const struct file_operations proc_qtaguid_stats_fops
= {
2926 .open
= proc_qtaguid_stats_open
,
2928 .llseek
= seq_lseek
,
2929 .release
= seq_release_private
,
2932 /*------------------------------------------*/
2933 static int __init
qtaguid_proc_register(struct proc_dir_entry
**res_procdir
)
2936 *res_procdir
= proc_mkdir(module_procdirname
, init_net
.proc_net
);
2937 if (!*res_procdir
) {
2938 pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n");
2943 xt_qtaguid_ctrl_file
= proc_create_data("ctrl", proc_ctrl_perms
,
2945 &proc_qtaguid_ctrl_fops
,
2947 if (!xt_qtaguid_ctrl_file
) {
2948 pr_err("qtaguid: failed to create xt_qtaguid/ctrl "
2954 xt_qtaguid_stats_file
= proc_create_data("stats", proc_stats_perms
,
2956 &proc_qtaguid_stats_fops
,
2958 if (!xt_qtaguid_stats_file
) {
2959 pr_err("qtaguid: failed to create xt_qtaguid/stats "
2962 goto no_stats_entry
;
2965 * TODO: add support counter hacking
2966 * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
2971 remove_proc_entry("ctrl", *res_procdir
);
2973 remove_proc_entry("xt_qtaguid", NULL
);
2978 static struct xt_match qtaguid_mt_reg __read_mostly
= {
2980 * This module masquerades as the "owner" module so that iptables
2981 * tools can deal with it.
2985 .family
= NFPROTO_UNSPEC
,
2986 .match
= qtaguid_mt
,
2987 .matchsize
= sizeof(struct xt_qtaguid_match_info
),
2991 static int __init
qtaguid_mt_init(void)
2993 if (qtaguid_proc_register(&xt_qtaguid_procdir
)
2994 || iface_stat_init(xt_qtaguid_procdir
)
2995 || xt_register_match(&qtaguid_mt_reg
)
2996 || misc_register(&qtu_device
))
3002 * TODO: allow unloading of the module.
3003 * For now stats are permanent.
3004 * Kconfig forces'y/n' and never an 'm'.
3007 module_init(qtaguid_mt_init
);
3008 MODULE_AUTHOR("jpa <jpa@google.com>");
3009 MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
3010 MODULE_LICENSE("GPL");
3011 MODULE_ALIAS("ipt_owner");
3012 MODULE_ALIAS("ip6t_owner");
3013 MODULE_ALIAS("ipt_qtaguid");
3014 MODULE_ALIAS("ip6t_qtaguid");