View | Details | Raw Unified | Return to bug 24223
Collapse All | Expand All

(-)file_not_specified_in_diff (-4 / +1513 lines)
Line  Link Here
0
-- /dev/null
0
++ b/include/linux/netfilter_ipv4/ipt_NETFLOW.h
Line 0 Link Here
0
-- a/net/ipv4/netfilter/Kconfig
1
2
#ifndef _IP_NETFLOW_H
3
#define _IP_NETFLOW_H
4
5
/*
6
 * Some tech info:
7
 * http://www.cisco.com/en/US/products/ps6601/prod_white_papers_list.html
8
 * http://www.cisco.com/en/US/products/sw/netmgtsw/ps1964/products_implementation_design_guide09186a00800d6a11.html
9
 */
10
11
#define NETFLOW5_RECORDS_MAX 30
12
13
struct netflow5_record {
14
	__be32		s_addr;
15
	__be32		d_addr;
16
	__be32		nexthop;
17
	__be16		i_ifc;
18
	__be16		o_ifc;
19
	__be32		nr_packets;
20
	__be32		nr_octets;
21
	__be32		ts_first;
22
	__be32		ts_last;
23
	__be16		s_port;
24
	__be16		d_port;
25
	__u8		reserved;
26
	__u8		tcp_flags;
27
	__u8		protocol;
28
	__u8		tos;
29
	__be16		s_as;
30
	__be16		d_as;
31
	__u8		s_mask;
32
	__u8		d_mask;
33
	__u16		padding;
34
} __attribute__ ((packed));
35
36
/* NetFlow v5 packet */
37
struct netflow5_pdu {
38
	__be16			version;
39
	__be16			nr_records;
40
	__be32			ts_uptime;
41
	__be32			ts_usecs;
42
	__be32			ts_unsecs;
43
	__be32			seq;
44
	__u8			eng_type;
45
	__u8			eng_id;
46
	__u16			padding;
47
	struct netflow5_record	flow[NETFLOW5_RECORDS_MAX];
48
} __attribute__ ((packed));
49
#define NETFLOW5_HEADER_SIZE (sizeof(struct netflow5_pdu) - NETFLOW5_RECORDS_MAX * sizeof(struct netflow5_record))
50
51
/* hashed data which identify unique flow */
52
struct ipt_netflow_tuple {
53
	__be32		s_addr;	// Network byte order
54
	__be32		d_addr; // -"-
55
	__be16		s_port; // -"-
56
	__be16		d_port; // -"-
57
	__be16		i_ifc;	// Local byte order
58
	__u8		protocol;
59
	__u8		tos;
60
};
61
/* tuple size is rounded to u32s */
62
#define NETFLOW_TUPLE_SIZE (sizeof(struct ipt_netflow_tuple) / 4)
63
64
/* maximum bytes flow can have, after it reached flow become not searchable and will be exported soon */
65
#define FLOW_FULL_WATERMARK 0xffefffff
66
67
/* flow entry */
68
struct ipt_netflow {
69
	struct hlist_node hlist; // hashtable search chain
70
	struct list_head list; // all flows chain
71
72
	/* unique per flow data (hashed, NETFLOW_TUPLE_SIZE) */
73
	struct ipt_netflow_tuple tuple;
74
75
	/* volatile data */
76
	__be16		o_ifc;
77
	__u8		s_mask;
78
	__u8		d_mask;
79
80
	/* flow statistics */
81
	u_int32_t	nr_packets;
82
	u_int32_t	nr_bytes;
83
	unsigned long	ts_first;
84
	unsigned long	ts_last;
85
	__u8		tcp_flags; /* `OR' of all tcp flags */
86
};
87
88
static inline int ipt_netflow_tuple_equal(const struct ipt_netflow_tuple *t1,
89
				    const struct ipt_netflow_tuple *t2)
90
{
91
	return (!memcmp(t1, t2, sizeof(struct ipt_netflow_tuple)));
92
}
93
94
struct ipt_netflow_sock {
95
	struct list_head list;
96
	struct socket *sock;
97
	__be32 ipaddr;
98
	unsigned short port;
99
	atomic_t wmem_peak;	// sk_wmem_alloc peak value
100
	atomic_t err_full;	// socket filled error
101
	atomic_t err_other;	// other socket errors
102
};
103
104
struct netflow_aggr_n {
105
	struct list_head list;
106
	__u32 mask;
107
	__u32 addr;
108
	__u32 aggr_mask;
109
	__u8 prefix;
110
};
111
112
struct netflow_aggr_p {
113
	struct list_head list;
114
	__u16 port1;
115
	__u16 port2;
116
	__u16 aggr_port;
117
};
118
119
#define NETFLOW_STAT_INC(count) (__get_cpu_var(ipt_netflow_stat).count++)
120
#define NETFLOW_STAT_ADD(count, val) (__get_cpu_var(ipt_netflow_stat).count += (unsigned long long)val)
121
122
#define NETFLOW_STAT_INC_ATOMIC(count)				\
123
	do {							\
124
		preempt_disable();				\
125
		(__get_cpu_var(ipt_netflow_stat).count++);	\
126
		preempt_enable();				\
127
	} while(0);
128
129
#define NETFLOW_STAT_ADD_ATOMIC(count, val)			\
130
	do {							\
131
		preempt_disable();				\
132
		(__get_cpu_var(ipt_netflow_stat).count += (unsigned long long)val); \
133
		preempt_enable();				\
134
	} while(0);
135
136
137
/* statistics */
138
struct ipt_netflow_stat {
139
	u64 searched;			// hash stat
140
	u64 found;			// hash stat
141
	u64 notfound;			// hash stat
142
	unsigned int truncated;		// packets stat
143
	unsigned int frags;		// packets stat
144
	unsigned int alloc_err;		// failed to allocate flow mem
145
	unsigned int maxflows_err;	// maxflows reached
146
	unsigned int send_success;	// sendmsg() ok
147
	unsigned int send_failed;	// sendmsg() failed
148
	unsigned int sock_errors;	// socket error callback called (got icmp refused)
149
	u64 exported_size;		// netflow traffic itself
150
	u64 pkt_total;			// packets accounted total
151
	u64 traf_total;			// traffic accounted total
152
	u64 pkt_drop;			// packets not accounted total
153
	u64 traf_drop;			// traffic not accounted total
154
	u64 pkt_out;			// packets out of the memory
155
	u64 traf_out;			// traffic out of the memory
156
};
157
158
#ifndef list_first_entry
159
#define list_first_entry(ptr, type, member) \
160
	list_entry((ptr)->next, type, member)
161
#endif
162
163
#endif
164
/* vim: set sw=8: */
165
++ b/net/ipv4/netfilter/Kconfig
Lines 181-186 config IP_NF_TARGET_MASQUERADE Link Here
181
181
182
	  To compile it as a module, choose M here.  If unsure, say N.
182
	  To compile it as a module, choose M here.  If unsure, say N.
183
183
184
config IP_NF_TARGET_NETFLOW
185
	tristate "NETFLOW target support"
186
	depends on NF_NAT
187
	depends on NETFILTER_ADVANCED
188
	help
189
	  NETFLOW is an implementation of in-kernel netflow probe
190
191
	  To compile it as a module, choose M here.  If unsure, say N.
192
184
config IP_NF_TARGET_NETMAP
193
config IP_NF_TARGET_NETMAP
185
	tristate "NETMAP target support"
194
	tristate "NETMAP target support"
186
	depends on NF_NAT
195
	depends on NF_NAT
187
-- a/net/ipv4/netfilter/Makefile
196
++ b/net/ipv4/netfilter/Makefile
Lines 57-62 obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o Link Here
57
obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
57
obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
58
obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o
58
obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o
59
obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
59
obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
60
obj-$(CONFIG_IP_NF_TARGET_NETFLOW) += ipt_NETFLOW.o
60
obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
61
obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
61
obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
62
obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
62
obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
63
obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
63
-- /dev/null
64
++ b/net/ipv4/netfilter/ipt_NETFLOW.c
Line 0 Link Here
1
/*
2
 * This is NetFlow exporting module (NETFLOW target)
3
 * (c) 2008 <abc@telekom.ru>
4
 *
5
 */
6
7
//#define RAW_PROMISC_HACK
8
9
#include <linux/module.h>
10
#include <linux/skbuff.h>
11
#include <linux/proc_fs.h>
12
#include <linux/vmalloc.h>
13
#include <linux/seq_file.h>
14
#include <linux/random.h>
15
#include <linux/ip.h>
16
#include <linux/udp.h>
17
#include <linux/icmp.h>
18
#include <linux/igmp.h>
19
#include <linux/inetdevice.h>
20
#include <linux/jhash.h>
21
#include <net/icmp.h>
22
#include <net/ip.h>
23
#include <net/tcp.h>
24
#include <net/route.h>
25
#include <net/dst.h>
26
#include <linux/netfilter/x_tables.h>
27
#include <asm/unaligned.h>
28
#include <linux/netfilter_ipv4/ipt_NETFLOW.h>
29
#ifdef CONFIG_BRIDGE_NETFILTER
30
#include <linux/netfilter_bridge.h>
31
#endif
32
#ifdef CONFIG_SYSCTL
33
#include <linux/sysctl.h>
34
#endif
35
36
#ifndef HIPQUAD
37
#if defined(__LITTLE_ENDIAN)
38
#define HIPQUAD(addr) \
39
	((unsigned char *)&addr)[3], \
40
((unsigned char *)&addr)[2], \
41
((unsigned char *)&addr)[1], \
42
((unsigned char *)&addr)[0]
43
#elif defined(__BIG_ENDIAN)
44
#define HIPQUAD NIPQUAD
45
#else
46
#error "Please fix asm/byteorder.h"
47
#endif /* __LITTLE_ENDIAN */
48
#endif
49
50
#define IPT_NETFLOW_VERSION "1.6"
51
52
MODULE_LICENSE("GPL");
53
MODULE_AUTHOR("<abc@telekom.ru>");
54
MODULE_DESCRIPTION("iptables NETFLOW target module");
55
MODULE_VERSION(IPT_NETFLOW_VERSION);
56
57
#define DST_SIZE 256
58
static char destination_buf[DST_SIZE] = "127.0.0.1:2055";
59
static char *destination = destination_buf;
60
module_param(destination, charp, 0400);
61
MODULE_PARM_DESC(destination, "export destination ipaddress:port");
62
63
static int inactive_timeout = 15;
64
module_param(inactive_timeout, int, 0600);
65
MODULE_PARM_DESC(inactive_timeout, "inactive flows timeout in seconds");
66
67
static int active_timeout = 30 * 60;
68
module_param(active_timeout, int, 0600);
69
MODULE_PARM_DESC(active_timeout, "active flows timeout in seconds");
70
71
static int debug = 0;
72
module_param(debug, int, 0600);
73
MODULE_PARM_DESC(debug, "debug verbosity level");
74
75
static int sndbuf;
76
module_param(sndbuf, int, 0400);
77
MODULE_PARM_DESC(sndbuf, "udp socket SNDBUF size");
78
79
static int hashsize;
80
module_param(hashsize, int, 0400);
81
MODULE_PARM_DESC(hashsize, "hash table size");
82
83
static int maxflows = 2000000;
84
module_param(maxflows, int, 0600);
85
MODULE_PARM_DESC(maxflows, "maximum number of flows");
86
static int peakflows = 0;
87
static unsigned long peakflows_at;
88
89
#define AGGR_SIZE 1024
90
static char aggregation_buf[AGGR_SIZE] = "";
91
static char *aggregation = aggregation_buf;
92
module_param(aggregation, charp, 0400);
93
MODULE_PARM_DESC(aggregation, "aggregation ruleset");
94
95
static DEFINE_PER_CPU(struct ipt_netflow_stat, ipt_netflow_stat);
96
static LIST_HEAD(usock_list);
97
static DEFINE_RWLOCK(sock_lock);
98
99
static unsigned int ipt_netflow_hash_rnd;
100
struct hlist_head *ipt_netflow_hash __read_mostly; /* hash table memory */
101
static unsigned int ipt_netflow_hash_size __read_mostly = 0; /* buckets */
102
static LIST_HEAD(ipt_netflow_list); /* all flows */
103
static LIST_HEAD(aggr_n_list);
104
static LIST_HEAD(aggr_p_list);
105
static DEFINE_RWLOCK(aggr_lock);
106
static struct kmem_cache *ipt_netflow_cachep __read_mostly; /* ipt_netflow memory */
107
static atomic_t ipt_netflow_count = ATOMIC_INIT(0);
108
static DEFINE_SPINLOCK(ipt_netflow_lock);
109
110
static DEFINE_SPINLOCK(pdu_lock);
111
static long long pdu_packets = 0, pdu_traf = 0;
112
static struct netflow5_pdu pdu;
113
static unsigned long pdu_ts_mod;
114
static void netflow_work_fn(struct work_struct *work);
115
static DECLARE_DELAYED_WORK(netflow_work, netflow_work_fn);
116
static struct timer_list rate_timer;
117
118
#define TCP_FIN_RST 0x05
119
120
static long long sec_prate = 0, sec_brate = 0;
121
static long long min_prate = 0, min_brate = 0;
122
static long long min5_prate = 0, min5_brate = 0;
123
static unsigned int metric = 10, min15_metric = 10, min5_metric = 10, min_metric = 10; /* hash metrics */
124
125
static int set_hashsize(int new_size);
126
static void destination_fini(void);
127
static int add_destinations(char *ptr);
128
static void aggregation_fini(struct list_head *list);
129
static int add_aggregation(char *ptr);
130
131
static inline __be32 bits2mask(int bits) {
132
	return (bits? 0xffffffff << (32 - bits) : 0);
133
}
134
135
static inline int mask2bits(__be32 mask) {
136
	int n;
137
138
	for (n = 0; mask; n++)
139
		mask = (mask << 1) & 0xffffffff;
140
	return n;
141
}
142
143
#ifdef CONFIG_PROC_FS
144
/* procfs statistics /proc/net/stat/ipt_netflow */
145
static int nf_seq_show(struct seq_file *seq, void *v)
146
{
147
	unsigned int nr_flows = atomic_read(&ipt_netflow_count);
148
	int cpu;
149
	unsigned long long searched = 0, found = 0, notfound = 0;
150
	unsigned int truncated = 0, frags = 0, alloc_err = 0, maxflows_err = 0;
151
	unsigned int sock_errors = 0, send_failed = 0, send_success = 0;
152
	unsigned long long pkt_total = 0, traf_total = 0, exported_size = 0;
153
	unsigned long long pkt_drop = 0, traf_drop = 0;
154
	unsigned long long pkt_out = 0, traf_out = 0;
155
	struct ipt_netflow_sock *usock;
156
	struct netflow_aggr_n *aggr_n;
157
	struct netflow_aggr_p *aggr_p;
158
	int snum = 0;
159
	int peak = (jiffies - peakflows_at) / HZ;
160
161
	seq_printf(seq, "Flows: active %u (peak %u reached %ud%uh%um ago), mem %uK\n",
162
		   nr_flows,
163
		   peakflows,
164
		   peak / (60 * 60 * 24), (peak / (60 * 60)) % 24, (peak / 60) % 60,
165
		   (unsigned int)((nr_flows * sizeof(struct ipt_netflow)) >> 10));
166
167
	for_each_present_cpu(cpu) {
168
		struct ipt_netflow_stat *st = &per_cpu(ipt_netflow_stat, cpu);
169
170
		searched += st->searched;
171
		found += st->found;
172
		notfound += st->notfound;
173
		truncated += st->truncated;
174
		frags += st->frags;
175
		alloc_err += st->alloc_err;
176
		maxflows_err += st->maxflows_err;
177
		send_success += st->send_success;
178
		send_failed += st->send_failed;
179
		sock_errors += st->sock_errors;
180
		exported_size += st->exported_size;
181
		pkt_total += st->pkt_total;
182
		traf_total += st->traf_total;
183
		pkt_drop += st->pkt_drop;
184
		traf_drop += st->traf_drop;
185
		pkt_out += st->pkt_out;
186
		traf_out += st->traf_out;
187
	}
188
189
#define FFLOAT(x, prec) (int)(x) / prec, (int)(x) % prec
190
	seq_printf(seq, "Hash: size %u (mem %uK), metric %d.%d, %d.%d, %d.%d, %d.%d. MemTraf: %llu pkt, %llu K (pdu %llu, %llu).\n",
191
		   ipt_netflow_hash_size,
192
		   (unsigned int)((ipt_netflow_hash_size * sizeof(struct hlist_head)) >> 10),
193
		   FFLOAT(metric, 10),
194
		   FFLOAT(min_metric, 10),
195
		   FFLOAT(min5_metric, 10),
196
		   FFLOAT(min15_metric, 10),
197
		   pkt_total - pkt_out + pdu_packets,
198
		   (traf_total - traf_out + pdu_traf) >> 10,
199
		   pdu_packets,
200
		   pdu_traf);
201
202
	seq_printf(seq, "Timeout: active %d, inactive %d. Maxflows %u\n",
203
		   active_timeout,
204
		   inactive_timeout,
205
		   maxflows);
206
207
	seq_printf(seq, "Rate: %llu bits/sec, %llu packets/sec; Avg 1 min: %llu bps, %llu pps; 5 min: %llu bps, %llu pps\n",
208
		   sec_brate, sec_prate, min_brate, min_prate, min5_brate, min5_prate);
209
210
	seq_printf(seq, "cpu#  stat: <search found new, trunc frag alloc maxflows>, sock: <ok fail cberr, bytes>, traffic: <pkt, bytes>, drop: <pkt, bytes>\n");
211
212
	seq_printf(seq, "Total stat: %6llu %6llu %6llu, %4u %4u %4u %4u, sock: %6u %u %u, %llu K, traffic: %llu, %llu MB, drop: %llu, %llu K\n",
213
		   (unsigned long long)searched,
214
		   (unsigned long long)found,
215
		   (unsigned long long)notfound,
216
		   truncated, frags, alloc_err, maxflows_err,
217
		   send_success, send_failed, sock_errors,
218
		   (unsigned long long)exported_size >> 10,
219
		   (unsigned long long)pkt_total, (unsigned long long)traf_total >> 20,
220
		   (unsigned long long)pkt_drop, (unsigned long long)traf_drop >> 10);
221
222
	if (num_present_cpus() > 1) {
223
		for_each_present_cpu(cpu) {
224
			struct ipt_netflow_stat *st;
225
226
			st = &per_cpu(ipt_netflow_stat, cpu);
227
			seq_printf(seq, "cpu%u  stat: %6llu %6llu %6llu, %4u %4u %4u %4u, sock: %6u %u %u, %llu K, traffic: %llu, %llu MB, drop: %llu, %llu K\n",
228
				   cpu,
229
				   (unsigned long long)st->searched,
230
				   (unsigned long long)st->found,
231
				   (unsigned long long)st->notfound,
232
				   st->truncated, st->frags, st->alloc_err, st->maxflows_err,
233
				   st->send_success, st->send_failed, st->sock_errors,
234
				   (unsigned long long)st->exported_size >> 10,
235
				   (unsigned long long)st->pkt_total, (unsigned long long)st->traf_total >> 20,
236
				   (unsigned long long)st->pkt_drop, (unsigned long long)st->traf_drop >> 10);
237
		}
238
	}
239
240
	read_lock(&sock_lock);
241
	list_for_each_entry(usock, &usock_list, list) {
242
		struct sock *sk = usock->sock->sk;
243
244
		seq_printf(seq, "sock%d: %u.%u.%u.%u:%u, sndbuf %u, filled %u, peak %u; err: sndbuf reached %u, other %u\n",
245
			   snum,
246
			   usock->ipaddr >> 24,
247
			   (usock->ipaddr >> 16) & 255,
248
			   (usock->ipaddr >> 8) & 255,
249
			   usock->ipaddr & 255,
250
			   usock->port,
251
			   sk->sk_sndbuf,
252
			   atomic_read(&sk->sk_wmem_alloc),
253
			   atomic_read(&usock->wmem_peak),
254
			   atomic_read(&usock->err_full),
255
			   atomic_read(&usock->err_other));
256
		snum++;
257
	}
258
	read_unlock(&sock_lock);
259
260
	read_lock_bh(&aggr_lock);
261
	snum = 0;
262
	list_for_each_entry(aggr_n, &aggr_n_list, list) {
263
		seq_printf(seq, "aggr#%d net: match %u.%u.%u.%u/%d strip %d\n",
264
			   snum,
265
			   HIPQUAD(aggr_n->addr),
266
			   mask2bits(aggr_n->mask),
267
			   mask2bits(aggr_n->aggr_mask));
268
		snum++;
269
	}
270
	snum = 0;
271
	list_for_each_entry(aggr_p, &aggr_p_list, list) {
272
		seq_printf(seq, "aggr#%d port: ports %u-%u replace %u\n",
273
			   snum,
274
			   aggr_p->port1,
275
			   aggr_p->port2,
276
			   aggr_p->aggr_port);
277
		snum++;
278
	}
279
	read_unlock_bh(&aggr_lock);
280
	return 0;
281
}
282
283
static int nf_seq_open(struct inode *inode, struct file *file)
284
{
285
	return single_open(file, nf_seq_show, NULL);
286
}
287
288
static struct file_operations nf_seq_fops = {
289
	.owner	 = THIS_MODULE,
290
	.open	 = nf_seq_open,
291
	.read	 = seq_read,
292
	.llseek	 = seq_lseek,
293
	.release = single_release,
294
};
295
#endif /* CONFIG_PROC_FS */
296
297
#ifdef CONFIG_SYSCTL
298
/* sysctl /proc/sys/net/netflow */
299
static int hsize_procctl(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *fpos)
300
{
301
	void *orig = ctl->data;
302
	int ret, hsize;
303
304
	if (write)
305
		ctl->data = &hsize;
306
	ret = proc_dointvec(ctl, write, buffer, lenp, fpos);
307
	if (write) {
308
		ctl->data = orig;
309
		if (hsize < 1)
310
			return -EPERM;
311
		return set_hashsize(hsize)?:ret;
312
	} else
313
		return ret;
314
}
315
316
static int sndbuf_procctl(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *fpos)
317
{
318
	int ret;
319
	struct ipt_netflow_sock *usock;
320
321
	read_lock(&sock_lock);
322
	if (list_empty(&usock_list)) {
323
		read_unlock(&sock_lock);
324
		return -ENOENT;
325
	}
326
	usock = list_first_entry(&usock_list, struct ipt_netflow_sock, list);
327
	sndbuf = usock->sock->sk->sk_sndbuf;
328
	read_unlock(&sock_lock);
329
330
	ctl->data = &sndbuf;
331
	ret = proc_dointvec(ctl, write, buffer, lenp, fpos);
332
	if (!write)
333
		return ret;
334
	if (sndbuf < SOCK_MIN_SNDBUF)
335
		sndbuf = SOCK_MIN_SNDBUF;
336
	write_lock(&sock_lock);
337
	list_for_each_entry(usock, &usock_list, list) {
338
		usock->sock->sk->sk_sndbuf = sndbuf;
339
	}
340
	write_unlock(&sock_lock);
341
	return ret;
342
}
343
344
static int destination_procctl(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *fpos)
345
{
346
	int ret;
347
348
	ret = proc_dostring(ctl, write, buffer, lenp, fpos);
349
	if (ret >= 0 && write) {
350
		destination_fini();
351
		add_destinations(destination_buf);
352
	}
353
	return ret;
354
}
355
356
static int aggregation_procctl(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *fpos)
357
{
358
	int ret;
359
360
	if (debug > 1)
361
		printk(KERN_INFO "aggregation_procctl (%d) %u %llu\n", write, (unsigned int)(*lenp), *fpos);
362
	ret = proc_dostring(ctl, write, buffer, lenp, fpos);
363
	if (ret >= 0 && write) {
364
		add_aggregation(aggregation_buf);
365
	}
366
	return ret;
367
}
368
369
static struct ctl_table_header *netflow_sysctl_header;
370
371
static struct ctl_table netflow_sysctl_table[] = {
372
	{
373
		.procname	= "active_timeout",
374
		.mode		= 0644,
375
		.data		= &active_timeout,
376
		.maxlen		= sizeof(int),
377
		.proc_handler	= &proc_dointvec,
378
	},
379
	{
380
		.procname	= "inactive_timeout",
381
		.mode		= 0644,
382
		.data		= &inactive_timeout,
383
		.maxlen		= sizeof(int),
384
		.proc_handler	= &proc_dointvec,
385
	},
386
	{
387
		.procname	= "debug",
388
		.mode		= 0644,
389
		.data		= &debug,
390
		.maxlen		= sizeof(int),
391
		.proc_handler	= &proc_dointvec,
392
	},
393
	{
394
		.procname	= "hashsize",
395
		.mode		= 0644,
396
		.data		= &ipt_netflow_hash_size,
397
		.maxlen		= sizeof(int),
398
		.proc_handler	= &hsize_procctl,
399
	},
400
	{
401
		.procname	= "sndbuf",
402
		.mode		= 0644,
403
		.maxlen		= sizeof(int),
404
		.proc_handler	= &sndbuf_procctl,
405
	},
406
	{
407
		.procname	= "destination",
408
		.mode		= 0644,
409
		.data		= &destination_buf,
410
		.maxlen		= sizeof(destination_buf),
411
		.proc_handler	= &destination_procctl,
412
	},
413
	{
414
		.procname	= "aggregation",
415
		.mode		= 0644,
416
		.data		= &aggregation_buf,
417
		.maxlen		= sizeof(aggregation_buf),
418
		.proc_handler	= &aggregation_procctl,
419
	},
420
	{
421
		.procname	= "maxflows",
422
		.mode		= 0644,
423
		.data		= &maxflows,
424
		.maxlen		= sizeof(int),
425
		.proc_handler	= &proc_dointvec,
426
	},
427
	{ }
428
};
429
430
static struct ctl_path netflow_sysctl_path[] = {
431
	{ .procname = "net" },
432
	{ .procname = "netflow" },
433
	{ }
434
};
435
#endif /* CONFIG_SYSCTL */
436
437
/* socket code */
438
static void sk_error_report(struct sock *sk)
439
{
440
	/* clear connection refused errors if any */
441
	write_lock_bh(&sk->sk_callback_lock);
442
	if (debug > 1)
443
		printk(KERN_INFO "NETFLOW: socket error <%d>\n", sk->sk_err);
444
	sk->sk_err = 0;
445
	NETFLOW_STAT_INC(sock_errors);
446
	write_unlock_bh(&sk->sk_callback_lock);
447
	return;
448
}
449
450
// return numbers of sends succeded, 0 if none
451
static int netflow_send_pdu(void *buffer, int len)
452
{
453
	struct msghdr msg = { .msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL };
454
	struct kvec iov = { buffer, len };
455
	int retok = 0, ret;
456
	int snum = 0;
457
	struct ipt_netflow_sock *usock;
458
459
	read_lock(&sock_lock);
460
	list_for_each_entry(usock, &usock_list, list) {
461
		if (debug)
462
			printk(KERN_INFO "netflow_send_pdu: sendmsg(%d, %d) [%u %u]\n",
463
			       snum,
464
			       len,
465
			       atomic_read(&usock->sock->sk->sk_wmem_alloc),
466
			       usock->sock->sk->sk_sndbuf);
467
		ret = kernel_sendmsg(usock->sock, &msg, &iov, 1, (size_t)len);
468
		if (ret < 0) {
469
			char *suggestion = "";
470
471
			NETFLOW_STAT_INC_ATOMIC(send_failed);
472
			if (ret == -EAGAIN) {
473
				atomic_inc(&usock->err_full);
474
				suggestion = ": increase sndbuf!";
475
			} else
476
				atomic_inc(&usock->err_other);
477
			printk(KERN_ERR "netflow_send_pdu[%d]: sendmsg error %d: data loss %llu pkt, %llu bytes%s\n",
478
			       snum, ret, pdu_packets, pdu_traf, suggestion);
479
		} else {
480
			unsigned int wmem = atomic_read(&usock->sock->sk->sk_wmem_alloc);
481
			if (wmem > atomic_read(&usock->wmem_peak))
482
				atomic_set(&usock->wmem_peak, wmem);
483
			NETFLOW_STAT_INC_ATOMIC(send_success);
484
			NETFLOW_STAT_ADD_ATOMIC(exported_size, ret);
485
			retok++;
486
		}
487
		snum++;
488
	}
489
	read_unlock(&sock_lock);
490
	return retok;
491
}
492
493
static void usock_free(struct ipt_netflow_sock *usock)
494
{
495
	printk(KERN_INFO "netflow: remove destination %u.%u.%u.%u:%u (%p)\n",
496
	       HIPQUAD(usock->ipaddr),
497
	       usock->port,
498
	       usock->sock);
499
	if (usock->sock)
500
		sock_release(usock->sock);
501
	usock->sock = NULL;
502
	vfree(usock);
503
}
504
505
static void destination_fini(void)
506
{
507
	write_lock(&sock_lock);
508
	while (!list_empty(&usock_list)) {
509
		struct ipt_netflow_sock *usock;
510
511
		usock = list_entry(usock_list.next, struct ipt_netflow_sock, list);
512
		list_del(&usock->list);
513
		write_unlock(&sock_lock);
514
		usock_free(usock);
515
		write_lock(&sock_lock);
516
	}
517
	write_unlock(&sock_lock);
518
}
519
520
static void add_usock(struct ipt_netflow_sock *usock)
521
{
522
	struct ipt_netflow_sock *sk;
523
524
	/* don't need empty sockets */
525
	if (!usock->sock) {
526
		usock_free(usock);
527
		return;
528
	}
529
530
	write_lock(&sock_lock);
531
	/* don't need duplicated sockets */
532
	list_for_each_entry(sk, &usock_list, list) {
533
		if (sk->ipaddr == usock->ipaddr &&
534
		    sk->port == usock->port) {
535
			write_unlock(&sock_lock);
536
			usock_free(usock);
537
			return;
538
		}
539
	}
540
	list_add_tail(&usock->list, &usock_list);
541
	printk(KERN_INFO "netflow: added destination %u.%u.%u.%u:%u\n",
542
	       HIPQUAD(usock->ipaddr),
543
	       usock->port);
544
	write_unlock(&sock_lock);
545
}
546
547
static struct socket *usock_alloc(__be32 ipaddr, unsigned short port)
548
{
549
	struct sockaddr_in sin;
550
	struct socket *sock;
551
	int error;
552
553
	if ((error = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
554
		printk(KERN_ERR "netflow: sock_create_kern error %d\n", error);
555
		return NULL;
556
	}
557
	sock->sk->sk_allocation = GFP_ATOMIC;
558
	sock->sk->sk_prot->unhash(sock->sk); /* hidden from input */
559
	sock->sk->sk_error_report = &sk_error_report; /* clear ECONNREFUSED */
560
	if (sndbuf)
561
		sock->sk->sk_sndbuf = sndbuf;
562
	else
563
		sndbuf = sock->sk->sk_sndbuf;
564
	memset(&sin, 0, sizeof(sin));
565
	sin.sin_family      = AF_INET;
566
	sin.sin_addr.s_addr = htonl(ipaddr);
567
	sin.sin_port        = htons(port);
568
	if ((error = sock->ops->connect(sock, (struct sockaddr *)&sin,
569
				  sizeof(sin), 0)) < 0) {
570
		printk(KERN_ERR "netflow: error connecting UDP socket %d\n", error);
571
		sock_release(sock);
572
		return NULL;
573
	}
574
	return sock;
575
}
576
577
#define SEPARATORS " ,;\t\n"
578
static int add_destinations(char *ptr)
579
{
580
	while (ptr) {
581
		unsigned char ip[4];
582
		unsigned short port;
583
584
		ptr += strspn(ptr, SEPARATORS);
585
586
		if (sscanf(ptr, "%hhu.%hhu.%hhu.%hhu:%hu",
587
			   ip, ip + 1, ip + 2, ip + 3, &port) == 5) {
588
			struct ipt_netflow_sock *usock;
589
590
			if (!(usock = vmalloc(sizeof(*usock)))) {
591
				printk(KERN_ERR "netflow: can't vmalloc socket\n");
592
				return -ENOMEM;
593
			}
594
595
			memset(usock, 0, sizeof(*usock));
596
			usock->ipaddr = ntohl(*(__be32 *)ip);
597
			usock->port = port;
598
			usock->sock = usock_alloc(usock->ipaddr, port);
599
			atomic_set(&usock->wmem_peak, 0);
600
			atomic_set(&usock->err_full, 0);
601
			atomic_set(&usock->err_other, 0);
602
			add_usock(usock);
603
		} else
604
			break;
605
606
		ptr = strpbrk(ptr, SEPARATORS);
607
	}
608
	return 0;
609
}
610
611
static void aggregation_fini(struct list_head *list)
612
{
613
	write_lock_bh(&aggr_lock);
614
	while (!list_empty(list)) {
615
		struct netflow_aggr_n *aggr; /* match netflow_aggr_p too */
616
617
		aggr = list_entry(list->next, struct netflow_aggr_n, list);
618
		list_del(&aggr->list);
619
		write_unlock_bh(&aggr_lock);
620
		vfree(aggr);
621
		write_lock_bh(&aggr_lock);
622
	}
623
	write_unlock_bh(&aggr_lock);
624
}
625
626
static int add_aggregation(char *ptr)
627
{
628
	struct netflow_aggr_n *aggr_n, *aggr, *tmp;
629
	struct netflow_aggr_p *aggr_p;
630
	LIST_HEAD(new_aggr_n_list);
631
	LIST_HEAD(new_aggr_p_list);
632
	LIST_HEAD(old_aggr_list);
633
634
	while (ptr && *ptr) {
635
		unsigned char ip[4];
636
		unsigned int mask;
637
		unsigned int port1, port2;
638
		unsigned int aggr_to;
639
640
		ptr += strspn(ptr, SEPARATORS);
641
642
		if (sscanf(ptr, "%hhu.%hhu.%hhu.%hhu/%u=%u",
643
			   ip, ip + 1, ip + 2, ip + 3, &mask, &aggr_to) == 6) {
644
645
			if (!(aggr_n = vmalloc(sizeof(*aggr_n)))) {
646
				printk(KERN_ERR "netflow: can't vmalloc aggr\n");
647
				return -ENOMEM;
648
			}
649
			memset(aggr_n, 0, sizeof(*aggr_n));
650
651
			aggr_n->addr = ntohl(*(__be32 *)ip);
652
			aggr_n->mask = bits2mask(mask);
653
			aggr_n->aggr_mask = bits2mask(aggr_to);
654
			aggr_n->prefix = mask;
655
			printk(KERN_INFO "netflow: add aggregation [%u.%u.%u.%u/%u=%u]\n",
656
			       HIPQUAD(aggr_n->addr), mask, aggr_to);
657
			list_add_tail(&aggr_n->list, &new_aggr_n_list);
658
659
		} else if (sscanf(ptr, "%u-%u=%u", &port1, &port2, &aggr_to) == 3 ||
660
			   sscanf(ptr, "%u=%u", &port2, &aggr_to) == 2) {
661
662
			if (!(aggr_p = vmalloc(sizeof(*aggr_p)))) {
663
				printk(KERN_ERR "netflow: can't vmalloc aggr\n");
664
				return -ENOMEM;
665
			}
666
			memset(aggr_p, 0, sizeof(*aggr_p));
667
668
			aggr_p->port1 = port1;
669
			aggr_p->port2 = port2;
670
			aggr_p->aggr_port = aggr_to;
671
			printk(KERN_INFO "netflow: add aggregation [%u-%u=%u]\n",
672
			       port1, port2, aggr_to);
673
			list_add_tail(&aggr_p->list, &new_aggr_p_list);
674
		} else {
675
			printk(KERN_ERR "netflow: bad aggregation rule: %s (ignoring)\n", ptr);
676
			break;
677
		}
678
679
		ptr = strpbrk(ptr, SEPARATORS);
680
	}
681
682
	/* swap lists */
683
	write_lock_bh(&aggr_lock);
684
	list_for_each_entry_safe(aggr, tmp, &aggr_n_list, list)
685
		list_move(&aggr->list, &old_aggr_list);
686
	list_for_each_entry_safe(aggr, tmp, &aggr_p_list, list)
687
		list_move(&aggr->list, &old_aggr_list);
688
689
	list_for_each_entry_safe(aggr, tmp, &new_aggr_n_list, list)
690
		list_move_tail(&aggr->list, &aggr_n_list);
691
	list_for_each_entry_safe(aggr, tmp, &new_aggr_p_list, list)
692
		list_move_tail(&aggr->list, &aggr_p_list);
693
	write_unlock_bh(&aggr_lock);
694
	aggregation_fini(&old_aggr_list);
695
	return 0;
696
}
697
698
static inline u_int32_t hash_netflow(const struct ipt_netflow_tuple *tuple)
699
{
700
	/* tuple is rounded to u32s */
701
	return jhash2((u32 *)tuple, NETFLOW_TUPLE_SIZE, ipt_netflow_hash_rnd) % ipt_netflow_hash_size;
702
}
703
704
static struct ipt_netflow *
705
ipt_netflow_find(const struct ipt_netflow_tuple *tuple)
706
{
707
	struct ipt_netflow *nf;
708
	unsigned int hash = hash_netflow(tuple);
709
	struct hlist_node *pos;
710
711
	hlist_for_each_entry(nf, pos, &ipt_netflow_hash[hash], hlist) {
712
		if (ipt_netflow_tuple_equal(tuple, &nf->tuple) &&
713
		    nf->nr_bytes < FLOW_FULL_WATERMARK) {
714
			NETFLOW_STAT_INC(found);
715
			return nf;
716
		}
717
		NETFLOW_STAT_INC(searched);
718
	}
719
	NETFLOW_STAT_INC(notfound);
720
	return NULL;
721
}
722
723
static struct hlist_head *alloc_hashtable(int size)
724
{
725
	struct hlist_head *hash;
726
727
	hash = vmalloc(sizeof(struct hlist_head) * size);
728
	if (hash) {
729
		int i;
730
731
		for (i = 0; i < size; i++)
732
			INIT_HLIST_HEAD(&hash[i]);
733
	} else
734
		printk(KERN_ERR "netflow: unable to vmalloc hash table.\n");
735
736
	return hash;
737
}
738
739
static int set_hashsize(int new_size)
740
{
741
	struct hlist_head *new_hash, *old_hash;
742
	unsigned int hash;
743
	struct ipt_netflow *nf;
744
	int rnd;
745
746
	printk(KERN_INFO "netflow: allocating new hash table %u -> %u buckets\n",
747
	       ipt_netflow_hash_size, new_size);
748
	new_hash = alloc_hashtable(new_size);
749
	if (!new_hash)
750
		return -ENOMEM;
751
752
	get_random_bytes(&rnd, 4);
753
754
	/* rehash */
755
	spin_lock_bh(&ipt_netflow_lock);
756
	old_hash = ipt_netflow_hash;
757
	ipt_netflow_hash = new_hash;
758
	ipt_netflow_hash_size = new_size;
759
	ipt_netflow_hash_rnd = rnd;
760
	/* hash_netflow() is dependent on ipt_netflow_hash_* values */
761
	list_for_each_entry(nf, &ipt_netflow_list, list) {
762
		hash = hash_netflow(&nf->tuple);
763
		/* hlist_add_head overwrites hlist pointers for this node
764
		 * so it's good */
765
		hlist_add_head(&nf->hlist, &new_hash[hash]);
766
	}
767
	spin_unlock_bh(&ipt_netflow_lock);
768
769
	vfree(old_hash);
770
771
	return 0;
772
}
773
774
static struct ipt_netflow *
775
ipt_netflow_alloc(struct ipt_netflow_tuple *tuple)
776
{
777
	struct ipt_netflow *nf;
778
	long count;
779
780
	nf = kmem_cache_alloc(ipt_netflow_cachep, GFP_ATOMIC);
781
	if (!nf) {
782
		printk(KERN_ERR "Can't allocate netflow.\n");
783
		return NULL;
784
	}
785
786
	memset(nf, 0, sizeof(*nf));
787
	nf->tuple = *tuple;
788
789
	count = atomic_inc_return(&ipt_netflow_count);
790
	if (count > peakflows) {
791
		peakflows = count;
792
		peakflows_at = jiffies;
793
	}
794
795
	return nf;
796
}
797
798
static void ipt_netflow_free(struct ipt_netflow *nf)
799
{
800
	atomic_dec(&ipt_netflow_count);
801
	kmem_cache_free(ipt_netflow_cachep, nf);
802
}
803
804
static struct ipt_netflow *
805
init_netflow(struct ipt_netflow_tuple *tuple,
806
	     struct sk_buff *skb)
807
{
808
	struct ipt_netflow *nf;
809
	unsigned int hash;
810
811
	nf = ipt_netflow_alloc(tuple);
812
	if (!nf)
813
		return NULL;
814
815
	hash = hash_netflow(&nf->tuple);
816
	hlist_add_head(&nf->hlist, &ipt_netflow_hash[hash]);
817
	list_add(&nf->list, &ipt_netflow_list);
818
819
	return nf;
820
}
821
822
/* cook pdu, send, and clean */
823
static void __netflow_export_pdu(void)
824
{
825
	struct timeval tv;
826
	int pdusize;
827
828
	if (!pdu.nr_records)
829
		return;
830
831
	if (debug > 1)
832
		printk(KERN_INFO "netflow_export_pdu with %d records\n", pdu.nr_records);
833
	do_gettimeofday(&tv);
834
835
	pdu.version	= htons(5);
836
	pdu.ts_uptime	= htonl(jiffies_to_msecs(jiffies));
837
	pdu.ts_usecs	= htonl(tv.tv_sec);
838
	pdu.ts_unsecs	= htonl(tv.tv_usec);
839
	//pdu.eng_type	= 0;
840
	//pdu.eng_id	= 0;
841
	//pdu.padding	= 0;
842
843
	pdusize = NETFLOW5_HEADER_SIZE + sizeof(struct netflow5_record) * pdu.nr_records;
844
845
	/* especially fix nr_records before export */
846
	pdu.nr_records	= htons(pdu.nr_records);
847
848
	if (netflow_send_pdu(&pdu, pdusize) == 0) {
849
		/* not least one send succeded, account stat for dropped packets */
850
		NETFLOW_STAT_ADD_ATOMIC(pkt_drop, pdu_packets);
851
		NETFLOW_STAT_ADD_ATOMIC(traf_drop, pdu_traf);
852
	}
853
854
	pdu.seq = htonl(ntohl(pdu.seq) + ntohs(pdu.nr_records));
855
856
	pdu.nr_records	= 0;
857
	pdu_packets = 0;
858
	pdu_traf = 0;
859
}
860
861
static void netflow_export_flow(struct ipt_netflow *nf)
862
{
863
	struct netflow5_record *rec;
864
865
	spin_lock(&pdu_lock);
866
	if (debug > 2)
867
		printk(KERN_INFO "adding flow to export (%d)\n", pdu.nr_records);
868
869
	pdu_packets += nf->nr_packets;
870
	pdu_traf += nf->nr_bytes;
871
	pdu_ts_mod = jiffies;
872
	rec = &pdu.flow[pdu.nr_records++];
873
874
	/* make V5 flow record */
875
	rec->s_addr	= nf->tuple.s_addr;
876
	rec->d_addr	= nf->tuple.d_addr;
877
	//rec->nexthop	= 0;
878
	rec->i_ifc	= htons(nf->tuple.i_ifc);
879
	rec->o_ifc	= htons(nf->o_ifc);
880
	rec->nr_packets = htonl(nf->nr_packets);
881
	rec->nr_octets	= htonl(nf->nr_bytes);
882
	rec->ts_first	= htonl(jiffies_to_msecs(nf->ts_first));
883
	rec->ts_last	= htonl(jiffies_to_msecs(nf->ts_last));
884
	rec->s_port	= nf->tuple.s_port;
885
	rec->d_port	= nf->tuple.d_port;
886
	//rec->reserved	= 0;
887
	rec->tcp_flags	= nf->tcp_flags;
888
	rec->protocol	= nf->tuple.protocol;
889
	rec->tos	= nf->tuple.tos;
890
	//rec->s_as	= 0;
891
	//rec->d_as	= 0;
892
	rec->s_mask	= nf->s_mask;
893
	rec->d_mask	= nf->d_mask;
894
	//rec->padding	= 0;
895
	ipt_netflow_free(nf);
896
897
	if (pdu.nr_records == NETFLOW5_RECORDS_MAX)
898
		__netflow_export_pdu();
899
	spin_unlock(&pdu_lock);
900
}
901
902
static inline int active_needs_export(struct ipt_netflow *nf, long a_timeout)
903
{
904
	/* active too long, finishing, or having too much bytes */
905
	return ((jiffies - nf->ts_first) > a_timeout) ||
906
		(nf->tuple.protocol == IPPROTO_TCP &&
907
		 (nf->tcp_flags & TCP_FIN_RST) &&
908
		 (jiffies - nf->ts_last) > (1 * HZ)) ||
909
		nf->nr_bytes >= FLOW_FULL_WATERMARK;
910
}
911
912
/* could be called with zero to flush cache and pdu */
913
static void netflow_scan_inactive_timeout(long timeout)
914
{
915
	long i_timeout = timeout * HZ;
916
	long a_timeout = active_timeout * HZ;
917
918
	spin_lock_bh(&ipt_netflow_lock);
919
	while (!list_empty(&ipt_netflow_list)) {
920
		struct ipt_netflow *nf;
921
922
		nf = list_entry(ipt_netflow_list.prev, struct ipt_netflow, list);
923
		/* Note: i_timeout checked with >= to allow specifying zero timeout
924
		 * to purge all flows on module unload */
925
		if (((jiffies - nf->ts_last) >= i_timeout) ||
926
		    active_needs_export(nf, a_timeout)) {
927
			hlist_del(&nf->hlist);
928
			list_del(&nf->list);
929
			NETFLOW_STAT_ADD(pkt_out, nf->nr_packets);
930
			NETFLOW_STAT_ADD(traf_out, nf->nr_bytes);
931
			spin_unlock_bh(&ipt_netflow_lock);
932
			netflow_export_flow(nf);
933
			spin_lock_bh(&ipt_netflow_lock);
934
		} else {
935
			/* all flows which need to be exported is always at the tail
936
			 * so if no more exportable flows we can break */
937
			break;
938
		}
939
	}
940
	spin_unlock_bh(&ipt_netflow_lock);
941
942
	/* flush flows stored in pdu if there no new flows for too long */
943
	/* Note: using >= to allow flow purge on zero timeout */
944
	if ((jiffies - pdu_ts_mod) >= i_timeout) {
945
		spin_lock(&pdu_lock);
946
		__netflow_export_pdu();
947
		spin_unlock(&pdu_lock);
948
	}
949
}
950
951
static void netflow_work_fn(struct work_struct *dummy)
952
{
953
	netflow_scan_inactive_timeout(inactive_timeout);
954
	schedule_delayed_work(&netflow_work, HZ / 10);
955
}
956
957
#define RATESHIFT 2
958
#define SAMPLERATE (RATESHIFT*RATESHIFT)
959
#define NUMSAMPLES(minutes) (minutes * 60 / SAMPLERATE)
960
#define _A(v, m) (v) * (1024 * 2 / (NUMSAMPLES(m) + 1)) >> 10
961
// x * (1024 / y) >> 10 is because I can not just divide long long integer
962
#define CALC_RATE(ewma, cur, minutes) ewma += _A(cur - ewma, minutes)
963
// calculate EWMA throughput rate for whole module
964
static void rate_timer_calc(unsigned long dummy)
965
{
966
	static u64 old_pkt_total = 0;
967
	static u64 old_traf_total = 0;
968
	static u64 old_searched = 0;
969
	static u64 old_found = 0;
970
	static u64 old_notfound = 0;
971
	u64 searched = 0;
972
	u64 found = 0;
973
	u64 notfound = 0;
974
	unsigned int dsrch, dfnd, dnfnd;
975
	u64 pkt_total = 0;
976
	u64 traf_total = 0;
977
	int cpu;
978
979
	for_each_present_cpu(cpu) {
980
		struct ipt_netflow_stat *st = &per_cpu(ipt_netflow_stat, cpu);
981
982
		pkt_total += st->pkt_total;
983
		traf_total += st->traf_total;
984
		searched += st->searched;
985
		found += st->found;
986
		notfound += st->notfound;
987
	}
988
989
	sec_prate = (pkt_total - old_pkt_total) >> RATESHIFT;
990
	CALC_RATE(min5_prate, sec_prate, 5);
991
	CALC_RATE(min_prate, sec_prate, 1);
992
	old_pkt_total = pkt_total;
993
994
	sec_brate = ((traf_total - old_traf_total) * 8) >> RATESHIFT;
995
	CALC_RATE(min5_brate, sec_brate, 5);
996
	CALC_RATE(min_brate, sec_brate, 1);
997
	old_traf_total = traf_total;
998
999
	dsrch = searched - old_searched;
1000
	dfnd = found - old_found;
1001
	dnfnd = notfound - old_notfound;
1002
	old_searched = searched;
1003
	old_found = found;
1004
	old_notfound = notfound;
1005
	/* if there is no access to hash keep rate steady */
1006
	metric = (dfnd + dnfnd)? 10 * (dsrch + dfnd + dnfnd) / (dfnd + dnfnd) : metric;
1007
	CALC_RATE(min15_metric, (unsigned long long)metric, 15);
1008
	CALC_RATE(min5_metric, (unsigned long long)metric, 5);
1009
	CALC_RATE(min_metric, (unsigned long long)metric, 1);
1010
1011
	mod_timer(&rate_timer, jiffies + (HZ * SAMPLERATE));
1012
}
1013
1014
/* packet receiver */
1015
static unsigned int netflow_target(
1016
			   struct sk_buff *skb,
1017
			   const struct xt_action_param *par
1018
		)
1019
{
1020
	struct iphdr _iph, *iph;
1021
	struct ipt_netflow_tuple tuple;
1022
	struct ipt_netflow *nf;
1023
	__u8 tcp_flags;
1024
	struct netflow_aggr_n *aggr_n;
1025
	struct netflow_aggr_p *aggr_p;
1026
	__u8 s_mask, d_mask;
1027
1028
	iph = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); //iph = ip_hdr(skb);
1029
1030
	if (iph == NULL) {
1031
		NETFLOW_STAT_INC(truncated);
1032
		NETFLOW_STAT_INC(pkt_drop);
1033
		return XT_CONTINUE;
1034
	}
1035
1036
	tuple.s_addr	= iph->saddr;
1037
	tuple.d_addr	= iph->daddr;
1038
	tuple.s_port	= 0;
1039
	tuple.d_port	= 0;
1040
	tuple.i_ifc	= par->in? par->in->ifindex : -1;
1041
	tuple.protocol	= iph->protocol;
1042
	tuple.tos	= iph->tos;
1043
	tcp_flags	= 0; /* Cisco sometimes have TCP ACK for non TCP packets, don't get it */
1044
	s_mask		= 0;
1045
	d_mask		= 0;
1046
1047
	if (iph->frag_off & htons(IP_OFFSET))
1048
		NETFLOW_STAT_INC(frags);
1049
	else {
1050
		switch (tuple.protocol) {
1051
		    case IPPROTO_TCP: {
1052
			struct tcphdr _hdr, *hp;
1053
1054
			if ((hp = skb_header_pointer(skb, iph->ihl * 4, 14, &_hdr))) {
1055
				tuple.s_port = hp->source;
1056
				tuple.d_port = hp->dest;
1057
				tcp_flags = (u_int8_t)(ntohl(tcp_flag_word(hp)) >> 16);
1058
			}
1059
			break;
1060
		    }
1061
		    case IPPROTO_UDP: {
1062
			struct udphdr _hdr, *hp;
1063
1064
			if ((hp = skb_header_pointer(skb, iph->ihl * 4, 4, &_hdr))) {
1065
				tuple.s_port = hp->source;
1066
				tuple.d_port = hp->dest;
1067
			}
1068
			break;
1069
		    }
1070
		    case IPPROTO_ICMP: {
1071
			struct icmphdr _hdr, *hp;
1072
1073
			if ((hp = skb_header_pointer(skb, iph->ihl * 4, 2, &_hdr)))
1074
				tuple.d_port = (hp->type << 8) | hp->code;
1075
			break;
1076
		    }
1077
		    case IPPROTO_IGMP: {
1078
			struct igmphdr *_hdr, *hp;
1079
1080
			if ((hp = skb_header_pointer(skb, iph->ihl * 4, 1, &_hdr)))
1081
				tuple.d_port = hp->type;
1082
			}
1083
			break;
1084
		}
1085
	} /* not fragmented */
1086
1087
	/* aggregate networks */
1088
	read_lock_bh(&aggr_lock);
1089
	list_for_each_entry(aggr_n, &aggr_n_list, list)
1090
		if ((ntohl(tuple.s_addr) & aggr_n->mask) == aggr_n->addr) {
1091
			tuple.s_addr &= htonl(aggr_n->aggr_mask);
1092
			s_mask = aggr_n->prefix;
1093
			break;
1094
		}
1095
	list_for_each_entry(aggr_n, &aggr_n_list, list)
1096
		if ((ntohl(tuple.d_addr) & aggr_n->mask) == aggr_n->addr) {
1097
			tuple.d_addr &= htonl(aggr_n->aggr_mask);
1098
			d_mask = aggr_n->prefix;
1099
			break;
1100
		}
1101
1102
	/* aggregate ports */
1103
	list_for_each_entry(aggr_p, &aggr_p_list, list)
1104
		if (ntohs(tuple.s_port) >= aggr_p->port1 &&
1105
		    ntohs(tuple.s_port) <= aggr_p->port2) {
1106
			tuple.s_port = htons(aggr_p->aggr_port);
1107
			break;
1108
		}
1109
1110
	list_for_each_entry(aggr_p, &aggr_p_list, list)
1111
		if (ntohs(tuple.d_port) >= aggr_p->port1 &&
1112
		    ntohs(tuple.d_port) <= aggr_p->port2) {
1113
			tuple.d_port = htons(aggr_p->aggr_port);
1114
			break;
1115
		}
1116
	read_unlock_bh(&aggr_lock);
1117
1118
	spin_lock_bh(&ipt_netflow_lock);
1119
	/* record */
1120
	nf = ipt_netflow_find(&tuple);
1121
	if (!nf) {
1122
		if (maxflows > 0 && atomic_read(&ipt_netflow_count) >= maxflows) {
1123
			/* This is DOS attack prevention */
1124
			NETFLOW_STAT_INC(maxflows_err);
1125
			NETFLOW_STAT_INC(pkt_drop);
1126
			NETFLOW_STAT_ADD(traf_drop, ntohs(iph->tot_len));
1127
			spin_unlock_bh(&ipt_netflow_lock);
1128
			return XT_CONTINUE;
1129
		}
1130
1131
		nf = init_netflow(&tuple, skb);
1132
		if (!nf || IS_ERR(nf)) {
1133
			NETFLOW_STAT_INC(alloc_err);
1134
			NETFLOW_STAT_INC(pkt_drop);
1135
			NETFLOW_STAT_ADD(traf_drop, ntohs(iph->tot_len));
1136
			spin_unlock_bh(&ipt_netflow_lock);
1137
			return XT_CONTINUE;
1138
		}
1139
1140
		nf->ts_first = jiffies;
1141
		nf->tcp_flags = tcp_flags;
1142
		nf->o_ifc = par->out? par->out->ifindex : -1;
1143
		nf->s_mask = s_mask;
1144
		nf->d_mask = d_mask;
1145
1146
		if (debug > 2)
1147
			printk(KERN_INFO "ipt_netflow: new (%u) %hd:%hd SRC=%u.%u.%u.%u:%u DST=%u.%u.%u.%u:%u\n",
1148
			       atomic_read(&ipt_netflow_count),
1149
			       tuple.i_ifc, nf->o_ifc,
1150
			       NIPQUAD(tuple.s_addr), ntohs(tuple.s_port),
1151
			       NIPQUAD(tuple.d_addr), ntohs(tuple.d_port));
1152
	} else {
1153
		/* ipt_netflow_list is sorted by access time:
1154
		 * most recently accessed flows are at head, old flows remain at tail
1155
		 * this function bubble up flow to the head */
1156
		list_move(&nf->list, &ipt_netflow_list);
1157
	}
1158
1159
	nf->nr_packets++;
1160
	nf->nr_bytes += ntohs(iph->tot_len);
1161
	nf->ts_last = jiffies;
1162
	nf->tcp_flags |= tcp_flags;
1163
1164
	NETFLOW_STAT_INC(pkt_total);
1165
	NETFLOW_STAT_ADD(traf_total, ntohs(iph->tot_len));
1166
1167
	if (active_needs_export(nf, active_timeout * HZ)) {
1168
		/* ok, if this active flow to be exported
1169
		 * bubble it to the tail */
1170
		list_move_tail(&nf->list, &ipt_netflow_list);
1171
1172
		/* Blog: I thought about forcing timer to wake up sooner if we have
1173
		 * enough exportable flows, but in fact this doesn't have much sense,
1174
		 * becasue this would only move flow data from one memory to another
1175
		 * (from our buffers to socket buffers, and socket buffers even have
1176
		 * limited size). But yes, this is disputable. */
1177
	}
1178
1179
	spin_unlock_bh(&ipt_netflow_lock);
1180
1181
	return XT_CONTINUE;
1182
}
1183
1184
static struct xt_target ipt_netflow_reg __read_mostly = {
1185
	.name		= "NETFLOW",
1186
	.family		= NFPROTO_IPV4,
1187
	.target		= netflow_target,
1188
	.table		= "filter",
1189
	.hooks		= (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) |
1190
				(1 << NF_INET_LOCAL_OUT),
1191
	.me		= THIS_MODULE
1192
};
1193
1194
static int __init ipt_netflow_init(void)
1195
{
1196
#ifdef CONFIG_PROC_FS
1197
	struct proc_dir_entry *proc_stat;
1198
#endif
1199
1200
	get_random_bytes(&ipt_netflow_hash_rnd, 4);
1201
1202
	/* determine hash size (idea from nf_conntrack_core.c) */
1203
	if (!hashsize) {
1204
		hashsize = (((num_physpages << PAGE_SHIFT) / 16384)
1205
					 / sizeof(struct hlist_head));
1206
		if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
1207
			hashsize = 8192;
1208
	}
1209
	if (hashsize < 16)
1210
		hashsize = 16;
1211
	printk(KERN_INFO "ipt_netflow version %s (%u buckets)\n",
1212
		IPT_NETFLOW_VERSION, hashsize);
1213
1214
	ipt_netflow_hash_size = hashsize;
1215
	ipt_netflow_hash = alloc_hashtable(ipt_netflow_hash_size);
1216
	if (!ipt_netflow_hash) {
1217
		printk(KERN_ERR "Unable to create ipt_neflow_hash\n");
1218
		goto err;
1219
	}
1220
1221
	ipt_netflow_cachep = kmem_cache_create("ipt_netflow",
1222
						sizeof(struct ipt_netflow), 0,
1223
						0, NULL
1224
					      );
1225
	if (!ipt_netflow_cachep) {
1226
		printk(KERN_ERR "Unable to create ipt_netflow slab cache\n");
1227
		goto err_free_hash;
1228
	}
1229
1230
#ifdef CONFIG_PROC_FS
1231
	proc_stat = create_proc_entry("ipt_netflow", S_IRUGO, init_net.proc_net_stat);
1232
	if (!proc_stat) {
1233
		printk(KERN_ERR "Unable to create /proc/net/stat/ipt_netflow entry\n");
1234
		goto err_free_netflow_slab;
1235
	}
1236
	proc_stat->proc_fops = &nf_seq_fops;
1237
	printk(KERN_INFO "netflow: registered: /proc/net/stat/ipt_netflow\n");
1238
#endif
1239
1240
#ifdef CONFIG_SYSCTL
1241
	netflow_sysctl_header = register_sysctl_paths(netflow_sysctl_path, netflow_sysctl_table);
1242
	if (!netflow_sysctl_header) {
1243
		printk(KERN_ERR "netflow: can't register to sysctl\n");
1244
		goto err_free_proc_stat;
1245
	} else
1246
		printk(KERN_INFO "netflow: registered: sysctl net.netflow\n");
1247
#endif
1248
1249
	if (!destination)
1250
		destination = aggregation_buf;
1251
	if (destination != destination_buf) {
1252
		strlcpy(destination_buf, destination, sizeof(destination_buf));
1253
		destination = destination_buf;
1254
	}
1255
	if (add_destinations(destination) < 0)
1256
		goto err_free_sysctl;
1257
1258
	if (!aggregation)
1259
		aggregation = aggregation_buf;
1260
	if (aggregation != aggregation_buf) {
1261
		strlcpy(aggregation_buf, aggregation, sizeof(aggregation_buf));
1262
		aggregation = aggregation_buf;
1263
	}
1264
	add_aggregation(aggregation);
1265
1266
	schedule_delayed_work(&netflow_work, HZ / 10);
1267
	setup_timer(&rate_timer, rate_timer_calc, 0);
1268
	mod_timer(&rate_timer, jiffies + (HZ * SAMPLERATE));
1269
1270
	if (xt_register_target(&ipt_netflow_reg))
1271
		goto err_stop_timer;
1272
1273
	peakflows_at = jiffies;
1274
1275
	printk(KERN_INFO "ipt_netflow loaded.\n");
1276
	return 0;
1277
1278
err_stop_timer:
1279
	cancel_delayed_work(&netflow_work);
1280
	flush_scheduled_work();
1281
	del_timer_sync(&rate_timer);
1282
	destination_fini();
1283
1284
	aggregation_fini(&aggr_n_list);
1285
	aggregation_fini(&aggr_p_list);
1286
	destination_fini();
1287
err_free_sysctl:
1288
#ifdef CONFIG_SYSCTL
1289
	unregister_sysctl_table(netflow_sysctl_header);
1290
err_free_proc_stat:
1291
#endif
1292
#ifdef CONFIG_PROC_FS
1293
	remove_proc_entry("ipt_netflow", init_net.proc_net_stat);
1294
err_free_netflow_slab:
1295
#endif
1296
	kmem_cache_destroy(ipt_netflow_cachep);
1297
err_free_hash:
1298
	vfree(ipt_netflow_hash);
1299
err:
1300
	return -ENOMEM;
1301
}
1302
1303
static void __exit ipt_netflow_fini(void)
1304
{
1305
	printk(KERN_INFO "ipt_netflow unloading..\n");
1306
1307
	xt_unregister_target(&ipt_netflow_reg);
1308
	cancel_delayed_work(&netflow_work);
1309
	flush_scheduled_work();
1310
	del_timer_sync(&rate_timer);
1311
1312
	synchronize_sched();
1313
1314
#ifdef CONFIG_SYSCTL
1315
	unregister_sysctl_table(netflow_sysctl_header);
1316
#endif
1317
#ifdef CONFIG_PROC_FS
1318
	remove_proc_entry("ipt_netflow", init_net.proc_net_stat);
1319
#endif
1320
1321
	netflow_scan_inactive_timeout(0); /* flush cache and pdu */
1322
	destination_fini();
1323
	aggregation_fini(&aggr_n_list);
1324
	aggregation_fini(&aggr_p_list);
1325
1326
	kmem_cache_destroy(ipt_netflow_cachep);
1327
	vfree(ipt_netflow_hash);
1328
1329
	printk(KERN_INFO "ipt_netflow unloaded.\n");
1330
}
1331
1332
module_init(ipt_netflow_init);
1333
module_exit(ipt_netflow_fini);
1334
1335
/* vim: set sw=8: */

Return to bug 24223