--- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_NETFLOW.h @@ -0,0 +1,164 @@ + +#ifndef _IP_NETFLOW_H +#define _IP_NETFLOW_H + +/* + * Some tech info: + * http://www.cisco.com/en/US/products/ps6601/prod_white_papers_list.html + * http://www.cisco.com/en/US/products/sw/netmgtsw/ps1964/products_implementation_design_guide09186a00800d6a11.html + */ + +#define NETFLOW5_RECORDS_MAX 30 + +struct netflow5_record { + __be32 s_addr; + __be32 d_addr; + __be32 nexthop; + __be16 i_ifc; + __be16 o_ifc; + __be32 nr_packets; + __be32 nr_octets; + __be32 ts_first; + __be32 ts_last; + __be16 s_port; + __be16 d_port; + __u8 reserved; + __u8 tcp_flags; + __u8 protocol; + __u8 tos; + __be16 s_as; + __be16 d_as; + __u8 s_mask; + __u8 d_mask; + __u16 padding; +} __attribute__ ((packed)); + +/* NetFlow v5 packet */ +struct netflow5_pdu { + __be16 version; + __be16 nr_records; + __be32 ts_uptime; + __be32 ts_usecs; + __be32 ts_unsecs; + __be32 seq; + __u8 eng_type; + __u8 eng_id; + __u16 padding; + struct netflow5_record flow[NETFLOW5_RECORDS_MAX]; +} __attribute__ ((packed)); +#define NETFLOW5_HEADER_SIZE (sizeof(struct netflow5_pdu) - NETFLOW5_RECORDS_MAX * sizeof(struct netflow5_record)) + +/* hashed data which identify unique flow */ +struct ipt_netflow_tuple { + __be32 s_addr; // Network byte order + __be32 d_addr; // -"- + __be16 s_port; // -"- + __be16 d_port; // -"- + __be16 i_ifc; // Local byte order + __u8 protocol; + __u8 tos; +}; +/* tuple size is rounded to u32s */ +#define NETFLOW_TUPLE_SIZE (sizeof(struct ipt_netflow_tuple) / 4) + +/* maximum bytes flow can have, after it reached flow become not searchable and will be exported soon */ +#define FLOW_FULL_WATERMARK 0xffefffff + +/* flow entry */ +struct ipt_netflow { + struct hlist_node hlist; // hashtable search chain + struct list_head list; // all flows chain + + /* unique per flow data (hashed, NETFLOW_TUPLE_SIZE) */ + struct ipt_netflow_tuple tuple; + + /* volatile data */ + __be16 o_ifc; + __u8 s_mask; + __u8 d_mask; + + /* flow statistics */ + u_int32_t nr_packets; + u_int32_t nr_bytes; + unsigned long ts_first; + unsigned long ts_last; + __u8 tcp_flags; /* `OR' of all tcp flags */ +}; + +static inline int ipt_netflow_tuple_equal(const struct ipt_netflow_tuple *t1, + const struct ipt_netflow_tuple *t2) +{ + return (!memcmp(t1, t2, sizeof(struct ipt_netflow_tuple))); +} + +struct ipt_netflow_sock { + struct list_head list; + struct socket *sock; + __be32 ipaddr; + unsigned short port; + atomic_t wmem_peak; // sk_wmem_alloc peak value + atomic_t err_full; // socket filled error + atomic_t err_other; // other socket errors +}; + +struct netflow_aggr_n { + struct list_head list; + __u32 mask; + __u32 addr; + __u32 aggr_mask; + __u8 prefix; +}; + +struct netflow_aggr_p { + struct list_head list; + __u16 port1; + __u16 port2; + __u16 aggr_port; +}; + +#define NETFLOW_STAT_INC(count) (__get_cpu_var(ipt_netflow_stat).count++) +#define NETFLOW_STAT_ADD(count, val) (__get_cpu_var(ipt_netflow_stat).count += (unsigned long long)val) + +#define NETFLOW_STAT_INC_ATOMIC(count) \ + do { \ + preempt_disable(); \ + (__get_cpu_var(ipt_netflow_stat).count++); \ + preempt_enable(); \ + } while(0); + +#define NETFLOW_STAT_ADD_ATOMIC(count, val) \ + do { \ + preempt_disable(); \ + (__get_cpu_var(ipt_netflow_stat).count += (unsigned long long)val); \ + preempt_enable(); \ + } while(0); + + +/* statistics */ +struct ipt_netflow_stat { + u64 searched; // hash stat + u64 found; // hash stat + u64 notfound; // hash stat + unsigned int truncated; // packets stat + unsigned int frags; // packets stat + unsigned int alloc_err; // failed to allocate flow mem + unsigned int maxflows_err; // maxflows reached + unsigned int send_success; // sendmsg() ok + unsigned int send_failed; // sendmsg() failed + unsigned int sock_errors; // socket error callback called (got icmp refused) + u64 exported_size; // netflow traffic itself + u64 pkt_total; // packets accounted total + u64 traf_total; // traffic accounted total + u64 pkt_drop; // packets not accounted total + u64 traf_drop; // traffic not accounted total + u64 pkt_out; // packets out of the memory + u64 traf_out; // traffic out of the memory +}; + +#ifndef list_first_entry +#define list_first_entry(ptr, type, member) \ + list_entry((ptr)->next, type, member) +#endif + +#endif +/* vim: set sw=8: */ --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -181,6 +181,15 @@ config IP_NF_TARGET_MASQUERADE To compile it as a module, choose M here. If unsure, say N. +config IP_NF_TARGET_NETFLOW + tristate "NETFLOW target support" + depends on NF_NAT + depends on NETFILTER_ADVANCED + help + NETFLOW is an implementation of in-kernel netflow probe + + To compile it as a module, choose M here. If unsure, say N. + config IP_NF_TARGET_NETMAP tristate "NETMAP target support" depends on NF_NAT --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -57,6 +57,7 @@ obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o +obj-$(CONFIG_IP_NF_TARGET_NETFLOW) += ipt_NETFLOW.o obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o --- /dev/null +++ b/net/ipv4/netfilter/ipt_NETFLOW.c @@ -0,0 +1,1335 @@ +/* + * This is NetFlow exporting module (NETFLOW target) + * (c) 2008 + * + */ + +//#define RAW_PROMISC_HACK + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_BRIDGE_NETFILTER +#include +#endif +#ifdef CONFIG_SYSCTL +#include +#endif + +#ifndef HIPQUAD +#if defined(__LITTLE_ENDIAN) +#define HIPQUAD(addr) \ + ((unsigned char *)&addr)[3], \ +((unsigned char *)&addr)[2], \ +((unsigned char *)&addr)[1], \ +((unsigned char *)&addr)[0] +#elif defined(__BIG_ENDIAN) +#define HIPQUAD NIPQUAD +#else +#error "Please fix asm/byteorder.h" +#endif /* __LITTLE_ENDIAN */ +#endif + +#define IPT_NETFLOW_VERSION "1.6" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR(""); +MODULE_DESCRIPTION("iptables NETFLOW target module"); +MODULE_VERSION(IPT_NETFLOW_VERSION); + +#define DST_SIZE 256 +static char destination_buf[DST_SIZE] = "127.0.0.1:2055"; +static char *destination = destination_buf; +module_param(destination, charp, 0400); +MODULE_PARM_DESC(destination, "export destination ipaddress:port"); + +static int inactive_timeout = 15; +module_param(inactive_timeout, int, 0600); +MODULE_PARM_DESC(inactive_timeout, "inactive flows timeout in seconds"); + +static int active_timeout = 30 * 60; +module_param(active_timeout, int, 0600); +MODULE_PARM_DESC(active_timeout, "active flows timeout in seconds"); + +static int debug = 0; +module_param(debug, int, 0600); +MODULE_PARM_DESC(debug, "debug verbosity level"); + +static int sndbuf; +module_param(sndbuf, int, 0400); +MODULE_PARM_DESC(sndbuf, "udp socket SNDBUF size"); + +static int hashsize; +module_param(hashsize, int, 0400); +MODULE_PARM_DESC(hashsize, "hash table size"); + +static int maxflows = 2000000; +module_param(maxflows, int, 0600); +MODULE_PARM_DESC(maxflows, "maximum number of flows"); +static int peakflows = 0; +static unsigned long peakflows_at; + +#define AGGR_SIZE 1024 +static char aggregation_buf[AGGR_SIZE] = ""; +static char *aggregation = aggregation_buf; +module_param(aggregation, charp, 0400); +MODULE_PARM_DESC(aggregation, "aggregation ruleset"); + +static DEFINE_PER_CPU(struct ipt_netflow_stat, ipt_netflow_stat); +static LIST_HEAD(usock_list); +static DEFINE_RWLOCK(sock_lock); + +static unsigned int ipt_netflow_hash_rnd; +struct hlist_head *ipt_netflow_hash __read_mostly; /* hash table memory */ +static unsigned int ipt_netflow_hash_size __read_mostly = 0; /* buckets */ +static LIST_HEAD(ipt_netflow_list); /* all flows */ +static LIST_HEAD(aggr_n_list); +static LIST_HEAD(aggr_p_list); +static DEFINE_RWLOCK(aggr_lock); +static struct kmem_cache *ipt_netflow_cachep __read_mostly; /* ipt_netflow memory */ +static atomic_t ipt_netflow_count = ATOMIC_INIT(0); +static DEFINE_SPINLOCK(ipt_netflow_lock); + +static DEFINE_SPINLOCK(pdu_lock); +static long long pdu_packets = 0, pdu_traf = 0; +static struct netflow5_pdu pdu; +static unsigned long pdu_ts_mod; +static void netflow_work_fn(struct work_struct *work); +static DECLARE_DELAYED_WORK(netflow_work, netflow_work_fn); +static struct timer_list rate_timer; + +#define TCP_FIN_RST 0x05 + +static long long sec_prate = 0, sec_brate = 0; +static long long min_prate = 0, min_brate = 0; +static long long min5_prate = 0, min5_brate = 0; +static unsigned int metric = 10, min15_metric = 10, min5_metric = 10, min_metric = 10; /* hash metrics */ + +static int set_hashsize(int new_size); +static void destination_fini(void); +static int add_destinations(char *ptr); +static void aggregation_fini(struct list_head *list); +static int add_aggregation(char *ptr); + +static inline __be32 bits2mask(int bits) { + return (bits? 0xffffffff << (32 - bits) : 0); +} + +static inline int mask2bits(__be32 mask) { + int n; + + for (n = 0; mask; n++) + mask = (mask << 1) & 0xffffffff; + return n; +} + +#ifdef CONFIG_PROC_FS +/* procfs statistics /proc/net/stat/ipt_netflow */ +static int nf_seq_show(struct seq_file *seq, void *v) +{ + unsigned int nr_flows = atomic_read(&ipt_netflow_count); + int cpu; + unsigned long long searched = 0, found = 0, notfound = 0; + unsigned int truncated = 0, frags = 0, alloc_err = 0, maxflows_err = 0; + unsigned int sock_errors = 0, send_failed = 0, send_success = 0; + unsigned long long pkt_total = 0, traf_total = 0, exported_size = 0; + unsigned long long pkt_drop = 0, traf_drop = 0; + unsigned long long pkt_out = 0, traf_out = 0; + struct ipt_netflow_sock *usock; + struct netflow_aggr_n *aggr_n; + struct netflow_aggr_p *aggr_p; + int snum = 0; + int peak = (jiffies - peakflows_at) / HZ; + + seq_printf(seq, "Flows: active %u (peak %u reached %ud%uh%um ago), mem %uK\n", + nr_flows, + peakflows, + peak / (60 * 60 * 24), (peak / (60 * 60)) % 24, (peak / 60) % 60, + (unsigned int)((nr_flows * sizeof(struct ipt_netflow)) >> 10)); + + for_each_present_cpu(cpu) { + struct ipt_netflow_stat *st = &per_cpu(ipt_netflow_stat, cpu); + + searched += st->searched; + found += st->found; + notfound += st->notfound; + truncated += st->truncated; + frags += st->frags; + alloc_err += st->alloc_err; + maxflows_err += st->maxflows_err; + send_success += st->send_success; + send_failed += st->send_failed; + sock_errors += st->sock_errors; + exported_size += st->exported_size; + pkt_total += st->pkt_total; + traf_total += st->traf_total; + pkt_drop += st->pkt_drop; + traf_drop += st->traf_drop; + pkt_out += st->pkt_out; + traf_out += st->traf_out; + } + +#define FFLOAT(x, prec) (int)(x) / prec, (int)(x) % prec + seq_printf(seq, "Hash: size %u (mem %uK), metric %d.%d, %d.%d, %d.%d, %d.%d. MemTraf: %llu pkt, %llu K (pdu %llu, %llu).\n", + ipt_netflow_hash_size, + (unsigned int)((ipt_netflow_hash_size * sizeof(struct hlist_head)) >> 10), + FFLOAT(metric, 10), + FFLOAT(min_metric, 10), + FFLOAT(min5_metric, 10), + FFLOAT(min15_metric, 10), + pkt_total - pkt_out + pdu_packets, + (traf_total - traf_out + pdu_traf) >> 10, + pdu_packets, + pdu_traf); + + seq_printf(seq, "Timeout: active %d, inactive %d. Maxflows %u\n", + active_timeout, + inactive_timeout, + maxflows); + + seq_printf(seq, "Rate: %llu bits/sec, %llu packets/sec; Avg 1 min: %llu bps, %llu pps; 5 min: %llu bps, %llu pps\n", + sec_brate, sec_prate, min_brate, min_prate, min5_brate, min5_prate); + + seq_printf(seq, "cpu# stat: , sock: , traffic: , drop: \n"); + + seq_printf(seq, "Total stat: %6llu %6llu %6llu, %4u %4u %4u %4u, sock: %6u %u %u, %llu K, traffic: %llu, %llu MB, drop: %llu, %llu K\n", + (unsigned long long)searched, + (unsigned long long)found, + (unsigned long long)notfound, + truncated, frags, alloc_err, maxflows_err, + send_success, send_failed, sock_errors, + (unsigned long long)exported_size >> 10, + (unsigned long long)pkt_total, (unsigned long long)traf_total >> 20, + (unsigned long long)pkt_drop, (unsigned long long)traf_drop >> 10); + + if (num_present_cpus() > 1) { + for_each_present_cpu(cpu) { + struct ipt_netflow_stat *st; + + st = &per_cpu(ipt_netflow_stat, cpu); + seq_printf(seq, "cpu%u stat: %6llu %6llu %6llu, %4u %4u %4u %4u, sock: %6u %u %u, %llu K, traffic: %llu, %llu MB, drop: %llu, %llu K\n", + cpu, + (unsigned long long)st->searched, + (unsigned long long)st->found, + (unsigned long long)st->notfound, + st->truncated, st->frags, st->alloc_err, st->maxflows_err, + st->send_success, st->send_failed, st->sock_errors, + (unsigned long long)st->exported_size >> 10, + (unsigned long long)st->pkt_total, (unsigned long long)st->traf_total >> 20, + (unsigned long long)st->pkt_drop, (unsigned long long)st->traf_drop >> 10); + } + } + + read_lock(&sock_lock); + list_for_each_entry(usock, &usock_list, list) { + struct sock *sk = usock->sock->sk; + + seq_printf(seq, "sock%d: %u.%u.%u.%u:%u, sndbuf %u, filled %u, peak %u; err: sndbuf reached %u, other %u\n", + snum, + usock->ipaddr >> 24, + (usock->ipaddr >> 16) & 255, + (usock->ipaddr >> 8) & 255, + usock->ipaddr & 255, + usock->port, + sk->sk_sndbuf, + atomic_read(&sk->sk_wmem_alloc), + atomic_read(&usock->wmem_peak), + atomic_read(&usock->err_full), + atomic_read(&usock->err_other)); + snum++; + } + read_unlock(&sock_lock); + + read_lock_bh(&aggr_lock); + snum = 0; + list_for_each_entry(aggr_n, &aggr_n_list, list) { + seq_printf(seq, "aggr#%d net: match %u.%u.%u.%u/%d strip %d\n", + snum, + HIPQUAD(aggr_n->addr), + mask2bits(aggr_n->mask), + mask2bits(aggr_n->aggr_mask)); + snum++; + } + snum = 0; + list_for_each_entry(aggr_p, &aggr_p_list, list) { + seq_printf(seq, "aggr#%d port: ports %u-%u replace %u\n", + snum, + aggr_p->port1, + aggr_p->port2, + aggr_p->aggr_port); + snum++; + } + read_unlock_bh(&aggr_lock); + return 0; +} + +static int nf_seq_open(struct inode *inode, struct file *file) +{ + return single_open(file, nf_seq_show, NULL); +} + +static struct file_operations nf_seq_fops = { + .owner = THIS_MODULE, + .open = nf_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +#endif /* CONFIG_PROC_FS */ + +#ifdef CONFIG_SYSCTL +/* sysctl /proc/sys/net/netflow */ +static int hsize_procctl(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *fpos) +{ + void *orig = ctl->data; + int ret, hsize; + + if (write) + ctl->data = &hsize; + ret = proc_dointvec(ctl, write, buffer, lenp, fpos); + if (write) { + ctl->data = orig; + if (hsize < 1) + return -EPERM; + return set_hashsize(hsize)?:ret; + } else + return ret; +} + +static int sndbuf_procctl(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *fpos) +{ + int ret; + struct ipt_netflow_sock *usock; + + read_lock(&sock_lock); + if (list_empty(&usock_list)) { + read_unlock(&sock_lock); + return -ENOENT; + } + usock = list_first_entry(&usock_list, struct ipt_netflow_sock, list); + sndbuf = usock->sock->sk->sk_sndbuf; + read_unlock(&sock_lock); + + ctl->data = &sndbuf; + ret = proc_dointvec(ctl, write, buffer, lenp, fpos); + if (!write) + return ret; + if (sndbuf < SOCK_MIN_SNDBUF) + sndbuf = SOCK_MIN_SNDBUF; + write_lock(&sock_lock); + list_for_each_entry(usock, &usock_list, list) { + usock->sock->sk->sk_sndbuf = sndbuf; + } + write_unlock(&sock_lock); + return ret; +} + +static int destination_procctl(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *fpos) +{ + int ret; + + ret = proc_dostring(ctl, write, buffer, lenp, fpos); + if (ret >= 0 && write) { + destination_fini(); + add_destinations(destination_buf); + } + return ret; +} + +static int aggregation_procctl(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *fpos) +{ + int ret; + + if (debug > 1) + printk(KERN_INFO "aggregation_procctl (%d) %u %llu\n", write, (unsigned int)(*lenp), *fpos); + ret = proc_dostring(ctl, write, buffer, lenp, fpos); + if (ret >= 0 && write) { + add_aggregation(aggregation_buf); + } + return ret; +} + +static struct ctl_table_header *netflow_sysctl_header; + +static struct ctl_table netflow_sysctl_table[] = { + { + .procname = "active_timeout", + .mode = 0644, + .data = &active_timeout, + .maxlen = sizeof(int), + .proc_handler = &proc_dointvec, + }, + { + .procname = "inactive_timeout", + .mode = 0644, + .data = &inactive_timeout, + .maxlen = sizeof(int), + .proc_handler = &proc_dointvec, + }, + { + .procname = "debug", + .mode = 0644, + .data = &debug, + .maxlen = sizeof(int), + .proc_handler = &proc_dointvec, + }, + { + .procname = "hashsize", + .mode = 0644, + .data = &ipt_netflow_hash_size, + .maxlen = sizeof(int), + .proc_handler = &hsize_procctl, + }, + { + .procname = "sndbuf", + .mode = 0644, + .maxlen = sizeof(int), + .proc_handler = &sndbuf_procctl, + }, + { + .procname = "destination", + .mode = 0644, + .data = &destination_buf, + .maxlen = sizeof(destination_buf), + .proc_handler = &destination_procctl, + }, + { + .procname = "aggregation", + .mode = 0644, + .data = &aggregation_buf, + .maxlen = sizeof(aggregation_buf), + .proc_handler = &aggregation_procctl, + }, + { + .procname = "maxflows", + .mode = 0644, + .data = &maxflows, + .maxlen = sizeof(int), + .proc_handler = &proc_dointvec, + }, + { } +}; + +static struct ctl_path netflow_sysctl_path[] = { + { .procname = "net" }, + { .procname = "netflow" }, + { } +}; +#endif /* CONFIG_SYSCTL */ + +/* socket code */ +static void sk_error_report(struct sock *sk) +{ + /* clear connection refused errors if any */ + write_lock_bh(&sk->sk_callback_lock); + if (debug > 1) + printk(KERN_INFO "NETFLOW: socket error <%d>\n", sk->sk_err); + sk->sk_err = 0; + NETFLOW_STAT_INC(sock_errors); + write_unlock_bh(&sk->sk_callback_lock); + return; +} + +// return numbers of sends succeded, 0 if none +static int netflow_send_pdu(void *buffer, int len) +{ + struct msghdr msg = { .msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL }; + struct kvec iov = { buffer, len }; + int retok = 0, ret; + int snum = 0; + struct ipt_netflow_sock *usock; + + read_lock(&sock_lock); + list_for_each_entry(usock, &usock_list, list) { + if (debug) + printk(KERN_INFO "netflow_send_pdu: sendmsg(%d, %d) [%u %u]\n", + snum, + len, + atomic_read(&usock->sock->sk->sk_wmem_alloc), + usock->sock->sk->sk_sndbuf); + ret = kernel_sendmsg(usock->sock, &msg, &iov, 1, (size_t)len); + if (ret < 0) { + char *suggestion = ""; + + NETFLOW_STAT_INC_ATOMIC(send_failed); + if (ret == -EAGAIN) { + atomic_inc(&usock->err_full); + suggestion = ": increase sndbuf!"; + } else + atomic_inc(&usock->err_other); + printk(KERN_ERR "netflow_send_pdu[%d]: sendmsg error %d: data loss %llu pkt, %llu bytes%s\n", + snum, ret, pdu_packets, pdu_traf, suggestion); + } else { + unsigned int wmem = atomic_read(&usock->sock->sk->sk_wmem_alloc); + if (wmem > atomic_read(&usock->wmem_peak)) + atomic_set(&usock->wmem_peak, wmem); + NETFLOW_STAT_INC_ATOMIC(send_success); + NETFLOW_STAT_ADD_ATOMIC(exported_size, ret); + retok++; + } + snum++; + } + read_unlock(&sock_lock); + return retok; +} + +static void usock_free(struct ipt_netflow_sock *usock) +{ + printk(KERN_INFO "netflow: remove destination %u.%u.%u.%u:%u (%p)\n", + HIPQUAD(usock->ipaddr), + usock->port, + usock->sock); + if (usock->sock) + sock_release(usock->sock); + usock->sock = NULL; + vfree(usock); +} + +static void destination_fini(void) +{ + write_lock(&sock_lock); + while (!list_empty(&usock_list)) { + struct ipt_netflow_sock *usock; + + usock = list_entry(usock_list.next, struct ipt_netflow_sock, list); + list_del(&usock->list); + write_unlock(&sock_lock); + usock_free(usock); + write_lock(&sock_lock); + } + write_unlock(&sock_lock); +} + +static void add_usock(struct ipt_netflow_sock *usock) +{ + struct ipt_netflow_sock *sk; + + /* don't need empty sockets */ + if (!usock->sock) { + usock_free(usock); + return; + } + + write_lock(&sock_lock); + /* don't need duplicated sockets */ + list_for_each_entry(sk, &usock_list, list) { + if (sk->ipaddr == usock->ipaddr && + sk->port == usock->port) { + write_unlock(&sock_lock); + usock_free(usock); + return; + } + } + list_add_tail(&usock->list, &usock_list); + printk(KERN_INFO "netflow: added destination %u.%u.%u.%u:%u\n", + HIPQUAD(usock->ipaddr), + usock->port); + write_unlock(&sock_lock); +} + +static struct socket *usock_alloc(__be32 ipaddr, unsigned short port) +{ + struct sockaddr_in sin; + struct socket *sock; + int error; + + if ((error = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) { + printk(KERN_ERR "netflow: sock_create_kern error %d\n", error); + return NULL; + } + sock->sk->sk_allocation = GFP_ATOMIC; + sock->sk->sk_prot->unhash(sock->sk); /* hidden from input */ + sock->sk->sk_error_report = &sk_error_report; /* clear ECONNREFUSED */ + if (sndbuf) + sock->sk->sk_sndbuf = sndbuf; + else + sndbuf = sock->sk->sk_sndbuf; + memset(&sin, 0, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = htonl(ipaddr); + sin.sin_port = htons(port); + if ((error = sock->ops->connect(sock, (struct sockaddr *)&sin, + sizeof(sin), 0)) < 0) { + printk(KERN_ERR "netflow: error connecting UDP socket %d\n", error); + sock_release(sock); + return NULL; + } + return sock; +} + +#define SEPARATORS " ,;\t\n" +static int add_destinations(char *ptr) +{ + while (ptr) { + unsigned char ip[4]; + unsigned short port; + + ptr += strspn(ptr, SEPARATORS); + + if (sscanf(ptr, "%hhu.%hhu.%hhu.%hhu:%hu", + ip, ip + 1, ip + 2, ip + 3, &port) == 5) { + struct ipt_netflow_sock *usock; + + if (!(usock = vmalloc(sizeof(*usock)))) { + printk(KERN_ERR "netflow: can't vmalloc socket\n"); + return -ENOMEM; + } + + memset(usock, 0, sizeof(*usock)); + usock->ipaddr = ntohl(*(__be32 *)ip); + usock->port = port; + usock->sock = usock_alloc(usock->ipaddr, port); + atomic_set(&usock->wmem_peak, 0); + atomic_set(&usock->err_full, 0); + atomic_set(&usock->err_other, 0); + add_usock(usock); + } else + break; + + ptr = strpbrk(ptr, SEPARATORS); + } + return 0; +} + +static void aggregation_fini(struct list_head *list) +{ + write_lock_bh(&aggr_lock); + while (!list_empty(list)) { + struct netflow_aggr_n *aggr; /* match netflow_aggr_p too */ + + aggr = list_entry(list->next, struct netflow_aggr_n, list); + list_del(&aggr->list); + write_unlock_bh(&aggr_lock); + vfree(aggr); + write_lock_bh(&aggr_lock); + } + write_unlock_bh(&aggr_lock); +} + +static int add_aggregation(char *ptr) +{ + struct netflow_aggr_n *aggr_n, *aggr, *tmp; + struct netflow_aggr_p *aggr_p; + LIST_HEAD(new_aggr_n_list); + LIST_HEAD(new_aggr_p_list); + LIST_HEAD(old_aggr_list); + + while (ptr && *ptr) { + unsigned char ip[4]; + unsigned int mask; + unsigned int port1, port2; + unsigned int aggr_to; + + ptr += strspn(ptr, SEPARATORS); + + if (sscanf(ptr, "%hhu.%hhu.%hhu.%hhu/%u=%u", + ip, ip + 1, ip + 2, ip + 3, &mask, &aggr_to) == 6) { + + if (!(aggr_n = vmalloc(sizeof(*aggr_n)))) { + printk(KERN_ERR "netflow: can't vmalloc aggr\n"); + return -ENOMEM; + } + memset(aggr_n, 0, sizeof(*aggr_n)); + + aggr_n->addr = ntohl(*(__be32 *)ip); + aggr_n->mask = bits2mask(mask); + aggr_n->aggr_mask = bits2mask(aggr_to); + aggr_n->prefix = mask; + printk(KERN_INFO "netflow: add aggregation [%u.%u.%u.%u/%u=%u]\n", + HIPQUAD(aggr_n->addr), mask, aggr_to); + list_add_tail(&aggr_n->list, &new_aggr_n_list); + + } else if (sscanf(ptr, "%u-%u=%u", &port1, &port2, &aggr_to) == 3 || + sscanf(ptr, "%u=%u", &port2, &aggr_to) == 2) { + + if (!(aggr_p = vmalloc(sizeof(*aggr_p)))) { + printk(KERN_ERR "netflow: can't vmalloc aggr\n"); + return -ENOMEM; + } + memset(aggr_p, 0, sizeof(*aggr_p)); + + aggr_p->port1 = port1; + aggr_p->port2 = port2; + aggr_p->aggr_port = aggr_to; + printk(KERN_INFO "netflow: add aggregation [%u-%u=%u]\n", + port1, port2, aggr_to); + list_add_tail(&aggr_p->list, &new_aggr_p_list); + } else { + printk(KERN_ERR "netflow: bad aggregation rule: %s (ignoring)\n", ptr); + break; + } + + ptr = strpbrk(ptr, SEPARATORS); + } + + /* swap lists */ + write_lock_bh(&aggr_lock); + list_for_each_entry_safe(aggr, tmp, &aggr_n_list, list) + list_move(&aggr->list, &old_aggr_list); + list_for_each_entry_safe(aggr, tmp, &aggr_p_list, list) + list_move(&aggr->list, &old_aggr_list); + + list_for_each_entry_safe(aggr, tmp, &new_aggr_n_list, list) + list_move_tail(&aggr->list, &aggr_n_list); + list_for_each_entry_safe(aggr, tmp, &new_aggr_p_list, list) + list_move_tail(&aggr->list, &aggr_p_list); + write_unlock_bh(&aggr_lock); + aggregation_fini(&old_aggr_list); + return 0; +} + +static inline u_int32_t hash_netflow(const struct ipt_netflow_tuple *tuple) +{ + /* tuple is rounded to u32s */ + return jhash2((u32 *)tuple, NETFLOW_TUPLE_SIZE, ipt_netflow_hash_rnd) % ipt_netflow_hash_size; +} + +static struct ipt_netflow * +ipt_netflow_find(const struct ipt_netflow_tuple *tuple) +{ + struct ipt_netflow *nf; + unsigned int hash = hash_netflow(tuple); + struct hlist_node *pos; + + hlist_for_each_entry(nf, pos, &ipt_netflow_hash[hash], hlist) { + if (ipt_netflow_tuple_equal(tuple, &nf->tuple) && + nf->nr_bytes < FLOW_FULL_WATERMARK) { + NETFLOW_STAT_INC(found); + return nf; + } + NETFLOW_STAT_INC(searched); + } + NETFLOW_STAT_INC(notfound); + return NULL; +} + +static struct hlist_head *alloc_hashtable(int size) +{ + struct hlist_head *hash; + + hash = vmalloc(sizeof(struct hlist_head) * size); + if (hash) { + int i; + + for (i = 0; i < size; i++) + INIT_HLIST_HEAD(&hash[i]); + } else + printk(KERN_ERR "netflow: unable to vmalloc hash table.\n"); + + return hash; +} + +static int set_hashsize(int new_size) +{ + struct hlist_head *new_hash, *old_hash; + unsigned int hash; + struct ipt_netflow *nf; + int rnd; + + printk(KERN_INFO "netflow: allocating new hash table %u -> %u buckets\n", + ipt_netflow_hash_size, new_size); + new_hash = alloc_hashtable(new_size); + if (!new_hash) + return -ENOMEM; + + get_random_bytes(&rnd, 4); + + /* rehash */ + spin_lock_bh(&ipt_netflow_lock); + old_hash = ipt_netflow_hash; + ipt_netflow_hash = new_hash; + ipt_netflow_hash_size = new_size; + ipt_netflow_hash_rnd = rnd; + /* hash_netflow() is dependent on ipt_netflow_hash_* values */ + list_for_each_entry(nf, &ipt_netflow_list, list) { + hash = hash_netflow(&nf->tuple); + /* hlist_add_head overwrites hlist pointers for this node + * so it's good */ + hlist_add_head(&nf->hlist, &new_hash[hash]); + } + spin_unlock_bh(&ipt_netflow_lock); + + vfree(old_hash); + + return 0; +} + +static struct ipt_netflow * +ipt_netflow_alloc(struct ipt_netflow_tuple *tuple) +{ + struct ipt_netflow *nf; + long count; + + nf = kmem_cache_alloc(ipt_netflow_cachep, GFP_ATOMIC); + if (!nf) { + printk(KERN_ERR "Can't allocate netflow.\n"); + return NULL; + } + + memset(nf, 0, sizeof(*nf)); + nf->tuple = *tuple; + + count = atomic_inc_return(&ipt_netflow_count); + if (count > peakflows) { + peakflows = count; + peakflows_at = jiffies; + } + + return nf; +} + +static void ipt_netflow_free(struct ipt_netflow *nf) +{ + atomic_dec(&ipt_netflow_count); + kmem_cache_free(ipt_netflow_cachep, nf); +} + +static struct ipt_netflow * +init_netflow(struct ipt_netflow_tuple *tuple, + struct sk_buff *skb) +{ + struct ipt_netflow *nf; + unsigned int hash; + + nf = ipt_netflow_alloc(tuple); + if (!nf) + return NULL; + + hash = hash_netflow(&nf->tuple); + hlist_add_head(&nf->hlist, &ipt_netflow_hash[hash]); + list_add(&nf->list, &ipt_netflow_list); + + return nf; +} + +/* cook pdu, send, and clean */ +static void __netflow_export_pdu(void) +{ + struct timeval tv; + int pdusize; + + if (!pdu.nr_records) + return; + + if (debug > 1) + printk(KERN_INFO "netflow_export_pdu with %d records\n", pdu.nr_records); + do_gettimeofday(&tv); + + pdu.version = htons(5); + pdu.ts_uptime = htonl(jiffies_to_msecs(jiffies)); + pdu.ts_usecs = htonl(tv.tv_sec); + pdu.ts_unsecs = htonl(tv.tv_usec); + //pdu.eng_type = 0; + //pdu.eng_id = 0; + //pdu.padding = 0; + + pdusize = NETFLOW5_HEADER_SIZE + sizeof(struct netflow5_record) * pdu.nr_records; + + /* especially fix nr_records before export */ + pdu.nr_records = htons(pdu.nr_records); + + if (netflow_send_pdu(&pdu, pdusize) == 0) { + /* not least one send succeded, account stat for dropped packets */ + NETFLOW_STAT_ADD_ATOMIC(pkt_drop, pdu_packets); + NETFLOW_STAT_ADD_ATOMIC(traf_drop, pdu_traf); + } + + pdu.seq = htonl(ntohl(pdu.seq) + ntohs(pdu.nr_records)); + + pdu.nr_records = 0; + pdu_packets = 0; + pdu_traf = 0; +} + +static void netflow_export_flow(struct ipt_netflow *nf) +{ + struct netflow5_record *rec; + + spin_lock(&pdu_lock); + if (debug > 2) + printk(KERN_INFO "adding flow to export (%d)\n", pdu.nr_records); + + pdu_packets += nf->nr_packets; + pdu_traf += nf->nr_bytes; + pdu_ts_mod = jiffies; + rec = &pdu.flow[pdu.nr_records++]; + + /* make V5 flow record */ + rec->s_addr = nf->tuple.s_addr; + rec->d_addr = nf->tuple.d_addr; + //rec->nexthop = 0; + rec->i_ifc = htons(nf->tuple.i_ifc); + rec->o_ifc = htons(nf->o_ifc); + rec->nr_packets = htonl(nf->nr_packets); + rec->nr_octets = htonl(nf->nr_bytes); + rec->ts_first = htonl(jiffies_to_msecs(nf->ts_first)); + rec->ts_last = htonl(jiffies_to_msecs(nf->ts_last)); + rec->s_port = nf->tuple.s_port; + rec->d_port = nf->tuple.d_port; + //rec->reserved = 0; + rec->tcp_flags = nf->tcp_flags; + rec->protocol = nf->tuple.protocol; + rec->tos = nf->tuple.tos; + //rec->s_as = 0; + //rec->d_as = 0; + rec->s_mask = nf->s_mask; + rec->d_mask = nf->d_mask; + //rec->padding = 0; + ipt_netflow_free(nf); + + if (pdu.nr_records == NETFLOW5_RECORDS_MAX) + __netflow_export_pdu(); + spin_unlock(&pdu_lock); +} + +static inline int active_needs_export(struct ipt_netflow *nf, long a_timeout) +{ + /* active too long, finishing, or having too much bytes */ + return ((jiffies - nf->ts_first) > a_timeout) || + (nf->tuple.protocol == IPPROTO_TCP && + (nf->tcp_flags & TCP_FIN_RST) && + (jiffies - nf->ts_last) > (1 * HZ)) || + nf->nr_bytes >= FLOW_FULL_WATERMARK; +} + +/* could be called with zero to flush cache and pdu */ +static void netflow_scan_inactive_timeout(long timeout) +{ + long i_timeout = timeout * HZ; + long a_timeout = active_timeout * HZ; + + spin_lock_bh(&ipt_netflow_lock); + while (!list_empty(&ipt_netflow_list)) { + struct ipt_netflow *nf; + + nf = list_entry(ipt_netflow_list.prev, struct ipt_netflow, list); + /* Note: i_timeout checked with >= to allow specifying zero timeout + * to purge all flows on module unload */ + if (((jiffies - nf->ts_last) >= i_timeout) || + active_needs_export(nf, a_timeout)) { + hlist_del(&nf->hlist); + list_del(&nf->list); + NETFLOW_STAT_ADD(pkt_out, nf->nr_packets); + NETFLOW_STAT_ADD(traf_out, nf->nr_bytes); + spin_unlock_bh(&ipt_netflow_lock); + netflow_export_flow(nf); + spin_lock_bh(&ipt_netflow_lock); + } else { + /* all flows which need to be exported is always at the tail + * so if no more exportable flows we can break */ + break; + } + } + spin_unlock_bh(&ipt_netflow_lock); + + /* flush flows stored in pdu if there no new flows for too long */ + /* Note: using >= to allow flow purge on zero timeout */ + if ((jiffies - pdu_ts_mod) >= i_timeout) { + spin_lock(&pdu_lock); + __netflow_export_pdu(); + spin_unlock(&pdu_lock); + } +} + +static void netflow_work_fn(struct work_struct *dummy) +{ + netflow_scan_inactive_timeout(inactive_timeout); + schedule_delayed_work(&netflow_work, HZ / 10); +} + +#define RATESHIFT 2 +#define SAMPLERATE (RATESHIFT*RATESHIFT) +#define NUMSAMPLES(minutes) (minutes * 60 / SAMPLERATE) +#define _A(v, m) (v) * (1024 * 2 / (NUMSAMPLES(m) + 1)) >> 10 +// x * (1024 / y) >> 10 is because I can not just divide long long integer +#define CALC_RATE(ewma, cur, minutes) ewma += _A(cur - ewma, minutes) +// calculate EWMA throughput rate for whole module +static void rate_timer_calc(unsigned long dummy) +{ + static u64 old_pkt_total = 0; + static u64 old_traf_total = 0; + static u64 old_searched = 0; + static u64 old_found = 0; + static u64 old_notfound = 0; + u64 searched = 0; + u64 found = 0; + u64 notfound = 0; + unsigned int dsrch, dfnd, dnfnd; + u64 pkt_total = 0; + u64 traf_total = 0; + int cpu; + + for_each_present_cpu(cpu) { + struct ipt_netflow_stat *st = &per_cpu(ipt_netflow_stat, cpu); + + pkt_total += st->pkt_total; + traf_total += st->traf_total; + searched += st->searched; + found += st->found; + notfound += st->notfound; + } + + sec_prate = (pkt_total - old_pkt_total) >> RATESHIFT; + CALC_RATE(min5_prate, sec_prate, 5); + CALC_RATE(min_prate, sec_prate, 1); + old_pkt_total = pkt_total; + + sec_brate = ((traf_total - old_traf_total) * 8) >> RATESHIFT; + CALC_RATE(min5_brate, sec_brate, 5); + CALC_RATE(min_brate, sec_brate, 1); + old_traf_total = traf_total; + + dsrch = searched - old_searched; + dfnd = found - old_found; + dnfnd = notfound - old_notfound; + old_searched = searched; + old_found = found; + old_notfound = notfound; + /* if there is no access to hash keep rate steady */ + metric = (dfnd + dnfnd)? 10 * (dsrch + dfnd + dnfnd) / (dfnd + dnfnd) : metric; + CALC_RATE(min15_metric, (unsigned long long)metric, 15); + CALC_RATE(min5_metric, (unsigned long long)metric, 5); + CALC_RATE(min_metric, (unsigned long long)metric, 1); + + mod_timer(&rate_timer, jiffies + (HZ * SAMPLERATE)); +} + +/* packet receiver */ +static unsigned int netflow_target( + struct sk_buff *skb, + const struct xt_action_param *par + ) +{ + struct iphdr _iph, *iph; + struct ipt_netflow_tuple tuple; + struct ipt_netflow *nf; + __u8 tcp_flags; + struct netflow_aggr_n *aggr_n; + struct netflow_aggr_p *aggr_p; + __u8 s_mask, d_mask; + + iph = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); //iph = ip_hdr(skb); + + if (iph == NULL) { + NETFLOW_STAT_INC(truncated); + NETFLOW_STAT_INC(pkt_drop); + return XT_CONTINUE; + } + + tuple.s_addr = iph->saddr; + tuple.d_addr = iph->daddr; + tuple.s_port = 0; + tuple.d_port = 0; + tuple.i_ifc = par->in? par->in->ifindex : -1; + tuple.protocol = iph->protocol; + tuple.tos = iph->tos; + tcp_flags = 0; /* Cisco sometimes have TCP ACK for non TCP packets, don't get it */ + s_mask = 0; + d_mask = 0; + + if (iph->frag_off & htons(IP_OFFSET)) + NETFLOW_STAT_INC(frags); + else { + switch (tuple.protocol) { + case IPPROTO_TCP: { + struct tcphdr _hdr, *hp; + + if ((hp = skb_header_pointer(skb, iph->ihl * 4, 14, &_hdr))) { + tuple.s_port = hp->source; + tuple.d_port = hp->dest; + tcp_flags = (u_int8_t)(ntohl(tcp_flag_word(hp)) >> 16); + } + break; + } + case IPPROTO_UDP: { + struct udphdr _hdr, *hp; + + if ((hp = skb_header_pointer(skb, iph->ihl * 4, 4, &_hdr))) { + tuple.s_port = hp->source; + tuple.d_port = hp->dest; + } + break; + } + case IPPROTO_ICMP: { + struct icmphdr _hdr, *hp; + + if ((hp = skb_header_pointer(skb, iph->ihl * 4, 2, &_hdr))) + tuple.d_port = (hp->type << 8) | hp->code; + break; + } + case IPPROTO_IGMP: { + struct igmphdr *_hdr, *hp; + + if ((hp = skb_header_pointer(skb, iph->ihl * 4, 1, &_hdr))) + tuple.d_port = hp->type; + } + break; + } + } /* not fragmented */ + + /* aggregate networks */ + read_lock_bh(&aggr_lock); + list_for_each_entry(aggr_n, &aggr_n_list, list) + if ((ntohl(tuple.s_addr) & aggr_n->mask) == aggr_n->addr) { + tuple.s_addr &= htonl(aggr_n->aggr_mask); + s_mask = aggr_n->prefix; + break; + } + list_for_each_entry(aggr_n, &aggr_n_list, list) + if ((ntohl(tuple.d_addr) & aggr_n->mask) == aggr_n->addr) { + tuple.d_addr &= htonl(aggr_n->aggr_mask); + d_mask = aggr_n->prefix; + break; + } + + /* aggregate ports */ + list_for_each_entry(aggr_p, &aggr_p_list, list) + if (ntohs(tuple.s_port) >= aggr_p->port1 && + ntohs(tuple.s_port) <= aggr_p->port2) { + tuple.s_port = htons(aggr_p->aggr_port); + break; + } + + list_for_each_entry(aggr_p, &aggr_p_list, list) + if (ntohs(tuple.d_port) >= aggr_p->port1 && + ntohs(tuple.d_port) <= aggr_p->port2) { + tuple.d_port = htons(aggr_p->aggr_port); + break; + } + read_unlock_bh(&aggr_lock); + + spin_lock_bh(&ipt_netflow_lock); + /* record */ + nf = ipt_netflow_find(&tuple); + if (!nf) { + if (maxflows > 0 && atomic_read(&ipt_netflow_count) >= maxflows) { + /* This is DOS attack prevention */ + NETFLOW_STAT_INC(maxflows_err); + NETFLOW_STAT_INC(pkt_drop); + NETFLOW_STAT_ADD(traf_drop, ntohs(iph->tot_len)); + spin_unlock_bh(&ipt_netflow_lock); + return XT_CONTINUE; + } + + nf = init_netflow(&tuple, skb); + if (!nf || IS_ERR(nf)) { + NETFLOW_STAT_INC(alloc_err); + NETFLOW_STAT_INC(pkt_drop); + NETFLOW_STAT_ADD(traf_drop, ntohs(iph->tot_len)); + spin_unlock_bh(&ipt_netflow_lock); + return XT_CONTINUE; + } + + nf->ts_first = jiffies; + nf->tcp_flags = tcp_flags; + nf->o_ifc = par->out? par->out->ifindex : -1; + nf->s_mask = s_mask; + nf->d_mask = d_mask; + + if (debug > 2) + printk(KERN_INFO "ipt_netflow: new (%u) %hd:%hd SRC=%u.%u.%u.%u:%u DST=%u.%u.%u.%u:%u\n", + atomic_read(&ipt_netflow_count), + tuple.i_ifc, nf->o_ifc, + NIPQUAD(tuple.s_addr), ntohs(tuple.s_port), + NIPQUAD(tuple.d_addr), ntohs(tuple.d_port)); + } else { + /* ipt_netflow_list is sorted by access time: + * most recently accessed flows are at head, old flows remain at tail + * this function bubble up flow to the head */ + list_move(&nf->list, &ipt_netflow_list); + } + + nf->nr_packets++; + nf->nr_bytes += ntohs(iph->tot_len); + nf->ts_last = jiffies; + nf->tcp_flags |= tcp_flags; + + NETFLOW_STAT_INC(pkt_total); + NETFLOW_STAT_ADD(traf_total, ntohs(iph->tot_len)); + + if (active_needs_export(nf, active_timeout * HZ)) { + /* ok, if this active flow to be exported + * bubble it to the tail */ + list_move_tail(&nf->list, &ipt_netflow_list); + + /* Blog: I thought about forcing timer to wake up sooner if we have + * enough exportable flows, but in fact this doesn't have much sense, + * becasue this would only move flow data from one memory to another + * (from our buffers to socket buffers, and socket buffers even have + * limited size). But yes, this is disputable. */ + } + + spin_unlock_bh(&ipt_netflow_lock); + + return XT_CONTINUE; +} + +static struct xt_target ipt_netflow_reg __read_mostly = { + .name = "NETFLOW", + .family = NFPROTO_IPV4, + .target = netflow_target, + .table = "filter", + .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT), + .me = THIS_MODULE +}; + +static int __init ipt_netflow_init(void) +{ +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *proc_stat; +#endif + + get_random_bytes(&ipt_netflow_hash_rnd, 4); + + /* determine hash size (idea from nf_conntrack_core.c) */ + if (!hashsize) { + hashsize = (((num_physpages << PAGE_SHIFT) / 16384) + / sizeof(struct hlist_head)); + if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE)) + hashsize = 8192; + } + if (hashsize < 16) + hashsize = 16; + printk(KERN_INFO "ipt_netflow version %s (%u buckets)\n", + IPT_NETFLOW_VERSION, hashsize); + + ipt_netflow_hash_size = hashsize; + ipt_netflow_hash = alloc_hashtable(ipt_netflow_hash_size); + if (!ipt_netflow_hash) { + printk(KERN_ERR "Unable to create ipt_neflow_hash\n"); + goto err; + } + + ipt_netflow_cachep = kmem_cache_create("ipt_netflow", + sizeof(struct ipt_netflow), 0, + 0, NULL + ); + if (!ipt_netflow_cachep) { + printk(KERN_ERR "Unable to create ipt_netflow slab cache\n"); + goto err_free_hash; + } + +#ifdef CONFIG_PROC_FS + proc_stat = create_proc_entry("ipt_netflow", S_IRUGO, init_net.proc_net_stat); + if (!proc_stat) { + printk(KERN_ERR "Unable to create /proc/net/stat/ipt_netflow entry\n"); + goto err_free_netflow_slab; + } + proc_stat->proc_fops = &nf_seq_fops; + printk(KERN_INFO "netflow: registered: /proc/net/stat/ipt_netflow\n"); +#endif + +#ifdef CONFIG_SYSCTL + netflow_sysctl_header = register_sysctl_paths(netflow_sysctl_path, netflow_sysctl_table); + if (!netflow_sysctl_header) { + printk(KERN_ERR "netflow: can't register to sysctl\n"); + goto err_free_proc_stat; + } else + printk(KERN_INFO "netflow: registered: sysctl net.netflow\n"); +#endif + + if (!destination) + destination = aggregation_buf; + if (destination != destination_buf) { + strlcpy(destination_buf, destination, sizeof(destination_buf)); + destination = destination_buf; + } + if (add_destinations(destination) < 0) + goto err_free_sysctl; + + if (!aggregation) + aggregation = aggregation_buf; + if (aggregation != aggregation_buf) { + strlcpy(aggregation_buf, aggregation, sizeof(aggregation_buf)); + aggregation = aggregation_buf; + } + add_aggregation(aggregation); + + schedule_delayed_work(&netflow_work, HZ / 10); + setup_timer(&rate_timer, rate_timer_calc, 0); + mod_timer(&rate_timer, jiffies + (HZ * SAMPLERATE)); + + if (xt_register_target(&ipt_netflow_reg)) + goto err_stop_timer; + + peakflows_at = jiffies; + + printk(KERN_INFO "ipt_netflow loaded.\n"); + return 0; + +err_stop_timer: + cancel_delayed_work(&netflow_work); + flush_scheduled_work(); + del_timer_sync(&rate_timer); + destination_fini(); + + aggregation_fini(&aggr_n_list); + aggregation_fini(&aggr_p_list); + destination_fini(); +err_free_sysctl: +#ifdef CONFIG_SYSCTL + unregister_sysctl_table(netflow_sysctl_header); +err_free_proc_stat: +#endif +#ifdef CONFIG_PROC_FS + remove_proc_entry("ipt_netflow", init_net.proc_net_stat); +err_free_netflow_slab: +#endif + kmem_cache_destroy(ipt_netflow_cachep); +err_free_hash: + vfree(ipt_netflow_hash); +err: + return -ENOMEM; +} + +static void __exit ipt_netflow_fini(void) +{ + printk(KERN_INFO "ipt_netflow unloading..\n"); + + xt_unregister_target(&ipt_netflow_reg); + cancel_delayed_work(&netflow_work); + flush_scheduled_work(); + del_timer_sync(&rate_timer); + + synchronize_sched(); + +#ifdef CONFIG_SYSCTL + unregister_sysctl_table(netflow_sysctl_header); +#endif +#ifdef CONFIG_PROC_FS + remove_proc_entry("ipt_netflow", init_net.proc_net_stat); +#endif + + netflow_scan_inactive_timeout(0); /* flush cache and pdu */ + destination_fini(); + aggregation_fini(&aggr_n_list); + aggregation_fini(&aggr_p_list); + + kmem_cache_destroy(ipt_netflow_cachep); + vfree(ipt_netflow_hash); + + printk(KERN_INFO "ipt_netflow unloaded.\n"); +} + +module_init(ipt_netflow_init); +module_exit(ipt_netflow_fini); + +/* vim: set sw=8: */