连接跟踪的流量统计由扩展acct_extend实现,模块初始化函数nf_conntrack_acct_init注册连接跟踪扩展。
static const struct nf_ct_ext_type acct_extend = {
.len = sizeof(struct nf_conn_acct),
.align = __alignof__(struct nf_conn_acct),
.id = NF_CT_EXT_ACCT,
};
int nf_conntrack_acct_init(void)
{
int ret = nf_ct_extend_register(&acct_extend);
if (ret < 0)
pr_err("Unable to register extension\n");
return ret;
}
对于连接跟踪的每个命名空间,由遍历nf_ct_acct控制初始情况下是否开启流量统计,nf_ct_acct可在模块加载时指定值。
void nf_conntrack_acct_pernet_init(struct net *net)
{
net->ct.sysctl_acct = nf_ct_acct;
}
int nf_conntrack_init_net(struct net *net)
{
...
nf_conntrack_acct_pernet_init(net);
另外,可通过PROC文件修改命名空间sysctl_acct的值。
# sysctl -a | grep nf_conntrack_acct
net.netfilter.nf_conntrack_acct = 1
连接跟踪acct扩展
在初始化一个新的连接时,由函数nf_ct_acct_ext_add添加流量统计扩展。
static noinline struct nf_conntrack_tuple_hash *
init_conntrack(struct net *net, struct nf_conn *tmpl,
const struct nf_conntrack_tuple *tuple,
struct sk_buff *skb,
unsigned int dataoff, u32 hash)
{
struct nf_conn *ct;
zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC,
hash);
if (IS_ERR(ct))
return (struct nf_conntrack_tuple_hash *)ct;
nf_ct_acct_ext_add(ct, GFP_ATOMIC);
如下,如果命名空间连接跟踪中sysctl_acct为真,才能真正添加统计扩展。
static inline
struct nf_conn_acct *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp)
{
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
struct net *net = nf_ct_net(ct);
struct nf_conn_acct *acct;
if (!net->ct.sysctl_acct)
return NULL;
acct = nf_ct_ext_add(ct, NF_CT_EXT_ACCT, gfp);
if (!acct)
pr_debug("failed to add accounting extension area");
return acct;
#else
return NULL;
#endif
统计操作
基础函数nf_ct_acct_add负责增加统计扩展中的报文合字节计数。
void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets,
unsigned int bytes)
{
struct nf_conn_acct *acct;
acct = nf_conn_acct_find(ct);
if (acct) {
struct nf_conn_counter *counter = acct->counter;
atomic64_add(packets, &counter[dir].packets);
atomic64_add(bytes, &counter[dir].bytes);
函数nf_ct_acct_update用于增加一个报文统计,以及报文的字节统计。
static inline void nf_ct_acct_update(struct nf_conn *ct, u32 dir,
unsigned int bytes)
{
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
nf_ct_acct_add(ct, dir, 1, bytes);
#endif
}
内核在执行连接跟踪刷新时,增加统计信息,调用更新函数nf_ct_acct_update。
void __nf_ct_refresh_acct(struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
const struct sk_buff *skb,
u32 extra_jiffies,
bool do_acct)
{
/* Only update if this is not a fixed timeout */
if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status))
goto acct;
...
acct:
if (do_acct)
nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
统计路径
在报文进入netfilter系统时,增加统计计数,即在函数nf_conntrack_in中。进入netfilter系统的hook点有NF_INET_PRE_ROUTING,NF_INET_LOCAL_OUT,以及网桥的hook点NF_BR_PRE_ROUTING。
unsigned int
nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state)
{
...
ct = nf_ct_get(skb, &ctinfo);
if (!ct) {
/* Not valid part of a connection */
NF_CT_STAT_INC_ATOMIC(state->net, invalid);
ret = NF_ACCEPT;
goto out;
}
ret = nf_conntrack_handle_packet(ct, skb, dataoff, ctinfo, state);
统计信息最终在各个协议处理函数中处理。
static int nf_conntrack_handle_packet(struct nf_conn *ct,
struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
const struct nf_hook_state *state)
{
switch (nf_ct_protonum(ct)) {
case IPPROTO_TCP:
return nf_conntrack_tcp_packet(ct, skb, dataoff, ctinfo, state);
case IPPROTO_UDP:
return nf_conntrack_udp_packet(ct, skb, dataoff, ctinfo, state);
case IPPROTO_ICMP:
return nf_conntrack_icmp_packet(ct, skb, ctinfo, state);
...
}
return generic_packet(ct, skb, ctinfo);
对于TCP协议,在处理的最后,调用连接跟踪的刷新函数nf_ct_refresh_acct,内部将更新统计数据。
int nf_conntrack_tcp_packet(struct nf_conn *ct,
struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
const struct nf_hook_state *state)
{
...
nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
return NF_ACCEPT;
对于UDP协议,由于对连接跟踪的不同状态的超时时长的不同设置,刷新函数分在两个分支中调用。
int nf_conntrack_udp_packet(struct nf_conn *ct,
struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
const struct nf_hook_state *state)
{
...
if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
unsigned long extra = timeouts[UDP_CT_UNREPLIED];
/* Still active after two seconds? Extend timeout. */
if (time_after(jiffies, ct->proto.udp.stream_ts))
extra = timeouts[UDP_CT_REPLIED];
nf_ct_refresh_acct(ct, ctinfo, skb, extra);
...
} else {
nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[UDP_CT_UNREPLIED]);
}
对于ICMP协议,函数最后调用连接跟踪刷新函数nf_ct_refresh_acct,同时,更新统计数据。
int nf_conntrack_icmp_packet(struct nf_conn *ct,
struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
const struct nf_hook_state *state)
{
if (!timeout)
timeout = &nf_icmp_pernet(nf_ct_net(ct))->timeout;
nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
return NF_ACCEPT;
对于协议GRE、DCCP、SCTP、ICMPv6等,统计数据的更新类似。
异常统计数据
对于TCP连接,如果连接还没有建立,接收到复位RST报文,删除连接跟踪。
int nf_conntrack_tcp_packet(struct nf_conn *ct,
struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
const struct nf_hook_state *state)
{
if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
/* If only reply is a RST, we can consider ourselves not to
have an established connection: this is a fairly common
problem case, so we can delete the conntrack
immediately. --RR */
if (th->rst) {
nf_ct_kill_acct(ct, ctinfo, skb);
return NF_ACCEPT;
}
首先更新连接跟踪的统计数据,之后将其删除。
bool nf_ct_kill_acct(struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
const struct sk_buff *skb)
{
nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
return nf_ct_delete(ct, 0, 0);
连接跟踪冲突
如果两个连接跟踪冲突,已经存在的表项没有经过NAT转换(SNAT/DNAT),或者两者完全相同(nf_ct_match),将新的连接跟踪删除,将当前报文的统计信息合并到已经存在的表项中。
static int __nf_ct_resolve_clash(struct sk_buff *skb,
struct nf_conntrack_tuple_hash *h)
{
/* This is the conntrack entry already in hashes that won race. */
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
struct nf_conn *loser_ct;
loser_ct = nf_ct_get(skb, &ctinfo);
if (nf_ct_is_dying(ct))
return NF_DROP;
if (((ct->status & IPS_NAT_DONE_MASK) == 0) ||
nf_ct_match(ct, loser_ct)) {
struct net *net = nf_ct_net(ct);
nf_conntrack_get(&ct->ct_general);
nf_ct_acct_merge(ct, ctinfo, loser_ct);
nf_ct_add_to_dying_list(loser_ct);
需要合并的仅一个报文,只需要读出其字节数,添加到已有连接跟踪即可。
static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
const struct nf_conn *loser_ct)
{
struct nf_conn_acct *acct;
acct = nf_conn_acct_find(loser_ct);
if (acct) {
struct nf_conn_counter *counter = acct->counter;
unsigned int bytes;
/* u32 should be fine since we must have seen one packet. */
bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes);
nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), bytes);
快速路径统计数据
由于flowtable的处理跳过了netfilter的hook点,在flowtable的处理中需要另外进行数据统计。
unsigned int
nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
if (flow_table->flags & NF_FLOWTABLE_COUNTER)
nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
unsigned int
nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
if (flow_table->flags & NF_FLOWTABLE_COUNTER)
nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
内核版本 5.10