Linux协议栈优化之Netfilter分类conntrack推荐

首先,如果你不同意我以下的观点,本文就不必看了:如今内存不值钱,空间换时间很划算,要知道,一万年前的1秒和一万年后1秒是一样的,你要是觉得人们做的事情可能不同,但是请记住,永远都会发生“安迪给你的被比尔拿走”之类的事情,对于你而言,什么都没有改变!虽然空间可以被拓展,但是要讲技巧。以下是一张截图,测试的是http服务器的常规性能,我依然使用相对值比较,因为我的电脑压不出真实性能,另外,为了将网络影响降低,我使用了本地环回地址,我的目的是测试conntrack对新建连接数的影响,截图如下:如果你稍微明白一些conntrack的原理,就会明白它的开销主要集中在两个方面,一个是查找开销,在内存不值钱的年代,可以通过设置很大的hash bucket来缓解,另外一个开销就是分配conn结构体内存的开销。如果在一台压力很大的设备上,短连接特别多,所有的conntrack将会在一张hash表内,我们所能指望的就是针对一个五元组计算的hash值足够散列了,如果对于一些攻击流量,特别是当他研究了Linux计算conntrackhash值的算法后,他会构造很多hash一致的不同五元组的数据包,这会使得hash表的冲突链表过长,遍历开销过大,在当前的内核conntrack模块实现中,这是无法避免的,因为所有鸡蛋都在一个篮子里面。现在换一个思路。 将不同类的数据流分到不同的hash表中如何?也就是说将一张大表拆分成几张小表,或者说如果你真的不在乎内存的话,设置多张大表也行!那么剩下的问题就是将数据包进行分类了。可以有两种实现:1.在RAW表中做一个模块根据skb的特征设置其conntrack表索引,值得注意的是,为了避免同一个流分配到不同的hash表,match项必须是计算hash的元素,比如源/目标IP,源/目标端口,协议等。对于IP分片直接通过,不予考虑。2.在MANGLE表中做一个优先级大于conntrack小于defrag的模块这个就不多说了,和1一样,就是解决了IP分片的问题。iptables规则如下:iptables -t mangle -A PREROUTING -s 1.1.1.0/24 -j CONNMAP 2以上规则将源IP为1.1.1.0/24段的数据包的conntrack设置在表2中。你也可以区分TCP协议和UDP协议,每一个协议一张表,这样就可以避免恶意UDP流量攻击问题。在加载模块的时候,你需要设置一个参数,即hash表的数量。 为了快速测试效果,我依然是先将代码写死,设置了3张hash表,基于内核3.2.5,两个patch如下:C文件patch:

diff-uNrlinux-source-3.2/net/ipv4/netfilter/ipt_MASQUERADE.clinux-source-3.2.new/net/ipv4/netfilter/ipt_MASQUERADE.c---linux-source-3.2/net/ipv4/netfilter/ipt_MASQUERADE.c2014-01-0312:33:36.000000000+0800+++linux-source-3.2.new/net/ipv4/netfilter/ipt_MASQUERADE.c2014-07-1110:32:57.736666273+0800@@-85,7+85,7@@mr- range[0].min,mr- range[0].max});/*Handmodifiedrangetogenericsetup.*/-returnnf_nat_setup_info(ct, newrange,IP_NAT_MANIP_SRC);+returnnf_nat_setup_info(ct, newrange,IP_NAT_MANIP_SRC,0);}staticintdiff-uNrlinux-source-3.2/net/ipv4/netfilter/ipt_NETMAP.clinux-source-3.2.new/net/ipv4/netfilter/ipt_NETMAP.c---linux-source-3.2/net/ipv4/netfilter/ipt_NETMAP.c2014-01-0312:33:36.000000000+0800+++linux-source-3.2.new/net/ipv4/netfilter/ipt_NETMAP.c2014-07-1110:35:14.976667434+0800@@-67,7+67,7@@mr- range[0].min,mr- range[0].max});/*Handmodifiedrangetogenericsetup.*/-returnnf_nat_setup_info(ct, newrange,HOOK2MANIP(par- hooknum));+returnnf_nat_setup_info(ct, newrange,HOOK2MANIP(par- hooknum),0);}staticstructxt_targetnetmap_tg_reg__read_mostly={diff-uNrlinux-source-3.2/net/ipv4/netfilter/ipt_REDIRECT.clinux-source-3.2.new/net/ipv4/netfilter/ipt_REDIRECT.c---linux-source-3.2/net/ipv4/netfilter/ipt_REDIRECT.c2014-01-0312:33:36.000000000+0800+++linux-source-3.2.new/net/ipv4/netfilter/ipt_REDIRECT.c2014-07-1110:36:45.760668202+0800@@-82,7+82,7@@mr- range[0].min,mr- range[0].max});/*Handmodifiedrangetogenericsetup.*/-returnnf_nat_setup_info(ct, newrange,IP_NAT_MANIP_DST);+returnnf_nat_setup_info(ct, newrange,IP_NAT_MANIP_DST,0);}staticstructxt_targetredirect_tg_reg__read_mostly={diff-uNrlinux-source-3.2/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.clinux-source-3.2.new/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c---linux-source-3.2/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c2014-01-0312:33:36.000000000+0800+++linux-source-3.2.new/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c2014-07-1115:03:50.596093049+0800@@-151,6+151,12@@conststructnet_device*out,int(*okfn)(structsk_buff*)){+structiphdr*hdr=ip_hdr(skb);+if(ipv4_is_loopback(hdr- saddr)||ipv4_is_loopback(hdr- daddr)){+skb- ij=1;+}else{+skb- ij=0;+}returnnf_conntrack_in(dev_net(in),PF_INET,hooknum,skb);}@@-160,6+166,12@@conststructnet_device*out,int(*okfn)(structsk_buff*)){+structiphdr*hdr=ip_hdr(skb);+if(ipv4_is_loopback(hdr- saddr)||ipv4_is_loopback(hdr- daddr)){+skb- ij=1;+}else{+skb- ij=0;+}/*rootisplayingwithrawsockets.*/if(skb- len sizeof(structiphdr)||ip_hdrlen(skb) sizeof(structiphdr))@@-254,7+266,7@@getorigdst(structsock*sk,intoptval,void__user*user,int*len){conststructinet_sock*inet=inet_sk(sk);-conststructnf_conntrack_tuple_hash*h;+/*conststructnf_conntrack_tuple_hash*h;*/structnf_conntrack_tupletuple;memset( tuple,0,sizeof(tuple));@@-276,7+288,7@@*len,sizeof(structsockaddr_in));return-EINVAL;}h=nf_conntrack_find_get(sock_net(sk),NF_CT_DEFAULT_ZONE, tuple);if(h){structsockaddr_insin;@@-297,6+309,7@@elsereturn0;}pr_debug("SO_ORIGINAL_DST:Can'tfind%pI4/%u-%pI4/%u.\n", tuple.src.u3.ip,ntohs(tuple.src.u.tcp.port), tuple.dst.u3.ip,ntohs(tuple.dst.u.tcp.port));diff-uNrlinux-source-3.2/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.clinux-source-3.2.new/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c---linux-source-3.2/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c2014-01-0312:33:36.000000000+0800+++linux-source-3.2.new/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c2014-07-1109:11:02.144624681+0800@@-38,7+38,7@@st- bucket net- ct.htable_size;st- bucket++){n=rcu_dereference(-hlist_nulls_first_rcu( net- ct.hash[st- bucket]));+hlist_nulls_first_rcu( net- ct.hash[0][st- bucket]));if(!is_a_nulls(n))returnn;}@@-58,7+58,7@@returnNULL;}head=rcu_dereference(-hlist_nulls_first_rcu( net- ct.hash[st- bucket]));+hlist_nulls_first_rcu( net- ct.hash[0][st- bucket]));}returnhead;}diff-uNrlinux-source-3.2/net/ipv4/netfilter/nf_conntrack_proto_icmp.clinux-source-3.2.new/net/ipv4/netfilter/nf_conntrack_proto_icmp.c---linux-source-3.2/net/ipv4/netfilter/nf_conntrack_proto_icmp.c2014-01-0312:33:36.000000000+0800+++linux-source-3.2.new/net/ipv4/netfilter/nf_conntrack_proto_icmp.c2014-07-1109:55:31.704647269+0800@@-148,7+148,7@@*ctinfo=IP_CT_RELATED;-h=nf_conntrack_find_get(net,zone, innertuple);+h=nf_conntrack_find_get(net,zone, innertuple,skb- if(!h){pr_debug("icmp_error_message:nomatch\n");return-NF_ACCEPT;diff-uNrlinux-source-3.2/net/ipv4/netfilter/nf_nat_core.clinux-source-3.2.new/net/ipv4/netfilter/nf_nat_core.c---linux-source-3.2/net/ipv4/netfilter/nf_nat_core.c2014-01-0312:33:36.000000000+0800+++linux-source-3.2.new/net/ipv4/netfilter/nf_nat_core.c2014-07-1110:24:16.544661863+0800@@-74,7+74,7@@structnf_conntrack_tuplereply;nf_ct_invert_tuplepr( reply,tuple);-returnnf_conntrack_tuple_taken( reply,ignored_conntrack);+returnnf_conntrack_tuple_taken( reply,ignored_conntrack,0);}EXPORT_SYMBOL(nf_nat_used_tuple);@@-206,7+206,7@@conststructnf_conntrack_tuple*orig_tuple,conststructnf_nat_range*range,structnf_conn*ct,-enumnf_nat_manip_typemaniptype)+enumnf_nat_manip_typemaniptype,intij){structnet*net=nf_ct_net(ct);conststructnf_nat_protocol*proto;@@-268,7+268,7@@unsignedintnf_nat_setup_info(structnf_conn*ct,conststructnf_nat_range*range,-enumnf_nat_manip_typemaniptype)+enumnf_nat_manip_typemaniptype,intij){structnet*net=nf_ct_net(ct);structnf_conntrack_tuplecurr_tuple,new_tuple;@@-296,7+296,7@@nf_ct_invert_tuplepr( curr_tuple, ct- tuplehash[IP_CT_DIR_REPLY].tuple);-get_unique_tuple( new_tuple, curr_tuple,range,ct,maniptype);+get_unique_tuple( new_tuple, curr_tuple,range,ct,maniptype,ij);if(!nf_ct_tuple_equal( new_tuple, curr_tuple)){structnf_conntrack_tuplereply;@@-670,7+670,7@@if(nf_nat_initialized(ct,manip))return-EEXIST;-returnnf_nat_setup_info(ct, range,manip);+returnnf_nat_setup_info(ct, range,manip,0);}#elsestaticintdiff-uNrlinux-source-3.2/net/ipv4/netfilter/nf_nat_h323.clinux-source-3.2.new/net/ipv4/netfilter/nf_nat_h323.c---linux-source-3.2/net/ipv4/netfilter/nf_nat_h323.c2014-01-0312:33:36.000000000+0800+++linux-source-3.2.new/net/ipv4/netfilter/nf_nat_h323.c2014-07-1110:08:16.712653742+0800@@-411,14+411,14@@/*Changesrctowheremastersendsto*/range.flags=IP_NAT_RANGE_MAP_IPS;range.min_ip=range.max_ip=new- tuplehash[!this- dir].tuple.src.u3.ip;-nf_nat_setup_info(new, range,IP_NAT_MANIP_SRC);+nf_nat_setup_info(new, range,IP_NAT_MANIP_SRC,0);/*ForDSTmanip,mapportheretowhereit'sexpected.*/range.flags=(IP_NAT_RANGE_MAP_IPS|IP_NAT_RANGE_PROTO_SPECIFIED);range.min=range.max=this- saved_proto;range.min_ip=range.max_ip=new- master- tuplehash[!this- dir].tuple.src.u3.ip;-nf_nat_setup_info(new, range,IP_NAT_MANIP_DST);+nf_nat_setup_info(new, range,IP_NAT_MANIP_DST,0);}/****************************************************************************/@@-504,13+504,13@@/*Changesrctowheremastersendsto*/range.flags=IP_NAT_RANGE_MAP_IPS;range.min_ip=range.max_ip=new- tuplehash[!this- dir].tuple.src.u3.ip;-nf_nat_setup_info(new, range,IP_NAT_MANIP_SRC);+nf_nat_setup_info(new, range,IP_NAT_MANIP_SRC,0);/*ForDSTmanip,mapportheretowhereit'sexpected.*/range.flags=(IP_NAT_RANGE_MAP_IPS|IP_NAT_RANGE_PROTO_SPECIFIED);range.min=range.max=this- saved_proto;range.min_ip=range.max_ip=this- saved_ip;-nf_nat_setup_info(new, range,IP_NAT_MANIP_DST);+nf_nat_setup_info(new, range,IP_NAT_MANIP_DST,0);}/****************************************************************************/diff-uNrlinux-source-3.2/net/ipv4/netfilter/nf_nat_helper.clinux-source-3.2.new/net/ipv4/netfilter/nf_nat_helper.c---linux-source-3.2/net/ipv4/netfilter/nf_nat_helper.c2014-01-0312:33:36.000000000+0800+++linux-source-3.2.new/net/ipv4/netfilter/nf_nat_helper.c2014-07-1110:08:55.008654066+0800@@-439,13+439,13@@range.flags=IP_NAT_RANGE_MAP_IPS;range.min_ip=range.max_ip=ct- master- tuplehash[!exp- dir].tuple.dst.u3.ip;-nf_nat_setup_info(ct, range,IP_NAT_MANIP_SRC);+nf_nat_setup_info(ct, range,IP_NAT_MANIP_SRC,0);/*ForDSTmanip,mapportheretowhereit'sexpected.*/range.flags=(IP_NAT_RANGE_MAP_IPS|IP_NAT_RANGE_PROTO_SPECIFIED);range.min=range.max=exp- saved_proto;range.min_ip=range.max_ip=ct- master- tuplehash[!exp- dir].tuple.src.u3.ip;-nf_nat_setup_info(ct, range,IP_NAT_MANIP_DST);+nf_nat_setup_info(ct, range,IP_NAT_MANIP_DST,0);}EXPORT_SYMBOL(nf_nat_follow_master);diff-uNrlinux-source-3.2/net/ipv4/netfilter/nf_nat_pptp.clinux-source-3.2.new/net/ipv4/netfilter/nf_nat_pptp.c---linux-source-3.2/net/ipv4/netfilter/nf_nat_pptp.c2014-01-0312:33:36.000000000+0800+++linux-source-3.2.new/net/ipv4/netfilter/nf_nat_pptp.c2014-07-1110:08:35.424653900+0800@@-95,7+95,7@@range.flags|=IP_NAT_RANGE_PROTO_SPECIFIED;range.min=range.max=exp- saved_proto;}-nf_nat_setup_info(ct, range,IP_NAT_MANIP_SRC);+nf_nat_setup_info(ct, range,IP_NAT_MANIP_SRC,0);/*ForDSTmanip,mapportheretowhereit'sexpected.*/range.flags=IP_NAT_RANGE_MAP_IPS;@@-105,7+105,7@@range.flags|=IP_NAT_RANGE_PROTO_SPECIFIED;range.min=range.max=exp- saved_proto;}-nf_nat_setup_info(ct, range,IP_NAT_MANIP_DST);+nf_nat_setup_info(ct, range,IP_NAT_MANIP_DST,0);}/*outboundpackets==fromPNStoPAC*/diff-uNrlinux-source-3.2/net/ipv4/netfilter/nf_nat_rule.clinux-source-3.2.new/net/ipv4/netfilter/nf_nat_rule.c---linux-source-3.2/net/ipv4/netfilter/nf_nat_rule.c2014-01-0312:33:36.000000000+0800+++linux-source-3.2.new/net/ipv4/netfilter/nf_nat_rule.c2014-07-1110:09:31.024654370+0800@@-56,7+56,7@@ctinfo==IP_CT_RELATED_REPLY));NF_CT_ASSERT(par- out!=NULL);-returnnf_nat_setup_info(ct, mr- range[0],IP_NAT_MANIP_SRC);+returnnf_nat_setup_info(ct, mr- range[0],IP_NAT_MANIP_SRC,0);}staticunsignedint@@-74,7+74,7@@/*Connectionmustbevalidandnew.*/NF_CT_ASSERT(ct (ctinfo==IP_CT_NEW||ctinfo==IP_CT_RELATED));-returnnf_nat_setup_info(ct, mr- range[0],IP_NAT_MANIP_DST);+returnnf_nat_setup_info(ct, mr- range[0],IP_NAT_MANIP_DST,0);}staticintipt_snat_checkentry(conststructxt_tgchk_param*par)@@-115,7+115,7@@ ct- tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip: ct- tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);-returnnf_nat_setup_info(ct, range,HOOK2MANIP(hooknum));+returnnf_nat_setup_info(ct, range,HOOK2MANIP(hooknum),0);}intnf_nat_rule_find(structsk_buff*skb,diff-uNrlinux-source-3.2/net/ipv4/netfilter/nf_nat_sip.clinux-source-3.2.new/net/ipv4/netfilter/nf_nat_sip.c---linux-source-3.2/net/ipv4/netfilter/nf_nat_sip.c2014-01-0312:33:36.000000000+0800+++linux-source-3.2.new/net/ipv4/netfilter/nf_nat_sip.c2014-07-1110:07:40.744653437+0800@@-259,7+259,7@@range.flags=(IP_NAT_RANGE_MAP_IPS|IP_NAT_RANGE_PROTO_SPECIFIED);range.min=range.max=exp- saved_proto;range.min_ip=range.max_ip=exp- saved_ip;-nf_nat_setup_info(ct, range,IP_NAT_MANIP_DST);+nf_nat_setup_info(ct, range,IP_NAT_MANIP_DST,0);/*Changesrctowheremastersendsto,butonlyiftheconnection*actuallycamefromthesamesource.*/@@-268,7+268,7@@range.flags=IP_NAT_RANGE_MAP_IPS;range.min_ip=range.max_ip=ct- master- tuplehash[!exp- dir].tuple.dst.u3.ip;-nf_nat_setup_info(ct, range,IP_NAT_MANIP_SRC);+nf_nat_setup_info(ct, range,IP_NAT_MANIP_SRC,0);}}diff-uNrlinux-source-3.2/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.clinux-source-3.2.new/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c---linux-source-3.2/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c2014-01-0312:33:36.000000000+0800+++linux-source-3.2.new/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c2014-07-1109:12:04.320625207+0800@@-165,7+165,7@@*ctinfo=IP_CT_RELATED;-h=nf_conntrack_find_get(net,zone, intuple);+h=nf_conntrack_find_get(net,zone, intuple,skb- if(!h){pr_debug("icmpv6_error:nomatch\n");return-NF_ACCEPT;diff-uNrlinux-source-3.2/net/netfilter/ipvs/ip_vs_nfct.clinux-source-3.2.new/net/netfilter/ipvs/ip_vs_nfct.c---linux-source-3.2/net/netfilter/ipvs/ip_vs_nfct.c2014-01-0312:33:36.000000000+0800+++linux-source-3.2.new/net/netfilter/ipvs/ip_vs_nfct.c2014-07-1111:10:42.224685433+0800@@-270,7+270,7@@__func__,ARG_TUPLE( tuple),ARG_CONN(cp));h=nf_conntrack_find_get(ip_vs_conn_net(cp),NF_CT_DEFAULT_ZONE,- tuple);+ tuple,0);if(h){ct=nf_ct_tuplehash_to_ctrack(h);/*Showwhathappensinsteadofcallingnf_ct_kill()*/diff-uNrlinux-source-3.2/net/netfilter/nf_conntrack_core.clinux-source-3.2.new/net/netfilter/nf_conntrack_core.c---linux-source-3.2/net/netfilter/nf_conntrack_core.c2014-01-0312:33:36.000000000+0800+++linux-source-3.2.new/net/netfilter/nf_conntrack_core.c2014-07-1110:48:19.948674076+0800@@-316,7+316,7@@*/staticstructnf_conntrack_tuple_hash*____nf_conntrack_find(structnet*net,u16zone,-conststructnf_conntrack_tuple*tuple,u32hash)+conststructnf_conntrack_tuple*tuple,u32hash,intij){structnf_conntrack_tuple_hash*h;structhlist_nulls_node*n;@@-327,7+327,7@@*/local_bh_disable();begin:-hlist_nulls_for_each_entry_rcu(h,n, net- ct.hash[bucket],hnnode){+hlist_nulls_for_each_entry_rcu(h,n, net- ct.hash[ij][bucket],hnnode){if(nf_ct_tuple_equal(tuple, h- tuple) nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))==zone){NF_CT_STAT_INC(net,found);@@-352,24+352,24@@structnf_conntrack_tuple_hash*__nf_conntrack_find(structnet*net,u16zone,-conststructnf_conntrack_tuple*tuple)+conststructnf_conntrack_tuple*tuple,intij){return____nf_conntrack_find(net,zone,tuple,-hash_conntrack_raw(tuple,zone));+hash_conntrack_raw(tuple,zone),ij);}EXPORT_SYMBOL_GPL(__nf_conntrack_find);/*Findaconnectioncorrespondingtoatuple.*/staticstructnf_conntrack_tuple_hash*__nf_conntrack_find_get(structnet*net,u16zone,-conststructnf_conntrack_tuple*tuple,u32hash)+conststructnf_conntrack_tuple*tuple,u32hash,intij){structnf_conntrack_tuple_hash*h;structnf_conn*ct;rcu_read_lock();begin:-h=____nf_conntrack_find(net,zone,tuple,hash);+h=____nf_conntrack_find(net,zone,tuple,hash,ij);if(h){ct=nf_ct_tuplehash_to_ctrack(h);if(unlikely(nf_ct_is_dying(ct)||@@-390,26+390,27@@structnf_conntrack_tuple_hash*nf_conntrack_find_get(structnet*net,u16zone,-conststructnf_conntrack_tuple*tuple)+conststructnf_conntrack_tuple*tuple,intij){return__nf_conntrack_find_get(net,zone,tuple,-hash_conntrack_raw(tuple,zone));+hash_conntrack_raw(tuple,zone),ij);}EXPORT_SYMBOL_GPL(nf_conntrack_find_get);staticvoid__nf_conntrack_hash_insert(structnf_conn*ct,unsignedinthash,-unsignedintrepl_hash)+unsignedintrepl_hash,+intij){structnet*net=nf_ct_net(ct);hlist_nulls_add_head_rcu( ct- tuplehash[IP_CT_DIR_ORIGINAL].hnnode,- net- ct.hash[hash]);+ net- ct.hash[ij][hash]);hlist_nulls_add_head_rcu( ct- tuplehash[IP_CT_DIR_REPLY].hnnode,- net- ct.hash[repl_hash]);+ net- ct.hash[ij][repl_hash]);}-voidnf_conntrack_hash_insert(structnf_conn*ct)+voidnf_conntrack_hash_insert(structnf_conn*ct,intij){structnet*net=nf_ct_net(ct);unsignedinthash,repl_hash;@@-419,7+420,7@@hash=hash_conntrack(net,zone, ct- tuplehash[IP_CT_DIR_ORIGINAL].tuple);repl_hash=hash_conntrack(net,zone, ct- tuplehash[IP_CT_DIR_REPLY].tuple);-__nf_conntrack_hash_insert(ct,hash,repl_hash);+__nf_conntrack_hash_insert(ct,hash,repl_hash,ij);}EXPORT_SYMBOL_GPL(nf_conntrack_hash_insert);@@-479,12+480,12@@/*Seeifthere'soneinthelistalready,includingreverse:NATcouldhavegrabbeditwithoutrealizing,sincewe'renotinthehash.Ifthereis,welostrace.*/-hlist_nulls_for_each_entry(h,n, net- ct.hash[hash],hnnode)+hlist_nulls_for_each_entry(h,n, net- ct.hash[skb- ij][hash],hnnode)if(nf_ct_tuple_equal( ct- tuplehash[IP_CT_DIR_ORIGINAL].tuple, h- tuple) zone==nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))gotoout;-hlist_nulls_for_each_entry(h,n, net- ct.hash[repl_hash],hnnode)+hlist_nulls_for_each_entry(h,n, net- ct.hash[skb- ij][repl_hash],hnnode)if(nf_ct_tuple_equal( ct- tuplehash[IP_CT_DIR_REPLY].tuple, h- tuple) zone==nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))@@-514,7+515,7@@*guaranteethatnootherCPUcanfindtheconntrackbeforetheabove*storesarevisible.*/-__nf_conntrack_hash_insert(ct,hash,repl_hash);+__nf_conntrack_hash_insert(ct,hash,repl_hash,skb- NF_CT_STAT_INC(net,insert);spin_unlock_bh( nf_conntrack_lock);@@-537,7+538,7@@forNAT).*/intnf_conntrack_tuple_taken(conststructnf_conntrack_tuple*tuple,-conststructnf_conn*ignored_conntrack)+conststructnf_conn*ignored_conntrack,intij){structnet*net=nf_ct_net(ignored_conntrack);structnf_conntrack_tuple_hash*h;@@-550,7+551,7@@*leastonceforthestatsanyway.*/rcu_read_lock_bh();-hlist_nulls_for_each_entry_rcu(h,n, net- ct.hash[hash],hnnode){+hlist_nulls_for_each_entry_rcu(h,n, net- ct.hash[ij][hash],hnnode){ct=nf_ct_tuplehash_to_ctrack(h);if(ct!=ignored_conntrack nf_ct_tuple_equal(tuple, h- tuple) @@-571,7+572,7@@/*There'sasmallraceherewherewemayfreeajust-assuredconnection.Toobad:we'reintroubleanyway.*/-staticnoinlineintearly_drop(structnet*net,unsignedinthash)+staticnoinlineintearly_drop(structnet*net,unsignedinthash,intij){/*Useoldestentry,whichisroughlyLRU*/structnf_conntrack_tuple_hash*h;@@-582,7+583,7@@rcu_read_lock();for(i=0;i net- ct.htable_size;i++){-hlist_nulls_for_each_entry_rcu(h,n, net- ct.hash[hash],+hlist_nulls_for_each_entry_rcu(h,n, net- ct.hash[ij][hash],hnnode){tmp=nf_ct_tuplehash_to_ctrack(h);if(!test_bit(IPS_ASSURED_BIT, tmp- status))@@-636,7+637,7@@__nf_conntrack_alloc(structnet*net,u16zone,conststructnf_conntrack_tuple*orig,conststructnf_conntrack_tuple*repl,-gfp_tgfp,u32hash)+gfp_tgfp,u32hash,intij){structnf_conn*ct;@@-651,7+652,7@@if(nf_conntrack_max unlikely(atomic_read( net- ct.count) nf_conntrack_max)){-if(!early_drop(net,hash_bucket(hash,net))){+if(!early_drop(net,hash_bucket(hash,net),ij)){atomic_dec( net- ct.count);if(net_ratelimit())printk(KERN_WARNING@@-713,9+714,9@@structnf_conn*nf_conntrack_alloc(structnet*net,u16zone,conststructnf_conntrack_tuple*orig,conststructnf_conntrack_tuple*repl,-gfp_tgfp)+gfp_tgfp,intij){-return__nf_conntrack_alloc(net,zone,orig,repl,gfp,0);+return__nf_conntrack_alloc(net,zone,orig,repl,gfp,0,ij);}EXPORT_SYMBOL_GPL(nf_conntrack_alloc);@@-753,7+754,7@@}ct=__nf_conntrack_alloc(net,zone,tuple, repl_tuple,GFP_ATOMIC,-hash);+hash,skb- if(IS_ERR(ct))return(structnf_conntrack_tuple_hash*)ct;@@-840,7+841,7@@/*lookfortuplematch*/hash=hash_conntrack_raw( tuple,zone);-h=__nf_conntrack_find_get(net,zone, tuple,hash);+h=__nf_conntrack_find_get(net,zone, tuple,hash,skb- if(!h){h=init_conntrack(net,tmpl, tuple,l3proto,l4proto,skb,dataoff,hash);@@-1170,7+1171,7@@spin_lock_bh( nf_conntrack_lock);for(;*bucket net- ct.htable_size;(*bucket)++){-hlist_nulls_for_each_entry(h,n, net- ct.hash[*bucket],hnnode){+hlist_nulls_for_each_entry(h,n, net- ct.hash[0][*bucket],hnnode){ct=nf_ct_tuplehash_to_ctrack(h);if(iter(ct,data))gotofound;@@-1297,6+1298,7@@staticvoidnf_conntrack_cleanup_net(structnet*net){+inti=0;i_see_dead_people:nf_ct_iterate_cleanup(net,kill_all,NULL);nf_ct_release_dying_list(net);@@-1305,7+1307,9@@gotoi_see_dead_people;}-nf_ct_free_hashtable(net- ct.hash,net- ct.htable_size);+for(i=0;i 3;i++){+nf_ct_free_hashtable(net- ct.hash[i],net- ct.htable_size);+}nf_conntrack_ecache_fini(net);nf_conntrack_tstamp_fini(net);nf_conntrack_acct_fini(net);@@-1364,7+1368,7@@{inti,bucket;unsignedinthashsize,old_size;-structhlist_nulls_head*hash,*old_hash;+structhlist_nulls_head*hash[3],*old_hash[3];structnf_conntrack_tuple_hash*h;structnf_conn*ct;@@-1378,10+1382,19@@hashsize=simple_strtoul(val,NULL,0);if(!hashsize)return-EINVAL;-hash=nf_ct_alloc_hashtable( hashsize,1);-if(!hash)-return-ENOMEM;+{+intk=0;+for(k=0;k 3;k++){+hash[k]=nf_ct_alloc_hashtable( hashsize,1);+if(!hash[k]){+intj=0;+for(j=0;j k;j++){+//freehash[j];+}+return-ENOMEM;+}+}+}/*Lookupsintheoldhashmighthappeninparallel,whichmeanswe*mightgetfalsenegativesduringconnectionlookup.Newconnections@@-1390,24+1403,38@@*/spin_lock_bh( nf_conntrack_lock);for(i=0;i init_net.ct.htable_size;i++){-while(!hlist_nulls_empty( init_net.ct.hash[i])){-h=hlist_nulls_entry(init_net.ct.hash[i].first,+intk=0;+for(k=0;k 3;k++){+while(!hlist_nulls_empty( init_net.ct.hash[k][i])){+h=hlist_nulls_entry(init_net.ct.hash[k][i].first,structnf_conntrack_tuple_hash,hnnode);ct=nf_ct_tuplehash_to_ctrack(h);hlist_nulls_del_rcu( h- hnnode);bucket=__hash_conntrack( h- tuple,nf_ct_zone(ct),hashsize);-hlist_nulls_add_head_rcu( h- hnnode, hash[bucket]);+hlist_nulls_add_head_rcu( h- hnnode, hash[k][bucket]);+}}}old_size=init_net.ct.htable_size;-old_hash=init_net.ct.hash;+old_hash[0]=init_net.ct.hash[0];+old_hash[1]=init_net.ct.hash[1];+old_hash[2]=init_net.ct.hash[2];init_net.ct.htable_size=nf_conntrack_htable_size=hashsize;-init_net.ct.hash=hash;+{+intk=0;+for(k=0;k 3;k++){+init_net.ct.hash[k]=hash[k];+}+}spin_unlock_bh( nf_conntrack_lock);-nf_ct_free_hashtable(old_hash,old_size);+{+intk=0;+for(k=0;k 3;k++){+nf_ct_free_hashtable(old_hash[k],old_size);+}+}return0;}EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);@@-1494,6+1521,7@@staticintnf_conntrack_init_net(structnet*net){intret;+inti=0;atomic_set( net- ct.count,0);INIT_HLIST_NULLS_HEAD( net- ct.unconfirmed,UNCONFIRMED_NULLS_VAL);@@-1520,11+1548,13@@}net- ct.htable_size=nf_conntrack_htable_size;-net- ct.hash=nf_ct_alloc_hashtable( net- ct.htable_size,1);-if(!net- ct.hash){-ret=-ENOMEM;-printk(KERN_ERR"Unabletocreatenf_conntrack_hash\n");-gotoerr_hash;+for(i=0;i 3;i++){+net- ct.hash[i]=nf_ct_alloc_hashtable( net- ct.htable_size,1);+if(!net- ct.hash[i]){+ret=-ENOMEM;+printk(KERN_ERR"Unabletocreatenf_conntrack_hash\n");+gotoerr_hash;+}}ret=nf_conntrack_expect_init(net);if(ret 0)@@-1548,7+1578,9@@err_acct:nf_conntrack_expect_fini(net);err_expect:-nf_ct_free_hashtable(net- ct.hash,net- ct.htable_size);+for(i=0;i 3;i++){+nf_ct_free_hashtable(net- ct.hash[i],net- ct.htable_size);+}err_hash:kmem_cache_destroy(net- ct.nf_conntrack_cachep);err_cache:diff-uNrlinux-source-3.2/net/netfilter/nf_conntrack_helper.clinux-source-3.2.new/net/netfilter/nf_conntrack_helper.c---linux-source-3.2/net/netfilter/nf_conntrack_helper.c2014-01-0312:33:36.000000000+0800+++linux-source-3.2.new/net/netfilter/nf_conntrack_helper.c2014-07-1110:57:25.424678691+0800@@-227,7+227,7@@hlist_nulls_for_each_entry(h,nn, net- ct.unconfirmed,hnnode)unhelp(h,me);for(i=0;i net- ct.htable_size;i++){-hlist_nulls_for_each_entry(h,nn, net- ct.hash[i],hnnode)+hlist_nulls_for_each_entry(h,nn, net- ct.hash[0][i],hnnode)unhelp(h,me);}}diff-uNrlinux-source-3.2/net/netfilter/nf_conntrack_netlink.clinux-source-3.2.new/net/netfilter/nf_conntrack_netlink.c---linux-source-3.2/net/netfilter/nf_conntrack_netlink.c2014-01-0312:33:36.000000000+0800+++linux-source-3.2.new/net/netfilter/nf_conntrack_netlink.c2014-07-1111:02:54.168681473+0800@@-692,7+692,7@@last=(structnf_conn*)cb- args[1];for(;cb- args[0] net- ct.htable_size;cb- args[0]++){restart:-hlist_nulls_for_each_entry(h,n, net- ct.hash[cb- args[0]],+hlist_nulls_for_each_entry(h,n, net- ct.hash[0][cb- args[0]],hnnode){if(NF_CT_DIRECTION(h)!=IP_CT_DIR_ORIGINAL)continue;@@-920,7+920,7@@if(err 0)returnerr;-h=nf_conntrack_find_get(net,zone, tuple);+h=nf_conntrack_find_get(net,zone, tuple,0);if(!h)return-ENOENT;@@-986,7+986,7@@if(err 0)returnerr;-h=nf_conntrack_find_get(net,zone, tuple);+h=nf_conntrack_find_get(net,zone, tuple,skb- if(!h)return-ENOENT;@@-1336,7+1336,7@@structnf_conntrack_helper*helper;structnf_conn_tstamp*tstamp;-ct=nf_conntrack_alloc(net,zone,otuple,rtuple,GFP_ATOMIC);+ct=nf_conntrack_alloc(net,zone,otuple,rtuple,GFP_ATOMIC,0);if(IS_ERR(ct))returnERR_PTR(-ENOMEM);@@-1446,7+1446,7@@if(err 0)gotoerr2;-master_h=nf_conntrack_find_get(net,zone, master);+master_h=nf_conntrack_find_get(net,zone, master,0);if(master_h==NULL){err=-ENOENT;gotoerr2;@@-1460,7+1460,7@@tstamp- start=ktime_to_ns(ktime_get_real());add_timer( ct- timeout);-nf_conntrack_hash_insert(ct);+nf_conntrack_hash_insert(ct,0);rcu_read_unlock();returnct;@@-1503,9+1503,9@@spin_lock_bh( nf_conntrack_lock);if(cda[CTA_TUPLE_ORIG])-h=__nf_conntrack_find(net,zone, otuple);+h=__nf_conntrack_find(net,zone, otuple,0);elseif(cda[CTA_TUPLE_REPLY])-h=__nf_conntrack_find(net,zone, rtuple);+h=__nf_conntrack_find(net,zone, rtuple,0);if(h==NULL){err=-ENOENT;@@-2020,7+2020,7@@returnerr;/*Lookformasterconntrackofthisexpectation*/-h=nf_conntrack_find_get(net,zone, master_tuple);+h=nf_conntrack_find_get(net,zone, master_tuple,0);if(!h)return-ENOENT;ct=nf_ct_tuplehash_to_ctrack(h);diff-uNrlinux-source-3.2/net/netfilter/nf_conntrack_pptp.clinux-source-3.2.new/net/netfilter/nf_conntrack_pptp.c---linux-source-3.2/net/netfilter/nf_conntrack_pptp.c2014-01-0312:33:36.000000000+0800+++linux-source-3.2.new/net/netfilter/nf_conntrack_pptp.c2014-07-1111:04:11.824682130+0800@@-148,7+148,7@@pr_debug("tryingtotimeoutctorexpfortuple");nf_ct_dump_tuple(t);-h=nf_conntrack_find_get(net,zone,t);+h=nf_conntrack_find_get(net,zone,t,0);if(h){sibling=nf_ct_tuplehash_to_ctrack(h);pr_debug("settingtimeoutofconntrack%pto0\n",sibling);diff-uNrlinux-source-3.2/net/netfilter/nf_conntrack_standalone.clinux-source-3.2.new/net/netfilter/nf_conntrack_standalone.c---linux-source-3.2/net/netfilter/nf_conntrack_standalone.c2014-01-0312:33:36.000000000+0800+++linux-source-3.2.new/net/netfilter/nf_conntrack_standalone.c2014-07-1110:56:08.276678039+0800@@-59,7+59,7@@for(st- bucket=0;st- bucket net- ct.htable_size;st- bucket++){-n=rcu_dereference(hlist_nulls_first_rcu( net- ct.hash[st- bucket]));+n=rcu_dereference(hlist_nulls_first_rcu( net- ct.hash[0][st- bucket]));if(!is_a_nulls(n))returnn;}@@-80,7+80,7@@}head=rcu_dereference(hlist_nulls_first_rcu(- net- ct.hash[st- bucket]));+ net- ct.hash[0][st- bucket]));}returnhead;}diff-uNrlinux-source-3.2/net/netfilter/xt_connlimit.clinux-source-3.2.new/net/netfilter/xt_connlimit.c---linux-source-3.2/net/netfilter/xt_connlimit.c2014-01-0312:33:36.000000000+0800+++linux-source-3.2.new/net/netfilter/xt_connlimit.c2014-07-1111:07:55.712684024+0800@@-117,7+117,7@@/*checkthesavedconnections*/hlist_for_each_entry_safe(conn,pos,n,hash,node){found=nf_conntrack_find_get(net,NF_CT_DEFAULT_ZONE,- conn- tuple);+ conn- tuple,0);found_ct=NULL;if(found!=NULL)diff-uNrlinux-source-3.2/net/netfilter/xt_CT.clinux-source-3.2.new/net/netfilter/xt_CT.c---linux-source-3.2/net/netfilter/xt_CT.c2014-01-0312:33:36.000000000+0800+++linux-source-3.2.new/net/netfilter/xt_CT.c2014-07-1111:06:38.648683372+0800@@-81,7+81,7@@gotoerr1;memset( t,0,sizeof(t));-ct=nf_conntrack_alloc(par- net,info- zone, t, t,GFP_KERNEL);+ct=nf_conntrack_alloc(par- net,info- zone, t, t,GFP_KERNEL,0);ret=PTR_ERR(ct);if(IS_ERR(ct))gotoerr2;diff-uNrlinux-source-3.2/include/linux/skbuff.hlinux-source-3.2.new/include/linux/skbuff.h---linux-source-3.2/include/linux/skbuff.h2014-02-0121:18:39.000000000+0800+++linux-source-3.2.new/include/linux/skbuff.h2014-07-1107:00:38.696558485+0800@@-474,6+474,7@@unsignedchar*head,*data;unsignedinttruesize;+intij;atomic_tusers;};diff-uNrlinux-source-3.2/include/linux/version.hlinux-source-3.2.new/include/linux/version.h---linux-source-3.2/include/linux/version.h1970-01-0108:00:00.000000000+0800+++linux-source-3.2.new/include/linux/version.h2014-07-1107:01:03.972558699+0800@@-0,0+1,2@@+#defineLINUX_VERSION_CODE197174+#defineKERNEL_VERSION(a,b,c)(((a) 16)+((b) 8)+(c))diff-uNrlinux-source-3.2/include/net/netfilter/nf_conntrack_core.hlinux-source-3.2.new/include/net/netfilter/nf_conntrack_core.h---linux-source-3.2/include/net/netfilter/nf_conntrack_core.h2014-01-0312:33:36.000000000+0800+++linux-source-3.2.new/include/net/netfilter/nf_conntrack_core.h2014-07-1108:59:04.872618612+0800@@-50,7+50,7@@/*Findaconnectioncorrespondingtoatuple.*/externstructnf_conntrack_tuple_hash*nf_conntrack_find_get(structnet*net,u16zone,-conststructnf_conntrack_tuple*tuple);+conststructnf_conntrack_tuple*tuple,intij);externint__nf_conntrack_confirm(structsk_buff*skb);diff-uNrlinux-source-3.2/include/net/netfilter/nf_conntrack.hlinux-source-3.2.new/include/net/netfilter/nf_conntrack.h---linux-source-3.2/include/net/netfilter/nf_conntrack.h2014-01-0312:33:36.000000000+0800+++linux-source-3.2.new/include/net/netfilter/nf_conntrack.h2014-07-1109:05:35.412621916+0800@@-176,7+176,7@@conntrack).*/externintnf_conntrack_tuple_taken(conststructnf_conntrack_tuple*tuple,-conststructnf_conn*ignored_conntrack);+conststructnf_conn*ignored_conntrack,intij);/*Returnconntrack_infoandtuplehashforgivenskb.*/staticinlinestructnf_conn*@@-207,9+207,9@@externstructnf_conntrack_tuple_hash*__nf_conntrack_find(structnet*net,u16zone,-conststructnf_conntrack_tuple*tuple);+conststructnf_conntrack_tuple*tuple,intij);-externvoidnf_conntrack_hash_insert(structnf_conn*ct);+externvoidnf_conntrack_hash_insert(structnf_conn*ct,intij);externvoidnf_ct_delete_from_lists(structnf_conn*ct);externvoidnf_ct_insert_dying_list(structnf_conn*ct);@@-284,7+284,7@@nf_conntrack_alloc(structnet*net,u16zone,conststructnf_conntrack_tuple*orig,conststructnf_conntrack_tuple*repl,-gfp_tgfp);+gfp_tgfp,intij);staticinlineintnf_ct_is_template(conststructnf_conn*ct){diff-uNrlinux-source-3.2/include/net/netfilter/nf_nat.hlinux-source-3.2.new/include/net/netfilter/nf_nat.h---linux-source-3.2/include/net/netfilter/nf_nat.h2014-01-0312:33:36.000000000+0800+++linux-source-3.2.new/include/net/netfilter/nf_nat.h2014-07-1110:05:22.568652268+0800@@-53,7+53,7@@/*Setuptheinfostructuretomapintothisrange.*/externunsignedintnf_nat_setup_info(structnf_conn*ct,conststructnf_nat_range*range,-enumnf_nat_manip_typemaniptype);+enumnf_nat_manip_typemaniptype,intij);/*Isthistuplealreadytaken?(notbyus)*/externintnf_nat_used_tuple(conststructnf_conntrack_tuple*tuple,diff-uNrlinux-source-3.2/include/net/netns/conntrack.hlinux-source-3.2.new/include/net/netns/conntrack.h---linux-source-3.2/include/net/netns/conntrack.h2014-01-0312:33:36.000000000+0800+++linux-source-3.2.new/include/net/netns/conntrack.h2014-07-1108:56:06.816617105+0800@@-13,7+13,7@@unsignedintexpect_count;unsignedinthtable_size;structkmem_cache*nf_conntrack_cachep;-structhlist_nulls_head*hash;+structhlist_nulls_head*hash[3];structhlist_head*expect_hash;structhlist_nulls_headunconfirmed;structhlist_nulls_headdying;

重新编译内核之后,效果十分可观,按照下面的脚本加入巨量的conntrack也丝毫不会影响最大连接数:

for((i=1;i i++));dofor((j=1;j j++));do\conntrack-Iconntrack-s172.129.$j.$i-d$j.$i.192.$j-pudp-t4000--sport245--dport2001;\conntrack-Iconntrack-s192.13.$i.$j-d$i.$j.24.19-ptcp--stateESTABLISHED-t4000--sport67--dport505;\conntrack-Iconntrack-s172.129.$j.$i-d$j.$i.192.$j-picmp-t4000;\done;done /dev/null2 1

除了分了多张hash之外,另外的好处在于锁的粒度变细了,每次在操作conntrack的时候只需要锁住和自己相关的hash表的锁即可,当然,在本次的优化中没有涉及这一点。和ipset结合起来就更猛了,本身ipset就可以定义一个集合,而该集合则可以用来计算conntrack的hash值,多表版本的conntrack和ipset结合,这在效果上不就是多级hash表吗?而且表面上还是iptables配置的。

流转的时光,都成为命途中美丽的点缀,

Linux协议栈优化之Netfilter分类conntrack推荐

相关文章:

你感兴趣的文章:

标签云: