iptables rp filter

 sysctl -w net.ipv4.conf.all.rp_filter=0

sysctl -w net.ipv4.conf.all.rp_filter=0
sysctl -w net.ipv4.conf.default.rp_filter=0
sysctl -w net.ipv4.conf.eth0.rp_filter=0
sysctl -w net.ipv4.conf.eth1.rp_filter=0

iptables -t nat -A PREROUTING -d  192.168.60.15  -i eth1  -p tcp -m tcp --dport 60001 -j LOG --log-level 4 --log-prefix nat60001
iptables -t nat -A PREROUTING -d  192.168.60.15  -i eth1  -p tcp -m tcp --dport 60001 -j DNAT --to-destination 192.168.117.165:22
iptables -nv -t nat -L PREROUTING --line
iptables -  -t nat -D POSTROUTING  1
iptables   -t nat -D PREROUTING  1
iptables -t nat -A POSTROUTING ! -s  192.168.117.240 -d 192.168.117.165  -j  LOG --log-level 4 --log-prefix nat165
iptables -nv -t nat -L POSTROUTING --line


iptables -t nat -vnL PREROUTING

sysctl -w net.ipv4.conf.all.rp_filter=0
sysctl -w net.ipv4.conf.default.rp_filter=0
sysctl -w net.ipv4.conf.eth0.rp_filter=0
sysctl -w net.ipv4.conf.eth1.rp_filter=0
[root@localhost ~]#  cat /proc/net/nf_conntrack  | grep 151
[root@localhost ~]#  cat /proc/net/nf_conntrack  | grep 151
[root@localhost ~]#  cat /proc/net/nf_conntrack  
ipv4     2 tcp      6 300 ESTABLISHED src=192.168.117.59 dst=192.168.117.240 sport=54141 dport=60000 src=192.168.117.240 dst=192.168.117.59 sport=60000 dport=54141 [ASSURED] mark=0 secctx=system_u:object_r:unlabeled_t:s0 zone=0 use=2
ipv4     2 udp      17 25 src=192.168.117.240 dst=193.182.111.14 sport=41984 dport=123 src=193.182.111.14 dst=192.168.117.240 sport=123 dport=41984 mark=0 secctx=system_u:object_r:unlabeled_t:s0 zone=0 use=2
ipv4     2 udp      17 2 src=192.168.117.30 dst=192.168.117.255 sport=138 dport=138 [UNREPLIED] src=192.168.117.255 dst=192.168.117.30 sport=138 dport=138 mark=0 secctx=system_u:object_r:unlabeled_t:s0 zone=0 use=2
ipv4     2 tcp      6 283 ESTABLISHED src=192.168.117.240 dst=192.168.117.59 sport=60000 dport=53755 src=192.168.117.59 dst=192.168.117.240 sport=53755 dport=60000 [ASSURED] mark=0 secctx=system_u:object_r:unlabeled_t:s0 zone=0 use=2
ipv4     2 tcp      6 431986 ESTABLISHED src=192.168.117.59 dst=192.168.117.240 sport=53872 dport=60000 src=192.168.117.240 dst=192.168.117.59 sport=60000 dport=53872 [ASSURED] mark=0 secctx=system_u:object_r:unlabeled_t:s0 zone=0 use=2
ipv4     2 udp      17 18 src=192.168.117.34 dst=192.168.117.255 sport=138 dport=138 [UNREPLIED] src=192.168.117.255 dst=192.168.117.34 sport=138 dport=138 mark=0 secctx=system_u:object_r:unlabeled_t:s0 zone=0 use=2
ipv4     2 udp      17 17 src=192.168.117.240 dst=5.79.108.34 sport=51544 dport=123 src=5.79.108.34 dst=192.168.117.240 sport=123 dport=51544 mark=0 secctx=system_u:object_r:unlabeled_t:s0 zone=0 use=2
ipv4     2 udp      17 17 src=192.168.117.23 dst=255.255.255.255 sport=64756 dport=22313 [UNREPLIED] src=255.255.255.255 dst=192.168.117.23 sport=22313 dport=64756 mark=0 secctx=system_u:object_r:unlabeled_t:s0 zone=0 use=2
[root@localhost ~]#  cat /proc/net/nf_conntrack  -----执行telnet
ipv4     2 tcp      6 300 ESTABLISHED src=192.168.117.59 dst=192.168.117.240 sport=54141 dport=60000 src=192.168.117.240 dst=192.168.117.59 sport=60000 dport=54141 [ASSURED] mark=0 secctx=system_u:object_r:unlabeled_t:s0 zone=0 use=2
ipv4     2 udp      17 16 src=192.168.117.240 dst=193.182.111.14 sport=41984 dport=123 src=193.182.111.14 dst=192.168.117.240 sport=123 dport=41984 mark=0 secctx=system_u:object_r:unlabeled_t:s0 zone=0 use=2
ipv4     2 udp      17 27 src=0.0.0.0 dst=255.255.255.255 sport=68 dport=67 [UNREPLIED] src=255.255.255.255 dst=0.0.0.0 sport=67 dport=68 mark=0 secctx=system_u:object_r:unlabeled_t:s0 zone=0 use=2
ipv4     2 udp      17 27 src=192.168.8.18 dst=255.255.255.255 sport=67 dport=68 [UNREPLIED] src=255.255.255.255 dst=192.168.8.18 sport=68 dport=67 mark=0 secctx=system_u:object_r:unlabeled_t:s0 zone=0 use=2
ipv4     2 tcp      6 116 TIME_WAIT src=210.22.22.151 dst=192.168.60.15 sport=59848 dport=60001 src=192.168.117.165 dst=192.168.117.240 sport=22 dport=59848 [ASSURED] mark=0 secctx=system_u:object_r:unlabeled_t:s0 zone=0 use=2
ipv4     2 tcp      6 274 ESTABLISHED src=192.168.117.240 dst=192.168.117.59 sport=60000 dport=53755 src=192.168.117.59 dst=192.168.117.240 sport=53755 dport=60000 [ASSURED] mark=0 secctx=system_u:object_r:unlabeled_t:s0 zone=0 use=2
ipv4     2 tcp      6 431977 ESTABLISHED src=192.168.117.59 dst=192.168.117.240 sport=53872 dport=60000 src=192.168.117.240 dst=192.168.117.59 sport=60000 dport=53872 [ASSURED] mark=0 secctx=system_u:object_r:unlabeled_t:s0 zone=0 use=2
ipv4     2 udp      17 9 src=192.168.117.34 dst=192.168.117.255 sport=138 dport=138 [UNREPLIED] src=192.168.117.255 dst=192.168.117.34 sport=138 dport=138 mark=0 secctx=system_u:object_r:unlabeled_t:s0 zone=0 use=2
ipv4     2 udp      17 8 src=192.168.117.240 dst=5.79.108.34 sport=51544 dport=123 src=5.79.108.34 dst=192.168.117.240 sport=123 dport=51544 mark=0 secctx=system_u:object_r:unlabeled_t:s0 zone=0 use=2
ipv4     2 udp      17 9 src=192.168.117.23 dst=255.255.255.255 sport=64756 dport=22313 [UNREPLIED] src=255.255.255.255 dst=192.168.117.23 sport=22313 dport=64756 mark=0 secctx=system_u:object_r:unlabeled_t:s0 zone=0 use=2
[root@localhost ~]# 
Title: Linux Operating Systems - How to identify the network traffic that is triggering the Reverse Path filter discards.
Object Name: emr_na-kc0109178en_us
Document Type: Support Information
Original owner: KCS - Linux
Disclosure level: Public
Version state: final
Environment
FACT:Linux kernel 2.4.18 or later.
/proc/sys/net/ipv4/conf/*/rp_filter set to a 1 or a 2
Questions/Symptoms
SYMPTOM:Some hosts or networks may be unreachable. The netstat -s counter IPReversePathFilter is incrementing.
Cause
CAUSE:The linux kernel has the rp_filters set to a zero by default, which means it is disabled. However most linux distributions set the rp_filter to a value of 1, in the file /etc/sysctl.conf. A one is the most restricted mode. So if a packet is received over an interface and the reverse path is over any other interface the packet will be discarded, and the IPReversePathFilter counter will be incremented.

# netstat -s | grep Filter
ArpFilter: 0
IPReversePathFilter: 80


# netstat -s | grep Filter
ArpFilter: 0
IPReversePathFilter: 84

Between these two commands four packets were discarded by the reverse path filter.

But how can you identify the interface and network traffic that is triggering the Reverse Path filter discard?

Answer/Solution
FIX:You can identify what network traffic is triggering the reverse path discard if you enable the log_martians and monitor syslog.

sysctl -w net.ipv4.conf.all.log_martians=1
or
echo 1 >/proc/sys/net/ipv4/conf/all/log_martians

Monitor syslog with the following command.

tail -f /var/log/messages | grep -B1 martian


These four packets were logged via syslog.

Sep 23 11:41:10 dl160d kernel: [775351.944834] martian source 16.112.240.255 from 16.112.240.30, on dev eth2
Sep 23 11:41:10 dl160d kernel: [775351.944837] ll header: ff:ff:ff:ff:ff:ff:aa:00:04:00:1e:30:08:00

The first is a subnet broadcast received on eth2 from 16.112.240.30 with mac address aa:00:04:00:1e:30:08:00

Sep 23 11:41:10 dl160d kernel: [775351.944885] martian source 16.112.240.255 from 16.112.240.30, on dev eth3
Sep 23 11:41:10 dl160d kernel: [775351.944889] ll header: ff:ff:ff:ff:ff:ff:aa:00:04:00:1e:30:08:00

Next is the same packet received on eth3

Sep 23 11:41:11 dl160d kernel: [775352.944336] martian source 16.112.240.197 from 16.112.240.121, on dev eth2
Sep 23 11:41:11 dl160d kernel: [775352.944339] ll header: ff:ff:ff:ff:ff:ff:d8:d3:85:59:c4:88:08:06

The third is a packet from 16.112.240.121 mac address d8:d3:85:59:c4:88:08:06.

Sep 23 11:41:11 dl160d kernel: [775352.944357] martian source 16.112.240.197 from 16.112.240.121, on dev eth3
Sep 23 11:41:11 dl160d kernel: [775352.944361] ll header: ff:ff:ff:ff:ff:ff:d8:d3:85:59:c4:88:08:06

The forth is the same packet received on eth3.

In the example above the discrards are due to a poorly designed network in which multiple subnets exist in the same broadcast domain. The syslog message has all the information needed to track back to the node that is the source of the packet.

Once you understand what the packet is, that it is triggering the reverse path discard, you might find it is OK to ignore the IPReversePathFilter counter. But if nodes or networks are unreachable due to reverse path filter discards and the network topology or routing cannot be corrected then you can change the rp_filter to a the loose mode by setting it to a 2 or turn the rp_filter off by setting it to a 0.

You can change the rp_filter on the fly with the sysctl utility.

sysctl -w net.ipv4.conf.all.rp_filter=2

When finished disable the log_martians by setting it back to a 0.

sysctl -w net.ipv4.conf.all.log_martians=0
or
echo 0 >/proc/sys/net/ipv4/conf/all/log_martians

Make the change in the file /etc/sysctl.conf so that the rp_filter is set as needed on boot. Use the following syntax.

# rp_filter 0 = off, 1 = strict, 2 = loose
net.ipv4.conf.all.rp_filter = 1

It is not recommended to set the reverse path filter to anything less than the restricted mode (1) on any internet facing interface. Read RFC3704 for more details on the reverse path filter and why it is used.

rp_filter 本身会过滤反向路由不通的数据包。用通俗的话解释一下,就是NIC1 有 incoming 数据包,Reverse Path Filtering 模块会将数据包的源地址和目的地址(srcIP->dstIP)调转过来成为(dstIP->srcIP),然后在路由表中查找这个(dstIP->srcIP) 的路由,如果出口恰好是 NIC1 那么 rp_filter 测试通过,否则不通过/丢弃。

 

有的时候我们使用一些策略路由可能会让rpfilter表现出我们不希望的结果。例如:打上 fwmark 的数据包通过 NIC2 出去,但是 NIC2 回来的数据包肯定是没有 fwmark 的,rp filter 就会反向路由查找失败,导致包被丢弃。

如何避免这种问题呢,StackOverflow 给出了一种用 conntrack 记录出包,然后在包回来的时候恢复 fwmark 的方法,原理上可行,不过我没有测试。

Linux policy routing - packets not coming back​serverfault.com图标

 

看了 StackOverflow 上的回答,大部分都不推荐直接关闭 rp_filter。不过我的情况,关闭特定端口的 rp_filter 没什么问题。

我尝试sysctl net.ipv4.conf.nic2.rp_filter=0 来关闭 NIC2 的 rp filter,发现没有效果。调整 net.ipv4.conf.all.rp_filter=0也不行。只有二者都调整为0才可以。但是我又担心all.rp_filter=0会关闭所有NIC的rp filter,一直没有这样运行。

读内核文档才知道:只要 net.ipv4.conf.nic2.rp_filter 和 net.ipv4.conf.all.rp_filter 有一个是1,那么 NIC2 的 rp filter 就会被启用。原文摘录如下:

rp_filter - INTEGER
0 - No source validation.
1 - Strict mode as defined in RFC3704 Strict Reverse Path
Each incoming packet is tested against the FIB and if the interface
is not the best reverse path the packet check will fail.
By default failed packets are discarded.
2 - Loose mode as defined in RFC3704 Loose Reverse Path
Each incoming packet's source address is also tested against the FIB
and if the source address is not reachable via any interface
the packet check will fail.

Current recommended practice in RFC3704 is to enable strict mode
to prevent IP spoofing from DDos attacks. If using asymmetric routing
or other complicated routing, then loose mode is recommended.

The max value from conf/{all,interface}/rp_filter is used
when doing source validation on the {interface}.

Default value is 0. Note that some distributions enable it
in startup scripts.

所以如果只想关闭一个接口的 rp filter,应该把 net.ipv4.conf.all.rp_filter 调整为0,并开启其他接口的 rp filter,再调整目标接口的 rp filter 为0.

rp_filter相关代码分析

  内核函数通过fib_validate_source做反向路径检查,在三个地方调用,调用关系如下所示:

  调用fib_validate_source函数的总入口是ip_rcv函数,也即是入向接收IP协议报文的总入口函数

 ip_rcv

 --> ip_rcv_finish

  --> ip_route_input_noref  ##如果skb还没有目的条目(路由相关),初始化虚拟路径cache

    ##目的地址是组播地址 ,这就是我们要分析的一支路径

     -->ip_route_input_mc -->fib_validate_source --> __fib_validate_source -->fib_lookup

    ##else 目的地址 非组播地址

    -->ip_route_input_slow

      -->fib_validate_source ## 通过fib_lookup查找到RTN_LOCAL类型路由,做反向检查,最终走local_input流程

                ##查找到RTN_BROADCAST类型路由且源地址非全0,也做反向检查

      ##不满足RTN_LOCAL和RTN_BROADCAST类型路由,则调用ip_mkroute_intput, 创建route cache entry

      -->ip_mkroute_input-->__mkroute_input-->fib_validate_source

经常在/var/log/messages里发现这种消息,它是对流入的包进行路由检查失败后,发出的警告。

martian source 192.168.1.1 from 10.0.0.1, on dev eth1
ll header: 52:54:00:98:99:d0:52:54:00:de:d8:10:08:00 

代码出处在此

kernel_source/net/ipv4/route.c 
static void ip_handle_martian_source(struct net_device *dev,
				     struct in_device *in_dev,
				     struct sk_buff *skb,
				     __be32 daddr,
				     __be32 saddr)
{
	RT_CACHE_STAT_INC(in_martian_src);
#ifdef CONFIG_IP_ROUTE_VERBOSE
	if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
		/*
		 *	RFC1812 recommendation, if source is martian,
		 *	the only hint is MAC header.
		 */
		printk(KERN_WARNING "martian source %pI4 from %pI4, on dev %s
",
			&daddr, &saddr, dev->name);
		if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
			int i;
			const unsigned char *p = skb_mac_header(skb);
			printk(KERN_WARNING "ll header: ");
			for (i = 0; i < dev->hard_header_len; i++, p++) {
				printk("%02x", *p);
				if (i < (dev->hard_header_len - 1))
					printk(":");
			}
			printk("
");
		}
	}
#endif
}

其中,10.0.0.1表示src ip,192.168.0.1表示dst ip,eth1表示实际收包的设备,来看看为什么会有错误。

kernel_source/net/ipv4/fib_frontend.c
int fib_validate_source(){
    // 代码有选择性省略
    // 反转src和dst
	struct flowi fl = { .nl_u = { .ip4_u =
				      { .daddr = src,
					.saddr = dst,
					.tos = tos } },
			    .mark = mark,
			    .iif = oif };


	in_dev = __in_dev_get_rcu(dev);
    // 拿到收包的设备,顺便取出该设备上rp_filter的flag
	if (in_dev) {
		no_addr = in_dev->ifa_list == NULL;
		rpf = IN_DEV_RPFILTER(in_dev);
		if (mark && !IN_DEV_SRC_VMARK(in_dev))
			fl.mark = 0;
	}
	rcu_read_unlock();

	if (in_dev == NULL)
		goto e_inval;
	net = dev_net(dev);
    // 以src为dst,查fib
	if (fib_lookup(net, &fl, &res))
		goto last_resort;
	if (res.type != RTN_UNICAST)
		goto e_inval_res;
	*spec_dst = FIB_RES_PREFSRC(res);
	fib_combine_itag(itag, &res);

#ifdef CONFIG_IP_ROUTE_MULTIPATH
    // 以src作为dst,查到的发送dev和当前收包的dev相同,一切ok
	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
#else
	if (FIB_RES_DEV(res) == dev)
#endif
	{
		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
		fib_res_put(&res);
		return ret;
	}
	fib_res_put(&res);
	if (no_addr)
		goto last_resort;
    // 如果dev不相同,并且rp_filter置为on,则检查失败
	if (rpf == 1)
		goto e_inval;

}

给一些的配置

sysctl.conf
// 0 means off,1 means on
net.ipv4.conf.all.rp_filter = 0
net.ipv4.conf.default.rp_filter = 0
net.ipv4.conf.eth0.rp_filter = 0
net.ipv4.conf.lo.rp_filter = 0
net.ipv4.conf.vboxnet0.rp_filter = 0
net.ipv4.conf.wlan0.rp_filter = 0

net.ipv4.conf.all.rp_filter是总开关,一开全开。

// 0 means off
net.ipv4.conf.all.log_martians = 0
net.ipv4.conf.default.log_martians = 0
net.ipv4.conf.eth0.log_martians = 0
net.ipv4.conf.lo.log_martians = 0
net.ipv4.conf.vboxnet0.log_martians = 0
net.ipv4.conf.wlan0.log_martians = 0

是否记录martian的开关,对应于代码里的IN_DEV_LOG_MARTIANS(in_dev)

小结一下,这个错误提示是很常见的,以至于常常被忽略,大多数情况它是做了正确的事情,不过当发现有意外的丢包,可以想想是否是遭遇了火星包,排查方法是:先做tcpdump,发现有traffic(tcpdump不受火星包的影响),但kernel hook或者app收不到包,应怀疑是martian source导致丢包,用dmesg看下是否有相关提示。

martian source 192.168.1.1 from 10.0.0.1, on dev eth1
ll header: 52:54:00:98:99:d0:52:54:00:de:d8:10:08:00 

做一下翻译:eth1上收到了src=10.0.0.1,dst=192.168.1.1的包,但是按照本机的路由设置对10.0.0.1进行路由计算,得出的out dev不是eth1。 一般遇到这种还是保持rp_filter=1吧,毕竟这个开关能让系统免受很多火星来客的干扰,研究下路由配置应该能解决问题;如果确实很复杂的使用场景,比如这台server有好多个网口,需要在不同网口之间转发,放开rp_filter的限制也无妨。

原文地址:https://www.cnblogs.com/dream397/p/13705957.html