最新消息:

linux诡异的半连接(SYN_RECV)队列长度(二)

Linux admin 5601浏览 0评论

继续上回:linux诡异的半连接(SYN_RECV)队列长度(一) 我们已经确认了全连接队列的长度计算,接下来继续寻找半连接队列长度。
试着慢慢减小tcp_max_syn_backlog的值,但还是看不到半连接状态数量的变化。
实在没什么思路,只能Google之,搜出来的基本都是关于SYN Flood的文章,难道没同学关注过半连接队列的长度吗?
困扰数日终于在某个夜晚被我找一篇题为《关于半连接队列的释疑》的文章,激动呐。根据作者提供的思路我开始翻代码,注意我用的内核版本2.6.32,不同版本代码也有差异。
首先定位到tcp_v4_conn_request函数,在文件netipv4tcp_ipv4.c中。

int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
{
	struct inet_request_sock *ireq;
	struct tcp_options_received tmp_opt;
	struct request_sock *req;
	__be32 saddr = ip_hdr(skb)->saddr;
	__be32 daddr = ip_hdr(skb)->daddr;
	__u32 isn = TCP_SKB_CB(skb)->when;
	struct dst_entry *dst = NULL;
#ifdef CONFIG_SYN_COOKIES
	int want_cookie = 0;
#else
#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
#endif

	/* Never answer to SYNs send to broadcast or multicast */
	if (skb->rtable->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
		goto drop;

	/* TW buckets are converted to open requests without
	 * limitations, they conserve resources and peer is
	 * evidently real one.
	 */

	//关键函数inet_csk_reqsk_queue_is_full
	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
#ifdef CONFIG_SYN_COOKIES
		if (sysctl_tcp_syncookies) {
			want_cookie = 1;
		} else
#endif
		goto drop;
	}

	/* Accept backlog is full. If we have already queued enough
	 * of warm entries in syn queue, drop request. It is better than
	 * clogging syn queue with openreqs with exponentially increasing
	 * timeout.
	 */
	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
		goto drop;

	req = inet_reqsk_alloc(&tcp_request_sock_ops);
	if (!req)
		goto drop;
省略N多代码

跟进关键函数inet_csk_reqsk_queue_is_full,在文件includenetinet_connection_sock.h中。

static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk)
{
	return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue);
}

跟进关键函数reqsk_queue_is_full,在文件includenetrequest_sock.h中。

static inline int reqsk_queue_is_full(const struct request_sock_queue *queue)
{
        //注意这里是用>>(右移)来判断的,不是大于号
	return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log;
}

查找qlen和max_qlen_log的定义,在文件includenetrequest_sock.h中。

/** struct listen_sock - listen state
 *
 * @max_qlen_log - log_2 of maximal queued SYNs/REQUESTs
 */
struct listen_sock {
	u8			max_qlen_log;// 2^max_qlen_log = 半连接队列最大长度
	/* 3 bytes hole, try to use */
	int			qlen;//全连接队列的当前长度
	int			qlen_young;
	int			clock_hand;
	u32			hash_rnd;
	u32			nr_table_entries;
	struct request_sock	*syn_table[0];
};

可见关键是如何计算max_qlen_log,前一篇博客 提到了listen的系统调用:

//file:net/socket.c

SYSCALL_DEFINE2(listen, int, fd, int, backlog)
{
	struct socket *sock;
	int err, fput_needed;
	int somaxconn;

	sock = sockfd_lookup_light(fd, &err, &fput_needed);
	if (sock) {
		somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
		//上限不超过somaxconn
		if ((unsigned)backlog > somaxconn)
			backlog = somaxconn;

		err = security_socket_listen(sock, backlog);
		if (!err)
		        //这里是关键。
			err = sock->ops->listen(sock, backlog);

		fput_light(sock->file, fput_needed);
	}
	return err;
}

sock->ops->listen其实是inet_listen,在文件netipv4af_inet.c中。

int inet_listen(struct socket *sock, int backlog)
{
	struct sock *sk = sock->sk;
	unsigned char old_state;
	int err;

	lock_sock(sk);

	err = -EINVAL;
	if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
		goto out;

	old_state = sk->sk_state;
	if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN)))
		goto out;

	/* Really, if the socket is already in listen state
	 * we can only allow the backlog to be adjusted.
	 */
	if (old_state != TCP_LISTEN) {
	        //关键函数inet_csk_listen_start
		err = inet_csk_listen_start(sk, backlog);
		if (err)
			goto out;
	}
	sk->sk_max_ack_backlog = backlog;
	err = 0;

out:
	release_sock(sk);
	return err;
}

跟进inet_csk_listen_start,在文件netipv4inet_connection_sock.c中。

int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
{
	struct inet_sock *inet = inet_sk(sk);
	struct inet_connection_sock *icsk = inet_csk(sk);

	//关键函数reqsk_queue_alloc
	int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries);
  //后面省略
}

跟进reqsk_queue_alloc,在文件netcorerequest_sock.c中。

int reqsk_queue_alloc(struct request_sock_queue *queue,
		      unsigned int nr_table_entries)
{
	size_t lopt_size = sizeof(struct listen_sock);
	struct listen_sock *lopt;

  //这里开始影响到nr_table_entries的取值,内核版本小于2.6.20的话nr_table_entries是不会修改的
        nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog);
	nr_table_entries = max_t(u32, nr_table_entries, 8);
	nr_table_entries = roundup_pow_of_two(nr_table_entries + 1);

	//nr_table_entries到这里已经确定
	lopt_size += nr_table_entries * sizeof(struct request_sock *);
	if (lopt_size > PAGE_SIZE)
		lopt = __vmalloc(lopt_size,
			GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
			PAGE_KERNEL);
	else
		lopt = kzalloc(lopt_size, GFP_KERNEL);
	if (lopt == NULL)
		return -ENOMEM;

  //这里确定了lopt->max_qlen_log的值
	for (lopt->max_qlen_log = 3;
	     (1 << lopt->max_qlen_log) < nr_table_entries;//内核版本小于2.6.20的话这里是sysctl_max_syn_backlog
	     lopt->max_qlen_log++);

	get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
	rwlock_init(&queue->syn_wait_lock);
	queue->rskq_accept_head = NULL;
	lopt->nr_table_entries = nr_table_entries;

	write_lock_bh(&queue->syn_wait_lock);
	queue->listen_opt = lopt;
	write_unlock_bh(&queue->syn_wait_lock);

	return 0;
}

代码到此为止,然后我们计算一下为何在虚拟机S上的SYN_RECV状态数量会是256

nr_table_entries = listen的第二个参数int backlog ,上限是系统的somaxconn
若 somaxconn = 128 sysctl_max_syn_backlog = 4096 backlog = 511 则 nr_table_entries = 128

nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog);
取两者较小的一个 nr_table_entries = 128

nr_table_entries = max_t(u32, nr_table_entries, 8);
取两者较大的一个 nr_table_entries = 128

nr_table_entries = roundup_pow_of_two(nr_table_entries + 1); //roundup_pow_of_two – round the given value up to nearest power of two
roundup_pow_of_two(128 + 1) = 256

for (lopt->max_qlen_log = 3; (1 << lopt->max_qlen_log) < nr_table_entries; lopt->max_qlen_log++);
max_qlen_log = 8

判断半连接队列是否满 queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log;
queue->listen_opt->qlen = 256 时reqsk_queue_is_full返回1 , 进入drop
所以queue->listen_opt->qlen 取值 0~255, 因此SYN_RECV状态数量会是 256

另外同事的测试结果为何与我的不同?
因为内核版本小于2.6.20的话max_qlen_log是直接由sysctl_max_syn_backlog决定的,所以半连接队列的长度就是等于sysctl_max_syn_backlog
文章有点长,不过总算是把问题给解决了。

转载请注明:爱开源 » linux诡异的半连接(SYN_RECV)队列长度(二)

您必须 登录 才能发表评论!