1、到底那个是半连接队列
/** struct listen_sock - listen state * * @max_qlen_log - log_2 of maximal queued SYNs/REQUESTs */ struct listen_sock { u8 max_qlen_log; /*2^max_qlen_log is the length of the accpet queue, max of max_qlen_log is 10. (2^10=1024)*/ /* 3 bytes hole, try to use */ int qlen; /* qlen is the current length of the accpet queue*/ int qlen_young; int clock_hand; u32 hash_rnd; u32 nr_table_entries; /*nr_table_entries is the number of the syn_table,max is 512*/ struct request_sock *syn_table[0]; };
跟踪listen系统调用:
inet_listen
inet_csk_listen_start
reqsk_queue_alloc
在reqsk_queue_alloc中:
const int lopt_size = sizeof(struct listen_sock) + nr_table_entries * sizeof(struct request_sock *); struct listen_sock *lopt = kzalloc(lopt_size, GFP_KERNEL);
我们发现这里进行了分配内存,分配了nr_table_entries个struct request_sock *。
对于nr_table_entries,我们可以往回追踪:
err = inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); #define TCP_SYNQ_HSIZE 512 /* Size of SYNACK hash table */
跟踪SYN数据包的处理,在tcp_v4_conn_request中,最后调用了inet_csk_reqsk_queue_hash_add函数:
void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, unsigned long timeout) { struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; const u32 h = inet_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd, lopt->nr_table_entries); reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); inet_csk_reqsk_queue_added(sk, timeout); } reqsk_queue_hash_req将新建的request_sock添加到reqsk_queue中: static inline void reqsk_queue_hash_req(struct request_sock_queue *queue, u32 hash, struct request_sock *req, unsigned long timeout) { struct listen_sock *lopt = queue->listen_opt; req->expires = jiffies + timeout; req->retrans = 0; req->sk = NULL; req->dl_next = lopt->syn_table[hash]; write_lock(&queue->syn_wait_lock); lopt->syn_table[hash] = req; write_unlock(&queue->syn_wait_lock); }
inet_csk_reqsk_queue_added增加连接请求队列的计数,必要是设置计数器:
static inline void inet_csk_reqsk_queue_added(struct sock *sk, const unsigned long timeout) { if (reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue) == 0) inet_csk_reset_keepalive_timer(sk, timeout); } static inline int reqsk_queue_added(struct request_sock_queue *queue) { struct listen_sock *lopt = queue->listen_opt; const int prev_qlen = lopt->qlen; lopt->qlen_young++; lopt->qlen++; return prev_qlen; }
其他的几个数据结构:
struct inet_connection_sock { /* inet_sock has to be the first member! */ struct inet_sock icsk_inet; struct request_sock_queue icsk_accept_queue; struct inet_bind_bucket *icsk_bind_hash; unsigned long icsk_timeout; struct timer_list icsk_retransmit_timer; struct timer_list icsk_delack_timer; __u32 icsk_rto; __u32 icsk_pmtu_cookie; const struct tcp_congestion_ops *icsk_ca_ops; const struct inet_connection_sock_af_ops *icsk_af_ops; unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); __u8 icsk_ca_state; __u8 icsk_retransmits; __u8 icsk_pending; __u8 icsk_backoff; __u8 icsk_syn_retries; __u8 icsk_probes_out; __u16 icsk_ext_hdr_len; struct { __u8 pending; /* ACK is pending */ __u8 quick; /* Scheduled number of quick acks */ __u8 pingpong; /* The session is interactive */ __u8 blocked; /* Delayed ACK was blocked by socket lock */ __u32 ato; /* Predicted tick of soft clock */ unsigned long timeout; /* Currently scheduled timeout */ __u32 lrcvtime; /* timestamp of last received data packet */ __u16 last_seg_size; /* Size of last incoming segment */ __u16 rcv_mss; /* MSS used for delayed ACK decisions */ } icsk_ack; struct { int enabled; /* Range of MTUs to search */ int search_high; int search_low; /* Information on the current probe. */ int probe_size; } icsk_mtup; u32 icsk_ca_priv[16]; #define ICSK_CA_PRIV_SIZE (16 * sizeof(u32)) }; struct request_sock_queue { /*Points to the request_sock accept queue, when after 3 handshake will add the request_sock from syn_table to here*/ struct request_sock *rskq_accept_head; struct request_sock *rskq_accept_tail; rwlock_t syn_wait_lock; u8 rskq_defer_accept; /* 3 bytes hole, try to pack */ struct listen_sock *listen_opt; };
因此,半连接队列在这里可以认为是icsk_accept_queue.listen_opt->syn_table,叫做连接请求队列。
其实这里只需要注意一点就是,在接收到SYN包的时候,就已经创建了request_sock结构,存储在半连接队列中;当接收到ACK数据包后,将其从半连接队列转移到accept_queue中。如果我们为了修改内核而抵御SYN Flood攻击的话,我们完全可以在接收到ACK后,再创建request_sock结构,并直接链接到accept_queue里面。
这里半连接队列为icsk_accept_queue.listen_opt->syn_table;accept_queue为icsk_accept_queue.rskq_accept_head。
其实,可见半连接队列与accept_queue存储的都是request_sock,但是不同的是,半连接队列存储的是未完成三次握手时候的request_sock;而accept_queue则是完成三次握手的request_sock,此时的request中包含着已经建立的用于跟新的连接请求进行通信的sock结构(通常称为child sock)。
2、半连接队列的长度
跟踪inet_csk_reqsk_queue_is_full,发现会比较queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log,看来关键在于max_qlen_log。
发现reqsk_queue_alloc中:
for (lopt->max_qlen_log = 6; /*64*/ (1 << lopt->max_qlen_log) < sysctl_max_syn_backlog; lopt->max_qlen_log++);
默认是1024,那么max_qlen_log就是10了;加入我们设置成64,那么max_qlen_log就是6了,我们设置成128,就是7了;其他的依次类推。
3、连接请求的数据流向
在前面的分析中,SYN数据包的处理中,接收到SYN数据包,将会建立一个reqest_sock结构,添加到syn_table哈希表相应的表中。
接收到ACK数据包后,跟踪tcp_v4_do_rcv,发现会调用tcp_v4_hnd_req。
在tcp_v4_hnd_req中:
/* Check the request_sock is in the syn_table or not. If the request_sock have been in the syn_table, then call tcp_check_req*/ /*If ACK in 3 handsharks, will find a request_sock in syn_table, then call tcp_check_req().*/ struct request_sock *req = inet_csk_search_req(sk, &prev, th->source, iph->saddr, iph->daddr); /*Normal: Call syn_recv_sock function(tcp_v4_syn_recv_sock)*/ if (req) return tcp_check_req(sk, skb, req, prev); 在tcp_check_req中: /*ipv4_specific.syn_recv_sock = tcp_v4_syn_recv_sock*/ child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); if (child == NULL) goto listen_overflow; /*Move the request_sock from the syn_table to accept_queue Notes: syn_table isn't A hlist_header structure.*/ inet_csk_reqsk_queue_unlink(sk, req, prev); inet_csk_reqsk_queue_removed(sk, req); inet_csk_reqsk_queue_add(sk, req, child); return child;
tcp_v4_syn_recv_sock会根据request_sock新建一个sock结构,并且进行一定的初始化,返回新建的sock结构。
将request_sock从syn_table中移到accept_queue中。
static inline void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req, struct sock *child) { reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child); } static inline void reqsk_queue_add(struct request_sock_queue *queue, struct request_sock *req, struct sock *parent, struct sock *child) { req->sk = child; /*Add the number of backlog, that not completed 3 handsharks but have connected the server.*/*/ sk_acceptq_added(parent); if (queue->rskq_accept_head == NULL) queue->rskq_accept_head = req; else queue->rskq_accept_tail->dl_next = req; queue->rskq_accept_tail = req; req->dl_next = NULL; }
4、accept系统调用的处理
三次握手之后,request_sock已经在rskq_accept队列中了,等待accept系统调用取走。
static inline void sk_acceptq_removed(struct sock *sk) { sk->sk_ack_backlog--; } static inline void sk_acceptq_added(struct sock *sk) { sk->sk_ack_backlog++; }
这个时候,我们关注一个struct sock中的两个变量:
unsigned short sk_ack_backlog; /*sk_ack_backlog is the socket number that not completed 3 handsharks but have connected the server.*/ unsigned short sk_max_ack_backlog; /*sk_max_ack_backlog is the Max sk_ack_backlog, is assigned in the listen()*/
跟踪accept系统调用:
inet_csk_accept: newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk); static inline struct sock *reqsk_queue_get_child(struct request_sock_queue *queue, struct sock *parent) { struct request_sock *req = reqsk_queue_remove(queue); struct sock *child = req->sk; BUG_TRAP(child != NULL); sk_acceptq_removed(parent); __reqsk_free(req); return child; }
注意这里free掉了在三次握手中建立的request_sock结构。
5、防止溢出的两个链表检查
在tcp_v4_conn_request中,对SYN包的处理过程中:
if (inet_csk_reqsk_queue_is_full(sk) && !isn) { #ifdef CONFIG_SYN_COOKIES if (sysctl_tcp_syncookies) { want_cookie = 1; } else #endif goto drop; } /* Accept backlog is full. If we have already queued enough * of warm entries in syn queue, drop request. It is better than * clogging syn queue with openreqs with exponentially increasing * timeout. */ /*If Accept Queue is full, Drop the packet*/ if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) goto drop;
这里面有两个队列的检查:request_sock队列和accept队列。
request_sock队列:
static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) { return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue); } static inline int reqsk_queue_is_full(const struct request_sock_queue *queue) { return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log; }
accept队列:
static inline int sk_acceptq_is_full(struct sock *sk) { return sk->sk_ack_backlog > sk->sk_max_ack_backlog; }
其中关系到4个变量,其中两个是sock的成员变量,两个是request_sock_queue中listen_opt的变量。
max_qlen_log的初始化:
在reqsk_queue_alloc中:
for (lopt->max_qlen_log = 6; /*64*/ (1 << lopt->max_qlen_log) < sysctl_max_syn_backlog; lopt->max_qlen_log++);
sk_max_ack_backlog的初始化:
在inet_listen中:
sk->sk_max_ack_backlog = backlog;
注:sk_max_ack_backlog就是我们经常熟悉的listen的参数。
qlen的增加:
tcp_v4_conn_request
inet_csk_reqsk_queue_hash_add
inet_csk_reqsk_queue_added
reqsk_queue_added
注:跟踪SYN数据包的处理,在tcp_v4_conn_request中,最后调用了inet_csk_reqsk_queue_hash_add函数:
inet_csk_reqsk_queue_added(sk, timeout); inet_csk_reqsk_queue_added增加连接请求队列的计数,必要时候设置计数器。 reqsk_queue_added: lopt->qlen++; qlen的减少: tcp_v4_hnd_req tcp_check_req inet_csk_reqsk_queue_removed reqsk_queue_removed
注:
在inet_csk_listen_stop中:
/* Following specs, it would be better either to send FIN * (and enter FIN-WAIT-1, it is normal close) * or to send active reset (abort). * Certainly, it is pretty dangerous while synflood, but it is * bad justification for our negligence 8) * To be honest, we are not able to make either * of the variants now. --ANK */ reqsk_queue_destroy(&icsk->icsk_accept_queue);
sk_ack_backlog的增加:
tcp_check_req
inet_csk_reqsk_queue_add
reqsk_queue_add
sk_acceptq_added
sk_ack_backlog的减少:
inet_csk_accept
reqsk_queue_get_child
sk_acceptq_removed
转载请注明:爱开源 » 关于半连接队列的释疑