页次: 1
linux下的网络编程离不开socket,中文被翻译为套接字。任何网络通信都必须先建立socket,再通过socket给对方收发数据!数据接受的demo代码如下:
#include <string.h>
#include <sys/socket.h>
#include <sys/types.h>
#define SET_PORT 3490
int main(void)
{
int sockfd, new_fd;
struct sockaddr_in my_addr;
struct sockaddr_in their_addr;
int sin_size;
sockfd = socket(PF_INET, SOCK_STREAM, 0);
my_addr.sin_family = AF_INET;
my_addr.sin_port = htons(_INT_PORT);
my_addr.sin_addr.s_addr = INADDR_ANY;
bzero(&(my_addr.sin_zero),sizeof(my_addr.sin_zero));
bind(sockfd, (struct sockaddr *)&my_addr,sizeof(struct sockaddr));// 绑定套接字
listen(sockfd, 10); // 监听套接字
sin_size = sizeof(struct sockaddr_in);
new_fd = accept(sockfd, &their_addr, &sin_size); // 接收套接字
}
可以看出,需要先调用socket函数建立socket,再绑定套接字,最后监听和接受数据。 这个socket到底是啥?linux在内核中又是怎么使用的了?
1、(1)socket是个结构体,字段不多,但是嵌套了其他结构体,各种嵌套的关系标识如下:
proto_ops:用户层调用的各种接口就是在这里注册的(篇幅有限,截图的字段不全)
wq:等待该socket的进程队列和异步通知队列;换句话说:同一个socket可能有多个进程都在等待使用!
sock:应该是socket结构体最核心的嵌套结构体了(篇幅有限,截图的字段不全)!
(2)socket结构体有了,接下来就是创建和初始化了!linux内核创建socket的函数是__sock_create,核心代码如下:
int __sock_create(struct net *net, int family, int type, int protocol,
struct socket **res, int kern)
{
int err;
struct socket *sock;
const struct net_proto_family *pf;
.........
/*
* Allocate the socket and allow the family to set things up. if
* the protocol is 0, the family is instructed to select an appropriate
* default.
本质:创建socket结构体,存放在inode,通过superblock统一检索和管理
*/
sock = sock_alloc();
.........
/*socket就是在这里创建的,实际调用的是inet_create
af_inet.c文件中:
static const struct net_proto_family inet_family_ops = {
.family = PF_INET,
.create = inet_create,
.owner = THIS_MODULE,
};*/
err = pf->create(net, sock, protocol, kern);
..................
}
创建socket的核心函数就2个:sock_alloc,还有pf->create!先看第一个sock_alloc,代码如下:
/**
* sock_alloc - allocate a socket
*
* Allocate a new inode and socket object. The two are bound together
* and initialised. The socket is then returned. If we are out of inodes
* NULL is returned.
明明是申请socket,底层却分配inode,这是为啥了?
1、socket也需要管理,放在inode后通过super_bloc统一检索和管理
2、socket的属性字段自然也存放在inode节点了
3、符合万物皆文件的理念
*/
struct socket *sock_alloc(void)
{
struct inode *inode;
struct socket *sock;
//从超级块里分配一个inode
inode = new_inode_pseudo(sock_mnt->mnt_sb);
if (!inode)
return NULL;
/*把inode和socket绑定在一起,通过inode寻址socket,便于管理*/
sock = SOCKET_I(inode);
kmemcheck_annotate_bitfield(sock, type);//标记shadow memory来表示这块内存已经使用了
inode->i_ino = get_next_ino();
inode->i_mode = S_IFSOCK | S_IRWXUGO;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
inode->i_op = &sockfs_inode_ops;
this_cpu_add(sockets_in_use, 1);
return sock;
}
本质上就是分配一个inode,然后和socket结构体绑定,通过inode寻址socket结构体!socket结构体有了,接下来就是在socket内部嵌套的sock结构体了!其生成和初始化的工作都是在inet_create内部完成的,代码如下:
static int inet_create(struct net *net, struct socket *sock, int protocol,
int kern)
{
struct sock *sk;
struct inet_protosw *answer;
struct inet_sock *inet;
struct proto *answer_prot;
unsigned char answer_flags;
int try_loading_module = 0;
int err;
if (protocol < 0 || protocol >= IPPROTO_MAX)
return -EINVAL;
sock->state = SS_UNCONNECTED;//初始化状态当然设置成未连接了
/* Look for the requested type/protocol pair. */
lookup_protocol:
err = -ESOCKTNOSUPPORT;
rcu_read_lock();
list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {
err = 0;
/* Check the non-wild match. */
if (protocol == answer->protocol) {
if (protocol != IPPROTO_IP)
break;
} else {
/* Check for the two wild cases. */
if (IPPROTO_IP == protocol) {
protocol = answer->protocol;
break;
}
if (IPPROTO_IP == answer->protocol)
break;
}
err = -EPROTONOSUPPORT;
}
if (unlikely(err)) {
if (try_loading_module < 2) {
rcu_read_unlock();
/*
* Be more specific, e.g. net-pf-2-proto-132-type-1
* (net-pf-PF_INET-proto-IPPROTO_SCTP-type-SOCK_STREAM)
*/
if (++try_loading_module == 1)
request_module("net-pf-%d-proto-%d-type-%d",
PF_INET, protocol, sock->type);
/*
* Fall back to generic, e.g. net-pf-2-proto-132
* (net-pf-PF_INET-proto-IPPROTO_SCTP)
*/
else
request_module("net-pf-%d-proto-%d",
PF_INET, protocol);
goto lookup_protocol;
} else
goto out_rcu_unlock;
}
err = -EPERM;
if (sock->type == SOCK_RAW && !kern &&
!ns_capable(net->user_ns, CAP_NET_RAW))
goto out_rcu_unlock;
sock->ops = answer->ops;
answer_prot = answer->prot;
answer_flags = answer->flags;
rcu_read_unlock();
WARN_ON(!answer_prot->slab);
err = -ENOBUFS;
/*从cpu缓存或堆内存分配空间存储sock实例,并初始化*/
sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, kern);
if (!sk)
goto out;
err = 0;
if (INET_PROTOSW_REUSE & answer_flags)
sk->sk_reuse = SK_CAN_REUSE;
/*
1、强制转换成inet_sock类型,便于继续初始化;
2、inet和sk指针并未改变,指向的是同一块内存地址,两个指针可以同时使用
*/
inet = inet_sk(sk);
inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
inet->nodefrag = 0;
if (SOCK_RAW == sock->type) {
inet->inet_num = protocol;
if (IPPROTO_RAW == protocol)
inet->hdrincl = 1;
}
if (net->ipv4.sysctl_ip_no_pmtu_disc)
inet->pmtudisc = IP_PMTUDISC_DONT;
else
inet->pmtudisc = IP_PMTUDISC_WANT;
inet->inet_id = 0;
/*
1、初始化sk_buff的读、写、错误队列
2、关联socket和sock的实例
3、定义sock的回调函数
4、初始化其他sock字段
*/
sock_init_data(sock, sk);
sk->sk_destruct = inet_sock_destruct;//析构时的回调函数
sk->sk_protocol = protocol;//协议类型
sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
//sk和inet交替使用来初始化
inet->uc_ttl = -1;
inet->mc_loop = 1;
inet->mc_ttl = 1;
inet->mc_all = 1;
inet->mc_index = 0;
inet->mc_list = NULL;
inet->rcv_tos = 0;
sk_refcnt_debug_inc(sk);//引用计数+1
if (inet->inet_num) {
/* It assumes that any protocol which allows
* the user to assign a number at socket
* creation time automatically
* shares.
*/
inet->inet_sport = htons(inet->inet_num);
/* Add to protocol hash chains. */
err = sk->sk_prot->hash(sk);
if (err) {
sk_common_release(sk);
goto out;
}
}
if (sk->sk_prot->init) {
err = sk->sk_prot->init(sk);
if (err)
sk_common_release(sk);
}
out:
return err;
out_rcu_unlock:
rcu_read_unlock();
goto out;
}
整个逻辑并不复杂,先是调用sk_alloc函数生成sock实例,再调用sock_init_data初始化sock实力,并和socket实例关联,所以我个人认为sock_init_data是最核心的函数,如下:
/*
1、初始化sk_buff的读、写、错误队列
2、关联socket和sock的实例
3、定义sock的回调函数
4、初始化其他sock字段
*/
void sock_init_data(struct socket *sock, struct sock *sk)
{
/*初始化sk_buff的读写、错误队列*/
skb_queue_head_init(&sk->sk_receive_queue);
skb_queue_head_init(&sk->sk_write_queue);
skb_queue_head_init(&sk->sk_error_queue);
sk->sk_send_head = NULL;
//初始化定时器
init_timer(&sk->sk_timer);
sk->sk_allocation = GFP_KERNEL;
sk->sk_rcvbuf = sysctl_rmem_default;
sk->sk_sndbuf = sysctl_wmem_default;
sk->sk_state = TCP_CLOSE;
//这里终于把socket和sock实例关联起来了
sk_set_socket(sk, sock);
sock_set_flag(sk, SOCK_ZAPPED);
if (sock) {
sk->sk_type = sock->type;
sk->sk_wq = sock->wq;
sock->sk = sk;
} else
sk->sk_wq = NULL;
rwlock_init(&sk->sk_callback_lock);
lockdep_set_class_and_name(&sk->sk_callback_lock,
af_callback_keys + sk->sk_family,
af_family_clock_key_strings[sk->sk_family]);
sk->sk_state_change = sock_def_wakeup;//状态改变后的回调函数
sk->sk_data_ready = sock_def_readable;//有数据可读的回调函数
sk->sk_write_space = sock_def_write_space;//有缓存可写的回调函数
sk->sk_error_report = sock_def_error_report;//发生io错误时的回调函数
sk->sk_destruct = sock_def_destruct;
sk->sk_frag.page = NULL;
sk->sk_frag.offset = 0;
sk->sk_peek_off = -1;
sk->sk_peer_pid = NULL;
sk->sk_peer_cred = NULL;
sk->sk_write_pending = 0;
sk->sk_rcvlowat = 1;
sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
sk->sk_stamp = ktime_set(-1L, 0);
#ifdef CONFIG_NET_RX_BUSY_POLL
sk->sk_napi_id = 0;
sk->sk_ll_usec = sysctl_net_busy_read;
#endif
sk->sk_max_pacing_rate = ~0U;
sk->sk_pacing_rate = ~0U;
sk->sk_incoming_cpu = -1;
/*
* Before updating sk_refcnt, we must commit prior changes to memory
* (Documentation/RCU/rculist_nulls.txt for details)
*/
smp_wmb();
atomic_set(&sk->sk_refcnt, 1);
atomic_set(&sk->sk_drops, 0);
}
上面有几个回调函数,其实实现的逻辑的代码结构基本是一样的:
/*
* Default Socket Callbacks
当sock的状态发生改变时,会调用此函数来进行处理
*/
static void sock_def_wakeup(struct sock *sk)
{
struct socket_wq *wq;
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))//有进程阻塞在这个socket
//唤醒所有在等待这个socket的进程,核心就是执行进程唤醒的回调函数
wake_up_interruptible_all(&wq->wait);
rcu_read_unlock();
}
/*sock有输入数据可读时,会调用此函数来处理*/
static void sock_def_readable(struct sock *sk)
{
struct socket_wq *wq;
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
/* 唤醒等待数据的进程,核心还是执行回调函数 */
wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
POLLRDNORM | POLLRDBAND);
/* 异步通知队列的处理。
* 检查应用程序是否通过recv()类调用来等待接收数据,如果没有就发送SIGIO信号,
* 告知它有数据可读。
* how为函数的处理方式,band为用来告知的IO类型。
*/
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
rcu_read_unlock();
}
当有可读数据的时候,肯定第一时间通知相应的进程来读取数据,核心是通过sk_wake_async函数实现的;而sk_wake_async最终调用了kill_fasync_rcu来给排队等待的队列发出SIGIO信号,通知这些队列中的进程来取数据了!异步的好处在这里就凸显了:进程不用在这里空转等数据,而是可以释放cpu去执行其他进程的代码;等socket有数据后再通过类似中断的形式通知等待的进程来取数据了!
/*
* rcu_read_lock() is held
函数名有kill,但实际是向队列的进程发送SIGIO信号
*/
static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
{
while (fa) {
struct fown_struct *fown;
unsigned long flags;
if (fa->magic != FASYNC_MAGIC) {
printk(KERN_ERR "kill_fasync: bad magic number in "
"fasync_struct!\n");
return;
}
spin_lock_irqsave(&fa->fa_lock, flags);
if (fa->fa_file) {
fown = &fa->fa_file->f_owner;
/* Don't send SIGURG to processes which have not set a
queued signum: SIGURG has its own default signalling
mechanism. */
if (!(sig == SIGURG && fown->signum == 0))
send_sigio(fown, fa->fa_fd, band);
}
spin_unlock_irqrestore(&fa->fa_lock, flags);
fa = rcu_dereference(fa->fa_next);
}
}
离线
页次: 1