关于select/poll与epoll
select/poll虽然避免了无限循环轮循, 但是有一个问题,那就是特定事件发生时,我们还是需要再次轮循一遍查看到底时哪个或哪几个fd产生了可读或可写事件
epoll的等待事件返回后, 虽然返回值也是可用事件总数, 但可用事件会返回到一个指定的事件集合中,这个集合只包含已就绪事件,这么就节省了很多不必要的遍历时间
相关函数与结构
int epoll_create(int size); //size相当于select里的maxfd+1,linux v2.6.8内核后的版本已经忽略size参数
int epoll_create1(int flags); //新的create函数, flags通常指定为EPOLL_CLOEXEC,表示进程调用exec函数后,关闭该描述符,而不像fork那样继承下来
int epoll_ctl(int epfd,int op,stuct epoll_event *event);
//epfd为epoll_create的返回值
//op选项:EPOLL_CTL_ADD/EPOLL_CTL_MOD/EPOLL_CTL_DEL,用于添加或删除要监听的事件
struct epoll_event{
__uint32_t events;
epoll_data_t data;
};
typedef union epoll_data{
void *ptr;
int fd; //文件描述符fd
__uint32_t u32;
__uint32_t u64;
}epoll_data_t;
//events选项:EPOLLIN/EPOLLOUT/EPOLLET/EPOLLLT等
int epoll_wait(int epfd,struct epoll_event *events,int maxevents,int timeout);
//epfd为epoll_create的返回值
//events作为输出,为可读/可写事件的返回值,通常是一个结构数组
//maxevents为上一个参数的对应数组的size
//timeout 超时,毫秒
//返回值是已经准备就绪的fd总数
//man手册例子
#define MAX_EVENTS 10
struct epoll_event ev, events[MAX_EVENTS];
int listen_sock, conn_sock, nfds, epollfd;
/* Set up listening socket, 'listen_sock' (socket(),
bind(), listen()) */
epollfd = epoll_create(10);
if (epollfd == -1) {
perror("epoll_create");
exit(EXIT_FAILURE);
}
ev.events = EPOLLIN;
ev.data.fd = listen_sock;
if (epoll_ctl(epollfd, EPOLL_CTL_ADD, listen_sock, &ev) == -1) {
perror("epoll_ctl: listen_sock");
exit(EXIT_FAILURE);
}
for (;;) {
nfds = epoll_wait(epollfd, events, MAX_EVENTS, -1);
if (nfds == -1) {
perror("epoll_pwait");
exit(EXIT_FAILURE);
}
for (n = 0; n < nfds; ++n) {
if (events[n].data.fd == listen_sock) {
conn_sock = accept(listen_sock,
(struct sockaddr *) &local, &addrlen);
if (conn_sock == -1) {
perror("accept");
exit(EXIT_FAILURE);
}
setnonblocking(conn_sock);
ev.events = EPOLLIN | EPOLLET;
ev.data.fd = conn_sock;
if (epoll_ctl(epollfd, EPOLL_CTL_ADD, conn_sock,
&ev) == -1) {
perror("epoll_ctl: conn_sock");
exit(EXIT_FAILURE);
}
} else {
do_use_fd(events[n].data.fd);
}
}
}
关于EPOLLET和EPOLLLT:
ET模式仅当状态发生变化的时候才获得通知,这里所谓的状态的变化并不包括缓冲区中还有未处理的数据,也就是说,如果要采用ET模式,需要一直read/write直到出错为止,很多人反映为什么采用ET模式只接收了一部分数据就再也得不到通知了,大多因为这样;而LT模式是只要有数据没有处理就会一直通知下去的.
epoll版server
#include <unistd.h>
#include <fcntl.h>
#include <netdb.h>
#include <errno.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <signal.h>
#include <sys/wait.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <sys/epoll.h>
#define MAX(a,b) a>b?a:b;
void err_quit(const char *s){
perror(s);
exit(1);
}
ssize_t readn(int fd,void *buff,size_t count){
char *buffp;
ssize_t nread;
size_t nleft;
buffp=(char *)buff;
nleft=count;
while(nleft > 0){
if((nread = read(fd,buffp,nleft)) < 0){
if(errno == EINTR)
continue;
else
return -1;
}else if(nread == 0)
break;
nleft -= nread;
buffp += nread;
}
return count-nleft;
}
ssize_t writen(int fd,const void *buff,size_t n){
size_t nleft;
ssize_t nwritten;
const char *ptr;
ptr=buff;
nleft=n;
while(nleft > 0){
if((nwritten=write(fd,ptr,nleft)) < 0){
if(nwritten < 0 && errno == EINTR)
continue;
else
return -1;
}else if(nwritten == 0)
break;
nleft -= nwritten;
ptr += nwritten;
}
return n-nleft;
}
ssize_t recv_peek(int fd,void *buf,size_t len){
ssize_t ret;
while(1){
ret=recv(fd,buf,len,MSG_PEEK);
if(ret == -1 && errno == EINTR)
continue;
return ret;
}
}
ssize_t readline(int fd,void *buf,size_t maxline){
ssize_t ret;
size_t nread;
size_t nleft;
char *bufp;
bufp=buf;
nleft=maxline;
while(1){
ret=recv_peek(fd,buf,nleft);
if(ret < 0)
return ret;
else if(ret == 0)
return ret;
nread=ret;
int i;
for(i=0;i<nread;i++){
if(bufp[i] == '
'){
ret=readn(fd,bufp,i+1);
if(ret != i+1)
err_quit("readn");
return ret;
}
}
if(nread > nleft)
err_quit("readn");
nleft -= nread;
ret=readn(fd,bufp,nread);
if(ret != nread)
err_quit("readn");
bufp += nread;
}
return -1;
}
int accept_timeout(int fd,struct sockaddr_in *addr,unsigned int wait_seconds){
int ret;
socklen_t addrlen=sizeof(struct sockaddr_in);
if(wait_seconds > 0){
fd_set accept_fdset;
struct timeval timeout;
FD_ZERO(&accept_fdset);
FD_SET(fd,&accept_fdset);
timeout.tv_usec=0;
timeout.tv_sec=wait_seconds;
do{
ret=select(fd+1,&accept_fdset,NULL,NULL,&timeout);
}while(ret < 0 && errno == EINTR);
if(ret == 0){
errno = ETIMEDOUT;
return -1;
}else if(ret == -1)
return -1;
}
if(addr != NULL)
ret=accept(fd,(struct sockaddr *)addr,&addrlen);
else
ret=accept(fd,NULL,NULL);
if(ret == -1)
err_quit("accept");
return ret;
}
void activate_nonblock(int fd){
int ret;
int flags=fcntl(fd,F_GETFL);
if(flags == -1)
err_quit("fcntl");
flags |= O_NONBLOCK;
ret=fcntl(fd,F_SETFL,flags);
if(ret == -1)
err_quit("fcntl");
}
void deactivate_nonblock(int fd){
int ret;
int flags=fcntl(fd,F_GETFL);
if(flags == -1)
err_quit("fcntl");
flags &= ~O_NONBLOCK;
ret=fcntl(fd,F_SETFL,flags);
if(ret == -1)
err_quit("fcntl");
}
int main(int argc,char *argv[]){
int i,tmpfd,sockfd,connfd,epollfd;
socklen_t len;
struct sockaddr_in addr,client;
int nready;
struct epoll_event ev,events[10];
ssize_t n;
char buf[1024];
if((sockfd=socket(PF_INET,SOCK_STREAM,0)) < 0)
err_quit("sockfd");
bzero(&addr,sizeof(addr));
addr.sin_family=AF_INET;
addr.sin_addr.s_addr=htonl(INADDR_ANY);
addr.sin_port=htons(5566);
int on=1;
if(setsockopt(sockfd,SOL_SOCKET,SO_REUSEADDR,&on,sizeof(on)) <0)
err_quit("setsockopt");
if(bind(sockfd,(struct sockaddr *)&addr,sizeof(addr))<0)
err_quit("bind");
if(listen(sockfd,10)<0)
err_quit("listen");
epollfd=epoll_create(10);
if(epollfd == -1)
err_quit("epoll_create");
ev.events=EPOLLIN;
ev.data.fd=sockfd;
if(epoll_ctl(epollfd,EPOLL_CTL_ADD,sockfd,&ev) < 0)
err_quit("epoll_ctl_add");
while(1){
nready=epoll_wait(epollfd,events,10,-1);
if(nready == -1){
if(errno == EINTR)
continue;
else
err_quit("select");
}
for(i=0;i<nready;i++){
if(events[i].data.fd == sockfd){
len=sizeof(client);
connfd=accept(sockfd,(struct sockaddr *)&client,&len);
if(connfd < 0)
err_quit("accept");
activate_nonblock(connfd);
ev.events=EPOLLIN | EPOLLET;
ev.data.fd=connfd;
if(epoll_ctl(epollfd,EPOLL_CTL_ADD,connfd,&ev) < 0)
err_quit("epoll_ctl_add");
}else if(events[i].data.fd & EPOLLIN){
tmpfd=events[i].data.fd;
bzero(buf,sizeof(buf));
if((n=readline(tmpfd,buf,sizeof(buf))) == 0){
if(epoll_ctl(epollfd,EPOLL_CTL_DEL,tmpfd,&ev) <0)
err_quit("epoll_ctl_del");
close(tmpfd);
}
write(STDOUT_FILENO,buf,n);
writen(tmpfd,buf,n);
}else if(events[i].data.fd & EPOLLOUT){
//do something
}
}
}
}