看我的测试代码,似乎应该是在创建子进程之后创建epoll的fd,否则程序将会有问题,试将代码中两个CreateWorker函数的调用位置分别调用,一个在创建epoll fd之前,一个在之后,在调用在创建之前的代码会出问题,在我的机器上(linux内核2.6.26)表现的症状就是所有进程的epoll_wait函数返回0, 而客户端似乎被阻塞了:
服务器端: #include < iostream > #include < sys / socket.h > #include < sys / epoll.h > #include < netinet / in .h > #include < arpa / inet.h > #include < fcntl.h > #include < unistd.h > #include < stdio.h > #include < errno.h > #include < sys / types.h > #include < sys / wait.h > using namespace std; #define MAXLINE 5 #define OPEN_MAX 100 #define LISTENQ 20 #define SERV_PORT 5000 #define INFTIM 1000 typedef struct task_t{ int fd; char buffer[ 100 ]; int n;}task_t; int CreateWorker( int nWorker){ if ( 0 < nWorker) { bool bIsChild; pid_t nPid; while ( ! bIsChild) { if ( 0 < nWorker) { nPid = ::fork(); if (nPid > 0 ) { bIsChild = false ; -- nWorker; } else if ( 0 == nPid) { bIsChild = true ; printf( " create worker %d success!\n " , ::getpid()); } else { printf( " fork error: %s\n " , ::strerror(errno)); return - 1 ; } } else { int nStatus; if ( - 1 == ::wait( & nStatus)) { ++ nWorker; } } } } return 0 ;} void setnonblocking( int sock){ int opts; opts = fcntl(sock,F_GETFL); if (opts < 0 ) { perror( " fcntl(sock,GETFL) " ); exit( 1 ); } opts = opts | O_NONBLOCK; if (fcntl(sock,F_SETFL,opts) < 0 ) { perror( " fcntl(sock,SETFL,opts) " ); exit( 1 ); } } int main(){ int i, maxi, listenfd, connfd, sockfd,epfd,nfds; ssize_t n; char line[MAXLINE]; socklen_t clilen; struct epoll_event ev,events[ 20 ]; struct sockaddr_in clientaddr; struct sockaddr_in serveraddr; listenfd = socket(AF_INET, SOCK_STREAM, 0 ); bzero( & serveraddr, sizeof (serveraddr)); serveraddr.sin_family = AF_INET; char * local_addr = " 127.0.0.1 " ; inet_aton(local_addr, & (serveraddr.sin_addr)); // htons(SERV_PORT); serveraddr.sin_port = htons(SERV_PORT); // 地址重用 int nOptVal = 1 ; socklen_t nOptLen = sizeof ( int ); if ( - 1 == ::setsockopt(listenfd, SOL_SOCKET, SO_REUSEADDR, & nOptVal, nOptLen)) { return - 1 ; } setnonblocking(listenfd); bind(listenfd,(sockaddr * ) & serveraddr, sizeof (serveraddr)); listen(listenfd, LISTENQ); CreateWorker( 5 ); // 把socket设置为非阻塞方式 // 生成用于处理accept的epoll专用的文件描述符 epfd = epoll_create( 256 ); // 设置与要处理的事件相关的文件描述符 ev.data.fd = listenfd; // 设置要处理的事件类型 ev.events = EPOLLIN | EPOLLET; // ev.events=EPOLLIN; // 注册epoll事件 epoll_ctl(epfd,EPOLL_CTL_ADD,listenfd, & ev); // CreateWorker(5); maxi = 0 ; task_t task; task_t * ptask; while ( true ) { // 等待epoll事件的发生 nfds = epoll_wait(epfd,events, 20 , 500 ); // 处理所发生的所有事件 for (i = 0 ;i < nfds; ++ i) { if (events[i].data.fd == listenfd) { connfd = accept(listenfd,NULL, NULL); if (connfd < 0 ){ printf( " connfd<0, listenfd = %d\n " , listenfd); printf( " error = %s\n " , strerror(errno)); exit( 1 ); } setnonblocking(connfd); // 设置用于读操作的文件描述符 memset( & task, 0 , sizeof (task)); task.fd = connfd; ev.data.ptr = & task; // 设置用于注册的读操作事件 ev.events = EPOLLIN | EPOLLET; // ev.events=EPOLLIN; // 注册ev epoll_ctl(epfd,EPOLL_CTL_ADD,connfd, & ev); } else if (events[i].events & EPOLLIN) { cout << " EPOLLIN " << endl; ptask = (task_t * )events[i].data.ptr; sockfd = ptask -> fd; if ( (ptask -> n = read(sockfd, ptask -> buffer, 100 )) < 0 ) { if (errno == ECONNRESET) { close(sockfd); events[i].data.ptr = NULL; } else std::cout << " readline error " << std::endl; } else if (ptask -> n == 0 ) { close(sockfd); events[i].data.ptr = NULL; } ptask -> buffer[ptask -> n] = ' \0 ' ; cout << " read " << ptask -> buffer << endl; // 设置用于写操作的文件描述符 ev.data.ptr = ptask; // 设置用于注测的写操作事件 ev.events = EPOLLOUT | EPOLLET; // 修改sockfd上要处理的事件为EPOLLOUT epoll_ctl(epfd,EPOLL_CTL_MOD,sockfd, & ev); } else if (events[i].events & EPOLLOUT) { cout << " EPOLLOUT " << endl; ptask = (task_t * )events[i].data.ptr; sockfd = ptask -> fd; write(sockfd, ptask -> buffer, ptask -> n); // 设置用于读操作的文件描述符 ev.data.ptr = ptask; // 修改sockfd上要处理的事件为EPOLIN epoll_ctl(epfd,EPOLL_CTL_DEL,sockfd, & ev); cout << " write " << ptask -> buffer; memset(ptask, 0 , sizeof ( * ptask)); close(sockfd); } } } return 0 ;}
测试客户端: # !/ usr / bin / perluse strict;use Socket;use IO::Handle;sub echoclient{ my $host = " 127.0.0.1 " ; my $port = 5000 ; my $protocol = getprotobyname( " TCP " ); $host = inet_aton($host); socket(SOCK, AF_INET, SOCK_STREAM, $protocol) or die " socket() failed: $! " ; my $dest_addr = sockaddr_in($port, $host); connect(SOCK, $dest_addr) or die " connect() failed: $! " ; SOCK -> autoflush( 1 ); my $msg_out = " hello world\n " ; print " out = " , $msg_out; print SOCK $msg_out; my $msg_in = < SOCK > ; print " in = " , $msg_in; close SOCK;}# & echoclient;#exit( 0 ); for (my $i = 0 ; $i < 9999 ; $i ++ ){ echoclient;}
我查看了lighttpd的实现,也是在创建完子进程之后才创建的epoll的fd. 请问谁知道哪里有讲解这个的文档? 这是美丽的分割线: ----------------------------------------------------------------------- 感谢luke, 他帮我解释了这个问题的原因: 假如fd1是由A进程加入epfd的,而且用的是ET模式,那么加入通知的是进程B,显然B进程不会对fd1进行处理,所以以后fd1的事件再不会通知,所以经过几次循环之后,所有的fd都没有事件通知了,所以epoll_wait在timeout之后就返回0了。而在客户端的结果可想而知,只能是被阻塞。 也就是说, 这是一种发生在epoll fd上面的类似于"惊群"的现象.