MaxDataRetries"="5"
对于实用的程序来说,2小时的空闲时间太长。因此,我们需要手工开启Keepalive功能并设置合理的Keepalive参数。
// 开启KeepAlive
BOOL bKeepAlive = TRUE;
int nRet = ::setsockopt(socket_handle, SOL_SOCKET, SO_KEEPALIVE, (char*)&bKeepAlive, sizeof(bKeepAlive));
if (nRet == SOCKET_ERROR)
{
return FALSE;
}
// 设置KeepAlive参数
tcp_keepalive alive_in = {0};
tcp_keepalive alive_out = {0};
alive_in.keepalivetime = 5000; // 开始首次KeepAlive探测前的TCP空闭时间
alive_in.keepaliveinterval = 1000; // 两次KeepAlive探测间的时间间隔
alive_in.onoff = TRUE;
unsigned long ulBytesReturn = 0;
nRet = WSAIoctl(socket_handle, SIO_KEEPALIVE_VALS, &alive_in, sizeof(alive_in),
&alive_out, sizeof(alive_out), &ulBytesReturn, NULL, NULL);
if (nRet == SOCKET_ERROR)
{
return FALSE;
}
开启Keepalive选项之后,对于使用IOCP模型的服务器端程序来说,一旦检测到连接断开,GetQueuedCompletionStatus函数将立即返回FALSE,使得服务器端能及时清除该连接、释放该连接相关的资源。对于使用select模型的客户端来说,连接断开被探测到时,以recv目的阻塞在socket上的select方法将立即返回SOCKET_ERROR,从而得知连接已失效,客户端程序便有机会及时执行清除工作、提醒用户或重新连接。
另一种技术,由应用程序自己发送心跳包来检测连接的健康性。客户端可以在一个Timer中或低级别的线程中定时向发服务器发送一个短小精悍的包,并等待服务器的回应。客户端程序在一定时间内没有收到服务器回应即认为连接不可用,同样,服务器在一定时间内没有收到客户端的心跳包则认为客户端已经掉线。
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
windows下此处的”非正常断开”指TCP连接不是以优雅的方式断开,如网线故障等物理链路的原因,还有突然主机断电等原因.
有两种方法可以检测:
1.TCP连接双方定时发握手消息
2.利用TCP协议栈中的KeepAlive探测
第二种方法简单可靠,只需对TCP连接两个Socket设定KeepAlive探测,
所以本文只讲第二种方法在Linux,Window2000下的实现(在其它的平台上没有作进一步的测试)
Windows 2000平台下 头文件
#include <mstcpip.h>
//定义结构及宏
/*
struct TCP_KEEPALIVE {
u_longonoff;
u_longkeepalivetime;
u_longkeepaliveinterval;
} ;
*/
tcp_keepalive live,liveout;
live.keepaliveinterval=5000; //每5秒发一次探测报文,发5次没有回应,就断开
live.keepalivetime=30000;//超过30s没有数据,就发送控测包
live.onoff=TRUE;
int Opt = 1;
int iRet = setsockopt(Accept,SOL_SOCKET,SO_KEEPALIVE,(char *)&Opt,sizeof(int));
if(iRet == 0){
DWORD dw;
if(::WSAIoctl(Accept,SIO_KEEPALIVE_VALS,
&live,sizeof(live),&liveout,sizeof(liveout),
&dw,NULL,NULL)== SOCKET_ERROR){
}
}
ACE下代码 //by rainfish blog.csdn.net/bat603
int Opt = 1;
//在测试过程中,发现检测的次数是5次,即下面的设置中,从最近一次消息开始计算的10秒后,每次间隔5秒,连续发送5次,即35秒发现网络断了
tcp_keepalive live,liveout;
live.keepaliveinterval=5000; //每次检测的间隔 (单位毫秒)
live.keepalivetime=10000; //第一次开始发送的时间(单位毫秒)
live.onoff=TRUE;
int iRet = stream.set_option(SOL_SOCKET,SO_KEEPALIVE,&Opt,sizeof(int));
if(iRet == 0){
DWORD dw;
//此处显示了在ACE下获取套接字的方法,即句柄的(SOCKET)化就是句柄
if(WSAIoctl((SOCKET)h,SIO_KEEPALIVE_VALS,&live,sizeof(live),
&liveout,sizeof(liveout),&dw,NULL,NULL)== SOCKET_ERROR){
//Delete Client
return;
}
}
Linux平台下
#include "/usr/include/linux/tcp.h"
#include "/usr/include/linux/socket.h"
////KeepAlive实现,单位秒
//下面代码要求有ACE,如果没有包含ACE,则请把用到的ACE函数改成linux相应的接口
int keepAlive = 1;//设定KeepAlive
int keepIdle = 5;//开始首次KeepAlive探测前的TCP空闭时间
int keepInterval = 5;//两次KeepAlive探测间的时间间隔
int keepCount = 3;//判定断开前的KeepAlive探测次数
if(setsockopt(s,SOL_SOCKET,SO_KEEPALIVE,(void*)&keepAlive,sizeof(keepAlive)) == -1)
{
ACE_DEBUG ((LM_INFO,
ACE_TEXT ("(%P|%t) setsockopt SO_KEEPALIVE error!/n")));
}
if(setsockopt(s,SOL_TCP,TCP_KEEPIDLE,(void *)&keepIdle,sizeof(keepIdle)) == -1)
{
ACE_DEBUG ((LM_INFO,
ACE_TEXT ("(%P|%t) setsockopt TCP_KEEPIDLE error!/n")));
}
if(setsockopt(s,SOL_TCP,TCP_KEEPINTVL,(void *)&keepInterval,sizeof(keepInterval)) == -1)
{
ACE_DEBUG ((LM_INFO,
ACE_TEXT ("(%P|%t) setsockopt TCP_KEEPINTVL error!/n")));
}
if(setsockopt(s,SOL_TCP,TCP_KEEPCNT,(void *)&keepCount,sizeof(keepCount)) == -1)
{
ACE_DEBUG ((LM_INFO,
ACE_TEXT ("(%P|%t)setsockopt TCP_KEEPCNT error!/n")));
}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++