用socket来代替HttpWebRequest和HttpWebResponse

通常我们爬虫不知道对方的ip,只知道它们的url,先把客户端代码修改如下:

    public string Get(string url, params Encoding[] encoding)
        {
            string responseText = string.Empty;
            _statu = HttpRequestStatus.Busy;
            try
            {
                Regex reg = new Regex("(http://)?(?<name>[^/?]+)");
                Match m = reg.Match(url);
                var hostName = m.Groups["name"].Value;

                IPHostEntry hosts = Dns.GetHostByName(hostName);

                if (!url.StartsWith("http://"))
                    url = "http://" + hostName + "/";
                if (!url.EndsWith("/"))
                    url += "/";
                IPEndPoint ipPoint = new IPEndPoint(hosts.AddressList[0], 80);
                Socket socket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);//创建Socket
                socket.Connect(ipPoint);

                ///向服务器发送信息
                //{GET /index.php HTTP/1.0Content-Type: application/x-www-form-urlencoded
                StringBuilder bufRequest = new StringBuilder();
                bufRequest.Append("GET ").Append(url).Append(" HTTP/1.0\r\n");
                bufRequest.Append("Content-Type: application/x-www-form-urlencoded\r\n");
                bufRequest.Append("\r\n");
                string requestContent = bufRequest.ToString();
                byte[] bs = Encoding.ASCII.GetBytes(requestContent);

                socket.Send(bs);//发送信息

                /**/
                ///接受从服务器返回的信息

                byte[] recvBytes = new byte[1024];
                int bytes;
                Encoding coder = Encoding.UTF8;
                if (encoding.Count() > 0)
                {
                    coder = encoding[0];
                }
                do
                {
                    bytes = socket.Receive(recvBytes, recvBytes.Length, 0);
                    responseText += coder.GetString(recvBytes, 0, bytes);

                } while (bytes != 0);

                /**/
                ///一定记着用完socket后要关闭
                socket.Close();
            }
            catch (Exception ex)
            {
            }
            return responseText;
        }

以上是用socket同步的方式来实现的,以下是异步方式

public class StateObject:IDisposable
    {
        public Socket workSocket = null;
        public const int BufferSize = 256;
        public byte[] buffer = new byte[BufferSize];
       // public StringBuilder sb = new StringBuilder();
        public MemoryStream Stream = new MemoryStream();
        public string header = null;
        public static Encoding Encoding = Encoding.UTF8;
        public Action<Stream> HandAction;


        public void Dispose()
        {
            if (workSocket != null)
            {
                workSocket.Shutdown(SocketShutdown.Both);
                workSocket.Close();
            }
        }
    }
    public class AsynchronousClient:IDisposable
    {
        public StateObject State { set; get; }
           
        public void StartClient(IPEndPoint ipPoint, string data, Action<Stream> actionHandle)
        {
            try
            {
                //IPHostEntry ipHostInfo = Dns.Resolve("host.contoso.com");
                //IPAddress ipAddress = ipHostInfo.AddressList[0];
                //IPEndPoint remoteEP = new IPEndPoint(ipAddress, port);
                Socket client = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);
                StateObject state = new StateObject();
                state.workSocket = client;
                state.header = data;
                state.HandAction = actionHandle;
                State = state;
                // state.header = "GET "+data+" HTTP/1.1";
                client.BeginConnect(ipPoint, new AsyncCallback(ConnectCallback), state);
            }

            catch { }
        }
        void ConnectCallback(IAsyncResult ar)
        {
            StateObject state = (StateObject)ar.AsyncState;
            Socket client = state.workSocket;
            client.EndConnect(ar);
            Trace.WriteLine(string.Format("Socket connected to {0}", client.RemoteEndPoint.ToString()));
            byte[] byteData = StateObject.Encoding.GetBytes(state.header);

            // 开始发送数据到远程设备.  
            client.BeginSend(byteData, 0, byteData.Length, 0, new AsyncCallback(SendCallback), state);
        }

        void SendCallback(IAsyncResult ar)
        {
            StateObject state = (StateObject)ar.AsyncState;
            Socket client = state.workSocket;
            // 完成数据发送.  
            int bytesSent = client.EndSend(ar);
            Trace.WriteLine(string.Format("Sent {0} bytes to server.", bytesSent.ToString()));
            try
            {
                //byte[] recvBytes = new byte[1024];
                //int bytes;
                //string responseText = string.Empty;
                //do
                //{
                //    bytes = client.Receive(recvBytes, recvBytes.Length, 0);
                //    responseText += StateObject.Encoding.GetString(recvBytes, 0, bytes);

                //} while (bytes != 0);  
               client.BeginReceive(state.buffer, 0, StateObject.BufferSize, 0, new AsyncCallback(ReceiveCallback), state);
            }
            catch (Exception ex) { }
        }


        void ReceiveCallback(IAsyncResult ar)
        {

            // 从输入参数异步state对象中获取state和socket对象  
            StateObject state = (StateObject)ar.AsyncState;
            Socket client = state.workSocket;

            //从远程设备读取数据  
            int bytesRead = client.EndReceive(ar);

            if (bytesRead > 0)
            {
                // 有数据,存储.  
              //  state.sb.Append(StateObject.Encoding.GetString(state.buffer, 0, bytesRead));
                state.Stream.Write(state.buffer, 0, bytesRead);
                // 继续读取.  
                client.BeginReceive(state.buffer, 0, StateObject.BufferSize, 0, new AsyncCallback(ReceiveCallback), state);
            }
            else
            {
                client.Shutdown(SocketShutdown.Both);
                client.Close();
                client = null;
                //state.HandAction.Invoke(state.sb.ToString());
                state.HandAction(state.Stream);
            }
        }

        public void Dispose()
        {
            State.Dispose();
        }
    }

调用方式:

     IPEndPoint ipPoint = new IPEndPoint(hosts.AddressList[0], 80);
            AsynchronousClient client = new AsynchronousClient();
            string responseText = string.Empty;
            string requestText=string.Empty;// = "GET " + url + " HTTP/1.0 \r\n Content-Type: application/x-www-form-urlencoded";
            StringBuilder bufRequest = new StringBuilder();
            bufRequest.Append("GET ").Append(url).Append(" HTTP/1.0\r\n");
            bufRequest.Append("Content-Type: application/x-www-form-urlencoded\r\n");
            bufRequest.Append("\r\n");
            requestText = bufRequest.ToString();
            if (!string.IsNullOrEmpty(header))
                requestText += header;
  
            client.StartClient(ipPoint, requestText, new Action<Stream>(x =>
            {
                byte[] recvBytes = new byte[1024];
                int bytes;
                Encoding coder = Encoding.UTF8;
                x.Seek(0, SeekOrigin.Begin);
                do
                {
                    bytes =x.Read(recvBytes,0, recvBytes.Length);
                    responseText += Encoding.UTF8.GetString(recvBytes, 0, bytes);

                } while (bytes != 0);
                client.Dispose();
            }));
    
原文地址:https://www.cnblogs.com/majiang/p/2580786.html