通过HTTP GET下载网页(socket编程)

通过HTTP GET下载网页(socket编程)
2009-04-29 11:16
基于linux socket,通过了http get方法获得网页信息
遇到问题与解决方法:利用超时方法判断http server发送数据完毕
通过select函数来实现超时。
GET方法的命令形式
GET / HTTP/1.1

包括两个文件HttpConnection.h和HttpConnection.cpp
HttpConnection.h
#ifndef HTTP_CONNECTION_H
#define HTTP_CONNECTION_H
#include <string>
#include <QtCore/QObject>
using std::string;
#define MAX_BUFF_LEN 4096
#define SLEEP_TIME 2
namespace HTTP
{
class HttpConnection
{
public:
    HttpConnection(string hostname,int port):hostname(hostname),port(port){}
    HttpConnection()
    {
        hostname="www.163.com";
        port = 80;
    }
    int http_connect();
    string get_html(const string) const;
    void http_close();
private:
    int conn_sock;
    string hostname;
    int port;    
    int status_code;
    int total_bytes;
    int current_bytes;
};
}
#endif

HttpConnection.cpp

#include <unistd.h>
#include <stdlib.h>
#include <iostream>
#include <string>
#include <strings.h>
#include <cstring>
#include <netdb.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <sstream>
#include <errno.h>
#include <sys/select.h>
#include <sys/time.h>
#include "HttpConnection.h"
using namespace std;
int HTTP::HttpConnection::http_connect()
{
    struct hostent *hptr;
    char ip_dot_num[INET_ADDRSTRLEN];
    char **ptr;
    struct sockaddr_in servaddr;
    
    if((hptr = gethostbyname(hostname.c_str()))==NULL)
    {
    #ifdef DEBUG
        cout<<"gethostbyname error for host "<<hostname<<endl;
    #endif
        return -1;
    }
    if(hptr->h_addrtype!=AF_INET)
    {
    #ifdef DEBUG
        cout<<"not support the address type"<<endl;
    #endif
        return -1;
    }
    ptr = hptr->h_addr_list;
    inet_ntop(AF_INET,*ptr,ip_dot_num,sizeof(ip_dot_num));    
#ifdef DEBUG
    cout<<"connect to "<<ip_dot_num<<" ..."<<endl;    
#endif
    conn_sock = socket(AF_INET,SOCK_STREAM,0);
    bzero(&servaddr,sizeof(servaddr));
    servaddr.sin_family = AF_INET;
    servaddr.sin_port = htons(port);
    int ret;
    if((ret =inet_pton(AF_INET, ip_dot_num, &servaddr.sin_addr))!=1)
    {
    #ifdef DEBUG
        cout<<"inet_pton: invalid input or error "<<ret<<endl;
    #endif
        return -1;
    }
    if(::connect(conn_sock,(struct sockaddr *)&servaddr, sizeof(servaddr))!=0)
    {
    #ifdef DEBUG
        cout<<"failed to connect"<<endl;
    #endif    
        return -1;
    }
#ifdef DEBUG
    cout<<"succeed to connect"<<endl;
#endif    
    return 0;
}

void HTTP::HttpConnection::http_close()
{
    if(close(conn_sock)==0)
    {
    #ifdef DEBUG
        cout<<"succeed to close socket"<<endl;
    #endif
    }
    else
    {
    #ifdef DEBUG
        cout<<"failed to close socket"<<endl;
    #endif    
    }
}
string HTTP::HttpConnection::get_html(string path) const
{
    stringstream strm;
    strm<<"GET "<<path<<" HTTP/1.1\n";
    strm<<"Connection:Keep-Alive\r\n";
    strm<<"Cache-Control:no-cache\r\n";
    strm<<"\r\n";
    char *send_content = (char *)strm.str().c_str();
    char receive_buff[MAX_BUFF_LEN];
    ssize_t nwritten;
    nwritten = write(conn_sock,send_content,strlen(send_content));
    string receive_content;
#ifdef DEBUG
    cout<<"send request to server"<<endl;
    cout<<strm.str()<<endl;
#endif
    ssize_t nread;
    while(true)
    {
        fd_set rset;
        struct timeval tv;
        tv.tv_sec = SLEEP_TIME;
        tv.tv_usec = 0;
        FD_ZERO(&rset);
        FD_SET(conn_sock,&rset);
        int ret = select(conn_sock+1,&rset,NULL,NULL,&tv);
        if (ret == 0)
            break;
        nread = read(conn_sock,receive_buff,MAX_BUFF_LEN);
        if(nread<0 && errno==EINTR)
        {
            continue;            
        }
        else if(nread>0)
        {
            receive_buff[nread]='\0';
            receive_content.append(receive_buff);
            continue;
        }
        break;
    }    
    return receive_content;    
}
原文地址:https://www.cnblogs.com/lexus/p/2248935.html