C: Linux Socket Programming, TCP, a simple HTTP client 安東尼隨手記 Yahoo!奇摩部落格

C: Linux Socket Programming, TCP, a simple HTTP client - 安東尼隨手記 - Yahoo!奇摩部落格








C: Linux Socket Programming, TCP, a simple HTTP client

2009/04/16 10:46

Copy from http://coding.debuntu.org/c-linux-socket-programming-tcp-simple-http-client


  1. #include <stdio.h>
  2. #include <sys/socket.h>
  3. #include <arpa/inet.h>
  4. #include <stdlib.h>
  5. #include <netdb.h>
  6. #include <string.h>
  7. int create_tcp_socket();
  8. char *get_ip(char *host);
  9. char *build_get_query(char *host, char *page);
  10. void usage();
  11.  
  12. #define HOST "coding.debuntu.org"
  13. #define PAGE "/"
  14. #define PORT 80
  15. #define USERAGENT "HTMLGET 1.0"
  16.  
  17. int main(int argc, char **argv)
  18. {
  19.   struct sockaddr_in *remote;
  20.   int sock;
  21.   int tmpres;
  22.   char *ip;
  23.   char *get;
  24.   char buf[BUFSIZ+1];
  25.   char *host;
  26.   char *page;
  27.  
  28.   if(argc == 1){
  29.     usage();
  30.     exit(2);
  31.   }  
  32.   host = argv[1];
  33.   if(argc > 2){
  34.     page = argv[2];
  35.   }else{
  36.     page = PAGE;
  37.   }
  38.   sock = create_tcp_socket();
  39.   ip = get_ip(host);
  40.   fprintf(stderr, "IP is %s\n", ip);
  41.   remote = (struct sockaddr_in *)malloc(sizeof(struct sockaddr_in *));
  42.   remote->sin_family = AF_INET;
  43.   tmpres = inet_pton(AF_INET, ip, (void *)(&(remote->sin_addr.s_addr)));
  44.   if( tmpres < 0)  
  45.   {
  46.     perror("Can't set remote->sin_addr.s_addr");
  47.     exit(1);
  48.   }else if(tmpres == 0)
  49.   {
  50.     fprintf(stderr, "%s is not a valid IP address\n", ip);
  51.     exit(1);
  52.   }
  53.   remote->sin_port = htons(PORT);
  54.  
  55.   if(connect(sock, (struct sockaddr *)remote, sizeof(struct sockaddr)) < 0){
  56.     perror("Could not connect");
  57.     exit(1);
  58.   }
  59.   get = build_get_query(host, page);
  60.   fprintf(stderr, "Query is:\n<<START>>\n%s<<END>>\n", get);
  61.  
  62.   //Send the query to the server
  63.   int sent = 0;
  64.   while(sent < strlen(get))
  65.   {
  66.     tmpres = send(sock, get+sent, strlen(get)-sent, 0);
  67.     if(tmpres == -1){
  68.       perror("Can't send query");
  69.       exit(1);
  70.     }
  71.     sent += tmpres;
  72.   }
  73.   //now it is time to receive the page
  74.   memset(buf, 0, sizeof(buf));
  75.   int htmlstart = 0;
  76.   char * htmlcontent;
  77.   while((tmpres = recv(sock, buf, BUFSIZ, 0)) > 0){
  78.     if(htmlstart == 0)
  79.     {
  80.       /* Under certain conditions this will not work.
  81.       * If the \r\n\r\n part is splitted into two messages
  82.       * it will fail to detect the beginning of HTML content
  83.       */
  84.       htmlcontent = strstr(buf, "\r\n\r\n");
  85.       if(htmlcontent != NULL){
  86.         htmlstart = 1;
  87.         htmlcontent += 4;
  88.       }
  89.     }else{
  90.       htmlcontent = buf;
  91.     }
  92.     if(htmlstart){
  93.       fprintf(stdout, htmlcontent);
  94.     }
  95.  
  96.     memset(buf, 0, tmpres);
  97.   }
  98.   if(tmpres < 0)
  99.   {
  100.     perror("Error receiving data");
  101.   }
  102.   free(get);
  103.   free(remote);
  104.   free(ip);
  105.   close(sock);
  106.   return 0;
  107. }
  108.  
  109. void usage()
  110. {
  111.   fprintf(stderr, "USAGE: htmlget host [page]\n\
  112. \thost: the website hostname. ex: coding.debuntu.org\n\
  113. \tpage: the page to retrieve. ex: index.html, default: /\n");
  114. }
  115.  
  116.  
  117. int create_tcp_socket()
  118. {
  119.   int sock;
  120.   if((sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0){
  121.     perror("Can't create TCP socket");
  122.     exit(1);
  123.   }
  124.   return sock;
  125. }
  126.  
  127.  
  128. char *get_ip(char *host)
  129. {
  130.   struct hostent *hent;
  131.   int iplen = 15; //XXX.XXX.XXX.XXX
  132.   char *ip = (char *)malloc(iplen+1);
  133.   memset(ip, 0, iplen+1);
  134.   if((hent = gethostbyname(host)) == NULL)
  135.   {
  136.     herror("Can't get IP");
  137.     exit(1);
  138.   }
  139.   if(inet_ntop(AF_INET, (void *)hent->h_addr_list[0], ip, iplen) == NULL)
  140.   {
  141.     perror("Can't resolve host");
  142.     exit(1);
  143.   }
  144.   return ip;
  145. }
  146.  
  147. char *build_get_query(char *host, char *page)
  148. {
  149.   char *query;
  150.   char *getpage = page;
  151.   char *tpl = "GET /%s HTTP/1.0\r\nHost: %s\r\nUser-Agent: %s\r\n\r\n";
  152.   if(getpage[0] == '/'){
  153.     getpage = getpage + 1;
  154.     fprintf(stderr,"Removing leading \"/\", converting %s to %s\n", page, getpage);
  155.   }
  156.   // -5 is to consider the %s %s %s in tpl and the ending \0
  157.   query = (char *)malloc(strlen(host)+strlen(getpage)+strlen(USERAGENT)+strlen(tpl)-5);
  158.   sprintf(query, tpl, getpage, host, USERAGENT);
  159.   return query;
  160. }

To compile it, run:

$ gcc -o htmlget htmlget.c
$ ./htmlget 
USAGE: htmlget host [page]
	host: the website hostname. ex: coding.debuntu.org
	page: the page to retrieve. ex: index.html, default: /

Informative messages and errors are printed to stderr. The content of the page is printed to stdout. Thus, to save the HTML content of a page to a file, you will need to run:

$ ./htmlget coding.debuntu.org category > /tmp/page.html
原文地址:https://www.cnblogs.com/lexus/p/2594618.html