Tinyhttpd源码剖析

简介

Tinyhttpd是一个不到500行的简单http服务器。

Makefile解析

all: httpd client
LIBS = -lpthread #-lsocket
httpd: httpd.c
    gcc -g -W -Wall $(LIBS) -o $@ $<
client: simpleclient.c
    gcc -W -Wall -o $@ $<
clean:
    rm httpd

Makefile非常简单，定义了两个编译目标httpd（服务器程序）、客户端程序。

startup函数

这个函数意图比较明显，就是根据提供的端口号显示创建listen fd，而该listen fd是阻塞的。目前对SO_REUSEADDR选项还不是很清楚。

int startup(u_short port) {
    int httpd = 0;
    int on = 1;
    struct sockaddr_in name;
    httpd = socket(PF_INET, SOCK_STREAM, 0);
    if (httpd == -1)
        error_die("socket");
    memset(&name, 0, sizeof(name));
    name.sin_family = AF_INET;
    name.sin_port = htons(*port);
    name.sin_addr.s_addr = htonl(INADDR_ANY);
    // 将套接字设置SO_REUSEADDR选项。
    if ((setsockopt(httpd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on))) < 0)  {  
        error_die("setsockopt failed");
    }
    if (bind(httpd, (struct sockaddr *)&name, sizeof(name)) < 0) {
        error_die("bind");
    }
    // 端口号为0，那么就动态的分配端口
    if (*port == 0)  {
        socklen_t namelen = sizeof(name);
        if (getsockname(httpd, (struct sockaddr *)&name, &namelen) == -1)
            error_die("getsockname");
        *port = ntohs(name.sin_port);
    }
    if (listen(httpd, 5) < 0)
        error_die("listen");
    return(httpd);
}

getsockname

这个函数用来查看OS动态给socket分配的端口信息等。

accept_request

创建listen fd之后，然后程序直接在本进程中accept，创建accept fd，然后来处理HTTP 请求。

void accept_request(void *arg) {
    // intptr_t 什么时候用到不是很清楚。
    int client = (intptr_t)arg;
    // bug 解决：client值无效
    client = *(int *) arg
    char buf[1024];
    size_t numchars;
    char method[255];
    char url[255];
    char path[512];
    size_t i, j;
    struct stat st;
    int cgi = 0;      
    char *query_string = NULL;
   // 读取当前客户端发送而来的一行
    numchars = get_line(client, buf, sizeof(buf));
    i = 0; j = 0;
    //  首先获取方法名称
    while (!ISspace(buf[i]) && (i < sizeof(method) - 1)) {
        method[i] = buf[i];
        i++;
    }
    j=i;
    method[i] = '';
    // 如果不是GET 方法那么就直接返回GET没有实现
    // 直接返回
    if (strcasecmp(method, "GET") && strcasecmp(method, "POST")) {
        unimplemented(client);
        return;
    }
    // 如果是POST方法那么
    if (strcasecmp(method, "POST") == 0)
        cgi = 1;
    i = 0;
    while (ISspace(buf[j]) && (j < numchars))
        j++;
    while (!ISspace(buf[j]) && (i < sizeof(url) - 1) && (j < numchars))
    {
        url[i] = buf[j];
        i++; j++;
    }
    // 
    url[i] = '';

    if (strcasecmp(method, "GET") == 0) {
        query_string = url;
        while ((*query_string != '?') && (*query_string != ''))
            query_string++;
        if (*query_string == '?') {
            cgi = 1;
            *query_string = '';
            query_string++;
        }
    }
    sprintf(path, "htdocs%s", url);
    if (path[strlen(path) - 1] == '/')
        strcat(path, "index.html");
    if (stat(path, &st) == -1) {
        while ((numchars > 0) && strcmp("
", buf))  /* read & discard headers */
            numchars = get_line(client, buf, sizeof(buf));
        not_found(client);
    } else {
        if ((st.st_mode & S_IFMT) == S_IFDIR)
            strcat(path, "/index.html");
        if ((st.st_mode & S_IXUSR) ||
                (st.st_mode & S_IXGRP) ||
                (st.st_mode & S_IXOTH)    )
            cgi = 1;
        if (!cgi)
            serve_file(client, path);
        else
            // 执行CGI脚本
            execute_cgi(client, path, method, query_string);
    }
    close(client);
}

我们通过nc来调试 nc 127.0.0.1 4000

因为fafafafa是乱输入的，所以不支持该方法，上述是httpd返回的值。其执行流程如下：

下面是一个完整是GET一个完整的HTTP报文头部字段，返回的是htocs下的index文件信息。

execute_cgi 解析

在POST请求下，或者是GET请求，但有查询参数或请求资源为可执行程序下，execute_cgi将会被调用。

void execute_cgi(int client, const char *path,
        const char *method, const char *query_string) {
    char buf[1024];
    int cgi_output[2];
    int cgi_input[2];
    pid_t pid;
    int status;
    int i;
    char c;
    int numchars = 1;
    int content_length = -1;
    buf[0] = 'A'; buf[1] = '';
    if (strcasecmp(method, "GET") == 0) {
        // 丢保其它报文头部字段
        while ((numchars > 0) && strcmp("
", buf))  /* read & discard headers */
            numchars = get_line(client, buf, sizeof(buf));
    } else if (strcasecmp(method, "POST") == 0)  {
        numchars = get_line(client, buf, sizeof(buf));
        while ((numchars > 0) && strcmp("
", buf)){
            buf[15] = '';
            if (strcasecmp(buf, "Content-Length:") == 0)
                content_length = atoi(&(buf[16]));
            numchars = get_line(client, buf, sizeof(buf));
        }
        if (content_length == -1) {
            bad_request(client);
            return;
        }
    }
    else/*HEAD or other*/ {
    }
    if (pipe(cgi_output) < 0) {
        cannot_execute(client);
        return;
    }
    if (pipe(cgi_input) < 0) {
        cannot_execute(client);
        return;
    }
    if ( (pid = fork()) < 0 ) {
        cannot_execute(client);
        return;
    }
    sprintf(buf, "HTTP/1.0 200 OK
");
    send(client, buf, strlen(buf), 0);
    if (pid == 0)  /* child: CGI script */ {
        char meth_env[255];
        char query_env[255];
        char length_env[255];
        //子进程STDOUT重定向到管道1的写端中。
        dup2(cgi_output[1], STDOUT);
        // 子进程STDIN重定向管道0的读端到中。
        dup2(cgi_input[0], STDIN);
        // 关掉其它不用的一端
        close(cgi_output[0]);
        close(cgi_input[1]);
        sprintf(meth_env, "REQUEST_METHOD=%s", method);
        putenv(meth_env);
        if (strcasecmp(method, "GET") == 0) {
            sprintf(query_env, "QUERY_STRING=%s", query_string);
            putenv(query_env);
        }
        else {   /* POST */
            sprintf(length_env, "CONTENT_LENGTH=%d", content_length);
            putenv(length_env);
        }
        // 执行可执行程序。
        execl(path, NULL);
        exit(0);
    }  else {    /* parent */
        // 父进程关掉不用的一端
        close(cgi_output[1]);
        close(cgi_input[0]);
        if (strcasecmp(method, "POST") == 0)
            for (i = 0; i < content_length; i++) {
                recv(client, &c, 1, 0);
                // 向子进程一个字节一个字节的写。
                write(cgi_input[1], &c, 1);
          }
        // 从输出管道中读入执行结果后发送给客户端。
        while (read(cgi_output[0], &c, 1) > 0)
            send(client, &c, 1, 0);

        close(cgi_output[0]);
        close(cgi_input[1]);
        waitpid(pid, &status, 0);
    }
}

管道的初始状态：

管道最终状态

在子进程中，把 STDOUT 重定向到 cgi_output 的写入端，把 STDIN 重定向到 cgi_input 的读取端，关闭 cgi_input 的写入端和 cgi_output 的读取端，设置 request_method 的环境变量，GET 的话设置 query_string 的环境变量，POST 的话设置 content_length 的环境变量，这些环境变量都是为了给 cgi 脚本调用，接着用 execl 运行 cgi 程序。
在父进程中，关闭 cgi_input 的读取端和 cgi_output 的写入端，如果 POST 的话，把 POST 数据写入 cgi_input，已被重定向到 STDIN，读取 cgi_output 的管道输出到客户端，该管道输入是 STDOUT。接着关闭所有管道，等待子进程结束。这一部分比较乱，见下图说明：

这里利用的是exec默认的输入和输出为STDIN和STDOUT，如果讲STDIN重定向后，那么CGI脚本将从cgi_input管道中读，执行完后的结果写到cgi_out管道中，然后父进程读取返回给客户端。

接口学习

getsockname用来获取OS给自己绑定的端口信息等。
stat用来查看文件的属性，是普通文件还是可执行文件
recv用来获取socket消息
send 用来将消息发送给协议栈
dup2 用一个新的文件描述符来复制一个旧的文件描述符，这样两个文件描述符共享同样的文件状态。这里的代码用dup2将管道和标准输入和输出联系一起。
pipe生产管道