Goahead源码解析(转)

title: Goahead源码解析(转)
date: 2019/12/21 15:24:47
toc: true

源码解析

title: Goahead源码解析(转)date: 2019/12/21 15:24:47toc: true

Goahead源码解析(转)

https://blog.csdn.net/chenlonglong2014

1. 从主函数到I/O事件循环

一、主函数

主函数主要流程是载入配置文件、申请必要数据结构、对服务器进行监听。内容不长，大家可结合注释看。

MAIN(goahead, int argc, char **argv, char **envp)
{
    char    *argp, *home, *documents, *endpoints, *endpoint, *route, *auth, *tok, *lspec;
    int     argind;

#if WINDOWS
    if (windowsInit() < 0) {
        return 0;
    }
#endif
    route = "route.txt";  //路径文件，类似权限
    auth = "auth.txt";	 //权限文件

    for (argind = 1; argind < argc; argind++) {
        argp = argv[argind];
        if (*argp != '-') {
            break;

        } else if (smatch(argp, "--auth") || smatch(argp, "-a")) {
            if (argind >= argc) usage();
            auth = argv[++argind];

#if ME_UNIX_LIKE && !MACOSX
        } else if (smatch(argp, "--background") || smatch(argp, "-b")) {
            websSetBackground(1);
#endif

        } else if (smatch(argp, "--debugger") || smatch(argp, "-d") || smatch(argp, "-D")) {
            websSetDebug(1);

        } else if (smatch(argp, "--home")) {
            if (argind >= argc) usage();
            home = argv[++argind];
            if (chdir(home) < 0) {
                error("Cannot change directory to %s", home);
                exit(-1);
            }
        } else if (smatch(argp, "--log") || smatch(argp, "-l")) {
            if (argind >= argc) usage();
            logSetPath(argv[++argind]);

        } else if (smatch(argp, "--verbose") || smatch(argp, "-v")) {
            logSetPath("stdout:2");

        } else if (smatch(argp, "--route") || smatch(argp, "-r")) {
            route = argv[++argind];

        } else if (smatch(argp, "--version") || smatch(argp, "-V")) {
            printf("%s
", ME_VERSION);
            exit(0);

        } else if (*argp == '-' && isdigit((uchar) argp[1])) {
            lspec = sfmt("stdout:%s", &argp[1]);
            logSetPath(lspec);
            wfree(lspec);

        } else {
            usage();
        }
    }
	//截止到这里是程序运行时根据入参来配置功能，实际改造时不会要求输入这么多参数，都是事先配置好
    documents = ME_GOAHEAD_DOCUMENTS;//存放web页面的位置
    if (argc > argind) {
        documents = argv[argind++];
    }
    initPlatform();  //定义了信号处理的行为，收到SIGTERM信号后，调用sigHandler,讲finished变量设置为1，退出服务器监听事件循环
    if (websOpen(documents, route) < 0) {//初始化变量以及相关函数行为对应的handler
        error("Cannot initialize server. Exiting.");
        return -1;
    }
#if ME_GOAHEAD_AUTH
    if (websLoad(auth) < 0) {//载入权限文件
        error("Cannot load %s", auth);
        return -1;
    }
#endif
    logHeader();
    if (argind < argc) {
        while (argind < argc) {
            endpoint = argv[argind++];
            if (websListen(endpoint) < 0) {
                return -1;
            }
        }
    } else {
        endpoints = sclone(ME_GOAHEAD_LISTEN);
        for (endpoint = stok(endpoints, ", 	", &tok); endpoint; endpoint = stok(NULL, ", 	,", &tok)) {
#if !ME_COM_SSL
            if (strstr(endpoint, "https")) continue;
#endif
            if (websListen(endpoint) < 0) {//将IP：PORT设置为监听套接字，打开监听端口
                wfree(endpoints);
                return -1;
            }
        }
        wfree(endpoints);
    }
#if ME_ROM && KEEP
    /*
        If not using a route/auth config files, then manually create the routes like this:
        If custom matching is required, use websSetRouteMatch. If authentication is required, use websSetRouteAuth.
     */
    websAddRoute("/", "file", 0);
#endif
#ifdef GOAHEAD_INIT
    /*
        Define your init function in main.me goahead.init, or
        configure with DFLAGS=GOAHEAD_INIT=myInitFunction
     */
    {
        extern int GOAHEAD_INIT();

        if (GOAHEAD_INIT() < 0) {
            exit(1);
        }
    }
#endif
#if ME_UNIX_LIKE && !MACOSX
    /*
        Service events till terminated
     */
    if (websGetBackground()) {//后台运行
        if (daemon(0, 0) < 0) {
            error("Cannot run as daemon");
            return -1;
        }
    }
#endif
    websServiceEvents(&finished);//里面调用select监听套接字，同时处理I/O事件循环，正常情况下不会退出此循环。直到收到SIGTERM信号，finished = 1，退出此循环，服务器优雅退出，清理资源。
    logmsg(1, "Instructed to exit");
    websClose();
#if WINDOWS
    windowsClose();
#endif
    return 0;
}

二、I/O事件循环

作为一个HTTP服务器，该代码最重要的就是socket事件循环，也就是websServiceEvents(&finished);函数，下来对这个函数展开。

PUBLIC void websServiceEvents(int *finished)
{
    int     delay, nextEvent;

    if (finished) {
        *finished = 0;
    }
    delay = 0;
    while (!finished || !*finished) {//主程序进入此循环，进行I/O监听
        if (socketSelect(-1, delay)) {//如果select监听有返回个数，就针对套接字进行I/O处理
            socketProcess();
        }
#if ME_GOAHEAD_CGI
        delay = websCgiPoll();//决定select的超市时间，实际上为什么这个时间要动态变化，还在研究中
#else
        delay = MAXINT;
#endif
        nextEvent = websRunEvents();
        delay = min(delay, nextEvent);
    }
}

下面我们来看看select函数是怎么写的：

PUBLIC int socketSelect(int sid, int timeout)
{
    struct timeval  tv;
    WebsSocket      *sp;
    fd_set          readFds, writeFds, exceptFds;
    int             nEvents;
    int             all, socketHighestFd;   /* Highest socket fd opened */

    FD_ZERO(&readFds);
    FD_ZERO(&writeFds);
    FD_ZERO(&exceptFds);
    socketHighestFd = -1;

    tv.tv_sec = (long) (timeout / 1000);
    tv.tv_usec = (DWORD) (timeout % 1000) * 1000;

    /*
        Set the select event masks for events to watch
     */
    all = nEvents = 0;

    if (sid < 0) {
        all++;
        sid = 0;
    }

    for (; sid < socketMax; sid++) {
        if ((sp = socketList[sid]) == NULL) {
            continue;
        }
        assert(sp);
        /*
            Set the appropriate bit in the ready masks for the sp->sock.
         */
        if (sp->handlerMask & SOCKET_READABLE) {//套接字需要监听的事件放到监听事件组中
            FD_SET(sp->sock, &readFds);
            nEvents++;
        }
        if (sp->handlerMask & SOCKET_WRITABLE) {
            FD_SET(sp->sock, &writeFds);
            nEvents++;
        }
        if (sp->handlerMask & SOCKET_EXCEPTION) {
            FD_SET(sp->sock, &exceptFds);
            nEvents++;
        }
        if (sp->flags & SOCKET_RESERVICE) {
            tv.tv_sec = 0;
            tv.tv_usec = 0;
        }
        if (! all) {
            break;
        }
    }
    /*
        Windows select() fails if no descriptors are set, instead of just sleeping like other, nice select() calls.
        So, if WINDOWS, sleep.
     */
    if (nEvents == 0) {
        Sleep((DWORD) timeout);
        return 0;
    }
    /*
        Wait for the event or a timeout
     */
    nEvents = select(socketHighestFd + 1, &readFds, &writeFds, &exceptFds, &tv);

    if (all) {
        sid = 0;
    }
    for (; sid < socketMax; sid++) {
        if ((sp = socketList[sid]) == NULL) {
            continue;
        }
        if (sp->flags & SOCKET_RESERVICE) {
            if (sp->handlerMask & SOCKET_READABLE) {
                sp->currentEvents |= SOCKET_READABLE;
            }
            if (sp->handlerMask & SOCKET_WRITABLE) {
                sp->currentEvents |= SOCKET_WRITABLE;
            }
            sp->flags &= ~SOCKET_RESERVICE;
            nEvents++;
        }//如果套接字在监听返回的事件组中，就将sp->currentEvents设置成对应的事件，供后续socketProcess处理
        if (FD_ISSET(sp->sock, &readFds)) {
            sp->currentEvents |= SOCKET_READABLE;
        }
        if (FD_ISSET(sp->sock, &writeFds)) {
            sp->currentEvents |= SOCKET_WRITABLE;
        }
        if (FD_ISSET(sp->sock, &exceptFds)) {
            sp->currentEvents |= SOCKET_EXCEPTION;
        }
        if (! all) {
            break;
        }
    }
    return nEvents;
}

#else /* !ME_WIN_LIKE */

三、服务器与客户端建立连接

假设浏览器有HTTP请求发往服务器，我们相应的流程是怎样的呢？
通过socketSelect监听，发现监听套接字有读事件，先调用socketAccep创建新的套接字，同时调用websAccept函数，为这个连接创建必要的数据结构，保存传输过程中需要的WEB数据结构
Webs *wp。创建好套接字之后，再为这个套接字注册一个读事件，从而进行请求的读取。函数实现如下：

PUBLIC int websAccept(int sid, cchar *ipaddr, int port, int listenSid)
{
    Webs        *wp;
    WebsSocket  *lp;
    struct sockaddr_storage ifAddr;
    int         wid, len;

    assert(sid >= 0);
    assert(ipaddr && *ipaddr);
    assert(listenSid >= 0);
    assert(port >= 0);

    /*
        Allocate a new handle for this accepted connection. This will allocate a Webs structure in the webs[] list
     */
    if ((wid = websAlloc(sid)) < 0) {
        return -1;
    }
    wp = webs[wid];
    assert(wp);
    wp->listenSid = listenSid;
    strncpy(wp->ipaddr, ipaddr, min(sizeof(wp->ipaddr) - 1, strlen(ipaddr)));

    /*
        Get the ip address of the interface that accept the connection.
     */
    len = sizeof(ifAddr);
    if (getsockname(socketPtr(sid)->sock, (struct sockaddr*) &ifAddr, (Socklen*) &len) < 0) {
        error("Cannot get sockname");
        websFree(wp);
        return -1;
    }
    socketAddress((struct sockaddr*) &ifAddr, (int) len, wp->ifaddr, sizeof(wp->ifaddr), NULL);

#if ME_GOAHEAD_LEGACY
    /*
        Check if this is a request from a browser on this system. This is useful to know for permitting administrative
        operations only for local access
     */
    if (strcmp(wp->ipaddr, "127.0.0.1") == 0 || strcmp(wp->ipaddr, websIpAddr) == 0 ||
            strcmp(wp->ipaddr, websHost) == 0) {
        wp->flags |= WEBS_LOCAL;
    }
#endif

    /*
        Arrange for socketEvent to be called when read data is available
     */
    lp = socketPtr(listenSid);
    trace(4, "New connection from %s:%d to %s:%d", ipaddr, port, wp->ifaddr, lp->port);

#if ME_COM_SSL
    if (lp->secure) {
        wp->flags |= WEBS_SECURE;
        trace(4, "Upgrade connection to TLS");
        if (sslUpgrade(wp) < 0) {
            error("Cannot upgrade to TLS");
            websFree(wp);
            return -1;
        }
    }
#endif
    assert(wp->timeout == -1);
    wp->timeout = websStartEvent(PARSE_TIMEOUT, checkTimeout, (void*) wp);
    socketEvent(sid, SOCKET_READABLE, wp);//给这个已连接套接字注册一个读事件，从而调用事件处理函数，发出读HTTP请求。
    return 0;
}

socketEvent 此函数我认为是HTTP连接中最关键的函数，里面进行I/O处理。作为HTTP服务器，其中的读写都遵循HTTP协议，根据请求的不同类型，做出不同的响应。
理解了这个事件中的readEvent, writeEvent两个函数，就可以理解HTTP协议的大概脉络。这两个函数对应的HTTP处理流程，后续专题讲述。

static void socketEvent(int sid, int mask, void *wptr)
{
    Webs    *wp;

    wp = (Webs*) wptr;
    assert(wp);

    assert(websValid(wp));
    if (! websValid(wp)) {
        return;
    }
    if (mask & SOCKET_READABLE) {
        readEvent(wp);
    }
    if (mask & SOCKET_WRITABLE) {
        writeEvent(wp);
    }
    if (wp->flags & WEBS_CLOSED) {
        websFree(wp);
        /* WARNING: wp not valid here */
    }
}

2. 读取HTTP请求

一、读取HTTP请求

浏览器与服务器建立好连接之后，会调用readEvent接口来读取从浏览器来的请求数据。HTTP请求的结束符是" ",服务器调用readEvent，通过websRead读取缓冲区（内容长度不超过2048字节）。

    The webs read handler. This is the primary read event loop. It uses a state machine to track progress while parsing
    the HTTP request.  Note: we never block as the socket is always in non-blocking mode.
 */
static void readEvent(Webs *wp)
{
    WebsBuf     *rxbuf;
    WebsSocket  *sp;
    ssize       nbytes;

    assert(wp);
    assert(websValid(wp));

    if (!websValid(wp)) {
        return;
    }
    websNoteRequestActivity(wp);
    rxbuf = &wp->rxbuf;//缓冲区的数据结构看定义，写得很清楚

    if (bufRoom(rxbuf) < (ME_GOAHEAD_LIMIT_BUFFER + 1)) {//缓冲区不够了增加缓冲区的大小
        if (!bufGrow(rxbuf, ME_GOAHEAD_LIMIT_BUFFER + 1)) {
            websError(wp, HTTP_CODE_INTERNAL_SERVER_ERROR, "Cannot grow rxbuf");
            websPump(wp);
            return;
        }
    }
    if ((nbytes = websRead(wp, (char*) rxbuf->endp, ME_GOAHEAD_LIMIT_BUFFER)) > 0) {//调用socketRead,读HTTP请求.rxbuf->endp是上一次的数据尾，每次读之后接上
        wp->lastRead = nbytes;//一次读了多少字节
        bufAdjustEnd(rxbuf, nbytes);//读了多少字节，数据的尾指针就加多少字节
        bufAddNull(rxbuf);//写字符串结束符
    }
    if (nbytes > 0 || wp->state > WEBS_BEGIN) {//读到数据了，进来处理
        websPump(wp);
    }
    if (wp->flags & WEBS_CLOSED) {
        return;//通过websPump处理完请求，需要关闭连接，return返回readEvent.数据结构依然保留。如果是非keep alive 什么时候清除本链接的数据结构？
    } else if (nbytes < 0 && socketEof(wp->sid)) {
        /* EOF or error. Allow running requests to continue. */
        if (wp->state < WEBS_READY) {
            if (wp->state > WEBS_BEGIN) {
                websError(wp, HTTP_CODE_COMMS_ERROR, "Read error: connection lost");
                websPump(wp);
            } else {
                complete(wp, 0);
            }
        } else {
            socketDeleteHandler(wp->sid);
        }
    } else if (wp->state < WEBS_READY) {//如果是keep alive的请求，继续监听。
        sp = socketPtr(wp->sid);
        socketCreateHandler(wp->sid, sp->handlerMask | SOCKET_READABLE, socketEvent, wp);
}

二、解析HTTP请求

websPump是处理WEB请求的主要函数，里面根据不同状态机来处理HTTP请求。HTTP请求的解析，响应，完成对应状态机中的几个状态。

PUBLIC void websPump(Webs *wp)
{
    bool    canProceed;

    for (canProceed = 1; canProceed; ) {//只到conProceed = 0 ,才退出循环，否则按状态顺序循环执行
        switch (wp->state) {
        case WEBS_BEGIN://最初都是BEGIN状态
            canProceed = parseIncoming(wp);
            break;
        case WEBS_CONTENT:
            canProceed = processContent(wp);//除了请求头之外有额外的数据输入到服务器
            break;
        case WEBS_READY:
            if (!websRunRequest(wp)) {//接受数据已经完成，开始响应HTTP请求。调用注册的各个handler，有jstHandler,fileHandler,actionHandler等。默认是fileHandler，即普通的文档传输。handler执行过程中将state置为COMPLETE
                /* Reroute if the handler re-wrote the request */
                websRouteRequest(wp);
                wp->state = WEBS_READY;
                canProceed = 1;
                continue;
            }
            canProceed = (wp->state != WEBS_RUNNING);
            break;
        case WEBS_RUNNING:
            /* Nothing to do until websDone is called */
            return;
        case WEBS_COMPLETE:
            canProceed = complete(wp, 1);//此处退出webPump,最终退出readEvent,等待select下一次返回
            break;
        }
    }
}

parseIncoming() 解析HTTP头的内容,确定是何种请求，从而才能知道怎么去响应:

mark

static bool parseIncoming(Webs *wp)
{
    WebsBuf     *rxbuf;
    char        *end, c;

    rxbuf = &wp->rxbuf;
    while (*rxbuf->servp == '
' || *rxbuf->servp == '
') {
        if (bufGetc(rxbuf) < 0) {
            break;
        }
    }//找到非
的第一个字节
    if ((end = strstr((char*) wp->rxbuf.servp, "

")) == 0) {//“

”是协议规定请求头的结束符，实际上就是两个连续换行
        if (bufLen(&wp->rxbuf) >= ME_GOAHEAD_LIMIT_HEADER) {//没读完请求的话，继续读，但是也不能读太长
            websError(wp, HTTP_CODE_REQUEST_TOO_LARGE | WEBS_CLOSE, "Header too large");
            return 1;
        }
        return 0;
    }
    trace(3 | WEBS_RAW_MSG, "
<<< Request
");
    c = *end;
    *end = '';
    trace(3 | WEBS_RAW_MSG, "%s
", wp->rxbuf.servp);
    *end = c;
//读完了请求了，开始解析
    /*
        Parse the first line of the Http header
     */
    parseFirstLine(wp);//解析第一行信息
    if (wp->state == WEBS_COMPLETE) {
        return 1;
    }
    parseHeaders(wp);//解析整个请求，把请求每一个属性记录下来，存在WP中
    if (wp->state == WEBS_COMPLETE) {
        return 1;
    }
    wp->state = (wp->rxChunkState || wp->rxLen > 0) ? WEBS_CONTENT : WEBS_READY;//解析头来判断是不是有内容，是不是有输入。

    websRouteRequest(wp);//route的意思是将这个wp与route.txt中每一行相匹配，如果能匹配，wp-route = route

    if (wp->state == WEBS_COMPLETE) {
        return 1;
    }
#if ME_GOAHEAD_CGI
    if (wp->route && wp->route->handler && wp->route->handler->service == cgiHandler) {
        if (smatch(wp->method, "POST")) {
            wp->cgiStdin = websGetCgiCommName();
            if ((wp->cgifd = open(wp->cgiStdin, O_CREAT | O_WRONLY | O_BINARY | O_TRUNC, 0666)) < 0) {
                websError(wp, HTTP_CODE_NOT_FOUND | WEBS_CLOSE, "Cannot open CGI file");
                return 1;
            }
        }
    }
#endif
#if !ME_ROM
    if (smatch(wp->method, "PUT")) {
        WebsStat    sbuf;
        wp->code = (stat(wp->filename, &sbuf) == 0 && sbuf.st_mode & S_IFDIR) ? HTTP_CODE_NO_CONTENT : HTTP_CODE_CREATED;
        wfree(wp->putname);
        wp->putname = websTempFile(ME_GOAHEAD_PUT_DIR, "put");
        if ((wp->putfd = open(wp->putname, O_BINARY | O_WRONLY | O_CREAT | O_BINARY, 0644)) < 0) {
            error("Cannot create PUT filename %s", wp->putname);
            websError(wp, HTTP_CODE_INTERNAL_SERVER_ERROR, "Cannot create the put URI");
            wfree(wp->putname);
            return 1;
        }
    }
#endif
    return 1;
}

3. 响应HTTP请求

一、如何响应HTTP请求

websPump中若前面两步解析请求行请求头成功，wp->state置为READY时，调用websRunRequest响应请求。
此时，websRunRequest中将wp->state置为RUNNING，之后调用route对应的service回调函数，也就是websDefineHandler中定义的各种handler。

PUBLIC void websPump(Webs *wp)//如何判断不同类型，去调用不同类型的handler呢。
{
    bool    canProceed;

    for (canProceed = 1; canProceed; ) {
        switch (wp->state) {
        case WEBS_BEGIN://最初都是BEGIN状态
            canProceed = parseIncoming(wp);
            break;
        case WEBS_CONTENT:
            canProceed = processContent(wp);
            break;
        case WEBS_READY:
            if (!websRunRequest(wp)) {
                /* Reroute if the handler re-wrote the request */
                websRouteRequest(wp);
                wp->state = WEBS_READY;
                canProceed = 1;
                continue;
            }
            canProceed = (wp->state != WEBS_RUNNING);
            break;
        case WEBS_RUNNING:
            /* Nothing to do until websDone is called */
            return;
        case WEBS_COMPLETE:
            canProceed = complete(wp, 1);
            break;
        }
    }
}

二、响应HTTP请求handler的类型

根据route中的定义，响应类型具体有actionHandler（post请求）,jstHandler（动态页面）,fileHandler（默认静态页面）,cgiHandler（调用外部程序）等。在这些handler中将数据返回给客户端。extensions就是后缀名，如果请求的文件后缀是.jst就会调用jstHandler。

2.1 actionHandler

actionHandler比较简单，就是通过hash表，将actionName与对应websDefineAction定义的函数回调匹配上，去回调自己定义的回调函数即可，入参wp。用户定义action的行为中，要自己返回客户端action的结果。

/*
    Process an action request. Returns 1 always to indicate it handled the URL
    Return true to indicate the request was handled, even for errors.
 */
static bool actionHandler(Webs *wp)
{
    WebsKey     *sp;
    char        actionBuf[ME_GOAHEAD_LIMIT_URI + 1];
    char        *cp, *actionName;
    WebsAction  fn;

    assert(websValid(wp));
    assert(actionTable >= 0);

    /*
        Extract the action name
     */
    scopy(actionBuf, sizeof(actionBuf), wp->path);
    if ((actionName = strchr(&actionBuf[1], '/')) == NULL) {
        websError(wp, HTTP_CODE_NOT_FOUND, "Missing action name");
        return 1;
    }
    actionName++;
    if ((cp = strchr(actionName, '/')) != NULL) {
        *cp = '';
    }
    /*
        Lookup the C action function first and then try tcl (no javascript support yet).
     */
    sp = hashLookup(actionTable, actionName);
    if (sp == NULL) {
        websError(wp, HTTP_CODE_NOT_FOUND, "Action %s is not defined", actionName);
    } else {
        fn = (WebsAction) sp->content.value.symbol;
        assert(fn);
        if (fn) {
#if ME_GOAHEAD_LEGACY
            (*((WebsProc) fn))((void*) wp, actionName, wp->query);
#else
            (*fn)((void*) wp);
#endif
        }
    }
    return 1;
}

2.2 jstHandler

jstHandler处理流程是先将page读取到内存中，从第一个字节开始，依次发送给客户端，遇到<%
%>之后，回调绑定的C函数，将函数返回结果替换<% %>返回客户端，直到页面的所有内容都发完。
这种技术可以使得页面可以动态根据服务器执行C函数的结果来响应内容。也就是动态页面。

/*
    Process requests and expand all scripting commands. We read the entire web page into memory and then process. If
    you have really big documents, it is better to make them plain HTML files rather than Javascript web pages.
    Return true to indicate the request was handled, even for errors.
 */
 //动态页面响应肯定比静态页面要慢
static bool jstHandler(Webs *wp)
{
    WebsFileInfo    sbuf;
    char            *lang, *token, *result, *ep, *cp, *buf, *nextp, *last;
    ssize           len;
    int             rc, jid;

    assert(websValid(wp));
    assert(wp->filename && *wp->filename);
    assert(wp->ext && *wp->ext);

    buf = 0;
    if ((jid = jsOpenEngine(wp->vars, websJstFunctions)) < 0) {
        websError(wp, HTTP_CODE_INTERNAL_SERVER_ERROR, "Cannot create JavaScript engine");
        goto done;
    }
    jsSetUserHandle(jid, wp);

    if (websPageStat(wp, &sbuf) < 0) {
        websError(wp, HTTP_CODE_NOT_FOUND, "Cannot stat %s", wp->filename);
        goto done;
    }
    if (websPageOpen(wp, O_RDONLY | O_BINARY, 0666) < 0) {
        websError(wp, HTTP_CODE_NOT_FOUND, "Cannot open URL: %s", wp->filename);
        goto done;
    }
    /*
        Create a buffer to hold the web page in-memory
     */
    len = sbuf.size;
    if ((buf = walloc(len + 1)) == NULL) {
        websError(wp, HTTP_CODE_INTERNAL_SERVER_ERROR, "Cannot get memory");
        goto done;
    }
    buf[len] = '';

    if (websPageReadData(wp, buf, len) != len) {
        websError(wp, HTTP_CODE_NOT_FOUND, "Cannot read %s", wp->filename);
        goto done;
    }
    websPageClose(wp);
    websWriteHeaders(wp, (ssize) -1, 0);
    websWriteHeader(wp, "Pragma", "no-cache");
    websWriteHeader(wp, "Cache-Control", "no-cache");
    websWriteEndHeaders(wp);

    /*
        Scan for the next "<%"
     */
    last = buf;
    for (rc = 0; rc == 0 && *last && ((nextp = strstr(last, "<%")) != NULL); ) {//循环到最后一个<%
        websWriteBlock(wp, last, (nextp - last));//先发送<%前的一块数据给客户端
        nextp = skipWhite(nextp + 2);
        /*
            Decode the language
         */
        token = "language";
        if ((lang = strtokcmp(nextp, token)) != NULL) {
            if ((cp = strtokcmp(lang, "=javascript")) != NULL) {
                /* Ignore */;
            } else {
                cp = nextp;
            }
            nextp = cp;
        }

        /*
            Find tailing bracket and then evaluate the script
         */
        if ((ep = strstr(nextp, "%>")) != NULL) {

            *ep = '';
            last = ep + 2;
            nextp = skipWhite(nextp);
            /*
                Handle backquoted newlines
             */
            for (cp = nextp; *cp; ) {
                if (*cp == '\' && (cp[1] == '
' || cp[1] == '
')) {
                    *cp++ = ' ';
                    while (*cp == '
' || *cp == '
') {
                        *cp++ = ' ';
                    }
                } else {
                    cp++;
                }
            }
            if (*nextp) {
                result = NULL;

                if (jsEval(jid, nextp, &result) == 0) {
                    /*
                         On an error, discard all output accumulated so far and store the error in the result buffer.
                         Be careful if the user has called websError() already.
                     */
                    rc = -1;
                    if (websValid(wp)) {
                        if (result) {
                            websWrite(wp, "<h2><b>Javascript Error: %s</b></h2>
", result);
                            websWrite(wp, "<pre>%s</pre>", nextp);
                            wfree(result);
                        } else {
                            websWrite(wp, "<h2><b>Javascript Error</b></h2>
%s
", nextp);
                        }
                        websWrite(wp, "</body></html>
");
                        rc = 0;
                    }
                    goto done;
                }
            }

        } else {
            websError(wp, HTTP_CODE_INTERNAL_SERVER_ERROR, "Unterminated script in %s: 
", wp->filename);
            goto done;
        }
    }
    /*
        Output any trailing HTML page text
     */
    if (last && *last && rc == 0) {
        websWriteBlock(wp, last, strlen(last));
    }

/*
    Common exit and cleanup
 */
done:
    if (websValid(wp)) {
        websPageClose(wp);
        if (jid >= 0) {
            jsCloseEngine(jid);
        }
    }
    websDone(wp);
    wfree(buf);
    return 1;
}

2.3 fileHandler

fileHandler就是普通静态文件传输

/*
    Serve static files
    Return true to indicate the request was handled, even for errors.
 */
static bool fileHandler(Webs *wp)
{
    WebsFileInfo    info;
    char            *tmp, *date;
    ssize           nchars;
    int             code;

    assert(websValid(wp));
    assert(wp->method);
    assert(wp->filename && wp->filename[0]);

#if !ME_ROM
    if (smatch(wp->method, "DELETE")) {
        if (unlink(wp->filename) < 0) {
            websError(wp, HTTP_CODE_NOT_FOUND, "Cannot delete the URI");
        } else {
            /* No content */
            websResponse(wp, 204, 0);
        }
    } else if (smatch(wp->method, "PUT")) {
        /* Code is already set for us by processContent() */
        websResponse(wp, wp->code, 0);

    } else
#endif /* !ME_ROM */
    {
        /*
            If the file is a directory, redirect using the nominated default page
         */
        if (websPageIsDirectory(wp)) {
            nchars = strlen(wp->path);
            if (wp->path[nchars - 1] == '/' || wp->path[nchars - 1] == '\') {
                wp->path[--nchars] = '';
            }
            tmp = sfmt("%s/%s", wp->path, websIndex);
            websRedirect(wp, tmp);
            wfree(tmp);
            return 1;
        }
        if (websPageOpen(wp, O_RDONLY | O_BINARY, 0666) < 0) {
#if ME_DEBUG
            if (wp->referrer) {
                trace(1, "From %s", wp->referrer);
            }
#endif
            websError(wp, HTTP_CODE_NOT_FOUND, "Cannot open document for: %s", wp->path);
            return 1;
        }
        if (websPageStat(wp, &info) < 0) {
            websError(wp, HTTP_CODE_NOT_FOUND, "Cannot stat page for URL");
            return 1;
        }
        code = 200;
        if (wp->since && info.mtime <= wp->since) {
            code = 304;
            info.size = 0;
        }
        websSetStatus(wp, code);
        websWriteHeaders(wp, info.size, 0);
        if ((date = websGetDateString(&info)) != NULL) {
            websWriteHeader(wp, "Last-Modified", "%s", date);
            wfree(date);
        }
        websWriteEndHeaders(wp);

        /*
            All done if the browser did a HEAD request
         */
        if (smatch(wp->method, "HEAD")) {
            websDone(wp);
            return 1;
        }
        if (info.size > 0) {
            websSetBackgroundWriter(wp, fileWriteEvent);
        } else {
            websDone(wp);
        }
    }
    return 1;
}

2.4 cgiHandler

调用外部的程序执行，从字面上理解如果是调用外部程序，还需要考虑到进程间通信。在我接触的项目中没有用到这个功能，不去研究。实际上嵌入式的WEB服务器不一定要用到这个。


/*
    Process a form request.
    Return true to indicate the request was handled, even for errors.
 */
PUBLIC bool cgiHandler(Webs *wp)//那么复杂的话不需要这样用到这个模块。
{
    Cgi         *cgip;
    WebsKey     *s;
    char        cgiPrefix[ME_GOAHEAD_LIMIT_FILENAME], *stdIn, *stdOut, cwd[ME_GOAHEAD_LIMIT_FILENAME];
    char        *cp, *cgiName, *cgiPath, **argp, **envp, **ep, *tok, *query, *dir, *extraPath, *exe, *vp;
    CgiPid      pHandle;
    int         n, envpsize, argpsize, cid;

    assert(websValid(wp));

    websSetEnv(wp);

    /*
        Extract the form name and then build the full path name. The form name will follow the first '/' in path.
     */
    scopy(cgiPrefix, sizeof(cgiPrefix), wp->path);
    if ((cgiName = strchr(&cgiPrefix[1], '/')) == NULL) {
        websError(wp, HTTP_CODE_NOT_FOUND, "Missing CGI name");
        return 1;
    }
    *cgiName++ = '';

    getcwd(cwd, ME_GOAHEAD_LIMIT_FILENAME);
    dir = wp->route->dir ? wp->route->dir : cwd;
    chdir(dir);

    extraPath = 0;
    if ((cp = strchr(cgiName, '/')) != NULL) {
        extraPath = sclone(cp);
        *cp = '';
        websSetVar(wp, "PATH_INFO", extraPath);
        websSetVarFmt(wp, "PATH_TRANSLATED", "%s%s%s", dir, cgiPrefix, extraPath);
        wfree(extraPath);
    } else {
        websSetVar(wp, "PATH_INFO", "");
        websSetVar(wp, "PATH_TRANSLATED", "");
    }
    cgiPath = sfmt("%s%s/%s", dir, cgiPrefix, cgiName);
    websSetVarFmt(wp, "SCRIPT_NAME", "%s/%s", cgiPrefix, cgiName);
    websSetVar(wp, "SCRIPT_FILENAME", cgiPath);

/*
    See if the file exists and is executable.  If not error out.  Don't do this step for VxWorks, since the module
    may already be part of the OS image, rather than in the file system.
*/
#if !VXWORKS
    {
        WebsStat sbuf;
        if (stat(cgiPath, &sbuf) != 0 || (sbuf.st_mode & S_IFREG) == 0) {
            exe = sfmt("%s.exe", cgiPath);
            if (stat(exe, &sbuf) == 0 && (sbuf.st_mode & S_IFREG)) {
                wfree(cgiPath);
                cgiPath = exe;
            } else {
                error("Cannot find CGI program: ", cgiPath);
                websError(wp, HTTP_CODE_NOT_FOUND | WEBS_NOLOG, "CGI program file does not exist");
                wfree(cgiPath);
                return 1;
            }
        }
#if ME_WIN_LIKE
        if (strstr(cgiPath, ".exe") == NULL && strstr(cgiPath, ".bat") == NULL)//执行一个外部可执行程序。实际是否需要用到这种CGI？
#else
        if (access(cgiPath, X_OK) != 0)
#endif
        {
            websError(wp, HTTP_CODE_NOT_FOUND, "CGI process file is not executable");
            wfree(cgiPath);
            return 1;
        }
    }
#endif /* ! VXWORKS */
    /*
        Build command line arguments.  Only used if there is no non-encoded = character.  This is indicative of a ISINDEX
        query.  POST separators are & and others are +.  argp will point to a walloc'd array of pointers.  Each pointer
        will point to substring within the query string.  This array of string pointers is how the spawn or exec routines
        expect command line arguments to be passed.  Since we don't know ahead of time how many individual items there are
        in the query string, the for loop includes logic to grow the array size via wrealloc.
     */
    argpsize = 10;
    if ((argp = walloc(argpsize * sizeof(char *))) == 0) {
        websError(wp, HTTP_CODE_NOT_FOUND, "Cannot allocate CGI args");
        wfree(cgiPath);
        return 1;
    }
    assert(argp);
    *argp = cgiPath;
    n = 1;
    query = 0;

    if (strchr(wp->query, '=') == NULL) {
        query = sclone(wp->query);
        websDecodeUrl(query, query, strlen(query));
        for (cp = stok(query, " ", &tok); cp != NULL && argp != NULL; ) {
            *(argp+n) = cp;
            trace(5, "ARG[%d] %s", n, argp[n-1]);
            n++;
            if (n >= argpsize) {
                argpsize *= 2;
                if (argpsize > ME_GOAHEAD_LIMIT_CGI_ARGS) {
                    websError(wp, HTTP_CODE_REQUEST_TOO_LARGE, "Too many arguments");
                    wfree(cgiPath);
                    return 1;
                }
                argp = wrealloc(argp, argpsize * sizeof(char *));
            }
            cp = stok(NULL, " ", &tok);
        }
    }
    *(argp+n) = NULL;

    /*
        Add all CGI variables to the environment strings to be passed to the spawned CGI process.
        This includes a few we don't already have in the symbol table, plus all those that are in
        the vars symbol table. envp will point to a walloc'd array of pointers. Each pointer will
        point to a walloc'd string containing the keyword value pair in the form keyword=value.
        Since we don't know ahead of time how many environment strings there will be the for
        loop includes logic to grow the array size via wrealloc.
     */
    envpsize = 64;
    envp = walloc(envpsize * sizeof(char*));
    if (wp->vars) {
        for (n = 0, s = hashFirst(wp->vars); s != NULL; s = hashNext(wp->vars, s)) {
            if (s->content.valid && s->content.type == string) {
                vp = strim(s->name.value.string, 0, WEBS_TRIM_START);
                if (smatch(vp, "REMOTE_HOST") || smatch(vp, "HTTP_AUTHORIZATION") ||
                    smatch(vp, "IFS") || smatch(vp, "CDPATH") ||
                    smatch(vp, "PATH") || sstarts(vp, "LD_")) {
                    continue;
                }
                if (s->arg != 0 && *ME_GOAHEAD_CGI_VAR_PREFIX != '') {
                    envp[n++] = sfmt("%s%s=%s", ME_GOAHEAD_CGI_VAR_PREFIX, s->name.value.string,
                        s->content.value.string);
                } else {
                    envp[n++] = sfmt("%s=%s", s->name.value.string, s->content.value.string);
                }
                trace(0, "Env[%d] %s", n, envp[n-1]);
                if (n >= envpsize) {
                    envpsize *= 2;
                    envp = wrealloc(envp, envpsize * sizeof(char *));
                }
            }
        }
    }
    *(envp+n) = NULL;

    /*
        Create temporary file name(s) for the child's stdin and stdout. For POST data the stdin temp file (and name)
        should already exist.
     */
    if (wp->cgiStdin == NULL) {
        wp->cgiStdin = websGetCgiCommName();
    }
    stdIn = wp->cgiStdin;
    stdOut = websGetCgiCommName();
    if (wp->cgifd >= 0) {
        close(wp->cgifd);
        wp->cgifd = -1;
    }

    /*
        Now launch the process.  If not successful, do the cleanup of resources.  If successful, the cleanup will be
        done after the process completes.
     */
    if ((pHandle = launchCgi(cgiPath, argp, envp, stdIn, stdOut)) == (CgiPid) -1) {
        websError(wp, HTTP_CODE_INTERNAL_SERVER_ERROR, "failed to spawn CGI task");
        for (ep = envp; *ep != NULL; ep++) {
            wfree(*ep);
        }
        wfree(cgiPath);
        wfree(argp);
        wfree(envp);
        wfree(stdOut);
        wfree(query);

    } else {
        /*
            If the spawn was successful, put this wp on a queue to be checked for completion.
         */
        cid = wallocObject(&cgiList, &cgiMax, sizeof(Cgi));
        cgip = cgiList[cid];
        cgip->handle = pHandle;
        cgip->stdIn = stdIn;
        cgip->stdOut = stdOut;
        cgip->cgiPath = cgiPath;
        cgip->argp = argp;
        cgip->envp = envp;
        cgip->wp = wp;
        cgip->fplacemark = 0;
        wfree(query);
    }
    /*
        Restore the current working directory after spawning child CGI
     */
    chdir(cwd);
    return 1;
}

4.用户登陆与权限认证

一、用户登陆

1.1 用户信息存储

在goahead源码实现了登陆功能，auth.txt中以文件的形式保存用户信息。密码是一串字符，由用户名密码和一个钥匙利用MD5算法生成的。web初始化时载入这个文件时，就会载入用户信息。

mark

1.2 登陆页面前台实现

前台页面已经实现好了login.html

<html><head><title>login.html</title></head>
<body>
    <p>Please log in</p>
    <form name="details" method="post" action="/action/login">
        Username <input type="text" name="username" value=''><br/>
        Password <input type="password" name="password" value=''><br/>
        <input type="submit" name="submit" value="OK">
    </form>
</body>
</html>

1.3 后台实现

websOpenAuth中绑定了login的action函数， websDefineAction(“login”, loginServiceProc);

static void loginServiceProc(Webs *wp)
{
    WebsRoute   *route;

    assert(wp);
    route = wp->route;
    assert(route);

    if (websLoginUser(wp, websGetVar(wp, "username", ""), websGetVar(wp, "password", ""))) {	//输入用户名和密码，与auth.txt比较是否匹配，是的话认为校验通过
        /* If the application defines a referrer session var, redirect to that */
        cchar *referrer;
        if ((referrer = websGetSessionVar(wp, "referrer", 0)) != 0) {
            websRedirect(wp, referrer);
        } else {
            websRedirectByStatus(wp, HTTP_CODE_OK);//网址重定向
        }
        websSetSessionVar(wp, "loginStatus", "ok");
    } else {
        if (route->askLogin) {
            (route->askLogin)(wp);
        }
        websSetSessionVar(wp, "loginStatus", "failed");
        websRedirectByStatus(wp, HTTP_CODE_UNAUTHORIZED);
    }
}

1.4 redirect

websRedirectByStatus是重定向函数，在route.txt中可以定义redirect选项，意思就是，针对login，如果状态码是200，就跳转到home.asp,如果是401，就跳转到login.html

route uri=/action/login methods=POST handler=action redirect=200@/home.asp redirect=401@login.html

1.5 cookie与session

websLoginUser密码校验函数中，当密码校验成功，会为这个用户创建一个session,并且将session id放在cookie中发给客户端，以后客户端的请求头中就会带有cookie,根据此cookie来校验之后请求的身份。

WebsSession *websGetSession(Webs *wp, int create)
{
    WebsKey     *sym;
    char        *id;

    assert(wp);

    if (!wp->session) {
        id = websGetSessionID(wp);
        if ((sym = hashLookup(sessions, id)) == 0) {
            if (!create) {
                wfree(id);
                return 0;
            }
            if (sessionCount >= ME_GOAHEAD_LIMIT_SESSION_COUNT) {
                error("Too many sessions %d/%d", sessionCount, ME_GOAHEAD_LIMIT_SESSION_COUNT);
                wfree(id);
                return 0;
            }
            sessionCount++;
            if ((wp->session = websAllocSession(wp, id, ME_GOAHEAD_LIMIT_SESSION_LIFE)) == 0) {
                wfree(id);
                return 0;
            }
            websSetCookie(wp, WEBS_SESSION, wp->session->id, "/", NULL, 0, 0);
        } else {
            wp->session = (WebsSession*) sym->content.value.symbol;
        }
        wfree(id);
    }
    if (wp->session) {
        wp->session->expires = time(0) + wp->session->lifespan;
    }
    return wp->session;
}

1.6 基本认证与摘要认证

当route.txt中配置了auth属性之后，websRouteRequest中会针对这条route进行认证。auth有basic和digest两种方式,即基本认证和摘要认证。这样的话每一条请求都会进行认证。

#if ME_GOAHEAD_AUTH
        if (route->authType && !websAuthenticate(wp)) {
            return;
        }
        if (route->abilities >= 0 && !websCan(wp, route->abilities)) {
            return;
        }
#endif

后记：假如利用了https协议，就没必要用到basic或者digest这两种认证模式了。基于https如何进行用户和权限管理，后续需自己实现

5.实现文件导入和导出

对于一个完整的WEB服务器来说，应该支持WEB文件导入功能，例如导入业务的配置文件，导入软件升级包进行升级等等。导出功能一般是导出用户配置文件，导出log日志等。导入导出对于HTTP请求来说依然是POST和GET。文件导入和导出在goahead中已经原生实现了。

一、文件导入

1、在主函数中定义action函数；

websDefineAction("upload", uploadTest);

2、实现uploadTest；
upfile 路径我修改了一下。这里只是做了回显和修改文件名。实际文件传输是在接受请求过程websPump中processContent–>websProcessUploadData已经实现的。接受请求的过程中，如果是上传文件，会把文件放在/tmp下。回调uploadTest前文件已经传输完毕了。

static void uploadTest(Webs *wp)
{
    WebsKey         *s;
    WebsUpload      *up;
    char            *upfile;

    websSetStatus(wp, 200);
    websWriteHeaders(wp, -1, 0);
    websWriteHeader(wp, "Content-Type", "text/plain");
    websWriteEndHeaders(wp);
    if (scaselessmatch(wp->method, "POST")) {
        for (s = hashFirst(wp->files); s; s = hashNext(wp->files, s)) {
            up = s->content.value.symbol;
            websWrite(wp, "FILE: %s
", s->name.value.string);
            websWrite(wp, "FILENAME=%s
", up->filename);
            websWrite(wp, "CLIENT=%s
", up->clientFilename);
            websWrite(wp, "TYPE=%s
", up->contentType);
            websWrite(wp, "SIZE=%d
", up->size);
            upfile = sfmt("/tmp/%s", up->clientFilename);
            if (rename(up->filename, upfile) < 0) {
                error("Cannot rename uploaded file: %s to %s, errno %d", up->filename, upfile, errno);
            }
            wfree(upfile);
        }
        websWrite(wp, "
VARS:
");
        for (s = hashFirst(wp->vars); s; s = hashNext(wp->vars, s)) {
            websWrite(wp, "%s=%s
", s->name.value.string, s->content.value.string);
        }
    }
    websDone(wp);
}

3、写前端页面 uploadFile.asp

<!DOCTYPE html>
<html>
<HEAD>
	<meta charset="utf-8">
    <title>上传文件</title>
</HEAD>
   <body>
     <div>
         <form action="/action/upload" method="post" enctype="multipart/form-data">
             <table>
                 <tr>
                     <td>请上传文件</td>
                     <td><input name="file" type="file"></td>
                     <td><input type="submit" value="上传"></td>
                 </tr>
             </table>
         </form>
     </div>
   </body>
</html>

4、http.c中修改大小限制,为了简便，我先注释了

            /*   if (wp->rxLen > ME_GOAHEAD_LIMIT_POST) {
                    websError(wp, HTTP_CODE_REQUEST_TOO_LARGE | WEBS_CLOSE, "Too big");
                    return;
                }*/

5、实际效果

mark

这样就将windows本地文件传到服务器/tmp目录下了。

二、文件导出

文件导出用的是fileHandler，将目标文件放在web放页面的目录下，直接请求这个文件名，就可以将文件下载出来的。在真实项目中，需要把FLASH中的数据先拷贝到/tmp下，再去请求tmp下对应的文件，可以结合JS和AJAX的交互方式来请求这个文件。后续再来补充案例。

6.结合openssl实现https协议

http协议是不安全的，因此还需要结合openssl实现安全的https协议。

一、SSL读函数

/*
    Read from a connection. Return the number of bytes read if successful. This may be less than the requested "len" and
    may be zero. Return -1 for errors or EOF. Distinguish between error and EOF via socketEof().
 */


static ssize websRead(Webs *wp, char *buf, ssize len)
{
    assert(wp);
    assert(buf);
    assert(len > 0);
#if ME_COM_SSL
    if (wp->flags & WEBS_SECURE) {//https对应的读函数
        return sslRead(wp, buf, len);
    }
#endif
    return socketRead(wp->sid, buf, len);
}

在websListen监听的服务器地址，只要是带https://格式的，自动转换为启用openssl的模式。

二、SSL写函数

/*
    Non-blocking write to socket.
    Returns number of bytes written. Returns -1 on errors. May return short.
 */
PUBLIC ssize websWriteSocket(Webs *wp, cchar *buf, ssize size)
{
    ssize   written;

    assert(wp);
    assert(buf);
    assert(size >= 0);

    if (wp->flags & WEBS_CLOSED) {
        return -1;
    }
#if ME_COM_SSL
    if (wp->flags & WEBS_SECURE) {
        if ((written = sslWrite(wp, (void*) buf, size)) < 0) {
            return written;
        }
    } else
#endif
    if ((written = socketWrite(wp->sid, (void*) buf, size)) < 0) {
        return written;
    }
    wp->written += written;
    websNoteRequestActivity(wp);
    return written;
}

Goahead源码解析(转)

title: Goahead源码解析(转) date: 2019/12/21 15:24:47 toc: true

源码解析

Goahead源码解析(转)

1. 从主函数到I/O事件循环

一、主函数

二、I/O事件循环

三、服务器与客户端建立连接

2. 读取HTTP请求

一、读取HTTP请求

二、解析HTTP请求

3. 响应HTTP请求

一、如何响应HTTP请求

二、响应HTTP请求handler的类型

2.1 actionHandler

2.2 jstHandler

2.3 fileHandler

2.4 cgiHandler

4.用户登陆与权限认证

一、用户登陆

1.1 用户信息存储

1.2 登陆页面前台实现

1.3 后台实现

1.4 redirect

1.5 cookie与session

1.6 基本认证与摘要认证

5.实现文件导入和导出

一、文件导入

二、文件导出

6.结合openssl实现https协议

一、SSL读函数

二、SSL写函数

title: Goahead源码解析(转)
date: 2019/12/21 15:24:47
toc: true