上一篇nginx的文章中,我们理解了整个http正向代理的运行流程原理,主要就是事件机制接入,header解析,body解析,然后遍历各种checker,直到处理成功为止。
我们以访问一个普通文件为例,nginx到底是如何找到这个文件并返回信息的呢?它会不会有些什么限制呢?
按我们自己的理解,它应该是uri栏里截取出相应的路径,然后对应到root下,查找到相应文件,返回即可。那么,它又是如何处理html和其他的各种不同格式的文件呢?
就让我们一起来探秘nginx文件的查找实现吧!
0. nginx 静态文件配置
要配置静态文件处理,只需在http server中配置root路径即可。(当然了,你可以根据前缀配置许多不同的root)
http { include mime.types; default_type application/octet-stream; #log_format main '$remote_addr - $remote_user [$time_local] "$request" ' # '$status $body_bytes_sent "$http_referer" ' # '"$http_user_agent" "$http_x_forwarded_for"'; #access_log logs/access.log main; sendfile on; #tcp_nopush on; #keepalive_timeout 0; keepalive_timeout 65; #gzip on; server { listen 8085; server_name localhost; #charset koi8-r; #access_log logs/host.access.log main; location /hello { root /www/hello; index index.html index.htm; } location / { root html; index index.html index.htm; } } # 后续可以添加无数个server 扩展 }
配置简单吧,实际核心就两三行代码搞定:监听端口号 listen、访问域名 server_name、服务器根路径 root。明显这是nginx成功的原因之一。
本文要讨论的场景是,如果我访问 http://localhost:8085/1.txt?d=xxx, nginx将如何干成这件事?
1. checker的遍历回顾
我们先来回顾下,nginx是如何遍历各个checker的吧!
// http/ngx_http_core_module.c // 响应客户端操作, 多阶段式操作 void ngx_http_core_run_phases(ngx_http_request_t *r) { ngx_int_t rc; ngx_http_phase_handler_t *ph; ngx_http_core_main_conf_t *cmcf; cmcf = ngx_http_get_module_main_conf(r, ngx_http_core_module); ph = cmcf->phase_engine.handlers; // 依次调用各 checker, 直到有响应 OK 的checker为止 while (ph[r->phase_handler].checker) { // 每次调用 checker 之后, 内部都会将 r->phase_handler++, 即迭代下一个 // 此处的 checker 非常之多, 是在各模块启动时, 自动向 ngx_http_core_module.main_conf 中进行注册的 /** * 定义如下: typedef enum { NGX_HTTP_POST_READ_PHASE = 0, NGX_HTTP_SERVER_REWRITE_PHASE, NGX_HTTP_FIND_CONFIG_PHASE, NGX_HTTP_REWRITE_PHASE, NGX_HTTP_POST_REWRITE_PHASE, NGX_HTTP_PREACCESS_PHASE, NGX_HTTP_ACCESS_PHASE, NGX_HTTP_POST_ACCESS_PHASE, NGX_HTTP_PRECONTENT_PHASE, NGX_HTTP_CONTENT_PHASE, NGX_HTTP_LOG_PHASE } ngx_http_phases; // 注册方式 cmcf = ngx_http_conf_get_module_main_conf(cf, ngx_http_core_module); h = ngx_array_push(&cmcf->phases[NGX_HTTP_ACCESS_PHASE].handlers); *h = ngx_http_access_handler; */ // 将请求信息和 handler 本身传入调用(不是面向, 只能这么做了) rc = ph[r->phase_handler].checker(r, &ph[r->phase_handler]); // 只要有一个处理成功, 则后续不再调用 if (rc == NGX_OK) { return; } } }
可以说,它的各checker是非常复杂的,各模块都可以向其中注册处理器。这也是nginx灵活性的体现。不过我们不想关注太多。
文件的查找是在 static_module 中完成的,我们只需关注这个即可。
2. 静态文件模块的注册
静态文件模块主要就是负责静态文件的查找处理。几乎所有的http处理模块,都是先进行注册,然后再被调用的过程。static_module 自然不例外。
// http/modules/ngx_http_static_handler.c static ngx_int_t ngx_http_static_init(ngx_conf_t *cf) { ngx_http_handler_pt *h; ngx_http_core_main_conf_t *cmcf; cmcf = ngx_http_conf_get_module_main_conf(cf, ngx_http_core_module); // 获取http_core_module的配置 // 将ngx_http_static_handler 添加到 NGX_HTTP_CONTENT_PHASE 的handlers中 h = ngx_array_push(&cmcf->phases[NGX_HTTP_CONTENT_PHASE].handlers); if (h == NULL) { return NGX_ERROR; } *h = ngx_http_static_handler; return NGX_OK; } // 下面是一些nginx的模块暴露规范,只有这样,这个模块才会被接入到整个系统中 static ngx_http_module_t ngx_http_static_module_ctx = { NULL, /* preconfiguration */ ngx_http_static_init, /* postconfiguration */ NULL, /* create main configuration */ NULL, /* init main configuration */ NULL, /* create server configuration */ NULL, /* merge server configuration */ NULL, /* create location configuration */ NULL /* merge location configuration */ }; ngx_module_t ngx_http_static_module = { NGX_MODULE_V1, &ngx_http_static_module_ctx, /* module context */ NULL, /* module directives */ NGX_HTTP_MODULE, /* module type */ NULL, /* init master */ NULL, /* init module */ NULL, /* init process */ NULL, /* init thread */ NULL, /* exit thread */ NULL, /* exit process */ NULL, /* exit master */ NGX_MODULE_V1_PADDING };
3. 文件查找实现
经过前面的模块注册到 NGX_HTTP_CONTENT_PHASE 中,就会被nginx调用。前提是没有其他更合适的处理器的时候。而因为 static_module 是在 NGX_HTTP_CONTENT_PHASE 中,所以都会走content的处理器:
// http/ngx_http_core_module.c ngx_int_t ngx_http_core_content_phase(ngx_http_request_t *r, ngx_http_phase_handler_t *ph) { size_t root; ngx_int_t rc; ngx_str_t path; if (r->content_handler) { r->write_event_handler = ngx_http_request_empty_handler; ngx_http_finalize_request(r, r->content_handler(r)); return NGX_OK; } ngx_log_debug1(NGX_LOG_DEBUG_HTTP, r->connection->log, 0, "content phase: %ui", r->phase_handler); rc = ph->handler(r); // 处理成功,则返回ok if (rc != NGX_DECLINED) { ngx_http_finalize_request(r, rc); return NGX_OK; } /* rc == NGX_DECLINED */ ph++; if (ph->checker) { r->phase_handler++; return NGX_AGAIN; } /* no content handler was found */ if (r->uri.data[r->uri.len - 1] == '/') { if (ngx_http_map_uri_to_path(r, &path, &root, 0) != NULL) { ngx_log_error(NGX_LOG_ERR, r->connection->log, 0, "directory index of "%s" is forbidden", path.data); } ngx_http_finalize_request(r, NGX_HTTP_FORBIDDEN); return NGX_OK; } ngx_log_error(NGX_LOG_ERR, r->connection->log, 0, "no handler found"); ngx_http_finalize_request(r, NGX_HTTP_NOT_FOUND); return NGX_OK; } // http/modules/ngx_http_static_handler.c static ngx_int_t ngx_http_static_handler(ngx_http_request_t *r) { u_char *last, *location; size_t root, len; ngx_str_t path; ngx_int_t rc; ngx_uint_t level; ngx_log_t *log; ngx_buf_t *b; ngx_chain_t out; ngx_open_file_info_t of; ngx_http_core_loc_conf_t *clcf; // 仅支持 get/head/post 方法进行静态文件处理 if (!(r->method & (NGX_HTTP_GET|NGX_HTTP_HEAD|NGX_HTTP_POST))) { return NGX_HTTP_NOT_ALLOWED; } // 要求uri不能以'/'结尾,否则走其他协议 if (r->uri.data[r->uri.len - 1] == '/') { return NGX_DECLINED; } log = r->connection->log; /* * ngx_http_map_uri_to_path() allocates memory for terminating ' ' * so we do not need to reserve memory for '/' for possible redirect */ // 解析文件路径到path中 last = ngx_http_map_uri_to_path(r, &path, &root, 0); if (last == NULL) { return NGX_HTTP_INTERNAL_SERVER_ERROR; } path.len = last - path.data; ngx_log_debug1(NGX_LOG_DEBUG_HTTP, log, 0, "http filename: "%s"", path.data); clcf = ngx_http_get_module_loc_conf(r, ngx_http_core_module); // 初始化文件实例 ngx_memzero(&of, sizeof(ngx_open_file_info_t)); of.read_ahead = clcf->read_ahead; of.directio = clcf->directio; of.valid = clcf->open_file_cache_valid; of.min_uses = clcf->open_file_cache_min_uses; of.errors = clcf->open_file_cache_errors; of.events = clcf->open_file_cache_events; if (ngx_http_set_disable_symlinks(r, clcf, &path, &of) != NGX_OK) { return NGX_HTTP_INTERNAL_SERVER_ERROR; } if (ngx_open_cached_file(clcf->open_file_cache, &path, &of, r->pool) != NGX_OK) { switch (of.err) { case 0: return NGX_HTTP_INTERNAL_SERVER_ERROR; case NGX_ENOENT: case NGX_ENOTDIR: case NGX_ENAMETOOLONG: level = NGX_LOG_ERR; rc = NGX_HTTP_NOT_FOUND; break; case NGX_EACCES: #if (NGX_HAVE_OPENAT) case NGX_EMLINK: case NGX_ELOOP: #endif level = NGX_LOG_ERR; rc = NGX_HTTP_FORBIDDEN; break; default: level = NGX_LOG_CRIT; rc = NGX_HTTP_INTERNAL_SERVER_ERROR; break; } if (rc != NGX_HTTP_NOT_FOUND || clcf->log_not_found) { ngx_log_error(level, log, of.err, "%s "%s" failed", of.failed, path.data); } return rc; } r->root_tested = !r->error_page; ngx_log_debug1(NGX_LOG_DEBUG_HTTP, log, 0, "http static fd: %d", of.fd); if (of.is_dir) { ngx_log_debug0(NGX_LOG_DEBUG_HTTP, log, 0, "http dir"); ngx_http_clear_location(r); r->headers_out.location = ngx_list_push(&r->headers_out.headers); if (r->headers_out.location == NULL) { return NGX_HTTP_INTERNAL_SERVER_ERROR; } len = r->uri.len + 1; if (!clcf->alias && r->args.len == 0) { location = path.data + root; *last = '/'; } else { if (r->args.len) { len += r->args.len + 1; } location = ngx_pnalloc(r->pool, len); if (location == NULL) { ngx_http_clear_location(r); return NGX_HTTP_INTERNAL_SERVER_ERROR; } last = ngx_copy(location, r->uri.data, r->uri.len); *last = '/'; if (r->args.len) { *++last = '?'; ngx_memcpy(++last, r->args.data, r->args.len); } } r->headers_out.location->hash = 1; ngx_str_set(&r->headers_out.location->key, "Location"); r->headers_out.location->value.len = len; r->headers_out.location->value.data = location; return NGX_HTTP_MOVED_PERMANENTLY; } #if !(NGX_WIN32) /* the not regular files are probably Unix specific */ if (!of.is_file) { ngx_log_error(NGX_LOG_CRIT, log, 0, ""%s" is not a regular file", path.data); return NGX_HTTP_NOT_FOUND; } #endif // 真正到内容输出的时候,post又是不被允许的方法 if (r->method == NGX_HTTP_POST) { return NGX_HTTP_NOT_ALLOWED; } // 静态文件处理,将会忽略所有请求body参数 rc = ngx_http_discard_request_body(r); if (rc != NGX_OK) { return rc; } // 输出文件内容到客户端 log->action = "sending response to client"; r->headers_out.status = NGX_HTTP_OK; r->headers_out.content_length_n = of.size; r->headers_out.last_modified_time = of.mtime; // 设置 ETag header if (ngx_http_set_etag(r) != NGX_OK) { return NGX_HTTP_INTERNAL_SERVER_ERROR; } // content-type 设置,大概就是根据文件后缀找到相应的content-type 输出即可 if (ngx_http_set_content_type(r) != NGX_OK) { return NGX_HTTP_INTERNAL_SERVER_ERROR; } if (r != r->main && of.size == 0) { return ngx_http_send_header(r); } r->allow_ranges = 1; /* we need to allocate all before the header would be sent */ b = ngx_calloc_buf(r->pool); if (b == NULL) { return NGX_HTTP_INTERNAL_SERVER_ERROR; } b->file = ngx_pcalloc(r->pool, sizeof(ngx_file_t)); if (b->file == NULL) { return NGX_HTTP_INTERNAL_SERVER_ERROR; } // 发送头信息,如果是 head 请求,则到此为止 // 该header响应会添加一些公共的请求头一并返回
// 其中content-type是根据文件类型做的映射返回,如txt文件映射为 text/plain
// 具体可以由用户指定,在 mime.types 中添加映射即可 // 使用一系列的filter过滤器链进行处理 // 当文件未发生变化时,会返回304, 即不再返回更多信息 /** Accept-Ranges: bytes Connection: keep-alive Content-Length: 18 Content-Type: text/plain Date: Wed, 07 Oct 2020 09:01:12 GMT ETag: "5f66fc46-12" Last-Modified: Sun, 20 Sep 2020 06:52:54 GMT Server: nginx/1.19.2 */ rc = ngx_http_send_header(r); if (rc == NGX_ERROR || rc > NGX_OK || r->header_only) { return rc; } // 封装要响应的文件描述符,输出响应 b->file_pos = 0; b->file_last = of.size; b->in_file = b->file_last ? 1: 0; b->last_buf = (r == r->main) ? 1: 0; b->last_in_chain = 1; b->file->fd = of.fd; b->file->name = path; b->file->log = log; b->file->directio = of.is_directio; out.buf = b; out.next = NULL; // 一系列的响应过滤器处理,响应body /** * range_filter * copy_filter * output_chain -> output_chain_copy_buf */ return ngx_http_output_filter(r, &out); }
整体可以简单用一句话说明,根据uri路径,结合root路径配置,得到文件信息,响应客户端。只处理 get/head请求,对文件内容无变化情况将会返回304。上面更多的是,只限于c语言的表达能力问题,做的许多变化,无须多看。
对于静态文件的处理,后续所有给定的参数都将被忽略掉。当然了也不是完全无用,不同参数也是有意义的,它用于检测文件是否发生变化。至于是html文件还是pdf或者txt,nginx并没有做特别的处理,它只是负责将内容返回给浏览器,浏览器做进一步解析从而达到html展现的效果。
另外,再次需要说明的一点是,nginx的性能优势是在于其巧妙地利用系统的非阻塞io,从而提升了处理能力,而且其扩展能力非常强,配置也非常友好。但其整体流程,与其他http服务器并无二致。
对于想要了解更多细节的同学,可以点开下面的代码,查看C语言的实现细节。
// http/ngx_http_core_module.c // 解析文件路径到path中 u_char * ngx_http_map_uri_to_path(ngx_http_request_t *r, ngx_str_t *path, size_t *root_length, size_t reserved) { u_char *last; size_t alias; ngx_http_core_loc_conf_t *clcf; clcf = ngx_http_get_module_loc_conf(r, ngx_http_core_module); alias = clcf->alias; if (alias && !r->valid_location) { ngx_log_error(NGX_LOG_ALERT, r->connection->log, 0, ""alias" cannot be used in location "%V" " "where URI was rewritten", &clcf->name); return NULL; } if (clcf->root_lengths == NULL) { *root_length = clcf->root.len; path->len = clcf->root.len + reserved + r->uri.len - alias + 1; path->data = ngx_pnalloc(r->pool, path->len); if (path->data == NULL) { return NULL; } // 将root根目录赋值给 path last = ngx_copy(path->data, clcf->root.data, clcf->root.len); } else { if (alias == NGX_MAX_SIZE_T_VALUE) { reserved += r->add_uri_to_alias ? r->uri.len + 1 : 1; } else { reserved += r->uri.len - alias + 1; } if (ngx_http_script_run(r, path, clcf->root_lengths->elts, reserved, clcf->root_values->elts) == NULL) { return NULL; } if (ngx_get_full_name(r->pool, (ngx_str_t *) &ngx_cycle->prefix, path) != NGX_OK) { return NULL; } *root_length = path->len - reserved; last = path->data + *root_length; if (alias == NGX_MAX_SIZE_T_VALUE) { if (!r->add_uri_to_alias) { *last = '