Python复习笔记(十)Http协议--Web服务器-并发服务器

1. HTTP协议(超文本传输协议)

浏览器===>服务器发送的请求格式如下:(浏览器告诉服务器,浏览器的信息)

GET / HTTP/1.1
Host: www.baidu.com
Connection: keep-alive
Cache-Control: max-age=0
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9

服务器==>浏览器回送的数据格式如下:(告诉浏览器,服务器的版本,按照什么格式解析)

header:告诉浏览器特殊信息(必须有HTTP/1.1 200 OK)

HTTP/1.1 200 OK
Bdpagetype: 2
Bdqid: 0xb49ba00a00010431
Cache-Control: private
Connection: Keep-Alive
Content-Encoding: gzip
Content-Type: text/html;charset=utf-8
Date: Sat, 09 Mar 2019 14:40:59 GMT
Expires: Sat, 09 Mar 2019 14:40:59 GMT
Server: BWS/1.1
Set-Cookie: BDSVRTM=70; path=/
Set-Cookie: BD_HOME=1; path=/
Set-Cookie: H_PS_PSSID=28648_1455_28395_21114_28608_28584_28557_28604_28625_28605; path=/; domain=.baidu.com
Strict-Transport-Security: max-age=172800
X-Ua-Compatible: IE=Edge,chrome=1
Transfer-Encoding: chunked
Cookie: BAIDUID=969EF83E73AFFBF96897E645871A1957:FG=1; BIDUPSID=969EF83E73AFFBF96897E645871A1957; PSTM=1549905544; BD_UPN=12314353; delPer=0; BD_CK_SAM=1; PSINO=1; ___rl__test__cookies=1552141627935; OUTFOX_SEARCH_USER_ID_NCOO=1556761245.5429947; BDRCVFR[QxxZVyx49rf]=I67x6TjHwwYf0; H_WISE_SIDS=125704_114553_129323_106370_128146_128229_120193_123018_129449_118893_118871_118854_118832_118787_107312_129945_129387_129088_129558_117336_129751_117432_128791_128402_129655_128246_124639_129620_129008_128967_129641_129293_128805_129692_129838_129981_129808_127764_129482_129643_129508_124030_130091_110085_129844_123289_128842_127417_128808_129049; FEED_SIDS=231735_0309_22; plus_lsv=393c3756be30db54; BDORZ=AE84CDB3A529C0F8A2B9DCDD1D18B695; plus_cv=1::m:49a3f4a6; Hm_lvt_12423ecbc0e2ca965d84259063d35238=1552141644; SE_LAUNCH=5%3A25869027_0%3A25869027; rsv_i=caa1rmCs0PpQpYzAbKe5ZOe7IPqcdsJjz9yFp5uzkt9iporuXUkXb39N0K1sIreyWXdiYvSq2TEnLzJMu1rSJdPaAoRZgSo; Hm_lpvt_12423ecbc0e2ca965d84259063d35238=1552141679; BDRCVFR[Usf3Hj-5366]=mk3SLVN4HKm; BDUSS=RJTzhiLTA2fkFSRmxiOGZYRVZEbVVMRU1FQmNsbHJDT2xRSHlPT1ZaV2NYS3RjQUFBQUFBJCQAAAAAAAAAAAEAAABEIlRw0LC2uTkwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAJzPg1ycz4NcY2; BD_HOME=1; BDRCVFR[4r8LXJfwh-6]=8QV4RScte5tfjRLnjbdnHRsg17xUvNV; H_PS_645EC=1e9fAcA0iG5RIf%2Bi4FlV0onp3XbZL2oEMPgGRb1L2abD%2BGzuKThL6MgB%2Be%2FwIJ6wYVm0C2fUHAUL; H_PS_PSSID=28648_1455_28395_21114_28608_28584_28557_28604_28625_28605; sug=3; sugstore=1; ORIGIN=2; bdime=0

body

<!DOCTYPE html>
<!--STATUS OK-->

 

2. Python模拟返回固定页面的http服务器

import socket

def service_client(new_socket):
    """为这个客户端返回数据"""
    # 1. 接受浏览器发送过来的请求,即Http请求
    # GET / HTTP/1.1
    # .....
    request = new_socket.recv(1024)
    print(request)

    # 2. 返回Http格式的数据,给浏览器
    # 2.1 准备发送给浏览器数据---header
    response = "HTTP/1.1 200 OK
"
    response += "
"
    # 2.2 准备发送给浏览器的数据---body
    response += "haaaaaaaaaaa"
    new_socket.send(response.encode("utf-8"))
    new_socket.close()


def main():
    """用来完成整体的控制"""
    # 1. 创建套接字
    tcp_server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

    # 2. 绑定
    tcp_server_socket.bind(("", 7890))

    # 3. 变为监听套接字
    tcp_server_socket.listen(128)

    while True:
        # 4. 等待新客户端的链接
        new_socket, client_addr = tcp_server_socket.accept()

        # 5. 为这个客户端服务
        service_client(new_socket)

    # 关闭监听套接字
    tcp_server_socket.close()


if __name__ == '__main__':
    main()

 返回指定html页面

import socket
import re

def service_client(new_socket):
    """为这个客户端返回数据"""
    # 1. 接受浏览器发送过来的请求,即Http请求
    # GET / HTTP/1.1
    # .....
    request = new_socket.recv(1024).decode("utf-8")
    # print(request)

    request_lines = request.splitlines()
    print(request_lines)

    # GET /page.html HTTP/1.1
    ret = re.match(r"[^/]+(/[^ ]*)", request_lines[0])
    if ret:
        file_name = ret.group(1)
        print("*"*50, file_name)

    # 2. 返回Http格式
    # 2.1 准备发送给浏览器数据---header
    response = "HTTP/1.1 200 OK
"
    response += "
"
    # 2.2 准备发送给浏览器的数据---body
    # response += "haaaaaaaaaaa"

    f = open("./templates" + file_name, "rb")
    html_content = f.read()
    f.close()

    # 将response header发送给浏览器
    new_socket.send(response.encode("utf-8"))
    new_socket.send(html_content)

    # 关闭套接字
    new_socket.close()


def main():
    """用来完成整体的控制"""
    # 1. 创建套接字
    tcp_server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

    # 2. 绑定
    # 设置当前服务器先close 即服务器4次挥手后资源能够立即是否,这样就保证了。
    # 下次运行程序时,可以立即执行
    tcp_server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
    tcp_server_socket.bind(("", 7890))

    # 3. 变为监听套接字
    tcp_server_socket.listen(128)

    while True:
        # 4. 等待新客户端的链接
        new_socket, client_addr = tcp_server_socket.accept()

        # 5. 为这个客户端服务
        service_client(new_socket)

    # 关闭监听套接字
    tcp_server_socket.close()


if __name__ == '__main__':
    main()

3. 多进程/线程实现http服务器

3.1 多进程: 需要在主进程调用new_socket.close()

import socket
import re
import multiprocessing

def service_client(new_socket):
    """为这个客户端返回数据"""
    # 1. 接受浏览器发送过来的请求,即Http请求
    # GET / HTTP/1.1
    # .....
    request = new_socket.recv(1024).decode("utf-8")
    # print(request)
    request_lines = request.splitlines()
    print(request_lines)
    # GET /page.html HTTP/1.1
    ret = re.match(r"[^/]+(/[^ ]*)", request_lines[0])
    if ret:
        file_name = ret.group(1)
        print("*"*50, file_name)
    # 2. 返回Http格式
    # 2.1 准备发送给浏览器数据---header
    response = "HTTP/1.1 200 OK
"
    response += "
"
    # 2.2 准备发送给浏览器的数据---body
    # response += "haaaaaaaaaaa"
    f = open("./templates" + file_name, "rb")
    html_content = f.read()
    f.close()
    # 将response header发送给浏览器
    new_socket.send(response.encode("utf-8"))
    new_socket.send(html_content)
    # 关闭套接字
    new_socket.close()

def main():
    """用来完成整体的控制"""
    # 1. 创建套接字
    tcp_server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

    # 2. 绑定
    # 设置当前服务器先close 即服务器4次挥手后资源能够立即是否,这样就保证了。
    # 下次运行程序时,可以立即执行
    tcp_server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
    tcp_server_socket.bind(("", 7890))

    # 3. 变为监听套接字
    tcp_server_socket.listen(128)
    while True:
        # 4. 等待新客户端的链接
        new_socket, client_addr = tcp_server_socket.accept()

        # 5. 为这个客户端服务
        p = multiprocessing.Process(target=service_client, args=(new_socket, ))
        p.start()
        new_socket.close()    

    # 关闭监听套接字
    tcp_server_socket.close()
if __name__ == '__main__':
    main()
主进程里的: new_socket.close()作用
# fd: 文件描述符, 就是一个数字, 对应一个特殊的文件, 例如网络接口
# 到子进程时候, new_socket会被复制一份, 所以要在主进程里 调用 new_socket.close()
# 主进程不调用close时, 浏览器会一直的等待, 四次挥手就不会开始!

3.2 多线程: 无需在主线程调用new_socket.close()--否则报错

import socket
import re
import threading

def service_client(new_socket):
    """为这个客户端返回数据"""
    # 1. 接受浏览器发送过来的请求,即Http请求
    # GET / HTTP/1.1
    # .....
    request = new_socket.recv(1024).decode("utf-8")
    # print(request)
    request_lines = request.splitlines()
    print("request_lines:", "*"*50, request_lines)

    # GET /page.html HTTP/1.1
    ret = re.match(r"[^/]+(/[^ ]*)", request_lines[0])
    if ret:
        file_name = ret.group(1)
        print("*"*50, file_name)
    # 2. 返回Http格式
    # 2.1 准备发送给浏览器数据---header
    response = "HTTP/1.1 200 OK
"
    response += "
"
    # 2.2 准备发送给浏览器的数据---body
    # response += "haaaaaaaaaaa"
    f = open("./templates" + file_name, "rb")
    html_content = f.read()
    f.close()
    # 将response header发送给浏览器
    new_socket.send(response.encode("utf-8"))
    new_socket.send(html_content)
    # 关闭套接字
    new_socket.close()

def main():
    """用来完成整体的控制"""
    # 1. 创建套接字
    tcp_server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

    # 2. 绑定
    # 设置当前服务器先close 即服务器4次挥手后资源能够立即是否,这样就保证了。
    # 下次运行程序时,可以立即执行
    tcp_server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
    tcp_server_socket.bind(("", 7890))

    # 3. 变为监听套接字
    tcp_server_socket.listen(128)
    while True:
        # 4. 等待新客户端的链接
        new_socket, client_addr = tcp_server_socket.accept()    
                                                                
        # 5. 为这个客户端服务                                      
        p = threading.Thread(target=service_client, args=(new_socket, ))
        p.start()

        # new_socket.close()

    # 关闭监听套接字
    tcp_server_socket.close()
    
if __name__ == '__main__':
    main()

 区别: 线程比线程耗费的资源小, 以下用协程实现, 会更方便

3.3 协程: 使用gevent实现http服务器

效率最高

import socket
import re
import gevent
from gevent import monkey

monkey.patch_all()

def service_client(new_socket):
    """为这个客户端返回数据"""
    # 1. 接受浏览器发送过来的请求,即Http请求
    # GET / HTTP/1.1
    # .....
    request = new_socket.recv(1024).decode("utf-8")
    # print(request)
    request_lines = request.splitlines()
    print("request_lines:", "*"*50, request_lines)

    # GET /page.html HTTP/1.1
    ret = re.match(r"[^/]+(/[^ ]*)", request_lines[0])
    if ret:
        file_name = ret.group(1)
        print("*"*50, file_name)
    # 2. 返回Http格式
    # 2.1 准备发送给浏览器数据---header
    response = "HTTP/1.1 200 OK
"
    response += "
"
    # 2.2 准备发送给浏览器的数据---body
    # response += "haaaaaaaaaaa"
    f = open("./templates" + file_name, "rb")
    html_content = f.read()
    f.close()
    # 将response header发送给浏览器
    new_socket.send(response.encode("utf-8"))
    new_socket.send(html_content)
    # 关闭套接字
    new_socket.close()

def main():
    """用来完成整体的控制"""
    # 1. 创建套接字
    tcp_server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

    # 2. 绑定
    # 设置当前服务器先close 即服务器4次挥手后资源能够立即是否,这样就保证了。
    # 下次运行程序时,可以立即执行
    tcp_server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
    tcp_server_socket.bind(("", 7890))

    # 3. 变为监听套接字
    tcp_server_socket.listen(128)
    while True:
        # 4. 等待新客户端的链接
        new_socket, client_addr = tcp_server_socket.accept()    
                                                               
        # 5. 为这个客户端服务                                      
        gevent.spawn(service_client, new_socket)

        # new_socket.close()

    # 关闭监听套接字
    tcp_server_socket.close()
    
if __name__ == '__main__':
    main()

4. Web静态服务器--单进程/线程/非堵塞模式

4.1 长连接和短连接

  • HTTP/1.1 长连接:  三次握手一次, 不断开的情况下, 通过一个Socket, 可以连续获取数据
  • HTTP/1.0 短连接

短连接

import socket
import re
import gevent
from gevent import monkey
import time

client_socker_list = list()

def main():
    """用来完成整体的控制"""
    # 1. 创建套接字
    tcp_server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

    # 2. 绑定
    # 设置当前服务器先close 即服务器4次挥手后资源能够立即是否,这样就保证了。
    # 下次运行程序时,可以立即执行
    tcp_server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
    tcp_server_socket.bind(("", 7890))

    # 3. 变为监听套接字
    tcp_server_socket.listen(128)
        
    tcp_server_socket.setblocking(False)    # 设置套接字为不堵塞

    while True:
        try:
            new_socket, new_addr = tcp_server_socket.accept()
        except Exception as ret:
            print("没有新的客户端到来")
        else:
            print("只要没产生一次, 那么也就意味着 来了一个新的客户端")
            new_socket.setblocking(False)
            client_socker_list.append(new_socket)
            
        for client_socket in client_socker_list:
            try:
                recv_data = new_socket.recv(1024)
            except Exception as ret:
                print(ret)
                print("这个客户端没有发送过来数据")
            else:
                # 对方发送过来数据
                if recv_data:
                    print("客户端发送过来了数据")
                else:
                    # 对方调用close 导致了 recv返回
                    client_socker_list.remove(client_socket)
                    client_socket.close()
                    print("客户端已经关闭")

        time.sleep(1)                
            
    # 关闭监听套接字
    tcp_server_socket.close()
        time.sleep(1)                
            
    # 关闭监听套接字
    tcp_server_socket.close()
        
if __name__ == '__main__':
    main()
  • 核心: 用多线程/进程原因, 这个套接字必定堵塞,  堵塞到数据收到为止
  • 只要设置为非堵塞, 则可以实现 单进程单线程单任务, 还能做到多个客户端一起服务.

 4.2 长连接来实现单进程/单线程--非堵塞模式(Content-Length)

上述实现都是基于短连接, 请求一次之后就断开连接了

import socket
import re
import gevent
from gevent import monkey
import time

client_socker_list = list()

def service_client(new_socket, request):
    """为这个客户端返回数据"""
    # 1. 接受浏览器发送过来的请求,即Http请求
    # GET / HTTP/1.1
    # .....
    # request = new_socket.recv(1024).decode("utf-8")
    # print(request)
    request_lines = request.splitlines()
    print("request_lines:", "*"*50, request_lines)

    # GET /page.html HTTP/1.1
    ret = re.match(r"[^/]+(/[^ ]*)", request_lines[0])
    if ret:
        file_name = ret.group(1)
        print("*"*50, file_name)


    # 2. 返回Http格式
    try:
        f = open("./templates" + file_name, "rb")
    except:
        response = "HTTP/1.1 404 NOT FOUND
"
        response += "
"
        response += "-----------File note found----------"
        new_socket.send(response.encode("utf-8"))
    else:
        html_content = f.read()
        f.close()
        
        # 2.1 准备发送给浏览器数据---header
        response_body = html_content
        
        response_header = "HTTP/1.1 200 OK
"
        response_header += "Content-Length:%d
" % len(response_body)  # Content-Length:len-->浏览器此时不需要调用close会自动发起新请求
        response_header += "
"

        # 此时都是二进制字符串
        response = response_header.encode("utf-8") + response_body

        new_socket.send(response)
        # response body发送给 浏览器
        new_socket.send(html_content)
    


def main():
    """用来完成整体的控制"""
    # 1. 创建套接字
    tcp_server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

    # 2. 绑定
    # 设置当前服务器先close 即服务器4次挥手后资源能够立即是否,这样就保证了。
    # 下次运行程序时,可以立即执行
    tcp_server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
    tcp_server_socket.bind(("", 7890))

    # 3. 变为监听套接字
    tcp_server_socket.listen(128)
    
    tcp_server_socket.setblocking(False)    # 设置套接字为不堵塞

    while True:
        try:
            new_socket, new_addr = tcp_server_socket.accept()
        except Exception as ret:
            print("没有新的客户端到来")
        else:
            print("只要没产生一次, 那么也就意味着 来了一个新的客户端")
            new_socket.setblocking(False)
            client_socker_list.append(new_socket)
        
        for client_socket in client_socker_list:
            try:
                recv_data = new_socket.recv(1024).decode("utf-8")
            except Exception as ret:
                print(ret)
                print("这个客户端没有发送过来数据")
            else:
                # 对方发送过来数据
                if recv_data:
                    print("客户端发送过来了数据")
                    service_client(client_socket, recv_data)
                else:
                    client_socket.close()
                    client_socker_list.remove(client_socket)
                    print("客户端已经关闭")
        
    # 关闭监听套接字
    tcp_server_socket.close()
    
if __name__ == '__main__':
    main()

 5. Web静态服务器--epoll

5.1 IO多路复用

select/epoll的好处: 在于单个process就可以同时处理多个网络连接的IO

它的基本原理就是 select, poll, epoll 这个function会不断的轮询所负责的所有Socket, 当某个Socket有数据到达了, 就通知用户进程

5.2 epoll简单模型

1. 减少了复制的过程

2. 以事件通知的方式(高效率, 注: 轮询的方式效率很低)

3. 有一个特殊的内存, 是应用程序和Kernel共享的, 在这个内存里要添加的,监听的,判断到来的套接字对应的文件描述符, 检测时不是轮询而是事件通知。

4. 使用了内存映射技术(mmap)技术

5. 采用基于事件的就绪通知方式

5.3 epoll版的http服务器

#!/bin/python3
# -*- encoding=utf-8 -*-

import socket
import re
import time
import select



def service_client(new_socket, request):
    """为这个客户端返回数据"""
    # 1. 接受浏览器发送过来的请求,即Http请求
    # GET / HTTP/1.1
    # .....
    # request = new_socket.recv(1024).decode("utf-8")
    # print(request)
    request_lines = request.splitlines()
    print("request_lines:", "*"*50, request_lines)

    # GET /page.html HTTP/1.1
    ret = re.match(r"[^/]+(/[^ ]*)", request_lines[0])
    if ret:
        file_name = ret.group(1)
        print("*"*50, file_name)


    # 2. 返回Http格式
    try:
        f = open("./templates" + file_name, "rb")
    except:
        response = "HTTP/1.1 404 NOT FOUND
"
        response += "
"
        response += "-----------File note found----------"
        new_socket.send(response.encode("utf-8"))
    else:
        html_content = f.read()
        f.close()
        
        # 2.1 准备发送给浏览器数据---header
        response_body = html_content
        
        response_header = "HTTP/1.1 200 OK
"
        response_header += "Content-Length:%d
" % len(response_body)  # Content-Length:len-->浏览器此时不需要调用close会自动发起新请求
        response_header += "
"

        # 此时都是二进制字符串
        response = response_header.encode("utf-8") + response_body

        new_socket.send(response)
        # response body发送给 浏览器
        new_socket.send(html_content)
    


def main():
    """用来完成整体的控制"""
    # 1. 创建套接字
    tcp_server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

    # 2. 绑定
    # 设置当前服务器先close 即服务器4次挥手后资源能够立即是否,这样就保证了。
    # 下次运行程序时,可以立即执行
    tcp_server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
    tcp_server_socket.bind(("", 7890))

    # 3. 变为监听套接字
    tcp_server_socket.listen(128)
    
    tcp_server_socket.setblocking(False)    # 设置套接字为不堵塞

    # 创建一个epoll对象
    epl = select.epoll()

    # 将监听套接字对应的fd(文件描述符)注册到epoll中
    epl.register(tcp_server_socket.fileno(), select.EPOLLIN) # EPOLLIN:表示检测是否有输入

    # {fd:socket, fd2:socket2}
    fd_event_dict = dict()
    while True:
        # 返回值是个列表        
        fd_event_list = epl.poll()  # poll默认会堵塞, 直到 os检测到数据到来, 通过事件通知方式 告诉这个程序, 此时才会解堵塞

        # [(fd, event), (套接字对应的文件描述符, 这个文件描述符到底是什么事件 例如 可以调用recv接收等)]
        for fd, event in fd_event_list:
            # 4. 等待新客户端的链接
            if fd == tcp_server_socket.fileno():
                new_socket, client_addr = tcp_server_socket.accept()
                epl.register(new_socket.fileno(), select.EPOLLIN)
                fd_event_dict[new_socket.fileno()] = new_socket   # fd:socket
            elif event == select.EPOLLIN:
                # 判断已经链接的客户端是否有数据发送过来
                recv_data = fd_event_dict[fd].recv(1024).decode("utf-8")
                if recv_data:
                    service_client(fd_event_dict[fd], recv_data)
                else:
                    fd_event_dict[fd].close()
                    epl.unregister(fd)
                    del fd_event_dict[fd]
                    
    # 关闭监听套接字
    tcp_server_socket.close()
    
if __name__ == '__main__':
    main()

原文地址:https://www.cnblogs.com/douzujun/p/10503602.html