自定义异步非阻塞web框架

Python的Web框架中Tornado以异步非阻塞而闻名。本文基于非阻塞的Socket以及IO多路复用从而实现异步非阻塞的Web框架

一、异步非阻塞和io多路复用

出现的原因:

由于进程的执行过程是线性的(也就是顺序执行),当我们调用低速系统I/O(read,write, accept等等),进程可能阻塞,此时进程就阻塞 在这个调用上,不能执行其他操作.阻塞很正常。接下来考虑这么一个问题: 一个服务器进程和一个客户端进程通信,服务器端read(sockfd1,bud,bufsize),此时客户端进程没有发送数据,那么read(阻塞调用)将 阻塞直到客户端调用write(sockfd,but,size) 发来数据. 在一个客户和服务器通信时这没 什么问题,当多个客户与服务器通信时,若服 务器阻塞于其中一个客户sockfd1,当另一个 客户的数据到达套接字sockfd2时,服务器不能处理,仍然阻塞在read(sockfd1,...)上;此时问题就出现了,不能及时处理另一个客户的 服务,咋么办?I/O多路复用来解决!

io多路复用:

继续上面的问题,有多个客户连接, sockfd1,sockfd2,sockfd3..sockfdn 同时监听这n个客户,当其中有一个发来消息时就从select的阻塞中返回,然后就调用read读取收到消息的sockfd,然后又循环回select 阻塞; 这样就不会因为阻塞在其中一个上而不能处理另一个客户的消息  原理:(socket 设置socket对象点setblocking(False)+select)

Q: 那这样子,在读取socket1的数据时,如果其它socket有数据来,那么也要等到socket1读取完了才能继续读取其它socket的数据吧。那不是也阻塞住了吗?而且读取到的数据也要开启线程处理吧,那这和多线程IO有什么区别呢?

A:

  1. CPU本来就是线性的 不论什么都需要顺序处理 并行只能是多核CPU
  2. io多路复用本来就是用来解决对多个I/O监听时,一个I/O阻塞影响其他I/O的问题,跟多线程没关系.
  3. 跟多线程相比较,线程切换需要切换到内核进行线程切换,需要消耗时间和资源. 而I/O多路复用不需要切换线/进程,效率相对较高,特别是对高并发的应用nginx就是用I/O多路复用,故而性能极佳.但多线程编程逻辑和处理上比I/O多路复用简单.而I/O多路复用处理起来较为复杂.

二、实现流程

1、sleep异步非阻塞

import tornado.ioloop
import tornado.web
class MainHandler(tornado.web.RequestHandler):
    def get(self):
        import time
        time.sleep(10)
        self.write("Hello, world")

class IndexHandler(tornado.web.RequestHandler):
    def get(self):
        self.write("Index")
application = tornado.web.Application([
    (r"/main", MainHandler),
    (r"/index", IndexHandler),
])

if __name__ == "__main__":
    application.listen(8888)
    tornado.ioloop.IOLoop.instance().start()
import tornado.ioloop
import tornado.web
from tornado import gen
from tornado.concurrent import Future
import time

class MainHandler(tornado.web.RequestHandler):
    @gen.coroutine
    def get(self):
        future = Future() #Future对象 记住这个很重要,后面主要靠他实现异步非阻塞
        # 特殊的形式等待5s
        tornado.ioloop.IOLoop.current().add_timeout(time.time() + 5, self.done) # 5秒之后回调函数done
        yield future
    def done(self, *args, **kwargs):
        self.write('Main')
        self.finish()

class IndexHandler(tornado.web.RequestHandler):
    def get(self):
        self.write("Index")
application = tornado.web.Application([
    (r"/main", MainHandler),
    (r"/index", IndexHandler),
])

if __name__ == "__main__":
    application.listen(8888)
    tornado.ioloop.IOLoop.instance().start()

2、requests请求异步非阻塞

import tornado.ioloop
import tornado.web

class MainHandler(tornado.web.RequestHandler):
    def get(self):
        import requests
        requests.get('http://www.google.com')
        self.write('xxxxx')

class IndexHandler(tornado.web.RequestHandler):
    def get(self):
        self.write("Index")
application = tornado.web.Application([
    (r"/main", MainHandler),
    (r"/index", IndexHandler),
])

if __name__ == "__main__":
    application.listen(8888)
    tornado.ioloop.IOLoop.instance().start()

  

import tornado.ioloop
import tornado.web
from tornado import gen

class MainHandler(tornado.web.RequestHandler):
    @gen.coroutine
    def get(self):
        from tornado import httpclient
        http = httpclient.AsyncHTTPClient()
        yield http.fetch("http://www.google.com", self.done)

    def done(self, *args, **kwargs):
        self.write('Main')
        self.finish()

class IndexHandler(tornado.web.RequestHandler):
    def get(self):
        self.write("Index")
application = tornado.web.Application([
    (r"/main", MainHandler),
    (r"/index", IndexHandler),
])

if __name__ == "__main__":
    application.listen(8888)
    tornado.ioloop.IOLoop.instance().start()  

3、future异步非阻塞

import tornado.ioloop
import tornado.web
from tornado import gen
from tornado.concurrent import Future

future = None
class MainHandler(tornado.web.RequestHandler):
    @gen.coroutine
    def get(self):
        global future
        future = Future()
        future.add_done_callback(self.done)

        yield future

    def done(self, *args, **kwargs):
        self.write('Main')
        self.finish()

class IndexHandler(tornado.web.RequestHandler):
    def get(self):
        global future
        future.set_result(None) # 不设置值会一直请求下去,直到地老天荒
        self.write("Index")

application = tornado.web.Application([
    (r"/main", MainHandler),
    (r"/index", IndexHandler),
])

if __name__ == "__main__":
    application.listen(8888)
    tornado.ioloop.IOLoop.instance().start()

future = Future()
原理:返回future 看看future的result里面有没有值 (有值就断开返回回去)

4、自定义服务端web框架(不支持异步)

import socket
import select

class HttpRequest(object):
    """
    用户封装用户请求信息
    """
    def __init__(self, content):
        """

        :param content:用户发送的请求数据:请求头和请求体
        """
        self.content = content

        self.header_bytes = bytes()
        self.body_bytes = bytes()

        self.header_dict = {}

        self.method = ""
        self.url = ""
        self.protocol = ""

        self.initialize()
        self.initialize_headers()

    def initialize(self):

        temp = self.content.split(b'

', 1)
        if len(temp) == 1:
            self.header_bytes += temp
        else:
            h, b = temp
            self.header_bytes += h
            self.body_bytes += b

    @property
    def header_str(self):
        return str(self.header_bytes, encoding='utf-8')

    def initialize_headers(self):
        headers = self.header_str.split('
')
        first_line = headers[0].split(' ')
        if len(first_line) == 3:
            self.method, self.url, self.protocol = headers[0].split(' ')
            for line in headers:
                kv = line.split(':')
                if len(kv) == 2:
                    k, v = kv
                    self.header_dict[k] = v

def main(request):
    return "main"

def index(request):
    return "index"

routers = [
    ('/main/',main),
    ('/index/',index),
]

def run():
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
    sock.bind(("127.0.0.1", 9999,))
    sock.setblocking(False)
    sock.listen(128)

    inputs = []
    inputs.append(sock)
    while True:
        rlist,wlist,elist = select.select(inputs,[],[],0.05)
        for r in rlist:
            if r == sock:
                """新请求到来"""
                conn,addr = sock.accept()
                conn.setblocking(False)
                inputs.append(conn)
            else:
                """客户端发来数据"""
                data = b""
                while True:
                    try:
                        chunk = r.recv(1024)
                        data = data + chunk
                    except Exception as e:
                        chunk = None  
                    if not chunk:
                        break  # 没数据终止接受
                # data进行处理:请求头和请求体
                request = HttpRequest(data)
                # 1. 请求头中获取url
                # 2. 去路由中匹配,获取指定的函数
                # 3. 执行函数,获取返回值
                # 4. 将返回值 r.sendall(b'alskdjalksdjf;asfd')
                import re
                flag = False
                func = None
                for route in routers:
                    if re.match(route[0],request.url):
                        flag = True
                        func = route[1]
                        break
                if flag:  # 如果有这个路由调用函数执行
                    result = func(request)
                    r.sendall(bytes(result,encoding='utf-8'))
                else:
                    r.sendall(b"404")

                inputs.remove(r)
                r.close()

if __name__ == '__main__':
    run()

5、支持异步非阻塞的web框架:

import socket
import select
import time

class HttpRequest(object):
    """
    用户封装用户请求信息
    """
    def __init__(self, content):
        """

        :param content:用户发送的请求数据:请求头和请求体
        """
        self.content = content

        self.header_bytes = bytes()
        self.body_bytes = bytes()

        self.header_dict = {}

        self.method = ""
        self.url = ""
        self.protocol = ""

        self.initialize()
        self.initialize_headers()

    def initialize(self):

        temp = self.content.split(b'

', 1)
        if len(temp) == 1:
            self.header_bytes += temp
        else:
            h, b = temp
            self.header_bytes += h
            self.body_bytes += b

    @property
    def header_str(self):
        return str(self.header_bytes, encoding='utf-8')

    def initialize_headers(self):
        headers = self.header_str.split('
')
        first_line = headers[0].split(' ')
        if len(first_line) == 3:
            self.method, self.url, self.protocol = headers[0].split(' ')
            for line in headers:
                kv = line.split(':')
                if len(kv) == 2:
                    k, v = kv
                    self.header_dict[k] = v

class Future(object):
    def __init__(self,timeout=0):
        self.result = None
        self.timeout = timeout
        self.start = time.time()
def main(request):
    f = Future(5)
    return f

def index(request):
    return "indexasdfasdfasdf"


routers = [
    ('/main/',main),
    ('/index/',index),
]

def run():
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
    sock.bind(("127.0.0.1", 8080,))
    sock.setblocking(False)
    sock.listen(128)

    inputs = []
    inputs.append(sock)

    async_request_dict = {
        # 'socket': futrue
    }

    while True:
        rlist,wlist,elist = select.select(inputs,[],[],0.05)
        for r in rlist:
            if r == sock:
                """新请求到来"""
                conn,addr = sock.accept()
                conn.setblocking(False)
                inputs.append(conn)
            else:
                """客户端发来数据"""
                data = b""
                while True:
                    try:
                        chunk = r.recv(1024)
                        data = data + chunk
                    except Exception as e:
                        chunk = None
                    if not chunk:
                        break
                # data进行处理:请求头和请求体
                request = HttpRequest(data)
                # 1. 请求头中获取url
                # 2. 去路由中匹配,获取指定的函数
                # 3. 执行函数,获取返回值
                # 4. 将返回值 r.sendall(b'alskdjalksdjf;asfd')
                import re
                flag = False
                func = None
                for route in routers:
                    if re.match(route[0],request.url):
                        flag = True
                        func = route[1]
                        break
                if flag:
                    result = func(request)
                    if isinstance(result,Future):
                        async_request_dict[r] = result
                    else:
                        r.sendall(bytes(result,encoding='utf-8'))
                        inputs.remove(r)
                        r.close()
                else:
                    r.sendall(b"404")
                    inputs.remove(r)
                    r.close()

        for conn in async_request_dict.keys():
            future = async_request_dict[conn]
            start = future.start
            timeout = future.timeout
            ctime = time.time()
            if (start + timeout) <= ctime :
                future.result = b"timeout"
            if future.result:
                conn.sendall(future.result)
                conn.close()
                del async_request_dict[conn]
                inputs.remove(conn)

if __name__ == '__main__':
    run()

想看完整且详细用法请参考:200行自定义异步非阻塞Web框架

原文地址:https://www.cnblogs.com/sunkai1993/p/6904600.html