requests库 文件上传源码解读:多字段多文件上传,单字段多文件上传

先说下多字段多文件的上传

        # 多个字段文件上传,org explain format is: ("filename", "fileobject", "content-type", "headers")
        {
          "field1" : ("filepath中的文件名称", open("filePath1", "rb")),
          "field2" : ("filename2", open("filePath2", "rb"), "image/jpeg"),
          "field3" : ("filename3", open("filePath3", "rb"), "image/jpeg", {"refer" : "localhost"})
        }
        ### but we can simple usage by following :
        >> files={
          "field1" : open("filePath1", "rb"),
          "field2" : open("filePath2", "rb"),
          "field3" : open("filePath3", "rb")
        }
        也可以使用元组:
        files=  [
            (
            "field1",open("test1.png","rb")
            ),
            (
            'filed2',open('a2.xlsx','rb').read()
            )
         ]
        >> r=request.post(url='http://httpbin.org/post',data={"user":"tester","signature":"md5"},files=files)
        >> print(r.json())

 

再说下单个字段,多个文件上传实现

        # 单个字段上传多个文件如:filed1:
            files=[("filed1",open("test1.png","rb")),
            ("filed1",open('a2.xlsx','rb'))
        ]
    a=requests.post(url="http://httpbin.org/post",data=None,files=files)
    print(a.text)

 

源码实现解读:

requests库> sessions.py > Session.request方法:

    def request(self, method, url,
            params=None, data=None, headers=None, cookies=None, files=None,
            auth=None, timeout=None, allow_redirects=True, proxies=None,
            hooks=None, stream=None, verify=None, cert=None, json=None):

        # Create the Request.
        req = Request(
            method=method.upper(),
            url=url,
            headers=headers,
            files=files,
            data=data or {},
            json=json,
            params=params or {},
            auth=auth,
            cookies=cookies,
            hooks=hooks,
        )
        prep = self.prepare_request(req)

这里 prep = self.prepare_request(req) 预处理接着往下走看到p.prepare()方法:

        p.prepare(
            method=request.method.upper(),
            url=request.url,
            files=request.files,
            data=request.data,
            json=request.json,
            headers=merge_setting(request.headers, self.headers, dict_class=CaseInsensitiveDict),
            params=merge_setting(request.params, self.params),
            auth=merge_setting(auth, self.auth),
            cookies=merged_cookies,
            hooks=merge_hooks(request.hooks, self.hooks),
        )
        return p

  接着再进去看到self.prepare_body(data, files, json)

    def prepare(self,
            method=None, url=None, headers=None, files=None, data=None,
            params=None, auth=None, cookies=None, hooks=None, json=None):
        """Prepares the entire request with the given parameters."""

        self.prepare_method(method)
        self.prepare_url(url, params)
        self.prepare_headers(headers)
        self.prepare_cookies(cookies)
        self.prepare_body(data, files, json)
        self.prepare_auth(auth, url)

        # Note that prepare_auth must be last to enable authentication schemes
        # such as OAuth to work on a fully prepared request.

        # This MUST go after prepare_auth. Authenticators could add a hook
        self.prepare_hooks(hooks)

 接着prepare_body:

    def prepare_body(self, data, files, json=None):
        """Prepares the given HTTP body data."""
         if is_stream:
            try:
                length = super_len(data)
            except (TypeError, AttributeError, UnsupportedOperation):
                length = None

            body = data

            if getattr(body, 'tell', None) is not None:
                # Record the current file position before reading.
                # This will allow us to rewind a file in the event
                # of a redirect.
                try:
                    self._body_position = body.tell()
                except (IOError, OSError):
                    # This differentiates from None, allowing us to catch
                    # a failed `tell()` later when trying to rewind the body
                    self._body_position = object()

            if files:
                raise NotImplementedError('Streamed bodies and files are mutually exclusive.')

            if length:
                self.headers['Content-Length'] = builtin_str(length)
            else:
                self.headers['Transfer-Encoding'] = 'chunked'
        else:
            # Multi-part file uploads.
            if files:
                (body, content_type) = self._encode_files(files, data)
            else:
                if data:
                    body = self._encode_params(data)
                    if isinstance(data, basestring) or hasattr(data, 'read'):
                        content_type = None
                    else:
                        content_type = 'application/x-www-form-urlencoded'

            self.prepare_content_length(body)

            # Add content-type if it wasn't explicitly provided.
            if content_type and ('content-type' not in self.headers):
                self.headers['Content-Type'] = content_type

        self.body = body

  

 这个方法主要调用了2个静态方法一个是_encode_params(data),一个_encode_files

@staticmethod
def _encode_files(files, data):
"""Build the body for a multipart/form-data request.

Will successfully encode files when passed as a dict or a list of
tuples. Order is retained if data is a list of tuples but arbitrary
if parameters are supplied as a dict.
The tuples may be 2-tuples (filename, fileobj), 3-tuples (filename, fileobj, contentype)
or 4-tuples (filename, fileobj, contentype, custom_headers).
"""
if (not files):
raise ValueError("Files must be provided.")
elif isinstance(data, basestring):
raise ValueError("Data must not be a string.")

new_fields = []
fields = to_key_val_list(data or {})
files = to_key_val_list(files or {})

for field, val in fields:
if isinstance(val, basestring) or not hasattr(val, '__iter__'):
val = [val]
for v in val:
if v is not None:
# Don't call str() on bytestrings: in Py3 it all goes wrong.
if not isinstance(v, bytes):
v = str(v)

new_fields.append(
(field.decode('utf-8') if isinstance(field, bytes) else field,
v.encode('utf-8') if isinstance(v, str) else v))

for (k, v) in files:
# support for explicit filename
ft = None
fh = None
if isinstance(v, (tuple, list)):
if len(v) == 2:
fn, fp = v
elif len(v) == 3:
fn, fp, ft = v
else:
fn, fp, ft, fh = v
else:
fn = guess_filename(v) or k
fp = v

if isinstance(fp, (str, bytes, bytearray)):
fdata = fp
elif hasattr(fp, 'read'):
fdata = fp.read()
elif fp is None:
continue
else:
fdata = fp

rf = RequestField(name=k, data=fdata, filename=fn, headers=fh)
rf.make_multipart(content_type=ft)
new_fields.append(rf)

body, content_type = encode_multipart_formdata(new_fields)

return body, content_type
    @staticmethod
    def _encode_params(data):
        """
     判断如果是string或者字节直接返回,如果是元组或者dict可迭代对象返回编码encode格式: a=2&c=4

        """

        if isinstance(data, (str, bytes)):
            return data
        elif hasattr(data, 'read'):
            return data
        elif hasattr(data, '__iter__'):
            result = []
            for k, vs in to_key_val_list(data):
                if isinstance(vs, basestring) or not hasattr(vs, '__iter__'):
                    vs = [vs]
                for v in vs:
                    if v is not None:
                        result.append(
                            (k.encode('utf-8') if isinstance(k, str) else k,
                             v.encode('utf-8') if isinstance(v, str) else v))
            return urlencode(result, doseq=True)
        else:
            return data

  

from requests.models import RequestEncodingMixin
import json
datas={"key":2222,"name":"test"}
data=[('a',1),('b',2)]
d=RequestEncodingMixin._encode_params(json.dumps(datas))
e2=RequestEncodingMixin._encode_params(data)
encodes=RequestEncodingMixin._encode_params(datas)
print(d)
print(e2)
print(encodes)

{"key": 2222, "name": "test"}
a=1&b=2
key=2222&name=test

 到这里问题来了,那么看源码什么时候使用mutiform-data,什么时候用'application/x-www-form-urlencoded',我们回到models.prepare_body方法

def prepare_body(self, data, files, json=None):
"""Prepares the given HTTP body data."""

# Check if file, fo, generator, iterator.
# If not, run through normal process.

# Nottin' on you.
body = None
content_type = None
# 如果是没有data参数且json不为空, content_type = 'application/json,
    if not data and json is not None:
# urllib3 requires a bytes-like body. Python 2's json.dumps
# provides this natively, but Python 3 gives a Unicode string.
content_type = 'application/json'
# 序列化obj为json str 对象
body = complexjson.dumps(json)
# 如果序列化后不是字节,进行编码utf-8,将对象转字节
if not isinstance(body, bytes):
body = body.encode('utf-8')

is_stream = all([
hasattr(data, '__iter__'),
not isinstance(data, (basestring, list, tuple, Mapping))
])

if is_stream:
try:
length = super_len(data)
except (TypeError, AttributeError, UnsupportedOperation):
length = None

body = data

if getattr(body, 'tell', None) is not None:
# Record the current file position before reading.
# This will allow us to rewind a file in the event
# of a redirect.
try:
self._body_position = body.tell()
except (IOError, OSError):
# This differentiates from None, allowing us to catch
# a failed `tell()` later when trying to rewind the body
self._body_position = object()

if files:
raise NotImplementedError('Streamed bodies and files are mutually exclusive.')

if length:
self.headers['Content-Length'] = builtin_str(length)
else:
self.headers['Transfer-Encoding'] = 'chunked'
else:
# Multi-part file uploads.
if files:
print("#########enter mutil-formdata#########")
(body, content_type) = self._encode_files(files, data)
print('#### body of muti-formdata is %s'%body)
else:
if data:
# dict转a&1=b&2
print("#### enter 'application/x-www-form-urlencoded'############")
body = self._encode_params(data)
print("body of 'application/x-www-form-urlencoded' is %s"%body)
if isinstance(data, basestring) or hasattr(data, 'read'):
content_type = None
else:
content_type = 'application/x-www-form-urlencoded'

self.prepare_content_length(body)

# Add content-type if it wasn't explicitly provided.
if content_type and ('content-type' not in self.headers):
self.headers['Content-Type'] = content_type

self.body = body

如果传入files对象:

 手动实现muti-fomdata body核心代码:

b'--f872e4372df27ae9bd51ebbecc6028d7
Content-Disposition: form-data; name="key"

2222
--f872e4372df27ae9bd51ebbecc6028d7
Content-Disposition: form-data; name="name"

test
--f872e4372df27ae9bd51ebbecc6028d7
Content-Disposition: form-data; name="filed1"; filename="test1.png"

x89PNG
x1a
x00x00x00
IHDRx00x00x03x89x00x00x02Xx08x02x00x00x00@Y4<x00x00x97x1aIDATxx9cxe
........ multipart/form-data; boundary=f872e4372df27ae9bd51ebbecc6028d7

  

原文地址:https://www.cnblogs.com/SunshineKimi/p/13953580.html