实现数据同步 -----将post, get 码云私有仓库

# -*- coding: utf-8 -*-
import requests
import sqlite3
import time
from demo_handle import sql_handle
from bs4 import BeautifulSoup


class post:

    def __init__(self):
        self.header = {
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
            "Accept-Encoding": "gzip, deflate, br",
            "Accept-Language": "zh-CN,zh;q=0.9",
            "Connection": "keep-alive",
            "Cookie": "user_locale=zh-CN; oschina_new_user=false; remote_way=http; aliyungf_tc=AQAAAK1x2x/QlgcAbjNye8bI5D9bzrkd; tz=Asia%2FShanghai; Hm_lvt_24f17767262929947cc3631f99bfd274=1543897444,1544083477,1544087096,1544145347; Hm_lpvt_24f17767262929947cc3631f99bfd274=1544151547; gitee-session-n=BAh7C0kiD3Nlc3Npb25faWQGOgZFVEkiJWY2NGViMDQ2NmQ2YzY5MGJmNDkwNDUwNDliNmFiNzQ0BjsAVEkiF21vYnlsZXR0ZV9vdmVycmlkZQY7AEY6CG5pbEkiGXdhcmRlbi51c2VyLnVzZXIua2V5BjsAVFsHWwZpA%2BebI0kiIiQyYSQxMCRjcmsvNGYxODNXSEMvYXo1emJHYk9PBjsAVEkiHXdhcmRlbi51c2VyLnVzZXIuc2Vzc2lvbgY7AFR7BkkiFGxhc3RfcmVxdWVzdF9hdAY7AFRJdToJVGltZQ3irB3Av9CU7Ak6DW5hbm9fbnVtaQIxAToNbmFub19kZW5pBjoNc3VibWljcm8iBzBQOgl6b25lSSIIVVRDBjsAVEkiF2FjdGl2ZV9lbWFpbF9ndWlkZQY7AEZGSSIQX2NzcmZfdG9rZW4GOwBGSSIxUzg5MDE1MFUzMVduK0IvY29FenZ6WG00TnJ0aUNEZm0yVjl2TFFLRzkxaz0GOwBG--9fc346f1c7abce3693c1480171a6914ac7a5aaff",
            "Host": "gitee.com",
            "Referer": "https://gitee.com/harmony_creation/quotationSystem/blob/master/test.py",
            "Upgrade-Insecure-Requests": "1",
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"
        }
        self.conn = sqlite3.connect("../db/record_price.db")
        self.cursor = self.conn.cursor()
        content = self.cursor.execute("select * from product").fetchall()
        self.conn.close()
        # print(data)
        result = requests.get("https://gitee.com/harmony_creation/quotationSystem/edit/master/test.py", headers=self.header)
        self.cookie = result.cookies.get_dict()
        b1 = BeautifulSoup(result.text, 'html.parser')
        self.token = b1.find(name='input', attrs={'name': "authenticity_token"}).get('value')
        self.data = {
            "utf8": "",
            "_method": "put",
            "authenticity_token": self.token,
            "commit_message_header": "更新 test.py",
            "extended_information": "",
            "last_commit": "",
            "eol_crlf": "false",
            "content": "%s" % content
        }

    def post_data(self):
        result = requests.post("https://gitee.com/harmony_creation/quotationSystem/edit/master/test.py", data=self.data,
                               cookies=self.cookie)
        print(result.status_code)


class get:
    def __init__(self):
        self.header = {
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
            "Accept-Encoding": "gzip, deflate, br",
            "Accept-Language": "zh-CN,zh;q=0.9",
            "Cache-Control": "max-age=0",
            "Connection": "keep-alive",
            "Cookie": "user_locale=zh-CN; oschina_new_user=false; remote_way=http; aliyungf_tc=AQAAADjFJiu/5wQA8kTEb8iDuTsmpAD3; tz=Asia%2FShanghai; Hm_lvt_24f17767262929947cc3631f99bfd274=1544153817,1544415700,1544491905,1544497469; Hm_lpvt_24f17767262929947cc3631f99bfd274=1544498693; gitee-session-n=BAh7DEkiD3Nlc3Npb25faWQGOgZFVEkiJThlM2YwM2NiNzE4MDhlNTkzNTAxOTRhYTQ3ZWI2NjA0BjsAVEkiF21vYnlsZXR0ZV9vdmVycmlkZQY7AEY6CG5pbEkiGXdhcmRlbi51c2VyLnVzZXIua2V5BjsAVFsHWwZpA%2BebI0kiIiQyYSQxMCRjcmsvNGYxODNXSEMvYXo1emJHYk9PBjsAVEkiHXdhcmRlbi51c2VyLnVzZXIuc2Vzc2lvbgY7AFR7BkkiFGxhc3RfcmVxdWVzdF9hdAY7AFRJdToJVGltZQ1jrR3AO0%2B5ZAk6DW5hbm9fbnVtaQJ5AzoNbmFub19kZW5pBjoNc3VibWljcm8iB4iQOgl6b25lSSIIVVRDBjsAVEkiF2FjdGl2ZV9lbWFpbF9ndWlkZQY7AEZGSSIQcGhvbmVfZ3VpZGUGOwBGVEkiEF9jc3JmX3Rva2VuBjsARkkiMVBBNzAwczJuNWo3aW5UOUJkekt1ekxxK2VLKzdlQklmeVJhaFllVlpKRDA9BjsARg%3D%3D--304d5a4a63f6a1b1d0980d281b6a789353be8e3e",
            "Host": "gitee.com",
            "Upgrade-Insecure-Requests": "1",
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"
        }
        self.conn = sqlite3.connect("../db/record_price.db")
        self.cursor = self.conn.cursor()
        self.cursor.execute("delete from product")
        self.conn.commit()

    def get_data(self):
        result = requests.get("https://gitee.com/harmony_creation/quotationSystem/blob/master/test.py", headers= self.header)
        print(result.status_code)
        # print(result.text)
        b2 = BeautifulSoup(result.text, "html.parser")
        content = b2.select(".line")

        data_list = eval(content[0].text)
        # obj =
        for content in data_list:
            sql_handle.sqlit_handle().edit_add(content)
        self.conn.commit()
        self.conn.close()

# post().post_data()
# import time
# time.sleep(5)
# get().get_data()

 在爬虫递归操作被拒绝时,可采取需要爬取的页面刷新捕捉Network里面的请求包,拿取requestheaders全部写入即可

原文地址:https://www.cnblogs.com/cjj-zyj/p/10107377.html