爬虫

一、完善球赛程序,测试球赛你程序、所有函数的测试结果:

from random import random
def printIntro():
    print("这个程序模拟两个选手A和B的排球竞技比赛")
    print("程序运行需要A和B的能力值(以0到1之间的小数表示)")
def getInputs():
    a = eval(input("请输入选手A的能力值(0-1): "))
    b = eval(input("请输入选手B的能力值(0-1): "))
    n = eval(input("模拟比赛的场次: "))
    return a, b, n
def simNGames(n, probA, probB):
    winsA, winsB = 0, 0
    for i in range(n):
        scoreA, scoreB = simOneGame(probA, probB)
        if scoreA > scoreB:
            winsA += 1
        else:
            winsB += 1
    return winsA, winsB
def gameOver(a,b):
    return a==15 or b==15
def simOneGame(probA, probB):
    scoreA, scoreB = 0, 0
    serving = "A"
    while not gameOver(scoreA, scoreB):
        if serving == "A":
            if random() < probA:
                scoreA += 1
            else:
                serving="B"
        else:
            if random() < probB:
                scoreB += 1
            else:
                serving="A"
    return scoreA, scoreB
def printSummary(winsA, winsB):
    n = winsA + winsB
    print("竞技分析开始,共模拟{}场比赛".format(n))
    print("选手A获胜{}场比赛,占比{:0.1%}".format(winsA, winsA/n))
    print("选手B获胜{}场比赛,占比{:0.1%}".format(winsB, winsB/n))
    print("2019310143016  卢佳纯")
def main():
    printIntro()
    probA, probB, n = getInputs()
    winsA, winsB = simNGames(n, probA, probB)
    printSummary(winsA, winsB)
print("排球竞技比赛赛制规定:")
print("1.前4局比赛采用25分制,每个队只有赢得至少25分,并同时超过对方2分时,才胜1局;")
print("2.正式比赛采用5局3胜制,决胜局的比赛采用15分制,一队先得8分后,两队交换场区,按原位置顺序继续比赛到结束;")
print("3.在决胜局(第五局)之比赛,先获15分并领先对方2分为胜;")
main()

测试函数:

首先对GameOver(a,b)函数进行测试

再对simOneGame(proA,proB)函数进行测试

后对simNGames(n,proA,proB)函数进行测试

def GameOver(N,scoreA,scoreB):
    if N<=4:
          return(scoreA>=25 and scoreB>=25 and abs(scoreA-scoreB)>=2)
    else:
          return(scoreA>=15 and abs(scoreA-scoreB)>=2) or (scoreB>=15 and abs(scoreA-scoreB)>=2)
ai=[]
bi=[]
try:
    for scoreA,scoreB in ((1,25),(1,26),(25,25),(16,17),(28,30)):
        if GameOver(scoreA,scoreB):
            ai.append(scoreA)
            bi.append(scoreB)
except:
    print('Error')

print(ai)
print(bi)

#对simOneGame(proA,proB)函数进行测试
from random import random
try:
    probA,probB=0.5,0.5
    scoreA,scoreB=0,0
    serving = "A"
    if serving == "A":
        if random() < probA:
            scoreA += 1
        else:
            serving="B"
    else:
        if random() < probB:
            scoreB += 1
        else:
            serving="A"
    print(scoreA) 
    print(scoreB)
except:
    print('Error')

#对simNGames(n, probA, probB)
try:
    n,scoreA,scoreB=1,1,21
    winsA, winsB = 0, 0
    scoreA_ls=[]
    scoreB_ls=[]
    for i in range(n):
        scoreA_ls.append(scoreA)
        scoreB_ls.append(scoreB)
        if scoreA > scoreB:
            winsA += 1
        else:
            winsB += 1
    print(winsA, winsB)
    print(scoreA_ls,scoreB_ls)
except:
    print('Error')

二、用requests库的get()函数访问百度网站,打印返回状态,text()内容,计算text()和content属性所返回的页面内容的长度:

import requests
def getHTMLText(url):
    try: 
        for i in range(0,20):                   
            r = requests.get(url, timeout=30)
        r.raise_for_status() 
        r.encoding = 'utf-8' 
        return r.status_code,r.text,r.content,len(r.text),len(r.content) 
    except:
        return ""
url = 'http://www.baidu.com.cn/'
print(getHTMLText(url))

结果为:

三、制作一个简单的html页面:a、获取body标签的内容;b、获取id为first的标签对象;c、获取并打印html页面中的中文字符

<!DOCTYPE html>
<html>
<head> 
<meta charset="utf-8"> 
<title>菜鸟教程(runoob.com)</title> 
</head>
<body>
	
	<b>第一个html页面————来自学号2019310143016</b><br><br>

    <p id="first">制作者的基本信息:</p>
</body>
	<table border="1">

	<tr>
		<td>班级</td>
        <td>姓名</td>
        <td>年级</td>
    </tr>
    <tr>
		<td>信计1班</td>
        <td>纯牛奶</td>
        <td>19级</td>
    </tr>
</table>
</html>

四、利用爬虫获取2017中国大学排名:

import requests
from bs4 import BeautifulSoup 
import bs4
import pandas as pd

info = []#用来存放爬取信息
url ="http://www.zuihaodaxue.com/Greater_China_ranking2017_0.html"
try:
    r=requests.get(url,timeout=100)
    r.raise_for_status()
    r.encoding=r.apparent_encoding
    soup  = BeautifulSoup(r.text,"html.parser")
    for tr in soup.find("tbody").children:
        if isinstance(tr,bs4.element.Tag):
            tds=tr.find_all("td") 
            info.append([tds[0].string,tds[1].string,tds[3].string])
    print("{0:^10}	{1:{3}^10}	{2:^10}".format("排名","学校名称","总分",chr(12288)))
    for i in range(50):
        print("{0:^10}	{1:{3}^10}	{2:^10}".format(info[i][0],info[i][1],info[i][2],chr(12288)))
    name = ["排名","学校名称","总分"]
    test = pd.DataFrame(columns=name,data=info)
    test.to_csv(r"C:code_python	est1.csv")
    print("保存成功")
except Exception as e : 
    print(e)

原文地址:https://www.cnblogs.com/cnn-ljc/p/12905635.html