from bs4 import BeautifulSoupimport requestsallUniv = []def getHTMLText(url): try: r = requests.get(url, timeout=30) r.raise_for_status() r.encoding = 'utf-8' return r.text except: return ""def fillUnivList(soup): data = soup.find_all('tr') print(data) for tr in data: ltd = tr.find_all('td') print(ltd) if len(ltd) == 0: continue singleUniv = [] for td in ltd: print(td.get_text()) if td.get_text() != '': singleUniv.append(td.get_text().strip()) print(singleUniv) allUniv.append(singleUniv) print(allUniv)def printUnivList(num): print("{:^4}{:^10}{:^5}{:^8}{:^10}".format("排名", "学校名称", "省市", "总分", "培养规模")) for i in range(num): u = allUniv[i] print(u) print("{:^4}{:^10}{:^5}{:^8}{:^10}".format(u[0], u[1], u[2], u[4], u[5])) print("{:^4}{:^10}{:^5}{:^8}{:^10}".format(u[0], u[1], u[2], u[3], u[6]))def main(): url = 'https://www.shanghairanking.cn/api/pub/v1/bcur?bcur_type=11&year=2020' html = getHTMLText(url) soup = BeautifulSoup(html, "html.parser") print(soup) # fillUnivList(soup) # printUnivList(5)main()
import requestsfrom bs4 import BeautifulSoupimport bs4def getJson(url): try: r = requests.get(url, timeout=30) r.raise_for_status() r.encoding = r.apparent_encoding return r.json() except Exception as e: print(e) return {'code':'0','msg':'error'}def printUnivList(rankings, limit): print("{:<4}\t{:^10}\t{:^10}\t{:^8}".format("排名","学校名称","省市","总分")) for i in range(limit): u=rankings[i] print("{:^4}\t{:^10}\t{:^10}\t{:^8}".format(u["ranking"],u["univNameCn"],u["province"],u["score"]))def main(): uinfo = [] url = 'https://www.shanghairanking.cn/api/pub/v1/bcur?bcur_type=11&year=2020' json = getJson(url) if json["code"]!=200: print(json["msg"]) return rankings=json["data"]["rankings"] printUnivList(rankings, 20) # 20 univsif __name__=="__main__": main()