python爬虫---豆瓣Top250电影采集
代码:
import requests from bs4 import BeautifulSoup as bs import time def get_movie(url): headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36 Edg/96.0.1054.62", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9" } resp = requests.get(url, headers=headers).text soup = bs(resp, "html.parser") items = soup.find_all("div", class_="hd") for i in items: tag = i.find("a") link = tag["href"] name = tag.find(class_="title").text print("电影名称:%s,电影地址:%s" % (name, link)) url = "https://movie.douban.com/top250?start={}" urls = [url.format(num * 25) for num in range(10)] for link in urls: get_movie(link) time.sleep(1)