Python requests详解
- Requests
参考文档:http://docs.python-requests.org/zh_CN/latest/user/quickstart.html
1 import requests 2 response = requests.get('http://www.baidu.com') 3 4 #文本形式的网页源码 5 print(response.text) 6 7 #二进制流形式打印 8 print(response.content) 9 10 #返回JSON格式,可能抛出异常 11 print(response.json) 12 13 #状态码 14 print(response.status_code) 15 16 #请求url 17 print(response.url) 18 19 #头信息 20 print(response.headers) 21 22 #cookie信息 23 print(response.cookies)
- 传递参数
1 import requests 2 3 payload = {'key1': 'value1', 'key2': 'value2', 'key3': None} 4 r = requests.get('http://httpbin.org/get', params=payload) 5 print(r.url)
- 请求方式
1 requests.get('http://httpbin.org/get') 2 requests.post('http://httpbin.org/post') 3 requests.put('http://httpbin.org/put') 4 requests.delete('http://httpbin.org/delete') 5 requests.head('http://httpbin.org/get') 6 requests.options('http://httpbin.org/get')
- post访问
1 import requests 2 payload = {'key1': 'value1', 'key2': 'value2'} 3 r = requests.post("http://httpbin.org/post", data=payload) 4 print(r.text)
- 传递文件
1 import requests 2 url = 'http://httpbin.org/post' 3 files = {'file': open('wyl.xls', 'rb')} 4 r = requests.post(url, files=files)
- 传递字符串
1 import requests 2 import json 3 url = 'https://xxxxxxxx' 4 payload = {'some': 'data'} 5 r = requests.post(url, data=json.dumps(payload)) 6 #或者 7 r = requests.post(url, json=payload)
- 超时设置
1 requests.get('http://xxxxx.com', timeout=1)
- 代理
import requests proxies = { 'http': 'http://10.10.1.10:3128', 'https': 'http://10.10.1.10:1080', } requests.get('http://xxxx.com', proxies=proxies)
- 重定向与请求历史
import requests s=requests.get('http://github.com') print(s.url) print(s.status_code) r=requests.get('http://github.com', allow_redirects=False) print(r.url) print(r.history) r=requests.head('http://github.com'); print(r.url) print(r.status_code) r=requests.head('http://github.com', allow_redirects=True) print(r.url) print(r.history)
- 异常处理
#所有Requests显式抛出的异常都继承自 requests.exceptions.RequestException import requests from requests.exceptions import ReadTimeout,HTTPError,RequestException try: response = requests.get('http://www.baidu.com',timeout=0.5) print(response.status_code) except ReadTimeout: print('timeout') except HTTPError: print('httperror') except RequestException: print('reqerror')
HTTPError
:如果 HTTP 请求返回了不成功的状态码
Timeout
:请求超时
ConnectionError
:遇到网络问题(如:DNS 查询失败、拒绝连接等)
TooManyRedirects
:若请求超过了设定的最大重定向次数
RequestException:所有的requerst 异常
- requests登陆的几种方法
#通过账号和密码登陆 loginurl='https://xxxxx.com/check' formData={'username':'*****', 'password':'*****'} headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:9.0.1) Gecko/20100101 Firefox/52.0'} res=req.post(loginurl,data=formData,headers=headers)
#通过COOKIES raw_cookies="k1=v1; k2=v2; k3=v3; cookies={} for line in raw_cookies.split(';'): key,value=line.split('=',1) cookies[key]=value loginurl='http://xxxxxx.com' res=req.post(loginurl,cookies=cookies) print res.content "访问其它的页面" logi1="http://xxxxx.htm" print req.post(logi1,cookies=cookies).content
#通过session import requests as req s=req.Session() param={'username':'****', 'password':'***'} url='https://xxxxxx' r=s.post(url,data=param,verify=False) #登录获取登录后的session print r.content print s.get('http://xxxxxxx',verify=False).content #通过session访问其它url