Python requests详解


  •  Requests

参考文档:http://docs.python-requests.org/zh_CN/latest/user/quickstart.html

 1 import requests
 2 response = requests.get('http://www.baidu.com')
 3 
 4 #文本形式的网页源码
 5 print(response.text)
 6 
 7 #二进制流形式打印
 8 print(response.content)
 9 
10 #返回JSON格式,可能抛出异常
11 print(response.json)
12 
13 #状态码
14 print(response.status_code)
15 
16 #请求url
17 print(response.url)
18 
19 #头信息
20 print(response.headers)
21 
22 #cookie信息
23 print(response.cookies)
  • 传递参数

   

1 import requests
2 
3 payload = {'key1': 'value1', 'key2': 'value2', 'key3': None}
4 r = requests.get('http://httpbin.org/get', params=payload)
5 print(r.url)
  • 请求方式

 

1 requests.get('http://httpbin.org/get')
2 requests.post('http://httpbin.org/post')
3 requests.put('http://httpbin.org/put')
4 requests.delete('http://httpbin.org/delete')
5 requests.head('http://httpbin.org/get')
6 requests.options('http://httpbin.org/get')
  • post访问
1 import requests
2 payload = {'key1': 'value1', 'key2': 'value2'}
3 r = requests.post("http://httpbin.org/post", data=payload)
4 print(r.text)
  • 传递文件
1 import requests
2 url = 'http://httpbin.org/post'
3 files = {'file': open('wyl.xls', 'rb')}
4 r = requests.post(url, files=files)
  • 传递字符串
1 import requests
2 import json
3 url = 'https://xxxxxxxx'
4 payload = {'some': 'data'}
5 r = requests.post(url, data=json.dumps(payload))
6 #或者
7 r = requests.post(url, json=payload)
  • 超时设置

 1 requests.get('http://xxxxx.com', timeout=1) 

  • 代理
import requests
proxies = {
  'http': 'http://10.10.1.10:3128',
  'https': 'http://10.10.1.10:1080',
}
requests.get('http://xxxx.com', proxies=proxies)
  • 重定向与请求历史
import requests
s=requests.get('http://github.com')
print(s.url)
print(s.status_code)
r=requests.get('http://github.com', allow_redirects=False)
print(r.url)
print(r.history)
r=requests.head('http://github.com');
print(r.url)
print(r.status_code)
r=requests.head('http://github.com', allow_redirects=True)
print(r.url)
print(r.history)
  • 异常处理
#所有Requests显式抛出的异常都继承自 requests.exceptions.RequestException

import requests
from requests.exceptions import ReadTimeout,HTTPError,RequestException
try:
    response = requests.get('http://www.baidu.com',timeout=0.5)
    print(response.status_code)
except ReadTimeout:
    print('timeout')
except HTTPError:
    print('httperror')
except RequestException:
    print('reqerror')

HTTPError:如果 HTTP 请求返回了不成功的状态码

Timeout:请求超时

ConnectionError:遇到网络问题(如:DNS 查询失败、拒绝连接等)

TooManyRedirects:若请求超过了设定的最大重定向次数

RequestException:所有的requerst 异常

  • requests登陆的几种方法
#通过账号和密码登陆

loginurl='https://xxxxx.com/check'  
formData={'username':'*****',  
          'password':'*****'}  
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:9.0.1) Gecko/20100101 Firefox/52.0'}  
res=req.post(loginurl,data=formData,headers=headers) 
#通过COOKIES

raw_cookies="k1=v1; k2=v2; k3=v3; 
cookies={}  
for line in raw_cookies.split(';'):  
      key,value=line.split('=',1)  
      cookies[key]=value  
loginurl='http://xxxxxx.com'  
res=req.post(loginurl,cookies=cookies)  
print res.content  
"访问其它的页面"  
logi1="http://xxxxx.htm"  
print req.post(logi1,cookies=cookies).content  
#通过session

import requests as req  
s=req.Session()  
param={'username':'****',  
       'password':'***'}  
url='https://xxxxxx'  
r=s.post(url,data=param,verify=False)  #登录获取登录后的session  
print r.content  
print s.get('http://xxxxxxx',verify=False).content  #通过session访问其它url