python 邮件读取
1. 开通pop之后仍然报错的解决方法
python - POPLIB poplib.error_proto: b'-ERR [AUTH] Username and password not accepted.' - Stack Overflow
2. 邮件生成
How to Read Email From Gmail Using Python 3 | CodeHandbook
可以生成全部的邮件
3. 但是Gmail经过加密所以看不了,最终解决方案依然是API,参考文献
How to read Emails from Gmail using Gmail API in Python ? - GeeksforGeeks
4. 为了看起来更方便每行之间添加了一行空格
python创建txt文件换行输入_python中写入txt文件需要换行,以及\r 和\n_weixin_39996742的博客-CSDN博客
5. 写入txt
使用python向txt文件写入内容 - chuyaoxin - 博客园 (cnblogs.com)
6. bs4内容提取
python爬虫数据提取之bs4的使用方法 - WillWeson - 博客园 (cnblogs.com)
7. 因为只需要未读,代码修改为:
result = service.users().messages().list(userId='me',labelIds=['INBOX'],q="is:unread").execute()
用txt格式实现了读取,但是太过依赖格式所以转换成HTML,原始代码记录如下:
1)实现读取并存入txt
# import the required libraries from googleapiclient.discovery import build from google_auth_oauthlib.flow import InstalledAppFlow from google.auth.transport.requests import Request import pickle import os.path import base64 import email from bs4 import BeautifulSoup # Define the SCOPES. If modifying it, delete the token.pickle file. SCOPES = ['https://www.googleapis.com/auth/gmail.readonly'] def getEmails(): # Variable creds will store the user access token. # If no valid token found, we will create one. creds = None # The file token.pickle contains the user access token. # Check if it exists if os.path.exists('token.pickle'): # Read the token from the file and store it in the variable creds with open('token.pickle', 'rb') as token: creds = pickle.load(token) # If credentials are not available or are invalid, ask the user to log in. if not creds or not creds.valid: if creds and creds.expired and creds.refresh_token: creds.refresh(Request()) else: flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES) creds = flow.run_local_server(port=0) # Save the access token in token.pickle file for the next run with open('token.pickle', 'wb') as token: pickle.dump(creds, token) # Connect to the Gmail API service = build('gmail', 'v1', credentials=creds) # request a list of all the messages result = service.users().messages().list(userId='me',labelIds=['INBOX'],q="is:unread").execute() #result = service.users().messages().list(userId='me',labelIds=['INBOX']).execute() # We can also pass maxResults to get any number of emails. Like this: # result = service.users().messages().list(maxResults=200, userId='me').execute() messages = result.get('messages') # messages is a list of dictionaries where each dictionary contains a message id. # iterate through all the messages for msg in messages: # Get the message from its id txt = service.users().messages().get(userId='me', id=msg['id']).execute() # Use try-except to avoid any Errors try: # Get value of 'payload' from dictionary 'txt' payload = txt['payload'] headers = payload['headers'] # Look for Subject and Sender Email in the headers for d in headers: if d['name'] == 'Subject': subject = d['value'] if d['name'] == 'From': sender = d['value'] # The Body of the message is in Encrypted format. So, we have to decode it. # Get the data and decode it with base 64 decoder. parts = payload.get('parts')[0] data = parts['body']['data'] data = data.replace("-","+").replace("_","/") decoded_data = base64.b64decode(data) # Now, the data obtained is in lxml. So, we will parse # it with BeautifulSoup library soup = BeautifulSoup(decoded_data , "lxml") body = soup.body() a=soup.body.text # Printing the subject, sender's email and message with open('C:\\Users\\Eleni\\test.txt','a',encoding='utf-8') as f: f.write('---------------------------------------------next letter---------------------------------------------') f.write(subject) f.write('\r\n') f.write(a) f.write('\r\n') f.write(body) except: pass getEmails()
2) 找到所有domain发来的消息,对转发邮件进行处理
f = open('C:/Users/Eleni/test.txt','r',encoding = 'utf-8') data = f.read() s = data.split('---------------------------------------------next letter---------------------------------------------') domain_list = [] for l in range(len(s)): if 'Domain'in s[l]: domain_list.append(l) final = [] for i in domain_list: new_list = (s[i].split('\n')) word_list=[] for i in range(len(new_list)): if i == 0: word_list.append(new_list[i]) if new_list[i]=='From:': word_list.append(new_list[i+2]) if new_list[i]=='Email:'and '@' in new_list[i+2]: word_list.append(new_list[i+2]) if new_list[i]=='Phone:'and is_number(new_list[i+2]): word_list.append(new_list[i+2]) print(word_list)
3)对直接发过来的邮件进行处理
for i in domain_list: list = (s[i].split('\n')) new_list = [] for i in list: if i!='': new_list.append(i) print(new_list[0]) print(new_list[12][7:]) print(new_list[13][9:]) print(new_list[14][9:]) for i in range(len(new_list)): if new_list[i] == '* Security Policy *': for a in range(15,i): print(new_list[a]) print('\n')