python 邮件读取


1. 开通pop之后仍然报错的解决方法

python - POPLIB poplib.error_proto: b'-ERR [AUTH] Username and password not accepted.' - Stack Overflow

2. 邮件生成

How to Read Email From Gmail Using Python 3 | CodeHandbook

可以生成全部的邮件

3. 但是Gmail经过加密所以看不了,最终解决方案依然是API,参考文献

How to read Emails from Gmail using Gmail API in Python ? - GeeksforGeeks

4. 为了看起来更方便每行之间添加了一行空格

python创建txt文件换行输入_python中写入txt文件需要换行,以及\r 和\n_weixin_39996742的博客-CSDN博客

5. 写入txt

使用python向txt文件写入内容 - chuyaoxin - 博客园 (cnblogs.com)

6. bs4内容提取

python爬虫数据提取之bs4的使用方法 - WillWeson - 博客园 (cnblogs.com)

7. 因为只需要未读,代码修改为:

result = service.users().messages().list(userId='me',labelIds=['INBOX'],q="is:unread").execute()

 用txt格式实现了读取,但是太过依赖格式所以转换成HTML,原始代码记录如下:

1)实现读取并存入txt

# import the required libraries 
from googleapiclient.discovery import build 
from google_auth_oauthlib.flow import InstalledAppFlow 
from google.auth.transport.requests import Request 
import pickle 
import os.path 
import base64 
import email 
from bs4 import BeautifulSoup 
  
# Define the SCOPES. If modifying it, delete the token.pickle file. 
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly'] 
  
def getEmails(): 
    # Variable creds will store the user access token. 
    # If no valid token found, we will create one. 
    creds = None
  
    # The file token.pickle contains the user access token. 
    # Check if it exists 
    if os.path.exists('token.pickle'): 
  
        # Read the token from the file and store it in the variable creds 
        with open('token.pickle', 'rb') as token: 
            creds = pickle.load(token) 
  
    # If credentials are not available or are invalid, ask the user to log in. 
    if not creds or not creds.valid: 
        if creds and creds.expired and creds.refresh_token: 
            creds.refresh(Request()) 
        else: 
            flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES) 
            creds = flow.run_local_server(port=0) 
  
        # Save the access token in token.pickle file for the next run 
        with open('token.pickle', 'wb') as token: 
            pickle.dump(creds, token) 
  
    # Connect to the Gmail API 
    service = build('gmail', 'v1', credentials=creds) 
  
    # request a list of all the messages 
    result = service.users().messages().list(userId='me',labelIds=['INBOX'],q="is:unread").execute() 
    #result = service.users().messages().list(userId='me',labelIds=['INBOX']).execute() 
  
    # We can also pass maxResults to get any number of emails. Like this: 
    # result = service.users().messages().list(maxResults=200, userId='me').execute() 
    messages = result.get('messages') 
  
    # messages is a list of dictionaries where each dictionary contains a message id. 
  
    # iterate through all the messages 
    for msg in messages:
        # Get the message from its id 
        txt = service.users().messages().get(userId='me', id=msg['id']).execute() 
  
        # Use try-except to avoid any Errors 
        try: 
            # Get value of 'payload' from dictionary 'txt' 
            payload = txt['payload'] 
            headers = payload['headers'] 
  
            # Look for Subject and Sender Email in the headers 
            for d in headers: 
                if d['name'] == 'Subject': 
                    subject = d['value'] 
                if d['name'] == 'From': 
                    sender = d['value'] 
  
            # The Body of the message is in Encrypted format. So, we have to decode it. 
            # Get the data and decode it with base 64 decoder. 
            parts = payload.get('parts')[0] 
            data = parts['body']['data'] 
            data = data.replace("-","+").replace("_","/") 
            decoded_data = base64.b64decode(data) 
  
            # Now, the data obtained is in lxml. So, we will parse  
            # it with BeautifulSoup library 
            soup = BeautifulSoup(decoded_data , "lxml") 
            body = soup.body() 
            a=soup.body.text
            # Printing the subject, sender's email and message 
            with open('C:\\Users\\Eleni\\test.txt','a',encoding='utf-8') as f:
                f.write('---------------------------------------------next letter---------------------------------------------')
                f.write(subject)
                f.write('\r\n') 
                f.write(a)
                f.write('\r\n') 
                f.write(body)
        except: 
            pass
  
  
getEmails()

2) 找到所有domain发来的消息,对转发邮件进行处理

f = open('C:/Users/Eleni/test.txt','r',encoding = 'utf-8')
data = f.read()
s = data.split('---------------------------------------------next letter---------------------------------------------')
domain_list = []
for l in range(len(s)):
    if 'Domain'in s[l]:
        domain_list.append(l)
final = []
for i in domain_list:
    new_list = (s[i].split('\n'))
    word_list=[]
    for i in range(len(new_list)):
        if i == 0:
            word_list.append(new_list[i])
        if new_list[i]=='From:':
            word_list.append(new_list[i+2])
        if new_list[i]=='Email:'and '@' in new_list[i+2]:
            word_list.append(new_list[i+2])
        if new_list[i]=='Phone:'and is_number(new_list[i+2]):
            word_list.append(new_list[i+2])
    print(word_list)

3)对直接发过来的邮件进行处理

for i in domain_list:
    list = (s[i].split('\n'))
    new_list = []
    for i in list:
        if i!='':
            new_list.append(i)
    print(new_list[0])
    print(new_list[12][7:])
    print(new_list[13][9:])
    print(new_list[14][9:])
    for i in range(len(new_list)):
        if new_list[i] == '* Security Policy *':
            for a in range(15,i):
                print(new_list[a])
    print('\n')