第一次用python爬取M站音乐


目标网站:

工具使用

开发工具: Visual Studio

开发环境:python-3.9.7-amd64, Windows10

使用工具包:requests,json,os,tqdm

项目思路解析

从M站的index搜索框获取搜索歌曲的ID(url:

https://www.missevan.com/sound/getsearch?s=
 1 url = "https://www.missevan.com/sound/getsearch?s="
 2     # userinput = input("请输入搜索内容:")
 3         param = {
 4         's': '一向纯荣',
 5         'page': 1,
 6         'type': 3,
 7         'page_size': 30
 8         }
 9         #请求链接
10         searchresult = requests.get(url=url, headers=self.headers, params=param).text
11         # 将请求内容转换为json   json.loads函数的使用,将字符串转化为字典
12         songidjson = json.loads(searchresult)
13         # 循环6次数据
14         for list_song in range(0,6):
15            
16             # 获取歌曲标题
17             songtitle = songidjson['info']['Datas'][list_song]['soundstr']
18             # # 添加到数组
19             songarrname.append(songtitle)
20             # 获取歌曲id
21             songid = songidjson['info']['Datas'][list_song]['id']
22             # 添加到数组
23             songarrid.append(songid)
,拿到歌曲ID就进行歌曲链接爬取(url: https://www.missevan.com/sound/getsound?
 1   url = "https://www.missevan.com/sound/getsound?"
 2           
 3             param = {
 4                 # 发送歌曲ID数据
 5            'soundid': result[i]
 6             }
 7          #获取歌曲json
 8             list_song  = requests.get(url, headers=self.headers, params=param).json()
 9 
10          # 获取链接
11             songurl = list_song['info']['sound']['soundurl']
12             list_song_url.append(songurl)
13             # print(songurl)
14             song_list = list_song['info']['sound']['soundstr']
15             list_song_title.append(song_list)
最后进行批量下载。    

 完整代码:

  1 import requests
  2 import json
  3 import os
  4 # 下载进度条
  5 from tqdm import tqdm
  6 
  7 
  8 class Songinfo():
  9     headers = {
 10         'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36'
 11         }
 12         #获取歌曲ID函数
 13     def Getsong_id(self):
 14         songarrid =[]
 15         songarrname=[]
 16 
 17         url = "https://www.missevan.com/sound/getsearch?s="
 18      userinput = input("请输入搜索内容:")
 19         param = {
 20         's':userinput ,
 21         'page': 1,
 22         'type': 3,
 23         'page_size': 30
 24         }
 25         #请求链接
 26         searchresult = requests.get(url=url, headers=self.headers, params=param).text
 27         # 将请求内容转换为json   json.loads函数的使用,将字符串转化为字典
 28         songidjson = json.loads(searchresult)
 29         # 循环6次数据
 30         for list_song in range(0,6):
 31            
 32             # 获取歌曲标题
 33             songtitle = songidjson['info']['Datas'][list_song]['soundstr']
 34             # # 添加到数组
 35             songarrname.append(songtitle)
 36             # 获取歌曲id
 37             songid = songidjson['info']['Datas'][list_song]['id']
 38             # 添加到数组
 39             songarrid.append(songid)
 40 
 41         return songarrid,songarrname
 42 
 43     #程序跳到第二步,会第一个调用这个程序,然后才会调用到上面的def函数
 44     def Get_songurl(self):
 45 
 46          # 创建一个文件夹
 47         if not os.path.exists('./M站爬取的音乐'):
 48 
 49             os.mkdir('./M站爬取的音乐')
 50         # 获取歌曲id
 51         result =self.Getsong_id()[0]
 52 
 53         # int类型的长度不能作为循环 只能固定循环
 54         list_song_url =[]
 55         list_song_title =[]
 56         for i in range(0,6):
 57             
 58             url = "https://www.missevan.com/sound/getsound?"
 59           
 60             param = {
 61                 # 发送歌曲ID数据
 62            'soundid': result[i]
 63             }
 64          #获取歌曲json
 65             list_song  = requests.get(url, headers=self.headers, params=param).json()
 66 
 67          # 获取链接
 68             songurl = list_song['info']['sound']['soundurl']
 69             list_song_url.append(songurl)
 70             # print(songurl)
 71             song_list = list_song['info']['sound']['soundstr']
 72             list_song_title.append(song_list)
 73             # print(song_list)
 74             # qingqiurl = songurl
 75         return list_song_url,list_song_title
 76 
 77     def __init__(self):
 78         # 从Get_songurl()函数拿返回值list_song_url
 79         respones_song_url= self.Get_songurl()[0]
 80         # print(respones_song_url)
 81         # 从Get_songurl()函数拿返回值list_song_title
 82         respones_song_title= self.Get_songurl()[1]
 83 
 84         # print(respones_song_title)
 85 
 86         # tqdm下载进度条
 87         for i in tqdm(range(0,6)):
 88             # 赋值歌曲链接
 89             qingqiurl = respones_song_url[i]
 90             # 赋值歌曲名字
 91             qingqiutitile = respones_song_title[i]
 92             # print(qingqiutitile)
 93             qingqiu = requests.get(url=qingqiurl,headers=self.headers).content
 94             # 下载的歌曲目录地址
 95             songname = "M站爬取的歌曲/"+qingqiutitile+".mp3"
 96 
 97             with open(songname,"wb")as fp:
 98                 fp.write(qingqiu)
 99                 # print(qingqiutitile)
100             print("下载完成!!!!")
101 
102 # __init__()方法又被称为构造器(constructor)  两个下划线开头的函数是声明该属性为私有
103  #程序跳到第一步
104  
105 if __name__ == "__main__":
106      Songinfo()

没看过python基础教程,然后直接看爬虫的教程,遇到的问题真的是让人头大,学一门语言还是得好好学好语言的语法,另外贪图方便用VScode码真的是不怎么人性化= =