第一次用python爬取M站音乐
目标网站:
工具使用
开发工具: Visual Studio
开发环境:python-3.9.7-amd64, Windows10
使用工具包:requests,json,os,tqdm
项目思路解析
从M站的index搜索框获取搜索歌曲的ID(url:
https://www.missevan.com/sound/getsearch?s=)1 url = "https://www.missevan.com/sound/getsearch?s=" 2 # userinput = input("请输入搜索内容:") 3 param = { 4 's': '一向纯荣', 5 'page': 1, 6 'type': 3, 7 'page_size': 30 8 } 9 #请求链接 10 searchresult = requests.get(url=url, headers=self.headers, params=param).text 11 # 将请求内容转换为json json.loads函数的使用,将字符串转化为字典 12 songidjson = json.loads(searchresult) 13 # 循环6次数据 14 for list_song in range(0,6): 15 16 # 获取歌曲标题 17 songtitle = songidjson['info']['Datas'][list_song]['soundstr'] 18 # # 添加到数组 19 songarrname.append(songtitle) 20 # 获取歌曲id 21 songid = songidjson['info']['Datas'][list_song]['id'] 22 # 添加到数组 23 songarrid.append(songid),拿到歌曲ID就进行歌曲链接爬取(url: https://www.missevan.com/sound/getsound?)
1 url = "https://www.missevan.com/sound/getsound?" 2 3 param = { 4 # 发送歌曲ID数据 5 'soundid': result[i] 6 } 7 #获取歌曲json 8 list_song = requests.get(url, headers=self.headers, params=param).json() 9 10 # 获取链接 11 songurl = list_song['info']['sound']['soundurl'] 12 list_song_url.append(songurl) 13 # print(songurl) 14 song_list = list_song['info']['sound']['soundstr'] 15 list_song_title.append(song_list),最后进行批量下载。
完整代码:
1 import requests 2 import json 3 import os 4 # 下载进度条 5 from tqdm import tqdm 6 7 8 class Songinfo(): 9 headers = { 10 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36' 11 } 12 #获取歌曲ID函数 13 def Getsong_id(self): 14 songarrid =[] 15 songarrname=[] 16 17 url = "https://www.missevan.com/sound/getsearch?s=" 18 userinput = input("请输入搜索内容:") 19 param = { 20 's':userinput , 21 'page': 1, 22 'type': 3, 23 'page_size': 30 24 } 25 #请求链接 26 searchresult = requests.get(url=url, headers=self.headers, params=param).text 27 # 将请求内容转换为json json.loads函数的使用,将字符串转化为字典 28 songidjson = json.loads(searchresult) 29 # 循环6次数据 30 for list_song in range(0,6): 31 32 # 获取歌曲标题 33 songtitle = songidjson['info']['Datas'][list_song]['soundstr'] 34 # # 添加到数组 35 songarrname.append(songtitle) 36 # 获取歌曲id 37 songid = songidjson['info']['Datas'][list_song]['id'] 38 # 添加到数组 39 songarrid.append(songid) 40 41 return songarrid,songarrname 42 43 #程序跳到第二步,会第一个调用这个程序,然后才会调用到上面的def函数 44 def Get_songurl(self): 45 46 # 创建一个文件夹 47 if not os.path.exists('./M站爬取的音乐'): 48 49 os.mkdir('./M站爬取的音乐') 50 # 获取歌曲id 51 result =self.Getsong_id()[0] 52 53 # int类型的长度不能作为循环 只能固定循环 54 list_song_url =[] 55 list_song_title =[] 56 for i in range(0,6): 57 58 url = "https://www.missevan.com/sound/getsound?" 59 60 param = { 61 # 发送歌曲ID数据 62 'soundid': result[i] 63 } 64 #获取歌曲json 65 list_song = requests.get(url, headers=self.headers, params=param).json() 66 67 # 获取链接 68 songurl = list_song['info']['sound']['soundurl'] 69 list_song_url.append(songurl) 70 # print(songurl) 71 song_list = list_song['info']['sound']['soundstr'] 72 list_song_title.append(song_list) 73 # print(song_list) 74 # qingqiurl = songurl 75 return list_song_url,list_song_title 76 77 def __init__(self): 78 # 从Get_songurl()函数拿返回值list_song_url 79 respones_song_url= self.Get_songurl()[0] 80 # print(respones_song_url) 81 # 从Get_songurl()函数拿返回值list_song_title 82 respones_song_title= self.Get_songurl()[1] 83 84 # print(respones_song_title) 85 86 # tqdm下载进度条 87 for i in tqdm(range(0,6)): 88 # 赋值歌曲链接 89 qingqiurl = respones_song_url[i] 90 # 赋值歌曲名字 91 qingqiutitile = respones_song_title[i] 92 # print(qingqiutitile) 93 qingqiu = requests.get(url=qingqiurl,headers=self.headers).content 94 # 下载的歌曲目录地址 95 songname = "M站爬取的歌曲/"+qingqiutitile+".mp3" 96 97 with open(songname,"wb")as fp: 98 fp.write(qingqiu) 99 # print(qingqiutitile) 100 print("下载完成!!!!") 101 102 # __init__()方法又被称为构造器(constructor) 两个下划线开头的函数是声明该属性为私有 103 #程序跳到第一步 104 105 if __name__ == "__main__": 106 Songinfo()
没看过python基础教程,然后直接看爬虫的教程,遇到的问题真的是让人头大,学一门语言还是得好好学好语言的语法,另外贪图方便用VScode码真的是不怎么人性化= =