批量下载小说网站文章


 1 using HtmlAgilityPack;
 2 using System;
 3 using System.Collections.Generic;
 4 using System.IO;
 5 using System.Net;
 6 using System.Text;
 7 
 8 namespace DownloadNovel
 9 {
10     class Program
11     {
12         static void Main(string[] args)
13         {
14             //小说网站的网址
15             string webSiteUrl = "http://www.biqugew.com";
16             //指定小说目录的网址
17             string NovelUrl = "http://www.biqugew.com/book/10/";
18             
19            
20 
21             DownNovel(webSiteUrl, NovelUrl);
22         }
23         private static void DownNovel(string webSiteUrl, string NovelUrl)
24         {
25             string[] split = { "
", "\r\n" }; 26 //指定小说的目录的 Xpath 27 string TableXpath = "/body[1]/div[1]/div[5]/div[1]/dl[1]/dd"; 28 //获取小说标题的 XPath 29 string TitleXpath = "/html[1]/body[1]/div[1]/div[3]/div[1]/div[2]/h1[1]"; 30 //获取指定小说的内容的 Xpath 31 string ContentsXpath = "/html/body/div/div[3]/div/div[3]"; 32 WebClient client = new WebClient { Encoding = Encoding.GetEncoding("GB2312") }; 33 HtmlNodeCollection nodes = null; 34 { 35 HtmlDocument doc = new HtmlDocument(); 36 //获取目录页 37 doc.LoadHtml(client.DownloadString(NovelUrl)); 38 nodes = doc.DocumentNode.SelectNodes(TableXpath); 39 } 40 //解析目录页 41 foreach (HtmlNode node in nodes) 42 { 43 HtmlDocument doc = new HtmlDocument(); 44 //获取小说单章的网站 45 string url = webSiteUrl + node.SelectSingleNode("a").Attributes["href"].Value; 46 //获取小说单章整个网页 47 doc.LoadHtml(client.DownloadString(url)); 48 //获取本章小说的标题 49 string title = doc.DocumentNode.SelectSingleNode(TitleXpath).InnerHtml; 50 //获取小说文本内容 doc.DocumentNode.SelectSingleNode("/html/body/div/div[3]/div/div[3]").OuterHtml 51 string str = doc.DocumentNode.SelectSingleNode(ContentsXpath).InnerHtml.Replace(" ", ""); 52 //过滤文本中的特殊字符和字符串 53 string aticale = ""; 54 foreach (var txt in str.Split(split, StringSplitOptions.RemoveEmptyEntries)) 55 { 56 if (!txt.Contains("")) 57 aticale += txt; 58 } 59 Console.WriteLine(title); 60 WriteLog(title + Environment.NewLine + aticale); 61 } 62 63 } 64 65 static void WriteLog(string msg) 66 { 67 string path = Environment.CurrentDirectory + "/novel/"; 68 if (!Directory.Exists(path)) Directory.CreateDirectory(path); 69 string fileName = DateTime.Now.ToString("yyyy-MM-dd"); 70 string filepath = path + fileName + ".txt"; 71 Stream fileStream = File.Open(filepath, FileMode.Append, FileAccess.Write, FileShare.Write); 72 StreamWriter writeAdapter = new StreamWriter(fileStream, Encoding.Default); 73 writeAdapter.WriteLine(msg); 74 writeAdapter.WriteLine(); 75 writeAdapter.Close(); 76 } 77 78 } 79 }

相关