-
Notifications
You must be signed in to change notification settings - Fork 0
/
Program.cs
143 lines (115 loc) · 5 KB
/
Program.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
using HtmlAgilityPack;
using System.Linq;
using System;
using System.Collections.Generic;
using System.Net;
using System.Threading.Tasks;
namespace VioletCrawler
{
class Program
{
const string BASE_URL = "https://baigiang.violet.vn/";
static PageFetcher htmlWeb = new PageFetcher("cookie.json");
static async Task Main(string[] args)
{
Console.InputEncoding = System.Text.Encoding.Unicode;
Console.OutputEncoding = System.Text.Encoding.Unicode;
string url = GetVioletUrl();
string authorName = GetFavoredAuthorName();
Console.WriteLine(authorName);
IEnumerable<Uri> urls = await GetSubUrls(url);
foreach (var item in urls)
{
IEnumerable<Uri> pps = await GetPowerPointList(item.AbsoluteUri);
string chosenPP = await ChooseFavoredAuthorOrMaxDownloadPP(pps, authorName);
await SavePP(chosenPP, Environment.CurrentDirectory);
}
}
static string GetVioletUrl()
{
Console.Write("Enter Violet subdirectory URL: ");
string url = Console.ReadLine();
while (!Uri.IsWellFormedUriString(url, UriKind.Absolute))
{
Console.Write("Invalid URL! Enter Violet subdirectory URL: ");
url = Console.ReadLine();
}
return url;
}
static string GetFavoredAuthorName()
{
Console.Write("Enter proper favored Author Name: ");
string name = Console.ReadLine();
return name;
}
static async Task<IEnumerable<Uri>> GetSubUrls(string mainUrl)
{
HtmlDocument doc = await htmlWeb.LoadPageAsync(mainUrl);
var mainFrame = doc.DocumentNode.Descendants("div").First(d => d.HasClass("frame-main"));
var content = mainFrame.Descendants("div").First(d => d.HasClass("content"));
var lessons = content.Elements("b");
List<Uri> urls = new List<Uri>();
Uri baseUrl = new Uri(BASE_URL);
foreach (var item in lessons)
{
string path = item.FirstChild.GetAttributeValue("href", string.Empty);
urls.Add(new Uri(baseUrl, path));
}
return urls;
}
static async Task<IEnumerable<Uri>> GetPowerPointList(string url)
{
HtmlDocument doc = await htmlWeb.LoadPageAsync(url);
var mainFrame = doc.DocumentNode.Descendants("div").First(d => d.HasClass("frame-main"));
var content = mainFrame.Descendants("div").First(d => d.HasClass("content"));
var pps = content.Elements("li");
List<Uri> urls = new List<Uri>();
Uri baseUrl = new Uri(BASE_URL);
foreach (var item in pps)
{
string path = item.FirstChild.GetAttributeValue("href", string.Empty);
urls.Add(new Uri(baseUrl, path));
}
return urls;
}
static async Task<string> ChooseFavoredAuthorOrMaxDownloadPP(IEnumerable<Uri> pps, string favoredAuthor)
{
favoredAuthor = favoredAuthor.Trim().ToLower();
string chosenPP = string.Empty;
int maxDl = 0;
foreach (var item in pps)
{
HtmlDocument doc = await htmlWeb.LoadPageAsync(item.AbsoluteUri);
string title = doc.DocumentNode.SelectSingleNode("//head/title").InnerText.ToLower();
if (!favoredAuthor.Equals(string.Empty) && title.Contains(favoredAuthor))
{
chosenPP = item.AbsoluteUri;
break;
}
var span = doc.DocumentNode.Descendants("b").First(p => p.InnerText.Equals("Số lượt tải: "));
string downloadCntStr = span.NextSibling.InnerText;
int dl = int.Parse(downloadCntStr);
if (dl > maxDl)
{
maxDl = dl;
chosenPP = item.AbsoluteUri;
}
}
return chosenPP;
}
static async Task SavePP(string ppUrl, string dirname)
{
HtmlDocument doc = await htmlWeb.LoadPageAsync(ppUrl);
var mainFrame = doc.DocumentNode.Descendants("div").First(d => d.HasClass("frame-main"));
string title = doc.DocumentNode.SelectSingleNode("//head/title").InnerText;
var content = mainFrame.Descendants("div").First(d => d.HasClass("doc"));
var h2 = content.Element("h2");
var anchor = h2.Element("label").Element("a");
string jsCode = anchor.GetAttributeValue("onclick", string.Empty);
string id = jsCode.Substring(68, 7);
string url = $"{BASE_URL}present/download/pr_id/{id}/t/{DateTimeOffset.UtcNow.ToUnixTimeSeconds()}";
htmlWeb.DownloadFile(url, dirname);
Console.WriteLine($"Downloaded: {title}");
}
}
}