diff --git a/lib/brain/dio_manager.dart b/lib/brain/dio_manager.dart index 3f4a59a..d573ef4 100644 --- a/lib/brain/dio_manager.dart +++ b/lib/brain/dio_manager.dart @@ -1,5 +1,20 @@ import 'package:dio/dio.dart'; +import 'package:dio_smart_retry/dio_smart_retry.dart'; Dio dio() { - return Dio()..options = BaseOptions(validateStatus: (status) => true); + final dio_ = Dio(); + dio_.options = BaseOptions(validateStatus: (status) => true); + dio_.interceptors.add( + RetryInterceptor( + dio: dio_, + logPrint: print, + retries: 3, + retryDelays: const [ + Duration(seconds: 2), + Duration(seconds: 3), + Duration(seconds: 5), + ], + ), + ); + return dio_; } diff --git a/lib/extractor/general/national/bangladesh/prothamalo.dart b/lib/extractor/general/national/bangladesh/prothamalo.dart new file mode 100644 index 0000000..f8acfe0 --- /dev/null +++ b/lib/extractor/general/national/bangladesh/prothamalo.dart @@ -0,0 +1,145 @@ +import 'package:raven/brain/dio_manager.dart'; +import 'package:raven/model/article.dart'; +import 'package:raven/model/publisher.dart'; + +class ProthamAlo extends Publisher { + @override + String get name => "প্রথম আলো"; + + @override + String get homePage => "https://www.prothomalo.com"; + + @override + Future> get categories => extractCategories(); + + @override + Category get mainCategory => Category.bangladesh; + + @override + bool get hasSearchSupport => true; + + Future> extractCategories() async { + return { + "সর্বশেষ": "latest", + "রাজনীতি": "politics", + "বাংলাদেশ": "bangladesh", + "অপরাধ": "crime-bangladesh", + "বিশ্ব": "world-all", + "বাণিজ্য": "business-all", + "মতামত": "opinion-all", + "খেলা": "sports-all", + "বিনোদন": "entertainment-all", + "জীবনযাপন": "lifestyle-all", + }; + } + + @override + Future> categoryArticles( + {String category = "latest", int page = 1}) async { + Set articles = {}; + var limit = 10; + var offset = limit * (page - 1); + String apiUrl = + "$homePage/api/v1/collections/$category?offset=$offset&limit=$limit"; + await dio().get(apiUrl).then( + (response) { + if (response.statusCode == 200) { + var articlesData = response.data; + var data = articlesData["items"]; + for (var element in data) { + var title = element['item']['headline'][0]; + var author = element['story']["author-name"]; + var thumbnail = element['story']["hero-image-s3-key"] ?? + element['story']["alternative"]["home"]["default"]["hero-image"] + ["hero-image-s3-key"] ?? + ""; + var time = element['story']["published-at"]; + var articleUrl = element['story']['slug']; + var excerpt = element['story']['summary']; + var tags = + element['story']['sections'].map((e) => e['name']).toList(); + articles.add( + NewsArticle( + publisher: name, + title: title ?? "", + content: "", + excerpt: excerpt ?? "", + author: author ?? "", + url: articleUrl, + thumbnail: thumbnail ?? "", + category: category, + publishedAt: time, + tags: List.from(tags), + ), + ); + } + } + }, + ); + + return articles; + } + + @override + Future> searchedArticles( + {required String searchQuery, int page = 1}) async { + Set articles = {}; + var limit = 10; + var offset = limit * (page - 1); + String apiUrl = + "$homePage/route-data.json?path=/search&q=$searchQuery&offset=$offset&limit=$limit"; + await dio().get(apiUrl).then( + (response) { + if (response.statusCode == 200) { + var articlesData = response.data; + var data = articlesData["data"]["stories"]; + for (var element in data) { + var title = element['headline'][0]; + var author = element["author-name"]; + var thumbnail = element["hero-image-s3-key"] ?? ""; + var time = element["published-at"]; + var articleUrl = element['slug']; + var excerpt = element['summary']; + var tags = element['sections'].map((e) => e['name']).toList(); + articles.add(NewsArticle( + publisher: name, + title: title ?? "", + content: "", + excerpt: excerpt ?? "", + author: author ?? "", + url: articleUrl, + thumbnail: thumbnail ?? "", + category: searchQuery, + publishedAt: time, + tags: List.from(tags),),); + } + } + }, + ); + + return articles; + } + + @override + Future article(NewsArticle newsArticle) async { + await dio() + .get('$homePage/route-data.json?path=${newsArticle.url}') + .then((response) { + if (response.statusCode == 200) { + var data = (response.data); + var content = ""; + var cards = data["data"]["story"]["cards"]; + for (var card in cards) { + if (card["story-elements"][0]["type"] == "text") { + content += card["story-elements"][0]["text"]; + } else if (card["story-elements"][0]["type"] == "image") { + var image = card["story-elements"][0]["image-s3-key"]; + content += "

"; + } + } + newsArticle.content = content; + } + }); + return newsArticle; + } +} diff --git a/lib/extractor/general/national/bangladesh/prothamalo_english.dart b/lib/extractor/general/national/bangladesh/prothamalo_english.dart new file mode 100644 index 0000000..eec51ad --- /dev/null +++ b/lib/extractor/general/national/bangladesh/prothamalo_english.dart @@ -0,0 +1,143 @@ +import 'package:raven/brain/dio_manager.dart'; +import 'package:raven/model/article.dart'; +import 'package:raven/model/publisher.dart'; + +class ProthamAloEn extends Publisher { + @override + String get name => "Prothom Alo"; + + @override + String get homePage => "https://en.prothomalo.com"; + + @override + Future> get categories => extractCategories(); + + @override + Category get mainCategory => Category.bangladesh; + + @override + bool get hasSearchSupport => true; + + Future> extractCategories() async { + return { + "Bangladesh": "bangladesh", + "International": "international", + "Sports": "sports", + "Opinion": "opinion", + "Business": "business", + "Youth": "youth", + "Entertainment": "entertainment", + "Lifestyle": "lifestyle", + }; + } + + @override + Future> categoryArticles( + {String category = "latest", int page = 1}) async { + Set articles = {}; + var limit = 10; + var offset = limit * (page - 1); + String apiUrl = + "$homePage/api/v1/collections/$category?offset=$offset&limit=$limit"; + await dio().get(apiUrl).then( + (response) { + if (response.statusCode == 200) { + var articlesData = response.data; + var data = articlesData["items"]; + for (var element in data) { + var title = element['item']['headline'][0]; + var author = element['story']["author-name"]; + var thumbnail = element['story']["hero-image-s3-key"] ?? + element['story']["alternative"]["home"]["default"]["hero-image"] + ["hero-image-s3-key"] ?? + ""; + var time = element['story']["published-at"]; + var articleUrl = element['story']['slug']; + var excerpt = element['story']['summary']; + var tags = + element['story']['sections'].map((e) => e['name']).toList(); + articles.add( + NewsArticle( + publisher: name, + title: title ?? "", + content: "", + excerpt: excerpt ?? "", + author: author ?? "", + url: articleUrl, + thumbnail: thumbnail ?? "", + category: category, + publishedAt: time, + tags: List.from(tags), + ), + ); + } + } + }, + ); + + return articles; + } + + @override + Future> searchedArticles( + {required String searchQuery, int page = 1}) async { + Set articles = {}; + var limit = 10; + var offset = limit * (page - 1); + String apiUrl = + "$homePage/route-data.json?path=/search&q=$searchQuery&offset=$offset&limit=$limit"; + await dio().get(apiUrl).then( + (response) { + if (response.statusCode == 200) { + var articlesData = response.data; + var data = articlesData["data"]["stories"]; + for (var element in data) { + var title = element['headline'][0]; + var author = element["author-name"]; + var thumbnail = element["hero-image-s3-key"] ?? ""; + var time = element["published-at"]; + var articleUrl = element['slug']; + var excerpt = element['summary']; + var tags = element['sections'].map((e) => e['name']).toList(); + articles.add(NewsArticle( + publisher: name, + title: title ?? "", + content: "", + excerpt: excerpt ?? "", + author: author ?? "", + url: articleUrl, + thumbnail: thumbnail ?? "", + category: searchQuery, + publishedAt: time, + tags: List.from(tags),),); + } + } + }, + ); + + return articles; + } + + @override + Future article(NewsArticle newsArticle) async { + await dio() + .get('$homePage/route-data.json?path=${newsArticle.url}') + .then((response) { + if (response.statusCode == 200) { + var data = (response.data); + var content = ""; + var cards = data["data"]["story"]["cards"]; + for (var card in cards) { + if (card["story-elements"][0]["type"] == "text") { + content += card["story-elements"][0]["text"]; + } else if (card["story-elements"][0]["type"] == "image") { + var image = card["story-elements"][0]["image-s3-key"]; + content += "

"; + } + } + newsArticle.content = content; + } + }); + return newsArticle; + } +} diff --git a/lib/extractor/general/national/china/rfa_cantonese.dart b/lib/extractor/general/national/china/rfa_cantonese.dart new file mode 100644 index 0000000..caa95f8 --- /dev/null +++ b/lib/extractor/general/national/china/rfa_cantonese.dart @@ -0,0 +1,175 @@ +import 'package:dio/dio.dart'; +import 'package:html/parser.dart' as html_parser; +import 'package:raven/brain/dio_manager.dart'; +import 'package:raven/model/article.dart'; +import 'package:raven/model/publisher.dart'; +import 'package:raven/utils/time.dart'; + +class RfaCantonese extends Publisher { + @override + String get name => "RFA 自由亞洲電台粵語部"; + + @override + String get homePage => "https://www.rfa.org/cantonese"; + + @override + Future> get categories async { + Map map = {"Cantonese": "cantonese"}; + await dio().get(homePage, options: options).then( + (response) { + if (response.statusCode == 200) { + var document = html_parser.parse(response.data); + var elements = document.querySelectorAll("*[class*='nav-'] a"); + for (var element in elements) { + if(element.attributes["href"]==homePage) { + continue; + } + map.putIfAbsent( + element.text, + () { + return element.attributes["href"]!.replaceAll("$homePage/", ""); + }, + ); + } + } + }, + ); + return map..removeWhere((key, value) => ["video", "audio", "send_news_form"].contains(value) || value.contains("about/"),); + } + + @override + Category get mainCategory => Category.world; + + @override + bool get hasSearchSupport => true; + + Options options = Options(headers: { + "User-Agent": + "Mozilla/5.0 (Linux; U; Android 4.0.3; ko-kr; LG-L160L Build/IML74K) AppleWebkit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30", + "Accept": "*/*", + "Accept-Encoding": "gzip, deflate, br", + "Connection": "keep-alive", + }); + + @override + Future article(NewsArticle newsArticle) async { + await dio().get(newsArticle.url, options: options).then((response) { + if (response.statusCode == 200) { + var document = html_parser.parse(response.data); + var content = document.querySelector('#storytext')?.text ?? ""; + var author = document.querySelector("#story_byline")?.text ?? ""; + var thumbnail = document.querySelector("#headerimg img")?.text ?? ""; + + newsArticle = newsArticle.fill( + content: content, + author: author, + tags: [], + thumbnail: thumbnail, + ); + } + }); + + return newsArticle; + } + + @override + Future> categoryArticles({ + String category = "news", + int page = 1, + }) async { + Set articles = {}; + var limit = 15; + var offset = (page - 1) * limit; + + await dio().get( + "$homePage/$category/story_archive?b_start:int=$offset", + options: options, + ) + .then( + (response) { + if (response.statusCode == 200) { + var document = html_parser.parse(response.data); + var data = document.querySelectorAll(".sectionteaser"); + for (var article in data) { + var title = article.querySelector("span")?.text ?? ""; + var thumbnail = + article.querySelector("img")?.attributes["src"] ?? ""; + var publishedAt = article.querySelector(".story_date")?.text ?? ""; + var excerpt = + article.querySelector("story_description")?.text ?? ""; + var url = article.querySelector("a")?.attributes["href"] ?? ""; + + articles.add( + NewsArticle( + publisher: name, + title: title, + content: "", + excerpt: excerpt, + author: "", + url: url, + tags: [], + thumbnail: thumbnail, + publishedAt: stringToUnix(publishedAt, format: "yyyy-MM-dd"), + category: category, + ), + ); + } + } + }, + ); + + return articles; + } + + @override + Future> searchedArticles({ + required String searchQuery, + int page = 1, + }) async { + Set articles = {}; + var limit = 30; + var offset = (page - 1) * limit; + await dio() + .get( + "$homePage/@@search?SearchableText=$searchQuery&sort_on=Date&b_start:int=$offset", + options: options) + .then( + (response) { + if (response.statusCode == 200) { + var document = html_parser.parse(response.data); + var data = document.querySelectorAll(".searchresult"); + for (var article in data) { + var title = article.querySelector("a.state-published")?.text ?? ""; + var thumbnail = + article.querySelector("img")?.attributes["src"] ?? ""; + var publishedAt = + article.querySelector(".searchresultdate")?.text.trim() ?? ""; + var excerpt = + article.querySelector(".croppedDescription")?.text ?? ""; + var url = article + .querySelector("a.state-published") + ?.attributes["href"] ?? + ""; + + articles.add( + NewsArticle( + publisher: name, + title: title, + content: "", + excerpt: excerpt, + author: "", + url: url, + tags: [], + thumbnail: thumbnail, + publishedAt: stringToUnix(publishedAt, format: "yyyy-MM-dd"), + category: searchQuery, + ), + ); + } + } + }, + ); + + return articles; + } +} diff --git a/lib/extractor/general/national/china/rfa_mandarin.dart b/lib/extractor/general/national/china/rfa_mandarin.dart new file mode 100644 index 0000000..784a9ec --- /dev/null +++ b/lib/extractor/general/national/china/rfa_mandarin.dart @@ -0,0 +1,175 @@ +import 'package:dio/dio.dart'; +import 'package:html/parser.dart' as html_parser; +import 'package:raven/brain/dio_manager.dart'; +import 'package:raven/model/article.dart'; +import 'package:raven/model/publisher.dart'; +import 'package:raven/utils/time.dart'; + +class RfaMandarin extends Publisher { + @override + String get name => "自由亚洲电台"; + + @override + String get homePage => "https://www.rfa.org/mandarin"; + + @override + Future> get categories async { + Map map = {"Mandarin": "mandarin"}; + await dio().get(homePage, options: options).then( + (response) { + if (response.statusCode == 200) { + var document = html_parser.parse(response.data); + var elements = document.querySelectorAll("*[class*='nav-'] a"); + for (var element in elements) { + if(element.attributes["href"]==homePage) { + continue; + } + map.putIfAbsent( + element.text, + () { + return element.attributes["href"]!.replaceAll("$homePage/", ""); + }, + ); + } + } + }, + ); + return map; + } + + @override + Category get mainCategory => Category.world; + + @override + bool get hasSearchSupport => true; + + Options options = Options(headers: { + "User-Agent": + "Mozilla/5.0 (Linux; U; Android 4.0.3; ko-kr; LG-L160L Build/IML74K) AppleWebkit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30", + "Accept": "*/*", + "Accept-Encoding": "gzip, deflate, br", + "Connection": "keep-alive", + }); + + @override + Future article(NewsArticle newsArticle) async { + await dio().get(newsArticle.url, options: options).then((response) { + if (response.statusCode == 200) { + var document = html_parser.parse(response.data); + var content = document.querySelector('#storytext')?.text ?? ""; + var author = document.querySelector("#story_byline")?.text ?? ""; + var thumbnail = document.querySelector("#headerimg img")?.text ?? ""; + + newsArticle = newsArticle.fill( + content: content, + author: author, + tags: [], + thumbnail: thumbnail, + ); + } + }); + + return newsArticle; + } + + @override + Future> categoryArticles({ + String category = "news", + int page = 1, + }) async { + Set articles = {}; + var limit = 15; + var offset = (page - 1) * limit; + + await dio().get( + "$homePage/$category/story_archive?b_start:int=$offset", + options: options, + ) + .then( + (response) { + if (response.statusCode == 200) { + var document = html_parser.parse(response.data); + var data = document.querySelectorAll(".sectionteaser"); + for (var article in data) { + var title = article.querySelector("span")?.text ?? ""; + var thumbnail = + article.querySelector("img")?.attributes["src"] ?? ""; + var publishedAt = article.querySelector(".story_date")?.text ?? ""; + var excerpt = + article.querySelector("story_description")?.text ?? ""; + var url = article.querySelector("a")?.attributes["href"] ?? ""; + + articles.add( + NewsArticle( + publisher: name, + title: title, + content: "", + excerpt: excerpt, + author: "", + url: url, + tags: [], + thumbnail: thumbnail, + publishedAt: stringToUnix(publishedAt, format: "yyyy-MM-dd"), + category: category, + ), + ); + } + } + }, + ); + + return articles; + } + + @override + Future> searchedArticles({ + required String searchQuery, + int page = 1, + }) async { + Set articles = {}; + var limit = 30; + var offset = (page - 1) * limit; + await dio() + .get( + "$homePage/@@search?SearchableText=$searchQuery&sort_on=Date&b_start:int=$offset", + options: options) + .then( + (response) { + if (response.statusCode == 200) { + var document = html_parser.parse(response.data); + var data = document.querySelectorAll(".searchresult"); + for (var article in data) { + var title = article.querySelector("a.state-published")?.text ?? ""; + var thumbnail = + article.querySelector("img")?.attributes["src"] ?? ""; + var publishedAt = + article.querySelector(".searchresultdate")?.text.trim() ?? ""; + var excerpt = + article.querySelector(".croppedDescription")?.text ?? ""; + var url = article + .querySelector("a.state-published") + ?.attributes["href"] ?? + ""; + + articles.add( + NewsArticle( + publisher: name, + title: title, + content: "", + excerpt: excerpt, + author: "", + url: url, + tags: [], + thumbnail: thumbnail, + publishedAt: stringToUnix(publishedAt, format: "yyyy-MM-dd"), + category: searchQuery, + ), + ); + } + } + }, + ); + + return articles; + } +} diff --git a/lib/extractor/general/national/china/rfa_tibetan.dart b/lib/extractor/general/national/china/rfa_tibetan.dart new file mode 100644 index 0000000..5c75c0f --- /dev/null +++ b/lib/extractor/general/national/china/rfa_tibetan.dart @@ -0,0 +1,175 @@ +import 'package:dio/dio.dart'; +import 'package:html/parser.dart' as html_parser; +import 'package:raven/brain/dio_manager.dart'; +import 'package:raven/model/article.dart'; +import 'package:raven/model/publisher.dart'; +import 'package:raven/utils/time.dart'; + +class RfaTibetan extends Publisher { + @override + String get name => "ཨེ་ཤེ་ཡ་རང་དབང་རླུང་འཕྲིན་ཁང་"; + + @override + String get homePage => "https://www.rfa.org/tibetan"; + + @override + Future> get categories async { + Map map = {"Tibetan": "tibetan"}; + await dio().get(homePage, options: options).then( + (response) { + if (response.statusCode == 200) { + var document = html_parser.parse(response.data); + var elements = document.querySelectorAll(".header_top li a"); + for (var element in elements) { + if(element.attributes["href"]==homePage) { + continue; + } + map.putIfAbsent( + element.text, + () { + return element.attributes["href"]!.replaceAll("$homePage/", ""); + }, + ); + } + } + }, + ); + return map..removeWhere((key, value) => value=="video",); + } + + @override + Category get mainCategory => Category.world; + + @override + bool get hasSearchSupport => true; + + Options options = Options(headers: { + "User-Agent": + "Mozilla/5.0 (Linux; U; Android 4.0.3; ko-kr; LG-L160L Build/IML74K) AppleWebkit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30", + "Accept": "*/*", + "Accept-Encoding": "gzip, deflate, br", + "Connection": "keep-alive", + }); + + @override + Future article(NewsArticle newsArticle) async { + await dio().get(newsArticle.url, options: options).then((response) { + if (response.statusCode == 200) { + var document = html_parser.parse(response.data); + var content = document.querySelector('#storytext')?.text ?? ""; + var author = document.querySelector("#story_byline")?.text ?? ""; + var thumbnail = document.querySelector("#headerimg img")?.text ?? ""; + + newsArticle = newsArticle.fill( + content: content, + author: author, + tags: [], + thumbnail: thumbnail, + ); + } + }); + + return newsArticle; + } + + @override + Future> categoryArticles({ + String category = "news", + int page = 1, + }) async { + Set articles = {}; + var limit = 15; + var offset = (page - 1) * limit; + + await dio().get( + "$homePage/$category/story_archive?b_start:int=$offset", + options: options, + ) + .then( + (response) { + if (response.statusCode == 200) { + var document = html_parser.parse(response.data); + var data = document.querySelectorAll(".sectionteaser"); + for (var article in data) { + var title = article.querySelector("span")?.text ?? ""; + var thumbnail = + article.querySelector("img")?.attributes["src"] ?? ""; + var publishedAt = article.querySelector(".story_date")?.text ?? ""; + var excerpt = + article.querySelector("story_description")?.text ?? ""; + var url = article.querySelector("a")?.attributes["href"] ?? ""; + + articles.add( + NewsArticle( + publisher: name, + title: title, + content: "", + excerpt: excerpt, + author: "", + url: url, + tags: [], + thumbnail: thumbnail, + publishedAt: stringToUnix(publishedAt, format: "yyyy-MM-dd"), + category: category, + ), + ); + } + } + }, + ); + + return articles; + } + + @override + Future> searchedArticles({ + required String searchQuery, + int page = 1, + }) async { + Set articles = {}; + var limit = 30; + var offset = (page - 1) * limit; + await dio() + .get( + "$homePage/@@search?SearchableText=$searchQuery&sort_on=Date&b_start:int=$offset", + options: options) + .then( + (response) { + if (response.statusCode == 200) { + var document = html_parser.parse(response.data); + var data = document.querySelectorAll(".searchresult"); + for (var article in data) { + var title = article.querySelector("a.state-published")?.text ?? ""; + var thumbnail = + article.querySelector("img")?.attributes["src"] ?? ""; + var publishedAt = + article.querySelector(".searchresultdate")?.text.trim() ?? ""; + var excerpt = + article.querySelector(".croppedDescription")?.text ?? ""; + var url = article + .querySelector("a.state-published") + ?.attributes["href"] ?? + ""; + + articles.add( + NewsArticle( + publisher: name, + title: title, + content: "", + excerpt: excerpt, + author: "", + url: url, + tags: [], + thumbnail: thumbnail, + publishedAt: stringToUnix(publishedAt, format: "yyyy-MM-dd"), + category: searchQuery, + ), + ); + } + } + }, + ); + + return articles; + } +} diff --git a/lib/extractor/general/national/myanmar/rfa_burmese.dart b/lib/extractor/general/national/myanmar/rfa_burmese.dart new file mode 100644 index 0000000..b1d02e2 --- /dev/null +++ b/lib/extractor/general/national/myanmar/rfa_burmese.dart @@ -0,0 +1,175 @@ +import 'package:dio/dio.dart'; +import 'package:html/parser.dart' as html_parser; +import 'package:raven/brain/dio_manager.dart'; +import 'package:raven/model/article.dart'; +import 'package:raven/model/publisher.dart'; +import 'package:raven/utils/time.dart'; + +class RfaBurmese extends Publisher { + @override + String get name => "မြန်မာဌာန"; + + @override + String get homePage => "https://www.rfa.org/burmese"; + + @override + Future> get categories async { + Map map = {"Burmese": "burmese"}; + await dio().get(homePage, options: options).then( + (response) { + if (response.statusCode == 200) { + var document = html_parser.parse(response.data); + var elements = document.querySelectorAll(".header_top li a"); + for (var element in elements) { + if(element.attributes["href"]==homePage) { + continue; + } + map.putIfAbsent( + element.text, + () { + return element.attributes["href"]!.replaceAll("$homePage/", ""); + }, + ); + } + } + }, + ); + return map..removeWhere((key, value) => value=="video",); + } + + @override + Category get mainCategory => Category.world; + + @override + bool get hasSearchSupport => true; + + Options options = Options(headers: { + "User-Agent": + "Mozilla/5.0 (Linux; U; Android 4.0.3; ko-kr; LG-L160L Build/IML74K) AppleWebkit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30", + "Accept": "*/*", + "Accept-Encoding": "gzip, deflate, br", + "Connection": "keep-alive", + }); + + @override + Future article(NewsArticle newsArticle) async { + await dio().get(newsArticle.url, options: options).then((response) { + if (response.statusCode == 200) { + var document = html_parser.parse(response.data); + var content = document.querySelector('#storytext')?.text ?? ""; + var author = document.querySelector("#story_byline")?.text ?? ""; + var thumbnail = document.querySelector("#headerimg img")?.text ?? ""; + + newsArticle = newsArticle.fill( + content: content, + author: author, + tags: [], + thumbnail: thumbnail, + ); + } + }); + + return newsArticle; + } + + @override + Future> categoryArticles({ + String category = "news", + int page = 1, + }) async { + Set articles = {}; + var limit = 15; + var offset = (page - 1) * limit; + + await dio().get( + "$homePage/$category/story_archive?b_start:int=$offset", + options: options, + ) + .then( + (response) { + if (response.statusCode == 200) { + var document = html_parser.parse(response.data); + var data = document.querySelectorAll(".sectionteaser"); + for (var article in data) { + var title = article.querySelector("span")?.text ?? ""; + var thumbnail = + article.querySelector("img")?.attributes["src"] ?? ""; + var publishedAt = article.querySelector(".story_date")?.text ?? ""; + var excerpt = + article.querySelector("story_description")?.text ?? ""; + var url = article.querySelector("a")?.attributes["href"] ?? ""; + + articles.add( + NewsArticle( + publisher: name, + title: title, + content: "", + excerpt: excerpt, + author: "", + url: url, + tags: [], + thumbnail: thumbnail, + publishedAt: stringToUnix(publishedAt, format: "yyyy-MM-dd"), + category: category, + ), + ); + } + } + }, + ); + + return articles; + } + + @override + Future> searchedArticles({ + required String searchQuery, + int page = 1, + }) async { + Set articles = {}; + var limit = 30; + var offset = (page - 1) * limit; + await dio() + .get( + "$homePage/@@search?SearchableText=$searchQuery&sort_on=Date&b_start:int=$offset", + options: options) + .then( + (response) { + if (response.statusCode == 200) { + var document = html_parser.parse(response.data); + var data = document.querySelectorAll(".searchresult"); + for (var article in data) { + var title = article.querySelector("a.state-published")?.text ?? ""; + var thumbnail = + article.querySelector("img")?.attributes["src"] ?? ""; + var publishedAt = + article.querySelector(".searchresultdate")?.text.trim() ?? ""; + var excerpt = + article.querySelector(".croppedDescription")?.text ?? ""; + var url = article + .querySelector("a.state-published") + ?.attributes["href"] ?? + ""; + + articles.add( + NewsArticle( + publisher: name, + title: title, + content: "", + excerpt: excerpt, + author: "", + url: url, + tags: [], + thumbnail: thumbnail, + publishedAt: stringToUnix(publishedAt, format: "yyyy-MM-dd"), + category: searchQuery, + ), + ); + } + } + }, + ); + + return articles; + } +} diff --git a/lib/extractor/general/world/rfa_english.dart b/lib/extractor/general/world/rfa_english.dart new file mode 100644 index 0000000..65bfe8e --- /dev/null +++ b/lib/extractor/general/world/rfa_english.dart @@ -0,0 +1,175 @@ +import 'package:dio/dio.dart'; +import 'package:html/parser.dart' as html_parser; +import 'package:raven/brain/dio_manager.dart'; +import 'package:raven/model/article.dart'; +import 'package:raven/model/publisher.dart'; +import 'package:raven/utils/time.dart'; + +class RfaEnglish extends Publisher { + @override + String get name => "Radio Free Asia"; + + @override + String get homePage => "https://www.rfa.org/english"; + + @override + Future> get categories async { + Map map = {"News": "news"}; + await dio().get(homePage, options: options).then( + (response) { + if (response.statusCode == 200) { + var document = html_parser.parse(response.data); + document.querySelectorAll(".nav-items a").forEach((element) { + map.putIfAbsent( + element.text, + () { + return element.attributes["href"]!.replaceAll("$homePage/", ""); + }, + ); + }); + } + }, + ); + var unsupported = ["Press Room", "Contact", "Jobs and internships"]; + return map + ..removeWhere( + (key, value) => unsupported.contains(key), + ); + } + + @override + Category get mainCategory => Category.world; + + @override + bool get hasSearchSupport => true; + + Options options = Options(headers: { + "User-Agent": + "Mozilla/5.0 (Linux; U; Android 4.0.3; ko-kr; LG-L160L Build/IML74K) AppleWebkit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30", + "Accept": "*/*", + "Accept-Encoding": "gzip, deflate, br", + "Connection": "keep-alive", + }); + + @override + Future article(NewsArticle newsArticle) async { + await dio().get(newsArticle.url, options: options).then((response) { + if (response.statusCode == 200) { + var document = html_parser.parse(response.data); + var content = document.querySelector('#storytext')?.text ?? ""; + var author = document.querySelector("#story_byline")?.text ?? ""; + var thumbnail = document.querySelector("#headerimg img")?.text ?? ""; + + newsArticle = newsArticle.fill( + content: content, + author: author, + tags: [], + thumbnail: thumbnail, + ); + } + }); + + return newsArticle; + } + + @override + Future> categoryArticles({ + String category = "news", + int page = 1, + }) async { + Set articles = {}; + var limit = 15; + var offset = (page - 1) * limit; + + await dio().get( + "$homePage/$category/story_archive?b_start:int=$offset", + options: options, + ) + .then( + (response) { + if (response.statusCode == 200) { + var document = html_parser.parse(response.data); + var data = document.querySelectorAll(".sectionteaser"); + for (var article in data) { + var title = article.querySelector("span")?.text ?? ""; + var thumbnail = + article.querySelector("img")?.attributes["src"] ?? ""; + var publishedAt = article.querySelector(".story_date")?.text ?? ""; + var excerpt = + article.querySelector("story_description")?.text ?? ""; + var url = article.querySelector("a")?.attributes["href"] ?? ""; + + articles.add( + NewsArticle( + publisher: name, + title: title, + content: "", + excerpt: excerpt, + author: "", + url: url, + tags: [], + thumbnail: thumbnail, + publishedAt: stringToUnix(publishedAt, format: "yyyy-MM-dd"), + category: category, + ), + ); + } + } + }, + ); + + return articles; + } + + @override + Future> searchedArticles({ + required String searchQuery, + int page = 1, + }) async { + Set articles = {}; + var limit = 30; + var offset = (page - 1) * limit; + await dio() + .get( + "$homePage/@@search?SearchableText=$searchQuery&sort_on=Date&b_start:int=$offset", + options: options) + .then( + (response) { + if (response.statusCode == 200) { + var document = html_parser.parse(response.data); + var data = document.querySelectorAll(".searchresult"); + for (var article in data) { + var title = article.querySelector("a.state-published")?.text ?? ""; + var thumbnail = + article.querySelector("img")?.attributes["src"] ?? ""; + var publishedAt = + article.querySelector(".searchresultdate")?.text.trim() ?? ""; + var excerpt = + article.querySelector(".croppedDescription")?.text ?? ""; + var url = article + .querySelector("a.state-published") + ?.attributes["href"] ?? + ""; + + articles.add( + NewsArticle( + publisher: name, + title: title, + content: "", + excerpt: excerpt, + author: "", + url: url, + tags: [], + thumbnail: thumbnail, + publishedAt: stringToUnix(publishedAt, format: "yyyy-MM-dd"), + category: searchQuery, + ), + ); + } + } + }, + ); + + return articles; + } +} diff --git a/lib/model/publisher.dart b/lib/model/publisher.dart index d9c056b..21459ba 100644 --- a/lib/model/publisher.dart +++ b/lib/model/publisher.dart @@ -1,14 +1,21 @@ import 'package:raven/extractor/custom/morss.dart'; import 'package:raven/extractor/custom/rss.dart'; +import 'package:raven/extractor/general/national/bangladesh/prothamalo.dart'; +import 'package:raven/extractor/general/national/bangladesh/prothamalo_english.dart'; +import 'package:raven/extractor/general/national/china/rfa_cantonese.dart'; +import 'package:raven/extractor/general/national/china/rfa_mandarin.dart'; +import 'package:raven/extractor/general/national/china/rfa_tibetan.dart'; import 'package:raven/extractor/general/national/india/thehindu.dart'; import 'package:raven/extractor/general/national/india/theindianexpress.dart'; import 'package:raven/extractor/general/national/india/thequint.dart'; import 'package:raven/extractor/general/national/india/thewire.dart'; +import 'package:raven/extractor/general/national/myanmar/rfa_burmese.dart'; import 'package:raven/extractor/general/world/aljazeera.dart'; import 'package:raven/extractor/general/world/apnews.dart'; import 'package:raven/extractor/general/world/bbc.dart'; import 'package:raven/extractor/general/world/cnn.dart'; import 'package:raven/extractor/general/world/reuters.dart'; +import 'package:raven/extractor/general/world/rfa_english.dart'; import 'package:raven/extractor/general/world/theguardian.dart'; import 'package:raven/extractor/technology/androidpolice.dart'; import 'package:raven/extractor/technology/arstechnica.dart'; @@ -30,7 +37,9 @@ Map publishers = { "CNN": CNN(), "Engadget": Engadget(), "morss": Morss(), + "Protham Alo": ProthamAloEn(), "Reuters": Reuters(), + "Radio Free Asia": RfaEnglish(), "RSS Feed": RSSFeed(), "The Guardian": TheGuardian(), "The Hindu": TheHindu(), @@ -40,12 +49,24 @@ Map publishers = { "The Wire": TheWire(), "TorrentFreak": TorrentFreak(), "XDA Developers": XDAdevelopers(), + + "প্রথম আলো": ProthamAlo(), + "မြန်မာဌာန": RfaBurmese(), + "RFA 自由亞洲電台粵語部": RfaCantonese(), + "自由亚洲电台": RfaMandarin(), + "ཨེ་ཤེ་ཡ་རང་དབང་རླུང་འཕྲིན་ཁང་": RfaTibetan(), }; enum Category { - world, technology, + world, + + // countries + bangladesh, + china, india, + + // misc custom, } diff --git a/pubspec.lock b/pubspec.lock index ee5e62a..d168d06 100644 --- a/pubspec.lock +++ b/pubspec.lock @@ -294,6 +294,14 @@ packages: url: "https://pub.dev" source: hosted version: "3.2.2" + dio_smart_retry: + dependency: "direct main" + description: + name: dio_smart_retry + sha256: "3d71450c19b4d91ef4c7d726a55a284bfc11eb3634f1f25006cdfab3f8595653" + url: "https://pub.dev" + source: hosted + version: "6.0.0" dynamic_color: dependency: "direct main" description: diff --git a/pubspec.yaml b/pubspec.yaml index 8ecb5b3..d013424 100644 --- a/pubspec.yaml +++ b/pubspec.yaml @@ -14,6 +14,7 @@ dependencies: device_info_plus: ^10.1.0 dio: ^5.4.3+1 dio_cache_interceptor_hive_store: ^3.2.2 + dio_smart_retry: ^6.0.0 dynamic_color: ^1.7.0 flutter: sdk: flutter diff --git a/test/extractor/general/national/bangladesh/prothamalo_en_test.dart b/test/extractor/general/national/bangladesh/prothamalo_en_test.dart new file mode 100644 index 0000000..a302c8e --- /dev/null +++ b/test/extractor/general/national/bangladesh/prothamalo_en_test.dart @@ -0,0 +1,22 @@ +import 'package:raven/extractor/general/national/bangladesh/prothamalo.dart'; +import 'package:test/test.dart'; +import 'package:raven/model/publisher.dart'; + +import '../../../common.dart'; + + +void main() { + Publisher publisher = ProthamAlo(); + + test('Extract Categories Test', () async { + await ExtractorTest.categoriesTest(publisher); + }); + + test('Category Articles Test', () async { + await ExtractorTest.categoryArticlesTest(publisher); + }); + + test('Search Articles Test', () async { + await ExtractorTest.searchedArticlesTest(publisher, 'ওয়ার্ল্ড'); + }); +} diff --git a/test/extractor/general/national/bangladesh/prothamalo_test.dart b/test/extractor/general/national/bangladesh/prothamalo_test.dart new file mode 100644 index 0000000..a302c8e --- /dev/null +++ b/test/extractor/general/national/bangladesh/prothamalo_test.dart @@ -0,0 +1,22 @@ +import 'package:raven/extractor/general/national/bangladesh/prothamalo.dart'; +import 'package:test/test.dart'; +import 'package:raven/model/publisher.dart'; + +import '../../../common.dart'; + + +void main() { + Publisher publisher = ProthamAlo(); + + test('Extract Categories Test', () async { + await ExtractorTest.categoriesTest(publisher); + }); + + test('Category Articles Test', () async { + await ExtractorTest.categoryArticlesTest(publisher); + }); + + test('Search Articles Test', () async { + await ExtractorTest.searchedArticlesTest(publisher, 'ওয়ার্ল্ড'); + }); +} diff --git a/test/extractor/general/national/china/rfa_cantonese_test.dart b/test/extractor/general/national/china/rfa_cantonese_test.dart new file mode 100644 index 0000000..c1edc8a --- /dev/null +++ b/test/extractor/general/national/china/rfa_cantonese_test.dart @@ -0,0 +1,21 @@ +import 'package:raven/extractor/general/national/china/rfa_cantonese.dart'; +import 'package:test/test.dart'; +import 'package:raven/model/publisher.dart'; + +import '../../../common.dart'; + +void main() { + Publisher publisher = RfaCantonese(); + + test('Extract Categories Test', () async { + await ExtractorTest.categoriesTest(publisher); + }); + + test('Category Articles Test', () async { + await ExtractorTest.categoryArticlesTest(publisher); + }); + + test('Search Articles Test', () async { + await ExtractorTest.searchedArticlesTest(publisher, 'world'); + }); +} diff --git a/test/extractor/general/national/china/rfa_mandarin_test.dart b/test/extractor/general/national/china/rfa_mandarin_test.dart new file mode 100644 index 0000000..8b11750 --- /dev/null +++ b/test/extractor/general/national/china/rfa_mandarin_test.dart @@ -0,0 +1,21 @@ +import 'package:raven/extractor/general/national/china/rfa_mandarin.dart'; +import 'package:test/test.dart'; +import 'package:raven/model/publisher.dart'; + +import '../../../common.dart'; + +void main() { + Publisher publisher = RfaMandarin(); + + test('Extract Categories Test', () async { + await ExtractorTest.categoriesTest(publisher); + }); + + test('Category Articles Test', () async { + await ExtractorTest.categoryArticlesTest(publisher); + }); + + test('Search Articles Test', () async { + await ExtractorTest.searchedArticlesTest(publisher, 'world'); + }); +} diff --git a/test/extractor/general/national/china/rfa_tibetan_test.dart b/test/extractor/general/national/china/rfa_tibetan_test.dart new file mode 100644 index 0000000..98969e1 --- /dev/null +++ b/test/extractor/general/national/china/rfa_tibetan_test.dart @@ -0,0 +1,22 @@ +import 'package:raven/extractor/general/national/china/rfa_mandarin.dart'; +import 'package:raven/extractor/general/national/china/rfa_tibetan.dart'; +import 'package:test/test.dart'; +import 'package:raven/model/publisher.dart'; + +import '../../../common.dart'; + +void main() { + Publisher publisher = RfaTibetan(); + + test('Extract Categories Test', () async { + await ExtractorTest.categoriesTest(publisher); + }); + + test('Category Articles Test', () async { + await ExtractorTest.categoryArticlesTest(publisher); + }); + + test('Search Articles Test', () async { + await ExtractorTest.searchedArticlesTest(publisher, 'world'); + }); +} diff --git a/test/extractor/general/national/myanmar/rfa_burmese_test.dart b/test/extractor/general/national/myanmar/rfa_burmese_test.dart new file mode 100644 index 0000000..86a5226 --- /dev/null +++ b/test/extractor/general/national/myanmar/rfa_burmese_test.dart @@ -0,0 +1,23 @@ +import 'package:raven/extractor/general/national/china/rfa_mandarin.dart'; +import 'package:raven/extractor/general/national/china/rfa_tibetan.dart'; +import 'package:raven/extractor/general/national/myanmar/rfa_burmese.dart'; +import 'package:test/test.dart'; +import 'package:raven/model/publisher.dart'; + +import '../../../common.dart'; + +void main() { + Publisher publisher = RfaBurmese(); + + test('Extract Categories Test', () async { + await ExtractorTest.categoriesTest(publisher); + }); + + test('Category Articles Test', () async { + await ExtractorTest.categoryArticlesTest(publisher); + }); + + test('Search Articles Test', () async { + await ExtractorTest.searchedArticlesTest(publisher, 'world'); + }); +} diff --git a/test/extractor/general/world/rfa_english_test.dart b/test/extractor/general/world/rfa_english_test.dart new file mode 100644 index 0000000..8c68341 --- /dev/null +++ b/test/extractor/general/world/rfa_english_test.dart @@ -0,0 +1,21 @@ +import 'package:raven/extractor/general/world/rfa_english.dart'; +import 'package:test/test.dart'; +import 'package:raven/model/publisher.dart'; + +import '../../common.dart'; + +void main() { + Publisher publisher = RfaEnglish(); + + test('Extract Categories Test', () async { + await ExtractorTest.categoriesTest(publisher); + }); + + test('Category Articles Test', () async { + await ExtractorTest.categoryArticlesTest(publisher); + }); + + test('Search Articles Test', () async { + await ExtractorTest.searchedArticlesTest(publisher, 'world'); + }); +}