From 8769dc0e671d2c2f362feda9df2bed6e5bdd3b44 Mon Sep 17 00:00:00 2001 From: n0099 Date: Wed, 15 May 2024 22:52:10 +0800 Subject: [PATCH] * flip operands to make param `newValue` preceding `oldValue` @ (Sub)ReplySaver.cs * split into two partial classes @ ReplySaver.cs @ crawler * reorder methods * expand list with newline @ c# --- .../src/Tieba/Crawl/Crawler/ThreadCrawler.cs | 13 ++++----- .../src/Tieba/Crawl/Parser/UserParser.cs | 8 +++++- .../src/Tieba/Crawl/Saver/Post/ReplySaver.cs | 28 ++++++++++--------- .../Tieba/Crawl/Saver/Post/SubReplySaver.cs | 6 ++-- .../src/Tieba/Crawl/Saver/Post/ThreadSaver.cs | 10 +++---- c#/shared/src/Db/TbmDbContext.cs | 2 -- 6 files changed, 36 insertions(+), 31 deletions(-) diff --git a/c#/crawler/src/Tieba/Crawl/Crawler/ThreadCrawler.cs b/c#/crawler/src/Tieba/Crawl/Crawler/ThreadCrawler.cs index b9a00a6d..d5e09b75 100644 --- a/c#/crawler/src/Tieba/Crawl/Crawler/ThreadCrawler.cs +++ b/c#/crawler/src/Tieba/Crawl/Crawler/ThreadCrawler.cs @@ -48,11 +48,10 @@ protected override IEnumerable GetRequestsForPage(Page page, Cancellati ]; } - protected ThreadRequest.Types.Data GetRequestDataForClientVersion602(Page page) => - new() - { - Kw = forumName, - Pn = (int)page, - Rn = 30 - }; + protected ThreadRequest.Types.Data GetRequestDataForClientVersion602(Page page) => new() + { + Kw = forumName, + Pn = (int)page, + Rn = 30 + }; } diff --git a/c#/crawler/src/Tieba/Crawl/Parser/UserParser.cs b/c#/crawler/src/Tieba/Crawl/Parser/UserParser.cs index 6d2f3e87..b3de7893 100644 --- a/c#/crawler/src/Tieba/Crawl/Parser/UserParser.cs +++ b/c#/crawler/src/Tieba/Crawl/Parser/UserParser.cs @@ -17,7 +17,13 @@ public void Parse(IEnumerable inUsers) => var (portrait, portraitUpdatedAt) = ExtractPortrait(el.Portrait); if (uid < 0) // historical anonymous user { - return new() {Uid = uid, Name = el.NameShow, Portrait = portrait, PortraitUpdatedAt = portraitUpdatedAt}; + return new() + { + Uid = uid, + Name = el.NameShow, + Portrait = portrait, + PortraitUpdatedAt = portraitUpdatedAt + }; } // will be an empty string when the user hasn't set a username for their baidu account yet diff --git a/c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs index e9f02955..9fadb3db 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs @@ -2,7 +2,7 @@ namespace tbm.Crawler.Tieba.Crawl.Saver; -public class ReplySaver( +public partial class ReplySaver( ILogger logger, ConcurrentDictionary posts, ReplySignatureSaver replySignatureSaver, @@ -21,13 +21,13 @@ protected override bool FieldUpdateIgnorance protected override bool UserFieldUpdateIgnorance(string propName, object? oldValue, object? newValue) => propName switch { // FansNickname in reply response will always be null - nameof(User.FansNickname) when oldValue is not null && newValue is null => true, + nameof(User.FansNickname) when newValue is null && oldValue is not null => true, _ => false }; protected override bool UserFieldRevisionIgnorance(string propName, object? oldValue, object? newValue) => propName switch { // user icon will be null after UserParser.ResetUsersIcon() get invoked - nameof(User.Icon) when oldValue is null && newValue is not null => true, + nameof(User.Icon) when newValue is not null && oldValue is null => true, _ => false }; @@ -51,6 +51,15 @@ protected override Dictionary } }; + [SuppressMessage("StyleCop.CSharp.SpacingRules", "SA1025:Code should not contain multiple whitespace in a row")] + protected override NullFieldsBitMask GetRevisionNullFieldBitMask(string fieldName) => fieldName switch + { + nameof(ReplyPost.IsFold) => 1 << 2, + nameof(ReplyPost.DisagreeCount) => 1 << 4, + nameof(ReplyPost.Geolocation) => 1 << 5, + _ => 0 + }; + public override SaverChangeSet Save(CrawlerDbContext db) { var changeSet = Save(db, r => r.Pid, @@ -65,16 +74,9 @@ public override SaverChangeSet Save(CrawlerDbContext db) return changeSet; } - - [SuppressMessage("StyleCop.CSharp.SpacingRules", "SA1025:Code should not contain multiple whitespace in a row")] - protected override NullFieldsBitMask GetRevisionNullFieldBitMask(string fieldName) => fieldName switch - { - nameof(ReplyPost.IsFold) => 1 << 2, - nameof(ReplyPost.DisagreeCount) => 1 << 4, - nameof(ReplyPost.Geolocation) => 1 << 5, - _ => 0 - }; - +} +public partial class ReplySaver +{ private static void SaveReplyContentImages(CrawlerDbContext db, IEnumerable replies) { var pidAndImageList = ( diff --git a/c#/crawler/src/Tieba/Crawl/Saver/Post/SubReplySaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/Post/SubReplySaver.cs index b4d51781..3614b860 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/Post/SubReplySaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/Post/SubReplySaver.cs @@ -18,7 +18,7 @@ protected override bool UserFieldUpdateIgnorance nameof(User.Icon) => true, // FansNickname in sub reply response will always be null - nameof(User.FansNickname) when oldValue is not null && newValue is null => true, + nameof(User.FansNickname) when newValue is null && oldValue is not null => true, // DisplayName in users embedded in sub replies from response will be the legacy nickname nameof(User.DisplayName) => true, @@ -40,6 +40,8 @@ protected override Dictionary } }; + protected override NullFieldsBitMask GetRevisionNullFieldBitMask(string fieldName) => 0; + public override SaverChangeSet Save(CrawlerDbContext db) { var changeSet = Save(db, sr => sr.Spid, @@ -52,6 +54,4 @@ public override SaverChangeSet Save(CrawlerDbContext db) return changeSet; } - - protected override NullFieldsBitMask GetRevisionNullFieldBitMask(string fieldName) => 0; } diff --git a/c#/crawler/src/Tieba/Crawl/Saver/Post/ThreadSaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/Post/ThreadSaver.cs index fe94ab47..0f192b86 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/Post/ThreadSaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/Post/ThreadSaver.cs @@ -67,11 +67,6 @@ protected override Dictionary } }; - public override SaverChangeSet Save(CrawlerDbContext db) => - Save(db, th => th.Tid, - th => new ThreadRevision {TakenAt = th.UpdatedAt ?? th.CreatedAt, Tid = th.Tid}, - PredicateBuilder.New(th => Posts.Keys.Contains(th.Tid))); - [SuppressMessage("StyleCop.CSharp.SpacingRules", "SA1025:Code should not contain multiple whitespace in a row")] protected override NullFieldsBitMask GetRevisionNullFieldBitMask(string fieldName) => fieldName switch { @@ -86,4 +81,9 @@ public override SaverChangeSet Save(CrawlerDbContext db) => nameof(ThreadPost.Geolocation) => 1 << 10, _ => 0 }; + + public override SaverChangeSet Save(CrawlerDbContext db) => + Save(db, th => th.Tid, + th => new ThreadRevision {TakenAt = th.UpdatedAt ?? th.CreatedAt, Tid = th.Tid}, + PredicateBuilder.New(th => Posts.Keys.Contains(th.Tid))); } diff --git a/c#/shared/src/Db/TbmDbContext.cs b/c#/shared/src/Db/TbmDbContext.cs index 6a763e1f..752ac182 100644 --- a/c#/shared/src/Db/TbmDbContext.cs +++ b/c#/shared/src/Db/TbmDbContext.cs @@ -139,9 +139,7 @@ protected void OnModelCreatingWithFid(ModelBuilder b, uint fid) => b.Entity().ToTable($"tbmc_f{fid}_reply_content_image"); protected virtual void OnConfiguringNpgsql(NpgsqlDbContextOptionsBuilder builder) { } - protected virtual void OnBuildingNpgsqlDataSource(NpgsqlDataSourceBuilder builder) { } - private Lazy GetNpgsqlDataSource(string? connectionString) => _dataSourceSingleton ??= new(() => {