Skip to content

Commit

Permalink
* reuse the first entity across attached entities that matching with …
Browse files Browse the repository at this point in the history
…the same `UniqueLatestReplier` to re-achieve c8f4920

* fix entities that already exists are still attached in `DbContext.ChangeTracker`
+ local function `DetachAndReplace()`
@ `ThreadLatestReplierSaver.SaveFromThread()`

- field `_latestRepliersKeyByUnique` to let `FillFromRequestingWith602()` directly assign new instances of related entity `LatestReplier` @ ThreadCrawlFacade.cs
@ c#/crawler
  • Loading branch information
n0099 committed Jul 11, 2024
1 parent 654feca commit da0c4c3
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 17 deletions.
14 changes: 4 additions & 10 deletions c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@ public class ThreadCrawlFacade(
postParser, postSaverFactory.Invoke,
userParserFactory.Invoke, userSaverFactory.Invoke)
{
private readonly Dictionary<ThreadLatestReplierSaver.UniqueLatestReplier, LatestReplier?> _latestRepliersKeyByUnique = [];

public delegate ThreadCrawlFacade New(Fid fid, string forumName);

protected override void OnPostParse(
Expand Down Expand Up @@ -44,21 +42,17 @@ join parsed in Posts.Values on (Tid)inResponse.Tid equals parsed.Tid
{ // replace with more detailed location.name in the 6.0.2 response
t.parsed.Geolocation = Helper.SerializedProtoBufOrNullIfEmpty(t.inResponse.Location);
}
var name = t.inResponse.LastReplyer.Name.NullIfEmpty();
var nameShow = t.inResponse.LastReplyer.NameShow.NullIfEmpty();
var lastReplyer = t.inResponse.LastReplyer;
var name = lastReplyer?.Name.NullIfEmpty();
var nameShow = lastReplyer?.NameShow.NullIfEmpty();
// LastReplyer will be null when LivePostType != "", but LastTimeInt will have expected timestamp value
var latestReplierEntity = t.inResponse.LastReplyer == null ? null : new LatestReplier
t.parsed.LatestReplier = lastReplyer == null ? null : new LatestReplier
{
Name = name,
#pragma warning disable S3358 // Ternary operators should not be nested
DisplayName = name == nameShow ? null : nameShow
#pragma warning restore S3358 // Ternary operators should not be nested
};
var uniqueLatestReplier = ThreadLatestReplierSaver.UniqueLatestReplier.FromLatestReplier(latestReplierEntity);
var isExists = _latestRepliersKeyByUnique.TryGetValue(uniqueLatestReplier, out var existingLatestReplier);
if (!isExists) _latestRepliersKeyByUnique[uniqueLatestReplier] = latestReplierEntity;
t.parsed.LatestReplier = isExists ? existingLatestReplier : latestReplierEntity;
});
}
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using Microsoft.EntityFrameworkCore.ChangeTracking;

namespace tbm.Crawler.Tieba.Crawl.Saver.Related;

public class ThreadLatestReplierSaver(
Expand All @@ -10,19 +12,38 @@ public class ThreadLatestReplierSaver(

public Action SaveFromThread(CrawlerDbContext db, IReadOnlyCollection<ThreadPost> threads)
{
var uniqueLatestRepliers = threads
static void DetachAndReplace(

Check failure on line 15 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (ubuntu-latest) / build (crawler)

Local function 'DetachAndReplace' contains the word 'and', which suggests doing multiple things (https://github.com/dennisdoomen/CSharpGuidelines/blob/5.7.0/_rules/1115.md)

Check failure on line 15 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (ubuntu-latest) / build (crawler)

Local function 'DetachAndReplace' contains the word 'and', which suggests doing multiple things (https://github.com/dennisdoomen/CSharpGuidelines/blob/5.7.0/_rules/1115.md)

Check failure on line 15 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (macos-latest) / build (crawler)

Local function 'DetachAndReplace' contains the word 'and', which suggests doing multiple things (https://github.com/dennisdoomen/CSharpGuidelines/blob/5.7.0/_rules/1115.md)

Check failure on line 15 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (macos-latest) / build (crawler)

Local function 'DetachAndReplace' contains the word 'and', which suggests doing multiple things (https://github.com/dennisdoomen/CSharpGuidelines/blob/5.7.0/_rules/1115.md)

Check failure on line 15 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (windows-latest) / build (crawler)

Local function 'DetachAndReplace' contains the word 'and', which suggests doing multiple things (https://github.com/dennisdoomen/CSharpGuidelines/blob/5.7.0/_rules/1115.md)

Check failure on line 15 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (windows-latest) / build (crawler)

Local function 'DetachAndReplace' contains the word 'and', which suggests doing multiple things (https://github.com/dennisdoomen/CSharpGuidelines/blob/5.7.0/_rules/1115.md)
EntityEntry<LatestReplier> entityEntry,
ThreadPost thread,
LatestReplier newLatestReplier)
{
entityEntry.State = EntityState.Detached;
thread.LatestReplier = newLatestReplier;
};

Check failure on line 22 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (ubuntu-latest) / build (crawler)

Check failure on line 22 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (ubuntu-latest) / build (crawler)

Remove this empty statement. (https://rules.sonarsource.com/csharp/RSPEC-1116)

Check failure on line 22 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (ubuntu-latest) / build (crawler)

Check failure on line 22 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (ubuntu-latest) / build (crawler)

Check failure on line 22 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (ubuntu-latest) / build (crawler)

Check failure on line 22 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (ubuntu-latest) / build (crawler)

Remove this empty statement. (https://rules.sonarsource.com/csharp/RSPEC-1116)

Check failure on line 22 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (ubuntu-latest) / build (crawler)

Check failure on line 22 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (ubuntu-latest) / build (crawler)

Check failure on line 22 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (macos-latest) / build (crawler)

Check failure on line 22 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (macos-latest) / build (crawler)

Remove this empty statement. (https://rules.sonarsource.com/csharp/RSPEC-1116)

Check failure on line 22 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (macos-latest) / build (crawler)

Check failure on line 22 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (macos-latest) / build (crawler)

Check failure on line 22 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (macos-latest) / build (crawler)

Check failure on line 22 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (macos-latest) / build (crawler)

Remove this empty statement. (https://rules.sonarsource.com/csharp/RSPEC-1116)

Check failure on line 22 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (macos-latest) / build (crawler)

Check failure on line 22 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (macos-latest) / build (crawler)

Check warning on line 22 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (macos-latest) / ReSharper

"[EmptyStatement] Empty statement is redundant" on /Users/runner/work/open-tbm/open-tbm/c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs(22,896)

Check warning on line 22 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (ubuntu-latest) / ReSharper

"[EmptyStatement] Empty statement is redundant" on /home/runner/work/open-tbm/open-tbm/c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs(22,896)

Check failure on line 22 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (windows-latest) / build (crawler)

Check failure on line 22 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (windows-latest) / build (crawler)

Remove this empty statement. (https://rules.sonarsource.com/csharp/RSPEC-1116)

Check failure on line 22 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (windows-latest) / build (crawler)

Check failure on line 22 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (windows-latest) / build (crawler)

Check failure on line 22 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (windows-latest) / build (crawler)

Check failure on line 22 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (windows-latest) / build (crawler)

Remove this empty statement. (https://rules.sonarsource.com/csharp/RSPEC-1116)

Check failure on line 22 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (windows-latest) / build (crawler)

Check failure on line 22 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (windows-latest) / build (crawler)

Check warning on line 22 in c#/crawler/src/Tieba/Crawl/Saver/Related/ThreadLatestReplierSaver.cs

View workflow job for this annotation

GitHub Actions / runs-on (windows-latest) / ReSharper

"[EmptyStatement] Empty statement is redundant" on D:\a\open-tbm\open-tbm\c#\crawler\src\Tieba\Crawl\Saver\Related\ThreadLatestReplierSaver.cs(22,896)

var threadsGroupByUniqueLatestReplier = threads
.Where(th => th.LatestReplier != null)
.Select(UniqueLatestReplier.FromThread).ToList();
.GroupBy(UniqueLatestReplier.FromThread).ToList();
threadsGroupByUniqueLatestReplier.ForEach(g =>
(from thread in g.Skip(1)
join entityEntry in db.ChangeTracker.Entries<LatestReplier>()
on thread.LatestReplier equals entityEntry.Entity
select (thread, entityEntry))
.ForEach(t => DetachAndReplace(t.entityEntry, t.thread, g.First().LatestReplier!)));

var uniqueLatestRepliers = threadsGroupByUniqueLatestReplier.Select(g => g.Key).ToList();
var existingLatestRepliers = db.LatestRepliers.AsNoTracking().FilterByItems(
uniqueLatestRepliers, (latestReplier, uniqueLatestReplier) =>
latestReplier.Name == uniqueLatestReplier.Name
&& latestReplier.DisplayName == uniqueLatestReplier.DisplayName)
uniqueLatestRepliers, (latestReplier, uniqueLatestReplier) =>
latestReplier.Name == uniqueLatestReplier.Name
&& latestReplier.DisplayName == uniqueLatestReplier.DisplayName)
.ToList();
(from existing in existingLatestRepliers
join thread in threads
on UniqueLatestReplier.FromLatestReplier(existing) equals UniqueLatestReplier.FromThread(thread)
select (existing, thread))
.ForEach(t => t.thread.LatestReplier = t.existing);
join entityEntry in db.ChangeTracker.Entries<LatestReplier>()
on thread.LatestReplier equals entityEntry.Entity // Object.ReferenceEquals()
select (existing, thread, entityEntry))
.ForEach(t => DetachAndReplace(t.entityEntry, t.thread, t.existing));

_ = _saverLocks.Value.Acquire(uniqueLatestRepliers
.Except(existingLatestRepliers.Select(UniqueLatestReplier.FromLatestReplier))
Expand Down

0 comments on commit da0c4c3

Please sign in to comment.