Skip to content

Commit

Permalink
* reuse the first entity across attached entities that matching with …
Browse files Browse the repository at this point in the history
…the same `UniqueLatestReplier` to re-achieve c8f4920

* fix entities that already exists are still attached in `DbContext.ChangeTracker`
+ local function `DetachAndReplace()`
@ `ThreadLatestReplierSaver.SaveFromThread()`

- field `_latestRepliersKeyByUnique` to let `FillFromRequestingWith602()` directly assign new instances of related entity `LatestReplier` @ ThreadCrawlFacade.cs
@ c#/crawler
  • Loading branch information
n0099 committed Jul 11, 2024
1 parent 654feca commit f2388a7
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 17 deletions.
14 changes: 4 additions & 10 deletions c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@ public class ThreadCrawlFacade(
postParser, postSaverFactory.Invoke,
userParserFactory.Invoke, userSaverFactory.Invoke)
{
private readonly Dictionary<ThreadLatestReplierSaver.UniqueLatestReplier, LatestReplier?> _latestRepliersKeyByUnique = [];

public delegate ThreadCrawlFacade New(Fid fid, string forumName);

protected override void OnPostParse(
Expand Down Expand Up @@ -44,21 +42,17 @@ join parsed in Posts.Values on (Tid)inResponse.Tid equals parsed.Tid
{ // replace with more detailed location.name in the 6.0.2 response
t.parsed.Geolocation = Helper.SerializedProtoBufOrNullIfEmpty(t.inResponse.Location);
}
var name = t.inResponse.LastReplyer.Name.NullIfEmpty();
var nameShow = t.inResponse.LastReplyer.NameShow.NullIfEmpty();
var lastReplyer = t.inResponse.LastReplyer;
var name = lastReplyer?.Name.NullIfEmpty();
var nameShow = lastReplyer?.NameShow.NullIfEmpty();
// LastReplyer will be null when LivePostType != "", but LastTimeInt will have expected timestamp value
var latestReplierEntity = t.inResponse.LastReplyer == null ? null : new LatestReplier
t.parsed.LatestReplier = lastReplyer == null ? null : new LatestReplier
{
Name = name,
#pragma warning disable S3358 // Ternary operators should not be nested
DisplayName = name == nameShow ? null : nameShow
#pragma warning restore S3358 // Ternary operators should not be nested
};
var uniqueLatestReplier = ThreadLatestReplierSaver.UniqueLatestReplier.FromLatestReplier(latestReplierEntity);
var isExists = _latestRepliersKeyByUnique.TryGetValue(uniqueLatestReplier, out var existingLatestReplier);
if (!isExists) _latestRepliersKeyByUnique[uniqueLatestReplier] = latestReplierEntity;
t.parsed.LatestReplier = isExists ? existingLatestReplier : latestReplierEntity;
});
}
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using Microsoft.EntityFrameworkCore.ChangeTracking;

namespace tbm.Crawler.Tieba.Crawl.Saver.Related;

public class ThreadLatestReplierSaver(
Expand All @@ -10,19 +12,38 @@ public class ThreadLatestReplierSaver(

public Action SaveFromThread(CrawlerDbContext db, IReadOnlyCollection<ThreadPost> threads)
{
var uniqueLatestRepliers = threads
static void DetachThenReplace(
EntityEntry<LatestReplier> entityEntry,
ThreadPost thread,
LatestReplier newLatestReplier)
{
entityEntry.State = EntityState.Detached;
thread.LatestReplier = newLatestReplier;
}

var threadsGroupByUniqueLatestReplier = threads
.Where(th => th.LatestReplier != null)
.Select(UniqueLatestReplier.FromThread).ToList();
.GroupBy(UniqueLatestReplier.FromThread).ToList();
threadsGroupByUniqueLatestReplier.ForEach(g =>
(from thread in g.Skip(1)
join entityEntry in db.ChangeTracker.Entries<LatestReplier>()
on thread.LatestReplier equals entityEntry.Entity
select (thread, entityEntry))
.ForEach(t => DetachThenReplace(t.entityEntry, t.thread, g.First().LatestReplier!)));

var uniqueLatestRepliers = threadsGroupByUniqueLatestReplier.Select(g => g.Key).ToList();
var existingLatestRepliers = db.LatestRepliers.AsNoTracking().FilterByItems(
uniqueLatestRepliers, (latestReplier, uniqueLatestReplier) =>
latestReplier.Name == uniqueLatestReplier.Name
&& latestReplier.DisplayName == uniqueLatestReplier.DisplayName)
uniqueLatestRepliers, (latestReplier, uniqueLatestReplier) =>
latestReplier.Name == uniqueLatestReplier.Name
&& latestReplier.DisplayName == uniqueLatestReplier.DisplayName)
.ToList();
(from existing in existingLatestRepliers
join thread in threads
on UniqueLatestReplier.FromLatestReplier(existing) equals UniqueLatestReplier.FromThread(thread)
select (existing, thread))
.ForEach(t => t.thread.LatestReplier = t.existing);
join entityEntry in db.ChangeTracker.Entries<LatestReplier>()
on thread.LatestReplier equals entityEntry.Entity // Object.ReferenceEquals()
select (existing, thread, entityEntry))
.ForEach(t => DetachThenReplace(t.entityEntry, t.thread, t.existing));

_ = _saverLocks.Value.Acquire(uniqueLatestRepliers
.Except(existingLatestRepliers.Select(UniqueLatestReplier.FromLatestReplier))
Expand All @@ -44,6 +65,8 @@ public Action SaveFromUser(CrawlerDbContext db, Tid tid, IEnumerable<User> users
.Where(u => u.Name == threadLatestReplier.Name
&& u.DisplayName == threadLatestReplier.DisplayName)
.DistinctBy(u => u.Uid).ToList();

// ReSharper disable once ConvertIfStatementToSwitchStatement
if (matchedUsers.Count == 0) return () => { };
if (matchedUsers.Count > 1)
Helper.LogDifferentValuesSharingTheSameKeyInEntities(logger, matchedUsers,
Expand Down

0 comments on commit f2388a7

Please sign in to comment.