Skip to content

Commit

Permalink
+ abstruct class BaseUser to extract fields (Display)?Name from e…
Browse files Browse the repository at this point in the history
…ntity class `User`

* fix false positive of signature existence due to `WHERE signatureId IN (s1, s2) AND xxHash3 IN (x1, x2)` is not equality with `WHERE (signatureId = x1 AND xxHash3 = x1) OR (signatureId = x2 AND xxHash3 = x2)` @ `ReplySignatureSaver.Save()`
* fix username of historical anonymous user won't get fallbacked to null when it's emtpy string @ `UserParser.Parse()`
* fix outdated comments since a0f48f3 @ `UserSaver.ShouldIgnoreEntityRevision()`
@ fe
  • Loading branch information
n0099 committed Jul 10, 2024
1 parent 9d5f4fd commit 59f7067
Show file tree
Hide file tree
Showing 7 changed files with 23 additions and 25 deletions.
7 changes: 7 additions & 0 deletions c#/crawler/src/Db/BaseUser.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
namespace tbm.Crawler.Db;

public abstract class BaseUser : TimestampedEntity
{
public string? Name { get; set; }
public string? DisplayName { get; set; }
}
4 changes: 1 addition & 3 deletions c#/crawler/src/Db/User.cs
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
namespace tbm.Crawler.Db;

public class User : TimestampedEntity
public class User : BaseUser
{
[Key] public long Uid { get; set; }

Check notice on line 5 in c#/crawler/src/Db/User.cs

View workflow job for this annotation

GitHub Actions / runs-on (ubuntu-latest) / ReSharper

"[UseSymbolAlias] Use type alias 'Uid'" on /home/runner/work/open-tbm/open-tbm/c#/crawler/src/Db/User.cs(5,75)

Check notice on line 5 in c#/crawler/src/Db/User.cs

View workflow job for this annotation

GitHub Actions / runs-on (macos-latest) / ReSharper

"[UseSymbolAlias] Use type alias 'Uid'" on /Users/runner/work/open-tbm/open-tbm/c#/crawler/src/Db/User.cs(5,75)

Check notice on line 5 in c#/crawler/src/Db/User.cs

View workflow job for this annotation

GitHub Actions / runs-on (windows-latest) / ReSharper

"[UseSymbolAlias] Use type alias 'Uid'" on D:\a\open-tbm\open-tbm\c#\crawler\src\Db\User.cs(5,75)
public string? Name { get; set; }
public string? DisplayName { get; set; }
public required string Portrait { get; set; }
public uint? PortraitUpdatedAt { get; set; }
public byte? Gender { get; set; }
Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Parser/UserParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ public void Parse(IEnumerable<TbClient.User> inUsers) =>
return new()
{
Uid = uid,
Name = el.NameShow,
Name = el.NameShow.NullIfEmpty(),
Portrait = portrait,
PortraitUpdatedAt = portraitUpdatedAt
};
Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ public override SaverChangeSet<ReplyPost> Save(CrawlerDbContext db)
r => new ReplyRevision {TakenAt = r.UpdatedAt ?? r.CreatedAt, Pid = r.Pid},
LinqKit.PredicateBuilder.New<ReplyPost>(r => Posts.Keys.Contains(r.Pid)));

db.ReplyContents.AddRange(changeSet.NewlyAdded
db.ReplyContents.AddRange(changeSet.NewlyAdded // https://github.com/dotnet/efcore/issues/33945
.Select(r => new ReplyContent {Pid = r.Pid, ProtoBufBytes = r.Content}));
PostSaveHandlers += replyContentImageSaver.Save(db, changeSet.NewlyAdded).Invoke;
PostSaveHandlers += AuthorRevisionSaver.SaveAuthorExpGradeRevisions(db, changeSet.AllAfter).Invoke;
Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Saver/Post/SubReplySaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public override SaverChangeSet<SubReplyPost> Save(CrawlerDbContext db)
sr => new SubReplyRevision {TakenAt = sr.UpdatedAt ?? sr.CreatedAt, Spid = sr.Spid},
LinqKit.PredicateBuilder.New<SubReplyPost>(sr => Posts.Keys.Contains(sr.Spid)));

db.SubReplyContents.AddRange(changeSet.NewlyAdded.Select(sr =>
db.SubReplyContents.AddRange(changeSet.NewlyAdded.Select(sr => // https://github.com/dotnet/efcore/issues/33945
new SubReplyContent {Spid = sr.Spid, ProtoBufBytes = sr.Content}));
PostSaveHandlers += AuthorRevisionSaver.SaveAuthorExpGradeRevisions(db, changeSet.AllAfter).Invoke;

Expand Down
18 changes: 10 additions & 8 deletions c#/crawler/src/Tieba/Crawl/Saver/ReplySignatureSaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,16 @@ public Action Save(CrawlerDbContext db, IEnumerable<ReplyPost> replies)
r => r.Signature,
SignatureIdAndValueEqualityComparer.Instance);

var existingSignatures = (
from s in db.ReplySignatures.AsTracking()
where signatures.Select(s2 => s2.SignatureId).Contains(s.SignatureId)

// server side eval doesn't need ByteArrayEqualityComparer
&& signatures.Select(s2 => s2.XxHash3).Contains(s.XxHash3)
select s
).ToList();
var existingSignatures = db.ReplySignatures.AsTracking()
.Where(signatures.Aggregate(
LinqKit.PredicateBuilder.New<ReplySignature>(),
(predicate, newOrExisting) =>
predicate.Or(LinqKit.PredicateBuilder
.New<ReplySignature>(existing =>
existing.SignatureId == newOrExisting.SignatureId)
.And(existing =>
existing.XxHash3 == newOrExisting.XxHash3))))
.ToList();
(from existing in existingSignatures
join newInReply in signatures on existing.SignatureId equals newInReply.SignatureId
select (existing, newInReply))
Expand Down
13 changes: 2 additions & 11 deletions c#/crawler/src/Tieba/Crawl/Saver/UserSaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -72,22 +72,13 @@ public partial class UserSaver
{
protected override bool ShouldIgnoreEntityRevision(string propName, PropertyEntry propEntry, EntityEntry entityEntry)
{
// ThreadCrawlFacade.ParseLatestRepliers() will save users with empty string as portrait
// they may soon be updated by (sub) reply crawler after it find out the latest reply
// ThreadCrawlFacade.ParseLatestRepliers() will save partial filled user of latest repliers for livepost thread
// they may later get updated by (sub) reply crawler after it find out the latest reply
// so we should ignore its revision update for all fields
// ignore entire record is not possible via IFieldChangeIgnorance.GlobalFieldChangeIgnorance.Revision()
// since it can only determine one field at the time
if (propName != nameof(User.Portrait) || propEntry.OriginalValue is not "") return false;

// invokes OriginalValues.ToObject() to get a new instance
// since entityInTracking is reference to the changed one
var user = (User)entityEntry.OriginalValues.ToObject();

// create another user instance with only fields of latest replier filled
var latestReplier = User.CreateLatestReplier(user.Uid, user.Name, user.DisplayName);

// if they are same by fields values, the original one is the latest replier
// that previously generated by ParseLatestRepliers()
return User.EqualityComparer.Instance.Equals(user, latestReplier);
}

Expand Down

0 comments on commit 59f7067

Please sign in to comment.