From 2e9c2fe0249775e332dd3db777c7bcd4f464a88c Mon Sep 17 00:00:00 2001
From: Aaron Vaage <vaage@google.com>
Date: Mon, 14 May 2018 13:46:03 -0700
Subject: [PATCH] Fix Cue Insertion at Text Stream End

Problem : Text samples have variable length and therefore act
          more like continuous samples whereas audio and video
          act more like discrete samples. Since we use sample
          start time, a cue event could be inserted after the
          start time of the last text sample and never get
          inserted as there are no more samples.

Change : After all streams have requested flushing, we make sure
         to collect all remaining cue events from the sync point
         queue and insert them into each stream.

Issue #362

Change-Id: Id8f136f7ef53531f7a7f412613eac352324e0130
---
 .../bear-subtitle-english-text-6.vtt          |  5 ++
 .../bear-subtitle-english-text.m3u8           |  5 +-
 .../media/chunking/cue_alignment_handler.cc   | 39 ++++++++----
 .../cue_alignment_handler_unittest.cc         | 59 +++++++++++++++++++
 packager/media/chunking/sync_point_queue.cc   |  4 ++
 packager/media/chunking/sync_point_queue.h    |  5 ++
 6 files changed, 106 insertions(+), 11 deletions(-)
 create mode 100644 packager/app/test/testdata/hls-audio-video-text-with-ad-cues/bear-subtitle-english-text-6.vtt
diff --git a/packager/app/test/testdata/hls-audio-video-text-with-ad-cues/bear-subtitle-english-text-6.vtt b/packager/app/test/testdata/hls-audio-video-text-with-ad-cues/bear-subtitle-english-text-6.vtt
new file mode 100644
index 00000000000..933e1539590
--- /dev/null
+++ b/packager/app/test/testdata/hls-audio-video-text-with-ad-cues/bear-subtitle-english-text-6.vtt
@@ -0,0 +1,5 @@
+WEBVTT
+
+00:00:01.000 --> 00:00:04.700
+He 's... um... doing bear-like stuff.
+
diff --git a/packager/app/test/testdata/hls-audio-video-text-with-ad-cues/bear-subtitle-english-text.m3u8 b/packager/app/test/testdata/hls-audio-video-text-with-ad-cues/bear-subtitle-english-text.m3u8
index 6ddb2b074ae..e4c4708d942 100644
--- a/packager/app/test/testdata/hls-audio-video-text-with-ad-cues/bear-subtitle-english-text.m3u8
+++ b/packager/app/test/testdata/hls-audio-video-text-with-ad-cues/bear-subtitle-english-text.m3u8
@@ -7,10 +7,13 @@
 bear-subtitle-english-text-1.vtt
 #EXTINF:1.000,
 bear-subtitle-english-text-2.vtt
-#EXTINF:1.000,
+#EXTINF:0.068,
 bear-subtitle-english-text-3.vtt
+#EXT-X-PLACEMENT-OPPORTUNITY
 #EXTINF:1.000,
 bear-subtitle-english-text-4.vtt
 #EXTINF:1.000,
 bear-subtitle-english-text-5.vtt
+#EXTINF:1.000,
+bear-subtitle-english-text-6.vtt
 #EXT-X-ENDLIST
diff --git a/packager/media/chunking/cue_alignment_handler.cc b/packager/media/chunking/cue_alignment_handler.cc
index 6fa29da69e2..f209e869d96 100644
--- a/packager/media/chunking/cue_alignment_handler.cc
+++ b/packager/media/chunking/cue_alignment_handler.cc
@@ -50,6 +50,19 @@ double TimeInSeconds(const StreamInfo& info, const StreamData& data) {
 
   return static_cast<double>(scaled_time) / time_scale;
 }
+
+Status GetNextCue(double hint,
+                  SyncPointQueue* sync_points,
+                  std::shared_ptr<const CueEvent>* out_cue) {
+  DCHECK(sync_points);
+  DCHECK(out_cue);
+
+  *out_cue = sync_points->GetNext(hint);
+
+  // |*out_cue| will only be null if the job was cancelled.
+  return *out_cue ? Status::OK
+                  : Status(error::CANCELLED, "SyncPointQueue is cancelled.");
+}
 }  // namespace
 
 CueAlignmentHandler::CueAlignmentHandler(SyncPointQueue* sync_points)
@@ -105,14 +118,24 @@ Status CueAlignmentHandler::OnFlushRequest(size_t stream_index) {
       DCHECK_EQ(stream.cues.size(), 0u)
           << "Video streams should not store cues";
     }
+  }
 
-    if (!stream.samples.empty()) {
-      LOG(WARNING) << "Unexpected samples seen on stream. Skipping samples";
-    }
+  // It is possible that we did not get all the cues. |hint_| will get updated
+  // when we call |UseNextSyncPoint|.
+  while (sync_points_->HasMore(hint_)) {
+    std::shared_ptr<const CueEvent> next_cue;
+    RETURN_IF_ERROR(GetNextCue(hint_, sync_points_, &next_cue));
+    RETURN_IF_ERROR(UseNewSyncPoint(std::move(next_cue)));
   }
 
-  // Go through all the streams and dispatch any remaining cues.
+  // Now that there are new cues, it may be possible to dispatch some of the
+  // samples that may be left waiting.
   for (StreamState& stream : stream_states_) {
+    RETURN_IF_ERROR(RunThroughSamples(&stream));
+    DCHECK_EQ(stream.samples.size(), 0u);
+
+    // It is possible for there to be cues that come after all the samples. Make
+    // sure to send them out too.
     while (stream.cues.size()) {
       RETURN_IF_ERROR(Dispatch(std::move(stream.cues.front())));
       stream.cues.pop_front();
@@ -177,12 +200,8 @@ Status CueAlignmentHandler::OnNonVideoSample(
   // to wait for all streams to converge on a hint so that we can get the next
   // sync point.
   if (EveryoneWaitingAtHint()) {
-    std::shared_ptr<const CueEvent> next_sync = sync_points_->GetNext(hint_);
-    if (!next_sync) {
-      // This happens only if the job is cancelled.
-      return Status(error::CANCELLED, "SyncPointQueue is cancelled.");
-    }
-
+    std::shared_ptr<const CueEvent> next_sync;
+    RETURN_IF_ERROR(GetNextCue(hint_, sync_points_, &next_sync));
     RETURN_IF_ERROR(UseNewSyncPoint(next_sync));
   }
 
diff --git a/packager/media/chunking/cue_alignment_handler_unittest.cc b/packager/media/chunking/cue_alignment_handler_unittest.cc
index fd256a8f35a..5bba5311b8c 100644
--- a/packager/media/chunking/cue_alignment_handler_unittest.cc
+++ b/packager/media/chunking/cue_alignment_handler_unittest.cc
@@ -493,6 +493,65 @@ TEST_F(CueAlignmentHandlerTest, TextInputWithCues) {
   ASSERT_OK(FlushAll({kTextStream}));
 }
 
+TEST_F(CueAlignmentHandlerTest, TextInputWithCueAfterLastStart) {
+  const size_t kTextStream = 0;
+
+  const int64_t kSampleDuration = 1000;
+
+  const int64_t kSample0Start = 0;
+  const int64_t kSample0End = kSample0Start + kSampleDuration;
+  const int64_t kSample1Start = kSample0End;
+  const int64_t kSample1End = kSample1Start + kSampleDuration;
+  const int64_t kSample2Start = kSample1End;
+  const int64_t kSample2End = kSample2Start + kSampleDuration;
+
+  const double kCueTimeInSeconds =
+      static_cast<double>(kSample2Start + kSample2End) / kMsTimeScale;
+
+  // Put the cue between the start and end of the last sample.
+  Cuepoint cue;
+  cue.start_time_in_seconds = kCueTimeInSeconds;
+
+  AdCueGeneratorParams params;
+  params.cue_points.push_back(cue);
+
+  SyncPointQueue sync_points(params);
+  std::shared_ptr<MediaHandler> handler =
+      std::make_shared<CueAlignmentHandler>(&sync_points);
+  SetUpAndInitializeGraph(handler, kOneInput, kOneOutput);
+
+  {
+    testing::InSequence s;
+
+    EXPECT_CALL(*Output(kTextStream),
+                OnProcess(IsStreamInfo(kParent, kMsTimeScale, !kEncrypted)));
+    EXPECT_CALL(*Output(kTextStream),
+                OnProcess(IsTextSample(kNoId, kSample0Start, kSample0End,
+                                       kNoSettings, kNoPayload)));
+    EXPECT_CALL(*Output(kTextStream),
+                OnProcess(IsTextSample(kNoId, kSample1Start, kSample1End,
+                                       kNoSettings, kNoPayload)));
+    EXPECT_CALL(*Output(kTextStream),
+                OnProcess(IsTextSample(kNoId, kSample2Start, kSample2End,
+                                       kNoSettings, kNoPayload)));
+    EXPECT_CALL(*Output(kTextStream),
+                OnProcess(IsCueEvent(kParent, kCueTimeInSeconds)));
+    EXPECT_CALL(*Output(kTextStream), OnFlush(kParent));
+  }
+
+  auto stream_info = GetTextStreamInfo();
+  auto sample_0 = GetTextSample(kNoId, kSample0Start, kSample0End, kNoPayload);
+  auto sample_1 = GetTextSample(kNoId, kSample1Start, kSample1End, kNoPayload);
+  auto sample_2 = GetTextSample(kNoId, kSample2Start, kSample2End, kNoPayload);
+
+  ASSERT_OK(DispatchStreamInfo(kTextStream, std::move(stream_info)));
+  ASSERT_OK(DispatchTextSample(kTextStream, std::move(sample_0)));
+  ASSERT_OK(DispatchTextSample(kTextStream, std::move(sample_1)));
+  ASSERT_OK(DispatchTextSample(kTextStream, std::move(sample_2)));
+
+  ASSERT_OK(FlushAll({kTextStream}));
+}
+
 TEST_F(CueAlignmentHandlerTest, TextAudioVideoInputWithCues) {
   const size_t kTextStream = 0;
   const size_t kAudioStream = 1;
diff --git a/packager/media/chunking/sync_point_queue.cc b/packager/media/chunking/sync_point_queue.cc
index 58ebfb2621d..dff5faf23db 100644
--- a/packager/media/chunking/sync_point_queue.cc
+++ b/packager/media/chunking/sync_point_queue.cc
@@ -87,6 +87,10 @@ std::shared_ptr<const CueEvent> SyncPointQueue::PromoteAt(
   return PromoteAtNoLocking(time_in_seconds);
 }
 
+bool SyncPointQueue::HasMore(double hint_in_seconds) const {
+  return hint_in_seconds < std::numeric_limits<double>::max();
+}
+
 std::shared_ptr<const CueEvent> SyncPointQueue::PromoteAtNoLocking(
     double time_in_seconds) {
   lock_.AssertAcquired();
diff --git a/packager/media/chunking/sync_point_queue.h b/packager/media/chunking/sync_point_queue.h
index a8ccb93618f..45e8a6fd0b6 100644
--- a/packager/media/chunking/sync_point_queue.h
+++ b/packager/media/chunking/sync_point_queue.h
@@ -47,6 +47,11 @@ class SyncPointQueue {
   /// unpromoted cues before the cue will be discarded.
   std::shared_ptr<const CueEvent> PromoteAt(double time_in_seconds);
 
+  /// @return True if there are more cues after the given hint. The hint must
+  ///         be a hint returned from |GetHint|. Using any other value results
+  ///         in undefined behavior.
+  bool HasMore(double hint_in_seconds) const;
+
  private:
   SyncPointQueue(const SyncPointQueue&) = delete;
   SyncPointQueue& operator=(const SyncPointQueue&) = delete;