twitter · johnynek · May 15, 2018 · May 15, 2018 · May 15, 2018 · MansurAshraf
diff --git a/algebird-generic/src/main/scala/com/twitter/algebird/generic/ApplicativeAggregators.scala b/algebird-generic/src/main/scala/com/twitter/algebird/generic/ApplicativeAggregators.scala
@@ -0,0 +1,72 @@
+package com.twitter.algebird.generic
+
+import shapeless._
+import com.twitter.algebird._
+
+import Shapeless._
+
+/**
+ * This method allows to combine disparate aggregators that share
+ * a common input type.
+ *
+ * This diagram illustrates the "shape" of this combinator:
+ *
+ *      / b1 -> z1
+ *  a1 -> b2 -> z2
+ *      \ b3 -> z3
+ *
+ * Let's use the following example code to demonstrate the usage here:
+ *
+ *   val a: MonoidAggregator[Animal, ColorStats, ColorResult] = ...
+ *   val b: MonoidAggregator[Animal, ShapeStats, ShapeResult] = ...
+ *
+ *   val m1: Aggregator[Animal, ColorStats :: ShapeStats :: HNil, ColorResult :: ShapeResult :: HNil] =
+ *     ApplicativeAggregators(a :: b :: HNil)
+ *
+ *  The name comes from the fact that this is the standard "Applicative"
+ *  product operation (which algebird calls .join). For non-monoid Aggregators
+ *  algebird GeneratedTupleAggregator.fromN functions do the same thing
+ */
+object ApplicativeAggregators {
+
+  def apply[A0, A1 <: HList, A2 <: HList, H <: HList](hlist: H)(
+      implicit witness: Evidence[H, A0, A1, A2]): MonoidAggregator[A0, A1, A2] = witness(hlist)
+
+  /**
+   * Types like this in type-level programming are often called "evidence" since
+   * they are evidence of some structure, but not used in the input or the output.
+   */
+  sealed abstract class Evidence[H <: HList, B0, B1 <: HList, B2 <: HList] {
+    def apply(h: H): MonoidAggregator[B0, B1, B2]
+  }
+
+  object Evidence {
+    implicit def hsingle[A0, A1, A2]
+      : Evidence[MonoidAggregator[A0, A1, A2] :: HNil, A0, A1 :: HNil, A2 :: HNil] =
+      new Evidence[MonoidAggregator[A0, A1, A2] :: HNil, A0, A1 :: HNil, A2 :: HNil] {
+        def apply(
+            hlist: MonoidAggregator[A0, A1, A2] :: HNil): MonoidAggregator[A0, A1 :: HNil, A2 :: HNil] = {
+          val a = hlist.head
+          new MonoidAggregator[A0, A1 :: HNil, A2 :: HNil] {
+            def prepare(input: A0): A1 :: HNil = a.prepare(input) :: HNil
+            def present(r: A1 :: HNil): A2 :: HNil = a.present(r.head) :: HNil
+            val monoid: Monoid[A1 :: HNil] = hconsMonoid(a.monoid, hnilRing)
+          }
+        }
+      }
+
+    implicit def cons[A0, A1, B1 <: HList, A2, B2 <: HList, T <: HList](
+        implicit rest: Evidence[T, A0, B1, B2])
+      : Evidence[MonoidAggregator[A0, A1, A2] :: T, A0, A1 :: B1, A2 :: B2] =
+      new Evidence[MonoidAggregator[A0, A1, A2] :: T, A0, A1 :: B1, A2 :: B2] {
+        def apply(hlist: MonoidAggregator[A0, A1, A2] :: T): MonoidAggregator[A0, A1 :: B1, A2 :: B2] =
+          new MonoidAggregator[A0, A1 :: B1, A2 :: B2] {
+            val a = hlist.head
+            val b = rest(hlist.tail)
+            def prepare(input: A0): A1 :: B1 = a.prepare(input) :: b.prepare(input)
+            def present(r: A1 :: B1): A2 :: B2 = a.present(r.head) :: b.present(r.tail)
+            val monoid: Monoid[A1 :: B1] = hconsMonoid(a.monoid, b.monoid)
+          }
+      }
+  }
+}
diff --git a/algebird-generic/src/main/scala/com/twitter/algebird/generic/CinchedAggregators.scala b/algebird-generic/src/main/scala/com/twitter/algebird/generic/CinchedAggregators.scala
@@ -0,0 +1,183 @@
+package com.twitter.algebird.generic
+
+import shapeless._
+import com.twitter.algebird._
+
+/**
+ * This method allows to combine disparate aggregators that share
+ * a common middle aggregation type (and Monoid, which we assume
+ * to be same).
+ *
+ * This diagram illustrates the "shape" of this combinator:
+ *
+ *  a1 \      / z1
+ *  a2 -> b* -> z2
+ *  a3 /      \ z3
+ *
+ * Cinched may be useful in cases where the B type is something like
+ * Moments, where we want to compute things like Z-score, mean,
+ * std-dev, etc. from moments.
+ *
+ * Let's use the following example code to demonstrate the usage
+ * here:
+ *
+ *   val a: MonoidAggregator[Dog, Stats, DogResult] = ...
+ *   val b: MonoidAggregator[Cat, Stats, CatResult] = ...
+ *
+ * For products (things like tuples or heterogeneous lists) we use
+ * .allOf to create a new aggregator:
+ *
+ *   val m1: Aggregator[Dog :: Cat :: HNil, Stats, DogResult :: CatResult :: HNil] =
+ *     CinchedAggregators.allOf(a :: b :: HNil)
+ *
+ * For coproducts (things like Either or sealed traits) we can do the following:
+ *
+ *   val m2: MonoidAggregator[Dog :+: Cat :+: CNil, Stats, DogResult :: CatResult :: HNil] =
+ *     CinchedAggregators.oneOf(a :: b :: HNil)
+ *
+ * ...and get a single aggregator that works for dogs, cats, and birds (wrapped in a coproduct)!
+ *
+ * We can also say:
+ *
+ *   val m2: MonoidAggregator[Animal, Stats, DogResult :: CatResult :: HNil] =
+ *     CinchedAggregators.oneOf(a :: b :: HNil).composePrepare(Generic[Animal].to)
+ *
+ * ...and get a single aggregator that works for all animals directly!
+ *
+ * You may need to fiddle with the order of your HList (e.g. a :: b ::
+ * HNil) to get this compile -- it must match the order that
+ * Generic[Animal].to expects.
+ */
+object CinchedAggregators {
+
+  /**
+   * This is when we have a union/sealed-trait input type and we
+   * have aggregators for branches of the union.
+   *
+   * We use Coproduct, which in shapeless is generalization of
+   * Either (similar to how HList is a generalization of Tuple2).
+   *
+   * To create an instance of `A :+: B :+: CNil` you do:
+   * `shapeless.Inl(a)` or `shapeless.Inr(shapeless.Inl(b))~ (CNil has
+   * no actual instance and is like another name for Nothing in
+   * the same way that HNil is like another name for Unit.
+   *
+   * typically, we expect you to use `shapeless.Generic[T].to` to create
+   * these from sealed traits.
+   *
+   */
+  def oneOf[A0 <: Coproduct, A1, A2 <: HList, H <: HList](hlist: H)(
+      implicit witness: OneOfEvidence[H, A0, A1, A2]): MonoidAggregator[A0, A1, A2] =
+    witness(hlist)
+
+  /**
+   * This is when we have several values coming in at the same time.
+   * You can construct an HList from a tuple with:
+   *
+   *   import shapeless._
+   *   import syntax.std.product._
+   *
+   *   t.productElements
+   */
+  def allOf[A0 <: HList, A1, A2 <: HList, H <: HList](hlist: H)(
+      implicit witness: AllOfEvidence[H, A0, A1, A2]): MonoidAggregator[A0, A1, A2] =
+    witness(hlist)
+
+// CinchedAggregators
+// product: aggregators give us semigroup[b] (*require coherence)
+// coproduct: we're fine
+
+  /**
+   * Evidence that we can unify several aggregators into a single aggregator.
+   *
+   * 1. H is a heterogeneous list of aggregators.
+   * 2. B0 is a coproduct of types.
+   * 3. B1 is a hetergeneous list of types.
+   */
+  sealed abstract class OneOfEvidence[H <: HList, B0 <: Coproduct, B1, B2 <: HList] {
+    def apply(h: H): MonoidAggregator[B0, B1, B2]
+  }
+
+  object OneOfEvidence {
+    implicit def hsingle[A0, A1, A2]
+      : OneOfEvidence[MonoidAggregator[A0, A1, A2] :: HNil, A0 :+: CNil, A1, A2 :: HNil] =
+      new OneOfEvidence[MonoidAggregator[A0, A1, A2] :: HNil, A0 :+: CNil, A1, A2 :: HNil] {
+        def apply(
+            hlist: MonoidAggregator[A0, A1, A2] :: HNil): MonoidAggregator[A0 :+: CNil, A1, A2 :: HNil] = {
+          val a = hlist.head
+          new MonoidAggregator[A0 :+: CNil, A1, A2 :: HNil] {
+            def prepare(input: A0 :+: CNil): A1 = input match {
+              case Inl(a0)   => a.prepare(a0)
+              case Inr(cnil) => cnil.impossible
+            }
+            def present(r: A1): A2 :: HNil = a.present(r) :: HNil
+            def monoid: Monoid[A1] = a.monoid
+          }
+        }
+      }
+
+    implicit def cons[A0, B0 <: Coproduct, A1, A2, B2 <: HList, T <: HList](
+        implicit rest: OneOfEvidence[T, B0, A1, B2])
+      : OneOfEvidence[MonoidAggregator[A0, A1, A2] :: T, A0 :+: B0, A1, A2 :: B2] =
+      new OneOfEvidence[MonoidAggregator[A0, A1, A2] :: T, A0 :+: B0, A1, A2 :: B2] {
+        def apply(hlist: MonoidAggregator[A0, A1, A2] :: T): MonoidAggregator[A0 :+: B0, A1, A2 :: B2] =
+          new MonoidAggregator[A0 :+: B0, A1, A2 :: B2] {
+            val a = hlist.head
+            val b = rest(hlist.tail)
+            def prepare(input: A0 :+: B0): A1 = input match {
+              case Inl(a0) => a.prepare(a0)
+              case Inr(b0) => b.prepare(b0)
+            }
+            def present(r: A1): A2 :: B2 =
+              a.present(r) :: b.present(r)
+            def monoid: Monoid[A1] =
+              a.monoid // assume a.monoid == b.monoid
+          }
+      }
+  }
+
+  /**
+   * Evidence that we can unify several aggregators into a single aggregator.
+   *
+   * 1. H is a heterogeneous list of aggregators.
+   * 2. B0 is a heterogeneous list of types.
+   * 3. B1 is a heterogeneous list of types.
+   */
+  sealed abstract class AllOfEvidence[H <: HList, B0 <: HList, B1, B2 <: HList] {
+    def apply(h: H): MonoidAggregator[B0, B1, B2]
+  }
+
+  object AllOfEvidence {
+    implicit def hsingle[A0, A1, A2]
+      : AllOfEvidence[MonoidAggregator[A0, A1, A2] :: HNil, A0 :: HNil, A1, A2 :: HNil] =
+      new AllOfEvidence[MonoidAggregator[A0, A1, A2] :: HNil, A0 :: HNil, A1, A2 :: HNil] {
+        def apply(
+            hlist: MonoidAggregator[A0, A1, A2] :: HNil): MonoidAggregator[A0 :: HNil, A1, A2 :: HNil] = {
+          val a = hlist.head
+          new MonoidAggregator[A0 :: HNil, A1, A2 :: HNil] {
+            def prepare(input: A0 :: HNil): A1 = a.prepare(input.head)
+            def present(r: A1): A2 :: HNil = a.present(r) :: HNil
+            def monoid: Monoid[A1] = a.monoid
+          }
+        }
+      }
+
+    implicit def cons[A0, B0 <: HList, A1, A2, B2 <: HList, T <: HList](
+        implicit rest: AllOfEvidence[T, B0, A1, B2])
+      : AllOfEvidence[MonoidAggregator[A0, A1, A2] :: T, A0 :: B0, A1, A2 :: B2] =
+      new AllOfEvidence[MonoidAggregator[A0, A1, A2] :: T, A0 :: B0, A1, A2 :: B2] {
+        def apply(hlist: MonoidAggregator[A0, A1, A2] :: T): MonoidAggregator[A0 :: B0, A1, A2 :: B2] =
+          new MonoidAggregator[A0 :: B0, A1, A2 :: B2] {
+            val a = hlist.head
+            val b = rest(hlist.tail)
+            // assume a.monoid == b.monoid
+            def prepare(input: A0 :: B0): A1 =
+              a.monoid.plus(a.prepare(input.head), b.prepare(input.tail))
+            def present(r: A1): A2 :: B2 =
+              a.present(r) :: b.present(r)
+            def monoid: Monoid[A1] =
+              a.monoid
+          }
+      }
+  }
+}
diff --git a/algebird-generic/src/main/scala/com/twitter/algebird/generic/CombinedAggregators.scala b/algebird-generic/src/main/scala/com/twitter/algebird/generic/CombinedAggregators.scala
@@ -0,0 +1,76 @@
+package com.twitter.algebird.generic
+
+import shapeless._
+import com.twitter.algebird._
+
+import Shapeless._
+
+/**
+ * This method allows to combine disparate aggregators that share
+ * a common input type and a semigroup on the common output type
+ *
+ * This diagram illustrates the "shape" of this combinator:
+ *
+ *       / b1 \
+ *   a -> b2 -> c
+ *       \ b3 /
+ *
+ * This is not as common as ApplicativeAggregators or ParallelAggregators,
+ * but could arise if for instance you wanted to evaluate a weighted threshold:
+ * `b1, b2, ...` might be the individual feature values, then the functions
+ * `b1 => c` could be weighting the feature into the return space. We then
+ * sum the weights (this would be something like a linear of logistic regression).
+ *
+ * Let's use the following example code to demonstrate the usage here:
+ *
+ *   val a: MonoidAggregator[Animal, ColorStats, Result] = ...
+ *   val b: MonoidAggregator[Animal, ShapeStats, Result] = ...
+ *
+ *   val m1: Aggregator[Animal, ColorStats :: ShapeStats :: HNil, Result] =
+ *     CombinedAggregators(a :: b :: HNil)
+ *
+ *  The name comes from the fact that "combine" is sometimes used as the name
+ *  for general semigroups.
+ *
+ */
+object CombinedAggregators {
+
+  def apply[A0, A1 <: HList, A2, H <: HList](hlist: H)(
+      implicit witness: Evidence[H, A0, A1, A2]): MonoidAggregator[A0, A1, A2] = witness(hlist)
+
+  /**
+   * Types like this in type-level programming are often called "evidence" since
+   * they are evidence of some structure, but not used in the input or the output.
+   */
+  sealed abstract class Evidence[H <: HList, B0, B1 <: HList, B2] {
+    def apply(h: H): MonoidAggregator[B0, B1, B2]
+  }
+
+  object Evidence {
+    implicit def hsingle[A0, A1, A2]: Evidence[MonoidAggregator[A0, A1, A2] :: HNil, A0, A1 :: HNil, A2] =
+      new Evidence[MonoidAggregator[A0, A1, A2] :: HNil, A0, A1 :: HNil, A2] {
+        def apply(hlist: MonoidAggregator[A0, A1, A2] :: HNil): MonoidAggregator[A0, A1 :: HNil, A2] = {
+          val a = hlist.head
+          new MonoidAggregator[A0, A1 :: HNil, A2] {
+            def prepare(input: A0): A1 :: HNil = a.prepare(input) :: HNil
+            def present(r: A1 :: HNil): A2 = a.present(r.head)
+            val monoid: Monoid[A1 :: HNil] = hconsMonoid(a.monoid, hnilRing)
+          }
+        }
+      }
+
+    implicit def cons[A0, A1, B1 <: HList, A2, T <: HList](
+        implicit rest: Evidence[T, A0, B1, A2],
+        z: Semigroup[A2]): Evidence[MonoidAggregator[A0, A1, A2] :: T, A0, A1 :: B1, A2] =
+      new Evidence[MonoidAggregator[A0, A1, A2] :: T, A0, A1 :: B1, A2] {
+        def apply(hlist: MonoidAggregator[A0, A1, A2] :: T): MonoidAggregator[A0, A1 :: B1, A2] =
+          new MonoidAggregator[A0, A1 :: B1, A2] {
+            val a = hlist.head
+            val b = rest(hlist.tail)
+            def prepare(input: A0): A1 :: B1 = a.prepare(input) :: b.prepare(input)
+            def present(r: A1 :: B1): A2 = z.plus(a.present(r.head), b.present(r.tail))
+            val monoid: Monoid[A1 :: B1] = hconsMonoid(a.monoid, b.monoid)
+          }
+      }
+  }
+}