From adc503cc370273a55c404c0f85bdd0c8df9faaf0 Mon Sep 17 00:00:00 2001 From: Menooker Date: Tue, 8 Oct 2019 11:05:12 +0800 Subject: [PATCH] add maskutils (#2921) * add maskutils * update tests & docs * fix typo in document --- .../dataset/segmentation/MaskUtils.scala | 362 ++++++++++++++++++ .../SegmentationDatasetSpec.scala | 154 ++++++++ 2 files changed, 516 insertions(+) create mode 100644 scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/segmentation/MaskUtils.scala create mode 100644 scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/dataset/segmentation/SegmentationDatasetSpec.scala diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/segmentation/MaskUtils.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/segmentation/MaskUtils.scala new file mode 100644 index 00000000000..292863551d1 --- /dev/null +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/segmentation/MaskUtils.scala @@ -0,0 +1,362 @@ +/* + * Copyright 2016 The BigDL Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.intel.analytics.bigdl.dataset.segmentation + +import com.intel.analytics.bigdl.tensor.Tensor +import scala.collection.mutable.ArrayBuffer + + +abstract class SegmentationMasks extends Serializable { + /** + * Convert to a RLE encoded tensor + */ + def toRLETensor: Tensor[Float] +} + +/** + * A mask of regions defined by one or more polygons. The masked object(s) should have the same + * label. + * @param poly An array of polygons. The inner array defines one polygon, with [x1,y1,x2,y2,...] + * @param height the height of the image + * @param width the width of the image + */ +class PolyMasks(val poly: Array[Array[Float]], val height: Int, val width: Int) extends + SegmentationMasks { + override def toRLETensor: Tensor[Float] = { + require(height > 0 && width > 0, "the height and width must > 0 for toRLETensor()") + MaskUtils.mergeRLEs(MaskUtils.poly2RLE(this, height, width), false).toRLETensor + } +} + +object PolyMasks { + def apply(poly: Array[Array[Float]], height: Int, width: Int): PolyMasks = + new PolyMasks(poly, height, width) +} + +/** + * A mask of regions defined by RLE. The masked object(s) should have the same label. + * This class corresponds to "uncompressed RLE" of COCO dataset. + * RLE is a compact format for binary masks. Binary masks defines the region by assigning a boolean + * to every pixel of the image. RLE compresses the binary masks by instead recording the runs of + * trues and falses in the binary masks. RLE is an array of integer. + * The first element is the length of run of falses staring from the first pixel. + * The second element of RLE is the is the length of first run of trues. + * e.g. binary masks: 00001110000011 + * RLE: ---4--3----5-2 ====> 4,3,5,2 + * + * Also note that we don't use COCO's "compact" RLE string here because this RLE class has better + * time & space performance. + * + * @param counts the RLE counts + * @param height height of the image + * @param width width of the image + */ +class RLEMasks(val counts: Array[Int], val height: Int, val width: Int) extends SegmentationMasks { + override def toRLETensor: Tensor[Float] = { + Tensor(counts.map(MaskUtils.uint2long(_).toFloat), Array(counts.length)) + } + + /** + * Get an element in the counts. Process the overflowed int + * + * @param idx + * @return + */ + def get(idx: Int): Long = { + MaskUtils.uint2long(counts(idx)) + } +} + +object RLEMasks { + def apply(counts: Array[Int], height: Int, width: Int): RLEMasks = + new RLEMasks(counts, height, width) +} + + +object MaskUtils { + + /** + * Convert an unsigned int to long (note: int may overflow) + * + * @param i + * @return + */ + def uint2long(i: Int): Long = { + if (i >= 0) { + i + } else { + i.toLong - Int.MinValue.toLong + Int.MaxValue.toLong + 1 + } + } + + /** + * Convert "uncompressed" RLE to "compact" RLE string of COCO + * Implementation based on COCO's MaskApi.c + * @param rle + * @return RLE string + */ + // scalastyle:off methodName + def RLE2String(rle: RLEMasks): String = { + // Similar to LEB128 but using 6 bits/char and ascii chars 48-111. + val m = rle.counts.length + val s = new ArrayBuffer[Char]() + for (i <- 0 until m) { + var x = rle.get(i) + if (i > 2) x -= rle.get(i - 2) + var more = true + while (more) { + var c = (x & 0x1f) + x >>= 5 + more = if ((c & 0x10) != 0) x != -1 else x != 0 + if (more) c |= 0x20 + c += 48 + s += c.toChar + } + } + new String(s.toArray) + } + // scalastyle:on methodName + + /** + * Convert "compact" RLE string of COCO to "uncompressed" RLE + * Implementation based on COCO's MaskApi.c + * @param s the RLE string + * @param h height of the image + * @param w width of the image + * @return RLE string + */ + def string2RLE(s: String, h: Int, w: Int): RLEMasks = { + val cnts = new ArrayBuffer[Int]() + var m = 0 + var p = 0 + while (p < s.length) { + var x = 0L + var k = 0 + var more = true + while (more) { + val c = s(p).toLong - 48 + x |= (c & 0x1f) << (5 * k) + more = (c & 0x20) != 0 + k += 1 + p += 1 + if (!more && (c & 0x10) != 0) x |= -1 << (5 * k) + } + if (m > 2) x += uint2long(cnts(m - 2)) + cnts += x.toInt + m += 1 + } + RLEMasks(cnts.toArray, h, w) + } + + /** + * Convert a PolyMasks to an array of RLEMasks. Note that a PolyMasks may have multiple + * polygons. This function does not merge them. Instead, it returns the RLE for each polygon. + * Implementation based on COCO's MaskApi.c + * @param poly + * @param height height of the image + * @param width width of the image + * @return The converted RLEs + */ + def poly2RLE(poly: PolyMasks, height: Int, width: Int): Array[RLEMasks] = { + poly.poly.map(xy => { + // upsample and get discrete points densely along entire boundary + val scale = 5d + val (u, v, upsamplePoints) = { + val nPoints = xy.length / 2 + val x = new Array[Long](nPoints + 1) + val y = new Array[Long](nPoints + 1) + for (j <- 0 until nPoints) { + x(j) = Math.floor(scale * xy(j * 2 + 0) + .5).toLong + y(j) = Math.floor(scale * xy(j * 2 + 1) + .5).toLong + } + x(nPoints) = x(0) + y(nPoints) = y(0) + val m1 = (0 until nPoints).map { case j => + Math.max(Math.abs(x(j) - x(j + 1)), Math.abs(y(j) - y(j + 1))) + 1 + }.sum.toInt + val u = new Array[Long](m1) + val v = new Array[Long](m1) + + var m = 0 + for (j <- 0 until nPoints) { + val (xs, xe, ys, ye, dx, dy, flip) = { + val _xs = x(j) + val _xe = x(j + 1) + val _ys = y(j) + val _ye = y(j + 1) + val _dx = Math.abs(_xe - _xs) + val _dy = Math.abs(_ys - _ye) + val _flip = (_dx >= _dy && _xs > _xe) || (_dx < _dy && _ys > _ye) + if (_flip) (_xe, _xs, _ye, _ys, _dx, _dy, _flip) + else (_xs, _xe, _ys, _ye, _dx, _dy, _flip) + } + + if (dx >= dy) { + for (d <- 0 to dx.toInt) { + val s = (ye - ys).toDouble / dx + val t = if (flip) dx - d else d + u(m) = t + xs + v(m) = Math.floor(ys + s * t + .5).toLong + m += 1 + } + } + else { + for (d <- 0 to dy.toInt) { + val s = (xe - xs).toDouble / dy + val t = if (flip) dy - d else d + v(m) = t + ys + u(m) = Math.floor(xs + s * t + .5).toLong + m += 1 + } + } + } + (u, v, m) + } + // get points along y-boundary and downsample + val (downsampleX, downsampleY, downsamplePoints) = { + // use an independent scope + val nPoints = upsamplePoints + var m = 0 + val x = new Array[Long](nPoints) + val y = new Array[Long](nPoints) + for (j <- 1 until nPoints) { + if (u(j) != u(j - 1)) { + // Should u(j) - 1 be u(j - 1) ???? + val _xd = if (u(j) < u(j - 1)) u(j) else u(j) - 1 + val xd = (_xd.toDouble + .5) / scale - .5 + if (Math.floor(xd) != xd || xd < 0 || xd > width - 1) { + // continue + } else { + var yd = (if (v(j) < v(j - 1)) v(j) else v(j - 1)).toDouble + yd = (yd + .5) / scale - .5 + if (yd < 0) { + yd = 0 + } else if (yd > height) { + yd = height + } + yd = Math.ceil(yd) + x(m) = xd.toInt + y(m) = yd.toInt + m += 1 + } + } + } + (x, y, m) + } + + { + // compute rle encoding given y-boundary points + val x = downsampleX + val y = downsampleY + val nPoints = downsamplePoints + 1 + val a = new Array[Long](nPoints) + for (j <- 0 until nPoints - 1) + a(j) = x(j) * height + y(j) + a(nPoints - 1) = height * width + scala.util.Sorting.quickSort(a) + + var p = 0L + for (j <- 0 until nPoints) { + val t = a(j) + a(j) -= p + p = t + } + val b = new ArrayBuffer[Int]() + var j = 1 + var m = 1 + b += a(0).toInt + while (j < nPoints) { + if (a(j) > 0) { + b += a(j).toInt + m += 1 + j += 1 + } + else { + j += 1 + if (j < nPoints) { + b(m - 1) += a(j).toInt + j += 1 + } + } + } + RLEMasks(b.toArray, height, width) + } + }) + } + + /** + * Merge multiple RLEs into one (union or intersect) + * Implementation based on COCO's MaskApi.c + * @param R the RLEs + * @param intersect if true, do intersection; else find union + * @return the merged RLE + */ + def mergeRLEs(R: Array[RLEMasks], intersect: Boolean): RLEMasks = { + val n = R.length + if (n == 1) return R(0) + val h = R(0).height + val w = R(0).width + val cnts = new ArrayBuffer[Int]() + cnts.appendAll(R(0).counts) + for(i <- 1 until n) { + val B = R(i) + require(B.height == h && B.width == w, "The height and width of the merged RLEs must" + + " be the same") + val acnt = cnts.toArray + val am = cnts.length + cnts.clear() + var ca = uint2long(acnt(0)) + var cb = uint2long(B.counts(0)) + var (v, va, vb) = (false, false, false) + var a = 1 + var b = 1 + var cc = 0L + var ct = 1L + + while (ct > 0) { + val c = Math.min(ca, cb) + cc += c + ct = 0 + ca -= c + if (ca == 0 && a < am) { + ca = uint2long(acnt(a)) + a += 1 + va = !va + } + ct += ca + cb -= c + if (cb == 0 && b < B.counts.length) { + cb = B.get(b) + b += 1 + vb = !vb + } + ct += cb + val vp = v + if (intersect) { + v = va && vb + } else { + v = va || vb + } + if (v != vp || ct == 0) { + cnts += cc.toInt + cc = 0 + } + } + } + RLEMasks(cnts.toArray, h, w) + } +} diff --git a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/dataset/segmentation/SegmentationDatasetSpec.scala b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/dataset/segmentation/SegmentationDatasetSpec.scala new file mode 100644 index 00000000000..9cc81d7810d --- /dev/null +++ b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/dataset/segmentation/SegmentationDatasetSpec.scala @@ -0,0 +1,154 @@ +/* + * Copyright 2016 The BigDL Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.intel.analytics.bigdl.dataset.segmentation + +import org.scalatest.{BeforeAndAfter, FlatSpec, Matchers} + +class SegmentationDatasetSpec extends FlatSpec with Matchers with BeforeAndAfter { + + val compressed1 = "Q:XX24Sm0e:jQ1]EajKSV69iZJV9T_OdF[>^NmB`2Z`0Y?a^OmR5lc6Zj2m[IckG0ZEXdl9l0j" + + "[SFT9_\\e1Z:XgZNh[OPM:d\\4O" + val compressed2 = "iaj0]TWZ2j9XZleMYG_oLf9U`]7mQbWg0b[gHlV[RYOQmZQ2TTU1oj]PNeVbEl[VNnZ" + + "]OkYLgfMja01fgW=\\1TofBY6c:Sheb0`n1Q[dol1PXc`0YQh]RNi^Z_OZeMOb?30nbR1^P^g0ShmNfPkYO" + + "^LWkkNXW3]_m0gUQ[2kdb?ZePeMhPZB^[NaQQMgZLkVlU54aUSjJ32" + + val arr1 = Array(321, 2312, 4, 3243, 345, 4325, 6, 54, 6345, 63, 546, 357, 6, 57, + 465, 7, + 46, 87, 568, 576, 9, 5789, 6789, 8679, 2, 346, 2, 4, 324234, 32, 4, 324, 54675, 654, 123, + 6, 27, 16, 4527, 15) + + val arr2 = Array(27193, 2432141, 314, 3541, 35, 452, 345, 243657, 24365462, 5435, + 325234, 2146524, 363254, 63547, 21451, 4535, 2345, 754, 0, 1324, 1, 435234, 45, 6, 246, + 345, 612345, 2345, 64563546, 546345, 2435, 2, 45, 1, 543, 4, 543, 35426, 768557, 357, + 42563, 243, 5546, 3547, 35735, 2462354, 546354, 5436, 97866, 3754, 635, 1, 5436246, + 5, 7, 8, 9) + "string2RLE" should "run well" in { + + val result = MaskUtils.string2RLE(compressed1, 100, 200) + result.counts should be(arr1) + result.height should be (100) + result.width should be (200) + + val result2 = MaskUtils.string2RLE(compressed2, 100, 200) + result2.counts should be (arr2) + result2.height should be (100) + result2.width should be (200) + } + + "RLE2String" should "run well" in { + MaskUtils.RLE2String(RLEMasks(arr2, 100, 200)) should be (compressed2) + MaskUtils.RLE2String(RLEMasks(arr1, 100, 200)) should be (compressed1) + } + + // real data in instances_val2014.json + // annId = 455475 + val poly1 = Array(426.91, 58.24, 434.49, 77.74, 467.0, 80.99, 485.42, 86.41, 493.0, 129.75, + 521.17, 128.67, 532.01, 144.92, 545.01, 164.42, 552.6, 170.93, 588.35, 178.51, 629.53, + 165.51, 629.53, 177.43, 578.6, 214.27, 558.01, 241.35, 526.59, 329.12, 512.51, 370.29, + 502.75, 415.8, 418.24, 409.3, 399.82, 414.72, 388.98, 420.14, 382.48, 424.47, 391.15, 430.97, + 414.99, 425.55, 447.49, 427.72, 449.66, 435.3, 431.24, 438.56, 421.49, 452.64, 422.57, + 456.98, 432.33, 464.56, 439.91, 458.06, 481.08, 465.64, 502.75, 464.56, 507.09, 473.23, + 639.28, 474.31, 639.28, 1.9, 431.24, 0.0 + ).map(_.toFloat) + + // annId = 692513 + val poly2 = Array( + 416.41, 449.28, 253.36, 422.87, 234.06, 412.2, 277.23, 406.61, 343.77, 411.69, 379.84, + 414.23, 384.41, 424.9, 397.11, 427.95, 410.31, 427.95, 445.36, 429.98, 454.0, 438.61, 431.65, + 438.61, 423.01, 449.28 + ).map(_.toFloat) + + "poly2RLE" should "run well" in { + val rle = MaskUtils.poly2RLE(PolyMasks(Array(poly1), 480, 640), 480, 640) + val targetRle = MaskUtils.string2RLE( + "Xnc51n>2N2O0O2N2O1N101N10O0100O100O01000O10O10O100000O010000O01000O1000000O1001O00ZBAk" + + "T1k9hNfE64_1S:[NhE84`1Q:X" + + "NjE95a1P:UNkE:5d1m9RNnE96e1l9RNnE87f1k9RNnE78g1j9RNmE7:g1i9RNmE6;h1h9RNmE5k6EUI;h6HXI8e6K[I5b6N^I2_61aI0[63eIOV64jINQ6" + + "5oILm57SJKh58XJIe59[JI`5:`JG\\5Z3BeL>\\3BdL>\\3BdL=]3CcL<^3DaL=_3CaL<`3D`L;a3E^L;c3E]" + + "L;c3E]L:d3F[L:f3FZL:f3FZL9g3GXL9i3GWL8j3HVL8j3HUL8l3HTL7m3ISL6n3JQL7o3IQL6P4JPL5Q4Ko" + + "K5Q4KnK5S4KmK4T4LlK3U4M_50000000000000000n>", 480, 640 + ) + rle(0).counts.length should be (targetRle.counts.length) + rle(0).counts.zip(targetRle.counts).foreach{case (rleCount, targetCount) => + rleCount should be (targetCount +- 1) + } + + val rle2 = MaskUtils.poly2RLE(PolyMasks(Array(poly2), 480, 640), 480, 640) + MaskUtils.RLE2String(rle2(0)) should be( + "la^31o>1O001N101O001O001O001N2O001O001O001O0O1000001O00000O10001O000000000O2O0000000000" + + "1O0000000000001O00000000010O00000000001O00000000001O01O00000001O00000000001O0001O0000" + + "0001O00000000001O0001O000001O00000000001O00000001O01O00000000001O0000000001O01O000000" + + "00001O000000000010O0000000001O00000002N2O1N3M2N000010O000001O0000010O0000000001O00000" + + "000001O00000000001O000000000001O0000000O1O1O1O1O1N2O1O1O10000000001O00000000000000001" + + "O1O1O1O1O1O1O1OZhf2") + + } + + "mergeRLEs" should "run well" in { + val rle1 = MaskUtils.poly2RLE(PolyMasks(Array(poly1), 480, 640), 480, 640)(0) + val rle2 = MaskUtils.poly2RLE(PolyMasks(Array(poly2), 480, 640), 480, 640)(0) + val merged = MaskUtils.mergeRLEs(Array(rle1, rle2), false) + val targetRle = MaskUtils.string2RLE( + "la^31o>1O001N101O001O001O001N2O001O001O001O0O1000001O00000O10001O000000000O2O0000000000" + + "1O0000000000001O00000000010O00000000001O00000000001O01O00000001O00000000001O0001O0000" + + "0001O00000000001O0001O000001O00000000001O00000001O01O00000000001O0000000001O01O000000" + + "00001O000000000010O0000000001O00000002N2O1N3M00O100O2N100O100O100O2O0O100O10001N10000" + + "O10001O[OTB6k=KUB5k=KUB5k=KUB5j=KWB5i=KWB6h=JXB6g=KYB5g=KYB5g=KYB5g=KYB5f=LZB4f=L[B3f" + + "=LZB4f=LZB40]Ob=?^B4OB_=:bB3OE^=8cB2NH_=6cB1NK^=4dB?T2YOh88TE`0e1IT9HVE?X1:_9WOYE`0j0" + + "h0k9hN[Ea0?T1S:[N^Eb0>V1Q:XNaEb0>X1P:UNbEc0>[1m9RNeEb0?\\1l9RNeEa0`0]1k9RNeE`0a0^1j9R" + + "NfE>a0`1i9RNfE=b0a1h9RNfEk6EUI;" + + "h6HXI8e6K[I5b6N^I2_61aI0[63eIOV64jINQ65oILm57SJKh58XJIe59[JI`5:`JG\\5Z3BeL>\\3BdL>\\3BdL=]3CcL" + + "<^3DaL=_3CaL<`3D`L;a3E^L;c3E]L;c3E]L:d3F[L:f3FZL:f3FZL9g3GXL9i3GWL8j3HVL8j3HUL8l3HTL7" + + "m3ISL6n3JQL7o3IQL6P4JPL5Q4KoK5Q4KnK5S4KmK4T4LlK3U4M_50000000000000000n>", 480, 640 + ) + merged.counts.length should be (targetRle.counts.length) + merged.counts.zip(targetRle.counts).foreach{case (rleCount, targetCount) => + rleCount should be (targetCount +- 1) + } + } + +} +