Skip to content

Commit

Permalink
add callZooFunc and change all callBigDlFunc to callZooFunc (intel-an…
Browse files Browse the repository at this point in the history
  • Loading branch information
qiuxin2012 committed Nov 26, 2019
1 parent 88fb510 commit 61826bf
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 89 deletions.
47 changes: 30 additions & 17 deletions python/dllib/src/bigdl/dllib/feature/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#

from bigdl.util.common import *
from zoo.common.utils import callZooFunc
from bigdl.dataset.dataset import DataSet
import sys

Expand All @@ -27,6 +28,7 @@ class Relation(object):
"""
It represents the relationship between two items.
"""

def __init__(self, id1, id2, label, bigdl_type="float"):
self.id1 = id1
self.id2 = id2
Expand Down Expand Up @@ -64,10 +66,10 @@ def read(path, sc=None, min_partitions=1, bigdl_type="float"):
texts. Only need to specify this when sc is not None. Default is 1.
"""
if sc:
jvalue = callBigDlFunc(bigdl_type, "readRelations", path, sc, min_partitions)
jvalue = callZooFunc(bigdl_type, "readRelations", path, sc, min_partitions)
res = jvalue.map(lambda x: Relation(str(x[0]), str(x[1]), int(x[2])))
else:
jvalue = callBigDlFunc(bigdl_type, "readRelations", path)
jvalue = callZooFunc(bigdl_type, "readRelations", path)
res = [Relation(str(x[0]), str(x[1]), int(x[2])) for x in jvalue]
return res

Expand All @@ -82,7 +84,7 @@ def read_parquet(path, sc, bigdl_type="float"):
:param sc: An instance of SparkContext.
:return: RDD of Relation.
"""
jvalue = callBigDlFunc(bigdl_type, "readRelationsParquet", path, sc)
jvalue = callZooFunc(bigdl_type, "readRelationsParquet", path, sc)
return jvalue.map(lambda x: Relation(str(x[0]), str(x[1]), int(x[2])))


Expand All @@ -91,9 +93,10 @@ class Preprocessing(JavaValue):
Preprocessing defines data transform action during feature preprocessing. Python wrapper for
the scala Preprocessing
"""

def __init__(self, bigdl_type="float", *args):
self.bigdl_type = bigdl_type
self.value = callBigDlFunc(bigdl_type, JavaValue.jvm_class_constructor(self), *args)
self.value = callZooFunc(bigdl_type, JavaValue.jvm_class_constructor(self), *args)

def __call__(self, input):
"""
Expand All @@ -106,10 +109,10 @@ def __call__(self, input):
from zoo.feature.text import TextSet
# if type(input) is ImageSet:
if isinstance(input, ImageSet):
jset = callBigDlFunc(self.bigdl_type, "transformImageSet", self.value, input)
jset = callZooFunc(self.bigdl_type, "transformImageSet", self.value, input)
return ImageSet(jvalue=jset)
elif isinstance(input, TextSet):
jset = callBigDlFunc(self.bigdl_type, "transformTextSet", self.value, input)
jset = callZooFunc(self.bigdl_type, "transformTextSet", self.value, input)
return TextSet(jvalue=jset)


Expand All @@ -118,6 +121,7 @@ class ChainedPreprocessing(Preprocessing):
chains two Preprocessing together. The output type of the first
Preprocessing should be the same with the input type of the second Preprocessing.
"""

def __init__(self, transformers, bigdl_type="float"):
for transfomer in transformers:
assert isinstance(transfomer, Preprocessing), \
Expand All @@ -130,6 +134,7 @@ class ScalarToTensor(Preprocessing):
"""
a Preprocessing that converts a number to a Tensor.
"""

def __init__(self, bigdl_type="float"):
super(ScalarToTensor, self).__init__(bigdl_type)

Expand All @@ -139,6 +144,7 @@ class SeqToTensor(Preprocessing):
a Transformer that converts an Array[_] or Seq[_] to a Tensor.
:param size dimensions of target Tensor.
"""

def __init__(self, size=[], bigdl_type="float"):
super(SeqToTensor, self).__init__(bigdl_type, size)

Expand All @@ -148,6 +154,7 @@ class SeqToMultipleTensors(Preprocessing):
a Transformer that converts an Array[_] or Seq[_] or ML Vector to several tensors.
:param size, list of int list, dimensions of target Tensors, e.g. [[2],[4]]
"""

def __init__(self, size=[], bigdl_type="float"):
super(SeqToMultipleTensors, self).__init__(bigdl_type, size)

Expand All @@ -157,6 +164,7 @@ class ArrayToTensor(Preprocessing):
a Transformer that converts an Array[_] to a Tensor.
:param size dimensions of target Tensor.
"""

def __init__(self, size, bigdl_type="float"):
super(ArrayToTensor, self).__init__(bigdl_type, size)

Expand All @@ -167,6 +175,7 @@ class MLlibVectorToTensor(Preprocessing):
.. note:: Deprecated in 0.4.0. NNEstimator will automatically extract Vectors now.
:param size dimensions of target Tensor.
"""

def __init__(self, size, bigdl_type="float"):
super(MLlibVectorToTensor, self).__init__(bigdl_type, size)

Expand All @@ -179,6 +188,7 @@ class FeatureLabelPreprocessing(Preprocessing):
:param feature_transformer transformer for feature, transform F to Tensor[T]
:param label_transformer transformer for label, transform L to Tensor[T]
"""

def __init__(self, feature_transformer, label_transformer, bigdl_type="float"):
super(FeatureLabelPreprocessing, self).__init__(bigdl_type,
feature_transformer, label_transformer)
Expand All @@ -188,6 +198,7 @@ class TensorToSample(Preprocessing):
"""
a Transformer that converts Tensor to Sample.
"""

def __init__(self, bigdl_type="float"):
super(TensorToSample, self).__init__(bigdl_type)

Expand All @@ -206,6 +217,7 @@ class ToTuple(Preprocessing):
"""
a Transformer that converts Feature to (Feature, None).
"""

def __init__(self, bigdl_type="float"):
super(ToTuple, self).__init__(bigdl_type)

Expand All @@ -218,6 +230,7 @@ class FeatureSet(DataSet):
Different from BigDL's DataSet, this FeatureSet could be cached to Intel Optane DC Persistent
Memory, if you set memory_type to PMEM when creating FeatureSet.
"""

def __init__(self, jvalue=None, bigdl_type="float"):
self.bigdl_type = bigdl_type
if jvalue:
Expand All @@ -244,8 +257,8 @@ def image_frame(cls, image_frame, memory_type="DRAM",
:param bigdl_type: numeric type
:return: A feature set
"""
jvalue = callBigDlFunc(bigdl_type, "createFeatureSetFromImageFrame",
image_frame, memory_type, sequential_order, shuffle)
jvalue = callZooFunc(bigdl_type, "createFeatureSetFromImageFrame",
image_frame, memory_type, sequential_order, shuffle)
return cls(jvalue=jvalue)

@classmethod
Expand All @@ -269,9 +282,9 @@ def image_set(cls, imageset, memory_type="DRAM",
:param bigdl_type: numeric type
:return: A feature set
"""
jvalue = callBigDlFunc(bigdl_type, "createFeatureSetFromImageFrame",
imageset.to_image_frame(), memory_type,
sequential_order, shuffle)
jvalue = callZooFunc(bigdl_type, "createFeatureSetFromImageFrame",
imageset.to_image_frame(), memory_type,
sequential_order, shuffle)
return cls(jvalue=jvalue)

@classmethod
Expand All @@ -295,8 +308,8 @@ def sample_rdd(cls, rdd, memory_type="DRAM",
:param bigdl_type:numeric type
:return: A feature set
"""
jvalue = callBigDlFunc(bigdl_type, "createSampleFeatureSetFromRDD", rdd,
memory_type, sequential_order, shuffle)
jvalue = callZooFunc(bigdl_type, "createSampleFeatureSetFromRDD", rdd,
memory_type, sequential_order, shuffle)
return cls(jvalue=jvalue)

@classmethod
Expand All @@ -319,8 +332,8 @@ def rdd(cls, rdd, memory_type="DRAM", sequential_order=False,
:param bigdl_type:numeric type
:return: A feature set
"""
jvalue = callBigDlFunc(bigdl_type, "createFeatureSetFromRDD", rdd,
memory_type, sequential_order, shuffle)
jvalue = callZooFunc(bigdl_type, "createFeatureSetFromRDD", rdd,
memory_type, sequential_order, shuffle)
return cls(jvalue=jvalue)

def transform(self, transformer):
Expand All @@ -329,13 +342,13 @@ def transform(self, transformer):
:param transformer: the transformers to transform this feature set.
:return: A feature set
"""
jvalue = callBigDlFunc(self.bigdl_type, "transformFeatureSet", self.value, transformer)
jvalue = callZooFunc(self.bigdl_type, "transformFeatureSet", self.value, transformer)
return FeatureSet(jvalue=jvalue)

def to_dataset(self):
"""
To BigDL compatible DataSet
:return:
"""
jvalue = callBigDlFunc(self.bigdl_type, "featureSetToDataSet", self.value)
jvalue = callZooFunc(self.bigdl_type, "featureSetToDataSet", self.value)
return FeatureSet(jvalue=jvalue)
58 changes: 30 additions & 28 deletions python/dllib/src/bigdl/dllib/feature/image/imageset.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#
from bigdl.transform.vision.image import ImageFrame
from bigdl.util.common import *
from zoo.common.utils import callZooFunc


class ImageSet(JavaValue):
Expand All @@ -34,20 +35,20 @@ def is_local(self):
"""
whether this is a LocalImageSet
"""
return callBigDlFunc(self.bigdl_type, "isLocalImageSet", self.value)
return callZooFunc(self.bigdl_type, "isLocalImageSet", self.value)

def is_distributed(self):
"""
whether this is a DistributedImageSet
"""
return callBigDlFunc(self.bigdl_type, "isDistributedImageSet", self.value)
return callZooFunc(self.bigdl_type, "isDistributedImageSet", self.value)

@property
def label_map(self):
"""
:return: the labelMap of this ImageSet, None if the ImageSet does not have a labelMap
"""
return callBigDlFunc(self.bigdl_type, "imageSetGetLabelMap", self.value)
return callZooFunc(self.bigdl_type, "imageSetGetLabelMap", self.value)

@classmethod
def read(cls, path, sc=None, min_partitions=1, resize_height=-1,
Expand Down Expand Up @@ -77,14 +78,14 @@ class should be put into the same class folder. So each image in the path is lab
:param one_based_label whether to use one based label
:return ImageSet
"""
return ImageSet(jvalue=callBigDlFunc(bigdl_type, "readImageSet", path,
sc, min_partitions, resize_height,
resize_width, image_codec, with_label,
one_based_label))
return ImageSet(jvalue=callZooFunc(bigdl_type, "readImageSet", path,
sc, min_partitions, resize_height,
resize_width, image_codec, with_label,
one_based_label))

@classmethod
def from_image_frame(cls, image_frame, bigdl_type="float"):
return ImageSet(jvalue=callBigDlFunc(bigdl_type, "imageFrameToImageSet", image_frame))
return ImageSet(jvalue=callZooFunc(bigdl_type, "imageFrameToImageSet", image_frame))

@classmethod
def from_rdds(cls, image_rdd, label_rdd=None, bigdl_type="float"):
Expand All @@ -98,15 +99,15 @@ def from_rdds(cls, image_rdd, label_rdd=None, bigdl_type="float"):
image_rdd = image_rdd.map(lambda x: JTensor.from_ndarray(x))
if label_rdd is not None:
label_rdd = label_rdd.map(lambda x: JTensor.from_ndarray(x))
return ImageSet(jvalue=callBigDlFunc(bigdl_type, "createDistributedImageSet",
image_rdd, label_rdd), bigdl_type=bigdl_type)
return ImageSet(jvalue=callZooFunc(bigdl_type, "createDistributedImageSet",
image_rdd, label_rdd), bigdl_type=bigdl_type)

def transform(self, transformer):
"""
transformImageSet
"""
return ImageSet(callBigDlFunc(self.bigdl_type, "transformImageSet",
transformer, self.value), self.bigdl_type)
return ImageSet(callZooFunc(self.bigdl_type, "transformImageSet",
transformer, self.value), self.bigdl_type)

def get_image(self, key="floats", to_chw=True):
"""
Expand All @@ -127,46 +128,47 @@ def get_predict(self, key="predict"):
return self.image_set.get_predict(key)

def to_image_frame(self, bigdl_type="float"):
return ImageFrame(callBigDlFunc(bigdl_type, "imageSetToImageFrame", self.value), bigdl_type)
return ImageFrame(callZooFunc(bigdl_type, "imageSetToImageFrame", self.value), bigdl_type)


class LocalImageSet(ImageSet):
"""
LocalImageSet wraps a list of ImageFeature
"""

def __init__(self, image_list=None, label_list=None, jvalue=None, bigdl_type="float"):
assert jvalue or image_list, "jvalue and image_list cannot be None in the same time"
if jvalue:
self.value = jvalue
else:
# init from image ndarray list and label rdd(optional)
image_tensor_list = list(map(lambda image: JTensor.from_ndarray(image), image_list))
label_tensor_list = list(map(lambda label: JTensor.from_ndarray(label), label_list))\
label_tensor_list = list(map(lambda label: JTensor.from_ndarray(label), label_list)) \
if label_list else None
self.value = callBigDlFunc(bigdl_type, JavaValue.jvm_class_constructor(self),
image_tensor_list, label_tensor_list)
self.value = callZooFunc(bigdl_type, JavaValue.jvm_class_constructor(self),
image_tensor_list, label_tensor_list)
self.bigdl_type = bigdl_type

def get_image(self, key="floats", to_chw=True):
"""
get image list from ImageSet
"""
tensors = callBigDlFunc(self.bigdl_type, "localImageSetToImageTensor",
self.value, key, to_chw)
tensors = callZooFunc(self.bigdl_type, "localImageSetToImageTensor",
self.value, key, to_chw)
return list(map(lambda tensor: tensor.to_ndarray(), tensors))

def get_label(self):
"""
get label list from ImageSet
"""
labels = callBigDlFunc(self.bigdl_type, "localImageSetToLabelTensor", self.value)
labels = callZooFunc(self.bigdl_type, "localImageSetToLabelTensor", self.value)
return map(lambda tensor: tensor.to_ndarray(), labels)

def get_predict(self, key="predict"):
"""
get prediction list from ImageSet
"""
predicts = callBigDlFunc(self.bigdl_type, "localImageSetToPredict", self.value, key)
predicts = callZooFunc(self.bigdl_type, "localImageSetToPredict", self.value, key)
return list(map(lambda predict:
(predict[0], list(map(lambda x: x.to_ndarray(), predict[1]))) if predict[1]
else (predict[0], None), predicts))
Expand All @@ -184,33 +186,33 @@ def __init__(self, image_rdd=None, label_rdd=None, jvalue=None, bigdl_type="floa
else:
# init from image ndarray rdd and label rdd(optional)
image_tensor_rdd = image_rdd.map(lambda image: JTensor.from_ndarray(image))
label_tensor_rdd = label_rdd.map(lambda label: JTensor.from_ndarray(label))\
label_tensor_rdd = label_rdd.map(lambda label: JTensor.from_ndarray(label)) \
if label_rdd else None
self.value = callBigDlFunc(bigdl_type, JavaValue.jvm_class_constructor(self),
image_tensor_rdd, label_tensor_rdd)
self.value = callZooFunc(bigdl_type, JavaValue.jvm_class_constructor(self),
image_tensor_rdd, label_tensor_rdd)
self.bigdl_type = bigdl_type

def get_image(self, key="floats", to_chw=True):
"""
get image rdd from ImageSet
"""
tensor_rdd = callBigDlFunc(self.bigdl_type, "distributedImageSetToImageTensorRdd",
self.value, key, to_chw)
tensor_rdd = callZooFunc(self.bigdl_type, "distributedImageSetToImageTensorRdd",
self.value, key, to_chw)
return tensor_rdd.map(lambda tensor: tensor.to_ndarray())

def get_label(self):
"""
get label rdd from ImageSet
"""
tensor_rdd = callBigDlFunc(self.bigdl_type, "distributedImageSetToLabelTensorRdd",
self.value)
tensor_rdd = callZooFunc(self.bigdl_type, "distributedImageSetToLabelTensorRdd",
self.value)
return tensor_rdd.map(lambda tensor: tensor.to_ndarray())

def get_predict(self, key="predict"):
"""
get prediction rdd from ImageSet
"""
predicts = callBigDlFunc(self.bigdl_type, "distributedImageSetToPredict", self.value, key)
predicts = callZooFunc(self.bigdl_type, "distributedImageSetToPredict", self.value, key)
return predicts.map(lambda predict:
(predict[0],
list(map(lambda x: x.to_ndarray(), predict[1]))) if predict[1]
Expand Down
Loading

0 comments on commit 61826bf

Please sign in to comment.