From 61826bfd5812dcccceaad49276a5a46f0f8682af Mon Sep 17 00:00:00 2001 From: Xin Qiu Date: Tue, 26 Nov 2019 13:52:40 +0800 Subject: [PATCH] add callZooFunc and change all callBigDlFunc to callZooFunc (#1793) --- .../dllib/src/bigdl/dllib/feature/common.py | 47 ++++++++----- .../src/bigdl/dllib/feature/image/imageset.py | 58 +++++++-------- .../bigdl/dllib/feature/text/text_feature.py | 20 +++--- .../src/bigdl/dllib/feature/text/text_set.py | 70 ++++++++++--------- .../bigdl/dllib/feature/text/transformer.py | 10 ++- 5 files changed, 116 insertions(+), 89 deletions(-) diff --git a/python/dllib/src/bigdl/dllib/feature/common.py b/python/dllib/src/bigdl/dllib/feature/common.py index 15e90a66d7a..5bd8503cc4e 100644 --- a/python/dllib/src/bigdl/dllib/feature/common.py +++ b/python/dllib/src/bigdl/dllib/feature/common.py @@ -15,6 +15,7 @@ # from bigdl.util.common import * +from zoo.common.utils import callZooFunc from bigdl.dataset.dataset import DataSet import sys @@ -27,6 +28,7 @@ class Relation(object): """ It represents the relationship between two items. """ + def __init__(self, id1, id2, label, bigdl_type="float"): self.id1 = id1 self.id2 = id2 @@ -64,10 +66,10 @@ def read(path, sc=None, min_partitions=1, bigdl_type="float"): texts. Only need to specify this when sc is not None. Default is 1. """ if sc: - jvalue = callBigDlFunc(bigdl_type, "readRelations", path, sc, min_partitions) + jvalue = callZooFunc(bigdl_type, "readRelations", path, sc, min_partitions) res = jvalue.map(lambda x: Relation(str(x[0]), str(x[1]), int(x[2]))) else: - jvalue = callBigDlFunc(bigdl_type, "readRelations", path) + jvalue = callZooFunc(bigdl_type, "readRelations", path) res = [Relation(str(x[0]), str(x[1]), int(x[2])) for x in jvalue] return res @@ -82,7 +84,7 @@ def read_parquet(path, sc, bigdl_type="float"): :param sc: An instance of SparkContext. :return: RDD of Relation. """ - jvalue = callBigDlFunc(bigdl_type, "readRelationsParquet", path, sc) + jvalue = callZooFunc(bigdl_type, "readRelationsParquet", path, sc) return jvalue.map(lambda x: Relation(str(x[0]), str(x[1]), int(x[2]))) @@ -91,9 +93,10 @@ class Preprocessing(JavaValue): Preprocessing defines data transform action during feature preprocessing. Python wrapper for the scala Preprocessing """ + def __init__(self, bigdl_type="float", *args): self.bigdl_type = bigdl_type - self.value = callBigDlFunc(bigdl_type, JavaValue.jvm_class_constructor(self), *args) + self.value = callZooFunc(bigdl_type, JavaValue.jvm_class_constructor(self), *args) def __call__(self, input): """ @@ -106,10 +109,10 @@ def __call__(self, input): from zoo.feature.text import TextSet # if type(input) is ImageSet: if isinstance(input, ImageSet): - jset = callBigDlFunc(self.bigdl_type, "transformImageSet", self.value, input) + jset = callZooFunc(self.bigdl_type, "transformImageSet", self.value, input) return ImageSet(jvalue=jset) elif isinstance(input, TextSet): - jset = callBigDlFunc(self.bigdl_type, "transformTextSet", self.value, input) + jset = callZooFunc(self.bigdl_type, "transformTextSet", self.value, input) return TextSet(jvalue=jset) @@ -118,6 +121,7 @@ class ChainedPreprocessing(Preprocessing): chains two Preprocessing together. The output type of the first Preprocessing should be the same with the input type of the second Preprocessing. """ + def __init__(self, transformers, bigdl_type="float"): for transfomer in transformers: assert isinstance(transfomer, Preprocessing), \ @@ -130,6 +134,7 @@ class ScalarToTensor(Preprocessing): """ a Preprocessing that converts a number to a Tensor. """ + def __init__(self, bigdl_type="float"): super(ScalarToTensor, self).__init__(bigdl_type) @@ -139,6 +144,7 @@ class SeqToTensor(Preprocessing): a Transformer that converts an Array[_] or Seq[_] to a Tensor. :param size dimensions of target Tensor. """ + def __init__(self, size=[], bigdl_type="float"): super(SeqToTensor, self).__init__(bigdl_type, size) @@ -148,6 +154,7 @@ class SeqToMultipleTensors(Preprocessing): a Transformer that converts an Array[_] or Seq[_] or ML Vector to several tensors. :param size, list of int list, dimensions of target Tensors, e.g. [[2],[4]] """ + def __init__(self, size=[], bigdl_type="float"): super(SeqToMultipleTensors, self).__init__(bigdl_type, size) @@ -157,6 +164,7 @@ class ArrayToTensor(Preprocessing): a Transformer that converts an Array[_] to a Tensor. :param size dimensions of target Tensor. """ + def __init__(self, size, bigdl_type="float"): super(ArrayToTensor, self).__init__(bigdl_type, size) @@ -167,6 +175,7 @@ class MLlibVectorToTensor(Preprocessing): .. note:: Deprecated in 0.4.0. NNEstimator will automatically extract Vectors now. :param size dimensions of target Tensor. """ + def __init__(self, size, bigdl_type="float"): super(MLlibVectorToTensor, self).__init__(bigdl_type, size) @@ -179,6 +188,7 @@ class FeatureLabelPreprocessing(Preprocessing): :param feature_transformer transformer for feature, transform F to Tensor[T] :param label_transformer transformer for label, transform L to Tensor[T] """ + def __init__(self, feature_transformer, label_transformer, bigdl_type="float"): super(FeatureLabelPreprocessing, self).__init__(bigdl_type, feature_transformer, label_transformer) @@ -188,6 +198,7 @@ class TensorToSample(Preprocessing): """ a Transformer that converts Tensor to Sample. """ + def __init__(self, bigdl_type="float"): super(TensorToSample, self).__init__(bigdl_type) @@ -206,6 +217,7 @@ class ToTuple(Preprocessing): """ a Transformer that converts Feature to (Feature, None). """ + def __init__(self, bigdl_type="float"): super(ToTuple, self).__init__(bigdl_type) @@ -218,6 +230,7 @@ class FeatureSet(DataSet): Different from BigDL's DataSet, this FeatureSet could be cached to Intel Optane DC Persistent Memory, if you set memory_type to PMEM when creating FeatureSet. """ + def __init__(self, jvalue=None, bigdl_type="float"): self.bigdl_type = bigdl_type if jvalue: @@ -244,8 +257,8 @@ def image_frame(cls, image_frame, memory_type="DRAM", :param bigdl_type: numeric type :return: A feature set """ - jvalue = callBigDlFunc(bigdl_type, "createFeatureSetFromImageFrame", - image_frame, memory_type, sequential_order, shuffle) + jvalue = callZooFunc(bigdl_type, "createFeatureSetFromImageFrame", + image_frame, memory_type, sequential_order, shuffle) return cls(jvalue=jvalue) @classmethod @@ -269,9 +282,9 @@ def image_set(cls, imageset, memory_type="DRAM", :param bigdl_type: numeric type :return: A feature set """ - jvalue = callBigDlFunc(bigdl_type, "createFeatureSetFromImageFrame", - imageset.to_image_frame(), memory_type, - sequential_order, shuffle) + jvalue = callZooFunc(bigdl_type, "createFeatureSetFromImageFrame", + imageset.to_image_frame(), memory_type, + sequential_order, shuffle) return cls(jvalue=jvalue) @classmethod @@ -295,8 +308,8 @@ def sample_rdd(cls, rdd, memory_type="DRAM", :param bigdl_type:numeric type :return: A feature set """ - jvalue = callBigDlFunc(bigdl_type, "createSampleFeatureSetFromRDD", rdd, - memory_type, sequential_order, shuffle) + jvalue = callZooFunc(bigdl_type, "createSampleFeatureSetFromRDD", rdd, + memory_type, sequential_order, shuffle) return cls(jvalue=jvalue) @classmethod @@ -319,8 +332,8 @@ def rdd(cls, rdd, memory_type="DRAM", sequential_order=False, :param bigdl_type:numeric type :return: A feature set """ - jvalue = callBigDlFunc(bigdl_type, "createFeatureSetFromRDD", rdd, - memory_type, sequential_order, shuffle) + jvalue = callZooFunc(bigdl_type, "createFeatureSetFromRDD", rdd, + memory_type, sequential_order, shuffle) return cls(jvalue=jvalue) def transform(self, transformer): @@ -329,7 +342,7 @@ def transform(self, transformer): :param transformer: the transformers to transform this feature set. :return: A feature set """ - jvalue = callBigDlFunc(self.bigdl_type, "transformFeatureSet", self.value, transformer) + jvalue = callZooFunc(self.bigdl_type, "transformFeatureSet", self.value, transformer) return FeatureSet(jvalue=jvalue) def to_dataset(self): @@ -337,5 +350,5 @@ def to_dataset(self): To BigDL compatible DataSet :return: """ - jvalue = callBigDlFunc(self.bigdl_type, "featureSetToDataSet", self.value) + jvalue = callZooFunc(self.bigdl_type, "featureSetToDataSet", self.value) return FeatureSet(jvalue=jvalue) diff --git a/python/dllib/src/bigdl/dllib/feature/image/imageset.py b/python/dllib/src/bigdl/dllib/feature/image/imageset.py index 319610ece66..0df26437496 100644 --- a/python/dllib/src/bigdl/dllib/feature/image/imageset.py +++ b/python/dllib/src/bigdl/dllib/feature/image/imageset.py @@ -15,6 +15,7 @@ # from bigdl.transform.vision.image import ImageFrame from bigdl.util.common import * +from zoo.common.utils import callZooFunc class ImageSet(JavaValue): @@ -34,20 +35,20 @@ def is_local(self): """ whether this is a LocalImageSet """ - return callBigDlFunc(self.bigdl_type, "isLocalImageSet", self.value) + return callZooFunc(self.bigdl_type, "isLocalImageSet", self.value) def is_distributed(self): """ whether this is a DistributedImageSet """ - return callBigDlFunc(self.bigdl_type, "isDistributedImageSet", self.value) + return callZooFunc(self.bigdl_type, "isDistributedImageSet", self.value) @property def label_map(self): """ :return: the labelMap of this ImageSet, None if the ImageSet does not have a labelMap """ - return callBigDlFunc(self.bigdl_type, "imageSetGetLabelMap", self.value) + return callZooFunc(self.bigdl_type, "imageSetGetLabelMap", self.value) @classmethod def read(cls, path, sc=None, min_partitions=1, resize_height=-1, @@ -77,14 +78,14 @@ class should be put into the same class folder. So each image in the path is lab :param one_based_label whether to use one based label :return ImageSet """ - return ImageSet(jvalue=callBigDlFunc(bigdl_type, "readImageSet", path, - sc, min_partitions, resize_height, - resize_width, image_codec, with_label, - one_based_label)) + return ImageSet(jvalue=callZooFunc(bigdl_type, "readImageSet", path, + sc, min_partitions, resize_height, + resize_width, image_codec, with_label, + one_based_label)) @classmethod def from_image_frame(cls, image_frame, bigdl_type="float"): - return ImageSet(jvalue=callBigDlFunc(bigdl_type, "imageFrameToImageSet", image_frame)) + return ImageSet(jvalue=callZooFunc(bigdl_type, "imageFrameToImageSet", image_frame)) @classmethod def from_rdds(cls, image_rdd, label_rdd=None, bigdl_type="float"): @@ -98,15 +99,15 @@ def from_rdds(cls, image_rdd, label_rdd=None, bigdl_type="float"): image_rdd = image_rdd.map(lambda x: JTensor.from_ndarray(x)) if label_rdd is not None: label_rdd = label_rdd.map(lambda x: JTensor.from_ndarray(x)) - return ImageSet(jvalue=callBigDlFunc(bigdl_type, "createDistributedImageSet", - image_rdd, label_rdd), bigdl_type=bigdl_type) + return ImageSet(jvalue=callZooFunc(bigdl_type, "createDistributedImageSet", + image_rdd, label_rdd), bigdl_type=bigdl_type) def transform(self, transformer): """ transformImageSet """ - return ImageSet(callBigDlFunc(self.bigdl_type, "transformImageSet", - transformer, self.value), self.bigdl_type) + return ImageSet(callZooFunc(self.bigdl_type, "transformImageSet", + transformer, self.value), self.bigdl_type) def get_image(self, key="floats", to_chw=True): """ @@ -127,13 +128,14 @@ def get_predict(self, key="predict"): return self.image_set.get_predict(key) def to_image_frame(self, bigdl_type="float"): - return ImageFrame(callBigDlFunc(bigdl_type, "imageSetToImageFrame", self.value), bigdl_type) + return ImageFrame(callZooFunc(bigdl_type, "imageSetToImageFrame", self.value), bigdl_type) class LocalImageSet(ImageSet): """ LocalImageSet wraps a list of ImageFeature """ + def __init__(self, image_list=None, label_list=None, jvalue=None, bigdl_type="float"): assert jvalue or image_list, "jvalue and image_list cannot be None in the same time" if jvalue: @@ -141,32 +143,32 @@ def __init__(self, image_list=None, label_list=None, jvalue=None, bigdl_type="fl else: # init from image ndarray list and label rdd(optional) image_tensor_list = list(map(lambda image: JTensor.from_ndarray(image), image_list)) - label_tensor_list = list(map(lambda label: JTensor.from_ndarray(label), label_list))\ + label_tensor_list = list(map(lambda label: JTensor.from_ndarray(label), label_list)) \ if label_list else None - self.value = callBigDlFunc(bigdl_type, JavaValue.jvm_class_constructor(self), - image_tensor_list, label_tensor_list) + self.value = callZooFunc(bigdl_type, JavaValue.jvm_class_constructor(self), + image_tensor_list, label_tensor_list) self.bigdl_type = bigdl_type def get_image(self, key="floats", to_chw=True): """ get image list from ImageSet """ - tensors = callBigDlFunc(self.bigdl_type, "localImageSetToImageTensor", - self.value, key, to_chw) + tensors = callZooFunc(self.bigdl_type, "localImageSetToImageTensor", + self.value, key, to_chw) return list(map(lambda tensor: tensor.to_ndarray(), tensors)) def get_label(self): """ get label list from ImageSet """ - labels = callBigDlFunc(self.bigdl_type, "localImageSetToLabelTensor", self.value) + labels = callZooFunc(self.bigdl_type, "localImageSetToLabelTensor", self.value) return map(lambda tensor: tensor.to_ndarray(), labels) def get_predict(self, key="predict"): """ get prediction list from ImageSet """ - predicts = callBigDlFunc(self.bigdl_type, "localImageSetToPredict", self.value, key) + predicts = callZooFunc(self.bigdl_type, "localImageSetToPredict", self.value, key) return list(map(lambda predict: (predict[0], list(map(lambda x: x.to_ndarray(), predict[1]))) if predict[1] else (predict[0], None), predicts)) @@ -184,33 +186,33 @@ def __init__(self, image_rdd=None, label_rdd=None, jvalue=None, bigdl_type="floa else: # init from image ndarray rdd and label rdd(optional) image_tensor_rdd = image_rdd.map(lambda image: JTensor.from_ndarray(image)) - label_tensor_rdd = label_rdd.map(lambda label: JTensor.from_ndarray(label))\ + label_tensor_rdd = label_rdd.map(lambda label: JTensor.from_ndarray(label)) \ if label_rdd else None - self.value = callBigDlFunc(bigdl_type, JavaValue.jvm_class_constructor(self), - image_tensor_rdd, label_tensor_rdd) + self.value = callZooFunc(bigdl_type, JavaValue.jvm_class_constructor(self), + image_tensor_rdd, label_tensor_rdd) self.bigdl_type = bigdl_type def get_image(self, key="floats", to_chw=True): """ get image rdd from ImageSet """ - tensor_rdd = callBigDlFunc(self.bigdl_type, "distributedImageSetToImageTensorRdd", - self.value, key, to_chw) + tensor_rdd = callZooFunc(self.bigdl_type, "distributedImageSetToImageTensorRdd", + self.value, key, to_chw) return tensor_rdd.map(lambda tensor: tensor.to_ndarray()) def get_label(self): """ get label rdd from ImageSet """ - tensor_rdd = callBigDlFunc(self.bigdl_type, "distributedImageSetToLabelTensorRdd", - self.value) + tensor_rdd = callZooFunc(self.bigdl_type, "distributedImageSetToLabelTensorRdd", + self.value) return tensor_rdd.map(lambda tensor: tensor.to_ndarray()) def get_predict(self, key="predict"): """ get prediction rdd from ImageSet """ - predicts = callBigDlFunc(self.bigdl_type, "distributedImageSetToPredict", self.value, key) + predicts = callZooFunc(self.bigdl_type, "distributedImageSetToPredict", self.value, key) return predicts.map(lambda predict: (predict[0], list(map(lambda x: x.to_ndarray(), predict[1]))) if predict[1] diff --git a/python/dllib/src/bigdl/dllib/feature/text/text_feature.py b/python/dllib/src/bigdl/dllib/feature/text/text_feature.py index 9d3144ca2a8..b03d0f31ae5 100644 --- a/python/dllib/src/bigdl/dllib/feature/text/text_feature.py +++ b/python/dllib/src/bigdl/dllib/feature/text/text_feature.py @@ -16,7 +16,8 @@ import sys import six -from bigdl.util.common import JavaValue, callBigDlFunc +from bigdl.util.common import JavaValue +from zoo.common.utils import callZooFunc if sys.version >= '3': long = int @@ -30,6 +31,7 @@ class TextFeature(JavaValue): e.g. original text content, uri, category label, tokens, index representation of tokens, BigDL Sample representation, prediction result and so on. """ + def __init__(self, text=None, label=None, uri=None, jvalue=None, bigdl_type="float"): if text is not None: assert isinstance(text, six.string_types), "text of a TextFeature should be a string" @@ -46,7 +48,7 @@ def get_text(self): :return: String """ - return callBigDlFunc(self.bigdl_type, "textFeatureGetText", self.value) + return callZooFunc(self.bigdl_type, "textFeatureGetText", self.value) def get_label(self): """ @@ -55,7 +57,7 @@ def get_label(self): :return: Int """ - return callBigDlFunc(self.bigdl_type, "textFeatureGetLabel", self.value) + return callZooFunc(self.bigdl_type, "textFeatureGetLabel", self.value) def get_uri(self): """ @@ -64,7 +66,7 @@ def get_uri(self): :return: String """ - return callBigDlFunc(self.bigdl_type, "textFeatureGetURI", self.value) + return callZooFunc(self.bigdl_type, "textFeatureGetURI", self.value) def has_label(self): """ @@ -72,7 +74,7 @@ def has_label(self): :return: Boolean """ - return callBigDlFunc(self.bigdl_type, "textFeatureHasLabel", self.value) + return callZooFunc(self.bigdl_type, "textFeatureHasLabel", self.value) def set_label(self, label): """ @@ -81,7 +83,7 @@ def set_label(self, label): :param label: Int :return: The TextFeature with label. """ - self.value = callBigDlFunc(self.bigdl_type, "textFeatureSetLabel", self.value, int(label)) + self.value = callZooFunc(self.bigdl_type, "textFeatureSetLabel", self.value, int(label)) return self def get_tokens(self): @@ -91,7 +93,7 @@ def get_tokens(self): :return: List of String """ - return callBigDlFunc(self.bigdl_type, "textFeatureGetTokens", self.value) + return callZooFunc(self.bigdl_type, "textFeatureGetTokens", self.value) def get_sample(self): """ @@ -100,7 +102,7 @@ def get_sample(self): :return: BigDL Sample """ - return callBigDlFunc(self.bigdl_type, "textFeatureGetSample", self.value) + return callZooFunc(self.bigdl_type, "textFeatureGetSample", self.value) def keys(self): """ @@ -108,4 +110,4 @@ def keys(self): :return: List of String """ - return callBigDlFunc(self.bigdl_type, "textFeatureGetKeys", self.value) + return callZooFunc(self.bigdl_type, "textFeatureGetKeys", self.value) diff --git a/python/dllib/src/bigdl/dllib/feature/text/text_set.py b/python/dllib/src/bigdl/dllib/feature/text/text_set.py index cd6bebe07a6..7f3e15a9672 100644 --- a/python/dllib/src/bigdl/dllib/feature/text/text_set.py +++ b/python/dllib/src/bigdl/dllib/feature/text/text_set.py @@ -15,7 +15,8 @@ # import six -from bigdl.util.common import JavaValue, callBigDlFunc +from bigdl.util.common import JavaValue +from zoo.common.utils import callZooFunc from pyspark import RDD @@ -23,6 +24,7 @@ class TextSet(JavaValue): """ TextSet wraps a set of texts with status. """ + def __init__(self, jvalue, bigdl_type="float", *args): super(TextSet, self).__init__(jvalue, bigdl_type, *args) @@ -32,7 +34,7 @@ def is_local(self): :return: Boolean """ - return callBigDlFunc(self.bigdl_type, "textSetIsLocal", self.value) + return callZooFunc(self.bigdl_type, "textSetIsLocal", self.value) def is_distributed(self): """ @@ -40,7 +42,7 @@ def is_distributed(self): :return: Boolean """ - return callBigDlFunc(self.bigdl_type, "textSetIsDistributed", self.value) + return callZooFunc(self.bigdl_type, "textSetIsDistributed", self.value) def to_distributed(self, sc=None, partition_num=4): """ @@ -55,8 +57,8 @@ def to_distributed(self, sc=None, partition_num=4): jvalue = self.value else: assert sc, "sc cannot be null to transform a LocalTextSet to a DistributedTextSet" - jvalue = callBigDlFunc(self.bigdl_type, "textSetToDistributed", self.value, - sc, partition_num) + jvalue = callZooFunc(self.bigdl_type, "textSetToDistributed", self.value, + sc, partition_num) return DistributedTextSet(jvalue=jvalue) def to_local(self): @@ -68,7 +70,7 @@ def to_local(self): if self.is_local(): jvalue = self.value else: - jvalue = callBigDlFunc(self.bigdl_type, "textSetToLocal", self.value) + jvalue = callZooFunc(self.bigdl_type, "textSetToLocal", self.value) return LocalTextSet(jvalue=jvalue) def get_word_index(self): @@ -78,7 +80,7 @@ def get_word_index(self): :return: Dictionary {word: id} """ - return callBigDlFunc(self.bigdl_type, "textSetGetWordIndex", self.value) + return callZooFunc(self.bigdl_type, "textSetGetWordIndex", self.value) def save_word_index(self, path): """ @@ -90,7 +92,7 @@ def save_word_index(self, path): :param path: The path to the text file. """ - callBigDlFunc(self.bigdl_type, "textSetSaveWordIndex", self.value, path) + callZooFunc(self.bigdl_type, "textSetSaveWordIndex", self.value, path) def load_word_index(self, path): """ @@ -107,7 +109,7 @@ def load_word_index(self, path): :return: TextSet with the loaded word_index. """ - jvalue = callBigDlFunc(self.bigdl_type, "textSetLoadWordIndex", self.value, path) + jvalue = callZooFunc(self.bigdl_type, "textSetLoadWordIndex", self.value, path) return TextSet(jvalue=jvalue) def set_word_index(self, vocab): @@ -118,7 +120,7 @@ def set_word_index(self, vocab): :return: TextSet with the word_index set. """ - jvalue = callBigDlFunc(self.bigdl_type, "textSetSetWordIndex", self.value, vocab) + jvalue = callZooFunc(self.bigdl_type, "textSetSetWordIndex", self.value, vocab) return TextSet(jvalue=jvalue) def generate_word_index_map(self, remove_topN=0, max_words_num=-1, @@ -131,8 +133,8 @@ def generate_word_index_map(self, remove_topN=0, max_words_num=-1, :return: Dictionary {word: id} """ - return callBigDlFunc(self.bigdl_type, "textSetGenerateWordIndexMap", self.value, - remove_topN, max_words_num, min_freq, existing_map) + return callZooFunc(self.bigdl_type, "textSetGenerateWordIndexMap", self.value, + remove_topN, max_words_num, min_freq, existing_map) def get_texts(self): """ @@ -141,7 +143,7 @@ def get_texts(self): :return: List of String for LocalTextSet. RDD of String for DistributedTextSet. """ - return callBigDlFunc(self.bigdl_type, "textSetGetTexts", self.value) + return callZooFunc(self.bigdl_type, "textSetGetTexts", self.value) def get_uris(self): """ @@ -151,7 +153,7 @@ def get_uris(self): :return: List of String for LocalTextSet. RDD of String for DistributedTextSet. """ - return callBigDlFunc(self.bigdl_type, "textSetGetURIs", self.value) + return callZooFunc(self.bigdl_type, "textSetGetURIs", self.value) def get_labels(self): """ @@ -161,7 +163,7 @@ def get_labels(self): :return: List of int for LocalTextSet. RDD of int for DistributedTextSet. """ - return callBigDlFunc(self.bigdl_type, "textSetGetLabels", self.value) + return callZooFunc(self.bigdl_type, "textSetGetLabels", self.value) def get_predicts(self): """ @@ -171,7 +173,7 @@ def get_predicts(self): :return: List of list of numpy array for LocalTextSet. RDD of list of numpy array for DistributedTextSet. """ - predicts = callBigDlFunc(self.bigdl_type, "textSetGetPredicts", self.value) + predicts = callZooFunc(self.bigdl_type, "textSetGetPredicts", self.value) if isinstance(predicts, RDD): return predicts.map(lambda predict: _process_predict_result(predict)) else: @@ -185,7 +187,7 @@ def get_samples(self): :return: List of Sample for LocalTextSet. RDD of Sample for DistributedTextSet. """ - return callBigDlFunc(self.bigdl_type, "textSetGetSamples", self.value) + return callZooFunc(self.bigdl_type, "textSetGetSamples", self.value) def random_split(self, weights): """ @@ -194,7 +196,7 @@ def random_split(self, weights): :param weights: List of float indicating the split portions. """ - jvalues = callBigDlFunc(self.bigdl_type, "textSetRandomSplit", self.value, weights) + jvalues = callZooFunc(self.bigdl_type, "textSetRandomSplit", self.value, weights) return [TextSet(jvalue=jvalue) for jvalue in list(jvalues)] def tokenize(self): @@ -204,7 +206,7 @@ def tokenize(self): :return: TextSet after tokenization. """ - jvalue = callBigDlFunc(self.bigdl_type, "textSetTokenize", self.value) + jvalue = callZooFunc(self.bigdl_type, "textSetTokenize", self.value) return TextSet(jvalue=jvalue) def normalize(self): @@ -215,7 +217,7 @@ def normalize(self): :return: TextSet after normalization. """ - jvalue = callBigDlFunc(self.bigdl_type, "textSetNormalize", self.value) + jvalue = callZooFunc(self.bigdl_type, "textSetNormalize", self.value) return TextSet(jvalue=jvalue) def word2idx(self, remove_topN=0, max_words_num=-1, min_freq=1, existing_map=None): @@ -263,8 +265,8 @@ def word2idx(self, remove_topN=0, max_words_num=-1, min_freq=1, existing_map=Non :return: TextSet after word2idx. """ - jvalue = callBigDlFunc(self.bigdl_type, "textSetWord2idx", self.value, - remove_topN, max_words_num, min_freq, existing_map) + jvalue = callZooFunc(self.bigdl_type, "textSetWord2idx", self.value, + remove_topN, max_words_num, min_freq, existing_map) return TextSet(jvalue=jvalue) def shape_sequence(self, len, trunc_mode="pre", pad_element=0): @@ -276,8 +278,8 @@ def shape_sequence(self, len, trunc_mode="pre", pad_element=0): :return: TextSet after sequence shaping. """ assert isinstance(pad_element, int), "pad_element should be an int" - jvalue = callBigDlFunc(self.bigdl_type, "textSetShapeSequence", self.value, - len, trunc_mode, pad_element) + jvalue = callZooFunc(self.bigdl_type, "textSetShapeSequence", self.value, + len, trunc_mode, pad_element) return TextSet(jvalue=jvalue) def generate_sample(self): @@ -288,12 +290,12 @@ def generate_sample(self): :return: TextSet with Samples. """ - jvalue = callBigDlFunc(self.bigdl_type, "textSetGenerateSample", self.value) + jvalue = callZooFunc(self.bigdl_type, "textSetGenerateSample", self.value) return TextSet(jvalue=jvalue) def transform(self, transformer): - return TextSet(callBigDlFunc(self.bigdl_type, "transformTextSet", - transformer, self.value), self.bigdl_type) + return TextSet(callZooFunc(self.bigdl_type, "transformTextSet", + transformer, self.value), self.bigdl_type) @classmethod def read(cls, path, sc=None, min_partitions=1, bigdl_type="float"): @@ -322,7 +324,7 @@ def read(cls, path, sc=None, min_partitions=1, bigdl_type="float"): :return: TextSet. """ - jvalue = callBigDlFunc(bigdl_type, "readTextSet", path, sc, min_partitions) + jvalue = callZooFunc(bigdl_type, "readTextSet", path, sc, min_partitions) return TextSet(jvalue=jvalue) @classmethod @@ -344,7 +346,7 @@ def read_csv(cls, path, sc=None, min_partitions=1, bigdl_type="float"): :return: TextSet. """ - jvalue = callBigDlFunc(bigdl_type, "textSetReadCSV", path, sc, min_partitions) + jvalue = callZooFunc(bigdl_type, "textSetReadCSV", path, sc, min_partitions) return TextSet(jvalue=jvalue) @classmethod @@ -359,7 +361,7 @@ def read_parquet(cls, path, sc, bigdl_type="float"): :return: DistributedTextSet. """ - jvalue = callBigDlFunc(bigdl_type, "textSetReadParquet", path, sc) + jvalue = callZooFunc(bigdl_type, "textSetReadParquet", path, sc) return DistributedTextSet(jvalue=jvalue) @classmethod @@ -391,7 +393,7 @@ def from_relation_pairs(cls, relations, corpus1, corpus2, bigdl_type="float"): relations = [relation.to_tuple() for relation in relations] else: raise TypeError("relations should be RDD or list of Relation") - jvalue = callBigDlFunc(bigdl_type, "textSetFromRelationPairs", relations, corpus1, corpus2) + jvalue = callZooFunc(bigdl_type, "textSetFromRelationPairs", relations, corpus1, corpus2) return TextSet(jvalue=jvalue) @classmethod @@ -425,7 +427,7 @@ def from_relation_lists(cls, relations, corpus1, corpus2, bigdl_type="float"): relations = [relation.to_tuple() for relation in relations] else: raise TypeError("relations should be RDD or list of Relation") - jvalue = callBigDlFunc(bigdl_type, "textSetFromRelationLists", relations, corpus1, corpus2) + jvalue = callZooFunc(bigdl_type, "textSetFromRelationLists", relations, corpus1, corpus2) return TextSet(jvalue=jvalue) @@ -433,6 +435,7 @@ class LocalTextSet(TextSet): """ LocalTextSet is comprised of lists. """ + def __init__(self, texts=None, labels=None, jvalue=None, bigdl_type="float"): """ Create a LocalTextSet using texts and labels. @@ -442,7 +445,7 @@ def __init__(self, texts=None, labels=None, jvalue=None, bigdl_type="float"): labels: List of int or None if texts don't have labels. """ if texts is not None: - assert all(isinstance(text, six.string_types) for text in texts),\ + assert all(isinstance(text, six.string_types) for text in texts), \ "texts for LocalTextSet should be list of string" if labels is not None: labels = [int(label) for label in labels] @@ -453,6 +456,7 @@ class DistributedTextSet(TextSet): """ DistributedTextSet is comprised of RDDs. """ + def __init__(self, texts=None, labels=None, jvalue=None, bigdl_type="float"): """ Create a DistributedTextSet using texts and labels. diff --git a/python/dllib/src/bigdl/dllib/feature/text/transformer.py b/python/dllib/src/bigdl/dllib/feature/text/transformer.py index 6b1e274173c..98c9daa918c 100644 --- a/python/dllib/src/bigdl/dllib/feature/text/transformer.py +++ b/python/dllib/src/bigdl/dllib/feature/text/transformer.py @@ -18,7 +18,7 @@ import six from zoo.feature.common import Preprocessing from zoo.feature.text import TextFeature -from bigdl.util.common import callBigDlFunc +from zoo.common.utils import callZooFunc if sys.version >= '3': long = int @@ -29,6 +29,7 @@ class TextTransformer(Preprocessing): """ Base class of Transformers that transform TextFeature. """ + def __init__(self, bigdl_type="float", *args): super(TextTransformer, self).__init__(bigdl_type, *args) @@ -36,7 +37,7 @@ def transform(self, text_feature): """ Transform a TextFeature. """ - res = callBigDlFunc(self.bigdl_type, "transformTextFeature", self.value, text_feature.value) + res = callZooFunc(self.bigdl_type, "transformTextFeature", self.value, text_feature.value) return TextFeature(jvalue=res) @@ -47,6 +48,7 @@ class Tokenizer(TextTransformer): >>> tokenizer = Tokenizer() creating: createTokenizer """ + def __init__(self, bigdl_type="float"): super(Tokenizer, self).__init__(bigdl_type) @@ -60,6 +62,7 @@ class Normalizer(TextTransformer): >>> normalizer = Normalizer() creating: createNormalizer """ + def __init__(self, bigdl_type="float"): super(Normalizer, self).__init__(bigdl_type) @@ -76,6 +79,7 @@ class WordIndexer(TextTransformer): >>> word_indexer = WordIndexer(map={"it": 1, "me": 2}) creating: createWordIndexer """ + def __init__(self, map, bigdl_type="float"): super(WordIndexer, self).__init__(bigdl_type, map) @@ -100,6 +104,7 @@ class SequenceShaper(TextTransformer): >>> sequence_shaper = SequenceShaper(len=6, trunc_mode="post", pad_element=10000) creating: createSequenceShaper """ + def __init__(self, len, trunc_mode="pre", pad_element=0, bigdl_type="float"): assert isinstance(pad_element, int), "pad_element should be an int" super(SequenceShaper, self).__init__(bigdl_type, len, trunc_mode, pad_element) @@ -113,5 +118,6 @@ class TextFeatureToSample(TextTransformer): >>> to_sample = TextFeatureToSample() creating: createTextFeatureToSample """ + def __init__(self, bigdl_type="float"): super(TextFeatureToSample, self).__init__(bigdl_type)