Skip to content

Commit

Permalink
Support iterate a dataset in sequential order when training (intel-an…
Browse files Browse the repository at this point in the history
…alytics#1743)

* support iterate a dataset in sequential order when training

add unit test

fix style

* unpersist

* fix bug
  • Loading branch information
yangw1234 committed Nov 8, 2019
1 parent 1e5c298 commit 88fb510
Showing 1 changed file with 34 additions and 8 deletions.
42 changes: 34 additions & 8 deletions python/dllib/src/bigdl/dllib/feature/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,9 @@ def __init__(self, jvalue=None, bigdl_type="float"):
self.value = jvalue

@classmethod
def image_frame(cls, image_frame, memory_type="DRAM", bigdl_type="float"):
def image_frame(cls, image_frame, memory_type="DRAM",
sequential_order=False,
shuffle=True, bigdl_type="float"):
"""
Create FeatureSet from ImageFrame.
:param image_frame: ImageFrame
Expand All @@ -235,15 +237,21 @@ def image_frame(cls, image_frame, memory_type="DRAM", bigdl_type="float"):
of the data into memory during the training. After going through the
1/n, we will release the current cache, and load another 1/n into
memory.
:param sequential_order: whether to iterate the elements in the feature set
in sequential order for training.
:param shuffle: whether to shuffle the elements in each partition before each epoch
when training
:param bigdl_type: numeric type
:return: A feature set
"""
jvalue = callBigDlFunc(bigdl_type, "createFeatureSetFromImageFrame",
image_frame, memory_type)
image_frame, memory_type, sequential_order, shuffle)
return cls(jvalue=jvalue)

@classmethod
def image_set(cls, imageset, memory_type="DRAM", bigdl_type="float"):
def image_set(cls, imageset, memory_type="DRAM",
sequential_order=False,
shuffle=True, bigdl_type="float"):
"""
Create FeatureSet from ImageFrame.
:param imageset: ImageSet
Expand All @@ -254,15 +262,22 @@ def image_set(cls, imageset, memory_type="DRAM", bigdl_type="float"):
of the data into memory during the training. After going through the
1/n, we will release the current cache, and load another 1/n into
memory.
:param sequential_order: whether to iterate the elements in the feature set
in sequential order for training.
:param shuffle: whether to shuffle the elements in each partition before each epoch
when training
:param bigdl_type: numeric type
:return: A feature set
"""
jvalue = callBigDlFunc(bigdl_type, "createFeatureSetFromImageFrame",
imageset.to_image_frame(), memory_type)
imageset.to_image_frame(), memory_type,
sequential_order, shuffle)
return cls(jvalue=jvalue)

@classmethod
def sample_rdd(cls, rdd, memory_type="DRAM", bigdl_type="float"):
def sample_rdd(cls, rdd, memory_type="DRAM",
sequential_order=False,
shuffle=True, bigdl_type="float"):
"""
Create FeatureSet from RDD[Sample].
:param rdd: A RDD[Sample]
Expand All @@ -273,14 +288,20 @@ def sample_rdd(cls, rdd, memory_type="DRAM", bigdl_type="float"):
of the data into memory during the training. After going through the
1/n, we will release the current cache, and load another 1/n into
memory.
:param sequential_order: whether to iterate the elements in the feature set
in sequential order when training.
:param shuffle: whether to shuffle the elements in each partition before each epoch
when training
:param bigdl_type:numeric type
:return: A feature set
"""
jvalue = callBigDlFunc(bigdl_type, "createSampleFeatureSetFromRDD", rdd, memory_type)
jvalue = callBigDlFunc(bigdl_type, "createSampleFeatureSetFromRDD", rdd,
memory_type, sequential_order, shuffle)
return cls(jvalue=jvalue)

@classmethod
def rdd(cls, rdd, memory_type="DRAM", bigdl_type="float"):
def rdd(cls, rdd, memory_type="DRAM", sequential_order=False,
shuffle=True, bigdl_type="float"):
"""
Create FeatureSet from RDD.
:param rdd: A RDD
Expand All @@ -291,10 +312,15 @@ def rdd(cls, rdd, memory_type="DRAM", bigdl_type="float"):
of the data into memory during the training. After going through the
1/n, we will release the current cache, and load another 1/n into
memory.
:param sequential_order: whether to iterate the elements in the feature set
in sequential order when training.
:param shuffle: whether to shuffle the elements in each partition before each epoch
when training
:param bigdl_type:numeric type
:return: A feature set
"""
jvalue = callBigDlFunc(bigdl_type, "createFeatureSetFromRDD", rdd, memory_type)
jvalue = callBigDlFunc(bigdl_type, "createFeatureSetFromRDD", rdd,
memory_type, sequential_order, shuffle)
return cls(jvalue=jvalue)

def transform(self, transformer):
Expand Down

0 comments on commit 88fb510

Please sign in to comment.