-
Notifications
You must be signed in to change notification settings - Fork 95
/
davis.py
114 lines (97 loc) · 4.24 KB
/
davis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
from __future__ import division
import torch
from torch.utils import data
# general libs
import numpy as np
import math
import time
import os
import random
import argparse
import glob
import json
# image io libs
import cv2
from PIL import Image
from scipy import ndimage, signal
def temporal_transform(frame_indices, sample_range):
tmp = np.random.randint(0,len(frame_indices)-sample_range)
return frame_indices[tmp:tmp+sample_range]
DAVIS_2016 = ['bear', 'bmx-bumps', 'boat', 'breakdance-flare', 'bus',
'car-turn', 'dance-jump', 'dog-agility', 'drift-turn', 'elephant',
'flamingo', 'hike', 'hockey', 'horsejump-low', 'kite-walk',
'lucia', 'mallard-fly', 'mallard-water', 'motocross-bumps',
'motorbike', 'paragliding', 'rhino', 'rollerblade',
'scooter-gray', 'soccerball', 'stroller', 'surf', 'swing',
'tennis', 'train',' blackswan', 'bmx-trees', 'breakdance',
'camel', 'car-roundabout', 'car-shadow', 'cows', 'dance-twirl',
'dog', 'drift-chicane', 'drift-straight', 'goat',
'horsejump-high', 'kite-surf', 'libby', 'motocross-jump',
'paragliding-launch', 'parkour', 'scooter-black', 'soapbox']
class DAVIS(data.Dataset):
def __init__(self, root, imset='2016/train.txt',
resolution='480p', size=(256,256), sample_duration=0):
self.sample_duration = sample_duration
self.root = root
self.mask_dir = os.path.join(root, 'Annotations', resolution)
self.image_dir = os.path.join(root, 'JPEGImages', resolution)
_imset_dir = os.path.join(root, 'ImageSets')
_imset_f = os.path.join(_imset_dir, imset)
self.size = size
self.videos = []
self.num_frames = {}
self.num_objects = {}
self.shape = {}
with open(os.path.join(_imset_f), "r") as lines:
for line in lines:
_video = line.rstrip('\n')
self.videos.append(_video)
self.num_frames[_video] = len(glob.glob(os.path.join(
self.image_dir, _video, '*.jpg')))
_mask = np.array(Image.open(os.path.join(
self.mask_dir, _video, '00000.png')).convert("P"))
self.num_objects[_video] = np.max(_mask)
self.shape[_video] = np.shape(_mask)
def __len__(self):
return len(self.videos)
def __getitem__(self, index):
video = self.videos[index]
info = {}
info['name'] = video
info['num_frames'] = self.num_frames[video]
num_objects = 1
info['num_objects'] = num_objects
images = []
masks = []
struct = ndimage.generate_binary_structure(2, 2)
f_list = list(range(self.num_frames[video]))
if self.sample_duration >0:
f_list = temporal_transform(f_list,self.sample_duration)
for f in f_list:
img_file = os.path.join(
self.image_dir, video, '{:05d}.jpg'.format(f))
image_ = cv2.resize(
cv2.imread(img_file), self.size, cv2.INTER_CUBIC)
image_ = np.float32(image_)/255.0
images.append(torch.from_numpy(image_))
try:
mask_file = os.path.join(
self.mask_dir, video, '{:05d}.png'.format(f))
except:
mask_file = os.path.join(self.mask_dir, video, '00000.png')
mask_ = np.array(Image.open(mask_file).convert('P'), np.uint8)
mask_ = cv2.resize(mask_,self.size, cv2.INTER_NEAREST)
if video in DAVIS_2016:
mask_ = (mask_ != 0)
else:
select_mask = min(1,mask_.max())
mask_ = (mask_==select_mask).astype(np.float)
w_k = np.ones((10,6))
mask2 = signal.convolve2d(mask_.astype(np.float), w_k, 'same')
mask2 = 1 - (mask2 == 0)
mask_ = np.float32(mask2)
masks.append( torch.from_numpy(mask_) )
masks = torch.stack(masks)
masks = ( masks == 1 ).type(torch.FloatTensor).unsqueeze(0)
images = torch.stack(images).permute(3,0,1,2)
return images, masks, info