Source code for owl.net.netio

import sys,os,gc
import time
import lmdb
import numpy as np
import numpy.random
import scipy.io as si
from PIL import Image
from google.protobuf import text_format

from caffe import *

[docs]class ImageWindowDataProvider: ''' Class for Image Window Data Provider. This data provider will read the original image and crop out patches according to the given box position, then resize the patch to form batch. .. note:: Layer type in Caffe's configure file: WINDOW_DATA Data format for each image:: # window meta data [Img_ind][Img_path][C][H][W][Window_num] # windows [label][overlap_ratio][upper][left][lower][right] [label][overlap_ratio][upper][left][lower][right] ...... [label][overlap_ratio][upper][left][lower][right] - ``Img_ind``: image index - ``Img_path``: image path - ``C``: number of image channels (feature maps) - ``H``: image height - ``W``: image width - ``Window_num``: number of window patches - ``label``: label - ``overlap_ratio``: overlap ratio between the window and object bouding box - ``upper left lower right``: position of the window ''' def __init__(self, window_data_param, mm_batch_num): bp = BlobProto() self.source = window_data_param.source self.batch_size = window_data_param.batch_size / mm_batch_num self.crop_size = window_data_param.crop_size self.mirror = window_data_param.mirror if len(window_data_param.mean_file) == 0: self.mean_data = np.ones([3, window_data_param.crop_size, window_data_param.crop_size], dtype=np.float32) assert(len(window_data_param.mean_value) == 3) self.mean_data[0] = window_data_param.mean_value[0] self.mean_data[1] = window_data_param.mean_value[1] self.mean_data[2] = window_data_param.mean_value[2] else: with open(window_data_param.mean_file, 'rb') as f: bp.ParseFromString(f.read()) np_mean = np.array(bp.data, dtype=np.float32) mean_size = np.sqrt(np.shape(np_mean)[0] / 3) mean_range_st = (mean_size - self.crop_size) / 2 mean_range_ed = mean_size - mean_range_st self.mean_data = np_mean.reshape([3, mean_size, mean_size]) self.mean_data = self.mean_data[:, mean_range_st : mean_range_ed, mean_range_st : mean_range_ed]
[docs] def get_mb(self, phase = 'TRAIN'): ''' Get next minibatch ''' sourcefile = open(self.source, 'r') samples = np.zeros([self.batch_size, self.crop_size ** 2 * 3], dtype = np.float32) labels = np.zeros([self.batch_size, 1], dtype=np.float32) num_label = -1 count = 0 line = sourcefile.readline() while line: #process each image assert(line[0] == '#') path = sourcefile.readline() path = path[0:-1] channel = int(sourcefile.readline()) height = int(sourcefile.readline()) width = int(sourcefile.readline()) boxnum = int(sourcefile.readline()) #open image try: img = Image.open(path) except IOError, e: print e print "not an image file %s" % (path[0]) #convert to rgb if img.mode not in ('RGB'): img = img.convert('RGB') #read boxes for i in range(boxnum): line = sourcefile.readline() box_info = line.split(' ') x1 = int(box_info[2]) - 1 y1 = int(box_info[3]) - 1 x2 = int(box_info[4]) y2 = int(box_info[5]) #crop image patch = img.crop((y1, x1, y2, x2)) #resize try: patch = patch.resize((self.crop_size, self.crop_size), Image.ANTIALIAS) except IOError, e: print e print "resize error occur %s" % (line_info[0]) orinpimg = np.array(patch, dtype = np.uint8) npimg = np.transpose(orinpimg.reshape([self.crop_size * self.crop_size, 3])).reshape(np.shape(self.mean_data)) npimg = npimg[::-1,:,:] ''' #output imgdata = np.zeros([self.crop_size, self.crop_size, 3], dtype=np.uint8) imgdata[:,:,0] = npimg[2,:,:] imgdata[:,:,1] = npimg[1,:,:] imgdata[:,:,2] = npimg[0,:,:] cropimg = Image.fromarray(imgdata) nnn = '/home/tianjun/tests/img_%d.jpg' % (i) cropimg.save(nnn, format = 'JPEG') ''' pixels = npimg - self.mean_data if self.mirror == True and numpy.random.rand() > 0.5: pixels = pixels[:,:,::-1] samples[count, :] = pixels.reshape(self.crop_size ** 2 * 3).astype(np.float32) count = count + 1 if count == self.batch_size: yield (samples, labels) labels = np.zeros([self.batch_size, 1], dtype=np.float32) count = 0 #finish one if count > 0: delete_idx = np.arange(count, self.batch_size) yield (np.delete(samples, delete_idx, 0), np.delete(labels, delete_idx, 0)) count = 0 line = sourcefile.readline() sourcefile.close()
[docs]class ImageListDataProvider: ''' Class for Image Data Provider. This data provider will read from original data into RGB value, then resize the patch to form batch. .. note:: Layer type in Caffe's configure file: IMAGE_DATA Data format for each image:: [Img_path][label_0][label_1]...[label_n] - ``Img_path``: image path - ``label_0 label_1 ... label_n``: we support multi-label for a single image ''' def __init__(self, image_data_param, transform_param, mm_batch_num): bp = BlobProto() if len(transform_param.mean_file) == 0: self.mean_data = np.ones([3, image_data_param.new_height, image_data_param.new_width], dtype=np.float32) assert(len(transform_param.mean_value) == 3) self.mean_data[0] = transform_param.mean_value[0] self.mean_data[1] = transform_param.mean_value[1] self.mean_data[2] = transform_param.mean_value[2] else: with open(transform_param.mean_file, 'rb') as f: bp.ParseFromString(f.read()) np_mean = np.array(bp.data, dtype=np.float32) mean_size = np.sqrt(np.shape(np_mean)[0] / 3) self.mean_data = np_mean.reshape([3, mean_size, mean_size]) mean_height_st = (mean_size - image_data_param.new_height) / 2 mean_height_ed = mean_size - mean_height_st mean_width_st = (mean_size - image_data_param.new_width) / 2 mean_width_ed = mean_size - mean_width_st self.mean_data = self.mean_data[:, mean_height_st : mean_height_ed, mean_width_st : mean_width_ed] self.source = image_data_param.source self.new_height = image_data_param.new_height self.new_width = image_data_param.new_width self.batch_size = image_data_param.batch_size / mm_batch_num self.crop_size = transform_param.crop_size self.mirror = transform_param.mirror
[docs] def get_mb(self, phase = 'TRAIN'): ''' Get next minibatch ''' sourcefile = open(self.source, 'r') samples = np.zeros([self.batch_size, self.crop_size ** 2 * 3], dtype = np.float32) num_label = -1 count = 0 line = sourcefile.readline() while line: line_info = line.split(' ') assert(len(line_info) >= 2) if num_label == -1: num_label = len(line_info) - 1 labels = np.zeros([self.batch_size, num_label], dtype = np.float32) #labels for labelidx in range(num_label): labels[count][labelidx] = float(line_info[labelidx+1]) #read img try: img = Image.open(line_info[0]) except IOError, e: print e print "not an image file %s" % (line_info[0]) #convert to rgb if img.mode not in ('RGB'): img = img.convert('RGB') #resize try: img = img.resize((self.new_height, self.new_width), Image.ANTIALIAS) except IOError, e: print e print "resize error occur %s" % (line_info[0]) orinpimg = np.array(img, dtype = np.uint8) npimg = np.transpose(orinpimg.reshape([self.new_height * self.new_width, 3])).reshape(np.shape(self.mean_data)) npimg = npimg[::-1,:,:] pixels = npimg - self.mean_data #crop if phase == 'TRAIN': crop_h = np.random.randint(np.shape(pixels)[1] - self.crop_size) crop_w = np.random.randint(np.shape(pixels)[2]- self.crop_size) else: crop_h = (np.shape(pixels)[1]- self.crop_size) / 2 crop_w = (np.shape(pixels)[2] - self.crop_size) / 2 im_cropped = pixels[:, crop_h:crop_h+self.crop_size, crop_w:crop_w+self.crop_size] if self.mirror == True and numpy.random.rand() > 0.5: im_cropped = im_cropped[:,:,::-1] samples[count, :] = im_cropped.reshape(self.crop_size ** 2 * 3).astype(np.float32) count = count + 1 if count == self.batch_size: yield (samples, labels) labels = np.zeros([self.batch_size, num_label], dtype=np.float32) count = 0 line = sourcefile.readline() if count != self.batch_size: delete_idx = np.arange(count, self.batch_size) sourcefile.close() yield (np.delete(samples, delete_idx, 0), np.delete(labels, delete_idx, 0))
[docs]class LMDBDataProvider: ''' Class for LMDB Data Provider. .. note:: Layer type in Caffe's configure file: DATA ''' def __init__(self, data_param, transform_param, mm_batch_num): bp = BlobProto() if len(transform_param.mean_file) == 0: self.mean_data = np.ones([3, 256, 256], dtype=np.float32) assert(len(transform_param.mean_value) == 3) self.mean_data[0] = transform_param.mean_value[0] self.mean_data[1] = transform_param.mean_value[1] self.mean_data[2] = transform_param.mean_value[2] else: with open(transform_param.mean_file, 'rb') as f: bp.ParseFromString(f.read()) mean_narray = np.array(bp.data, dtype=np.float32) h_w = np.sqrt(np.shape(mean_narray)[0] / 3) self.mean_data = np.array(bp.data, dtype=np.float32).reshape([3, h_w, h_w]) self.source = data_param.source self.batch_size = data_param.batch_size / mm_batch_num self.crop_size = transform_param.crop_size self.mirror = transform_param.mirror
[docs] def get_mb(self, phase = 'TRAIN'): ''' Get next minibatch ''' env = lmdb.open(self.source, readonly=True) samples = np.zeros([self.batch_size, self.crop_size ** 2 * 3], dtype=np.float32) num_label = -1 count = 0 with env.begin(write=False, buffers=False) as txn: cursor = txn.cursor() for key, value in cursor: d = Datum() d.ParseFromString(value) ori_size = np.sqrt(len(d.data) / 3) im = np.fromstring(d.data, dtype=np.uint8).reshape([3, ori_size, ori_size]) - self.mean_data if phase == 'TRAIN': [crop_h, crop_w] = np.random.randint(ori_size - self.crop_size, size=2) else: crop_h = (ori_size - self.crop_size) / 2 crop_w = (ori_size - self.crop_size) / 2 im_cropped = im[:, crop_h:crop_h+self.crop_size, crop_w:crop_w+self.crop_size] if self.mirror == True and numpy.random.rand() > 0.5: im_cropped = im_cropped[:,:,::-1] samples[count, :] = im_cropped.reshape(self.crop_size ** 2 * 3).astype(np.float32) ''' #output imgdata = np.zeros([self.crop_size, self.crop_size, 3], dtype=np.uint8) imgdata[:,:,0] = im_cropped[2,:,:] imgdata[:,:,1] = im_cropped[1,:,:] imgdata[:,:,2] = im_cropped[0,:,:] cropimg = Image.fromarray(imgdata) nnn = '/home/tianjun/tests/img_%d.jpg' % (count) cropimg.save(nnn, format = 'JPEG') ''' if num_label == -1: num_label = len(d.label) labels = np.zeros([self.batch_size, num_label], dtype=np.float32) labels[count, :] = d.label count = count + 1 if count == self.batch_size: yield (samples, labels) labels = np.zeros([self.batch_size, num_label], dtype=np.float32) count = 0 if count != self.batch_size: delete_idx = np.arange(count, self.batch_size) yield (np.delete(samples, delete_idx, 0), np.delete(labels, delete_idx, 0))
[docs] def get_multiview_mb(self): ''' Multiview testing will get better accuracy than single view testing. For each image, it will crop out the left-top, right-top, left-down, right-down, central patches and their hirizontal flipped version. The final prediction is averaged according to the 10 views. Thus, for each original batch, get_multiview_mb will produce 10 consecutive batches for the batch. ''' env = lmdb.open(self.source, readonly=True) view_num = 10 ori_size = -1 samples = np.zeros([view_num, self.batch_size, self.crop_size ** 2 * 3], dtype=np.float32) num_label = -1 count = 0 with env.begin(write=False, buffers=False) as txn: cursor = txn.cursor() for key, value in cursor: d = Datum() d.ParseFromString(value) if ori_size == -1: ori_size = np.sqrt(len(d.data) / 3) diff_size = ori_size - self.crop_size start_h = [0, diff_size, 0, diff_size, diff_size/2] start_w = [0, 0, diff_size, diff_size, diff_size/2] im = np.fromstring(d.data, dtype=np.uint8).reshape([3, ori_size, ori_size]) - self.mean_data for i in range(view_num): crop_h = start_h[i/2] crop_w = start_w[i/2] im_cropped = im[:, crop_h:crop_h+self.crop_size, crop_w:crop_w+self.crop_size] if i%2 == 1: im_cropped = im_cropped[:,:,::-1] samples[i, count, :] = im_cropped.reshape(self.crop_size ** 2 * 3).astype(np.float32) if num_label == -1: num_label = len(d.label) labels = np.zeros([self.batch_size, num_label], dtype=np.float32) labels[count, :] = d.label count = count + 1 if count == self.batch_size: for i in range(view_num): yield (samples[i,:,:], labels) labels = np.zeros([self.batch_size, num_label], dtype=np.float32) count = 0 if count != self.batch_size: delete_idx = np.arange(count, self.batch_size) left_samples = np.delete(samples, delete_idx, 1) left_labels = np.delete(labels, delete_idx, 0) for i in range(view_num): yield (left_samples[i,:,:], left_labels)
if __name__ == '__main__': ''' if sys.argv[1] == 'lmdb': #Test net_file = '/home/tianjun/configfile/Googmodel/train_val_CUB_lmdb.prototxt' with open(net_file, 'r') as f: netconfig = NetParameter() text_format.Merge(str(f.read()), netconfig) layerinfo = netconfig.layers[0] dp = LMDBDataProvider(layerinfo.data_param, layerinfo.transform_param, 1) count = 0 for (samples, labels) in dp.get_mb(): print count, ':', samples.shape count = count + 1 else: #Test net_file = '/home/tianjun/configfile/Googmodel/train_val_CUB_list.prototxt' with open(net_file, 'r') as f: netconfig = NetParameter() text_format.Merge(str(f.read()), netconfig) layerinfo = netconfig.layers[0] dp = ImageListDataProvider(layerinfo.image_data_param, layerinfo.transform_param, 1) count = 0 for (samples, labels) in dp.get_mb(): print count, ':', samples.shape count = count + 1 ''' net_file = '/home/tianjun/configfile/Alexmodel/filternet.prototxt' with open(net_file, 'r') as f: netconfig = NetParameter() text_format.Merge(str(f.read()), netconfig) layerinfo = netconfig.layers[0] dp = ImageWindowDataProvider(layerinfo.window_data_param, 1) count = 0 last = time.time() for (samples, labels) in dp.get_mb(): print count, ':', samples.shape thistime = time.time() - last print thistime last = time.time() count = count + 1