Job Script Example 06 Caffe

Description

This is an example of a job to run a Python Caffe deep learning computation. The original code is from Google deepdream.

The full code for this job, plus some documentation is in:

/mnt/HA/opt/Examples/Example06

Python Caffe Script

The Python Caffe script to perform Google DeepDream computations is too long to include in the wiki. Please see the full code on Proteus. Below is a condensed version which removes most descriptive comments. This script should work both with and without CUDA/GPU.

#!/usr/bin/env python3.6
from io import StringIO
import numpy as np
import scipy.ndimage as nd
import PIL.Image
from IPython.display import clear_output, Image, display
from google.protobuf import text_format

import sys
import os
import pathlib

import caffe

if 'SGE_HGR_ngpus' in os.environ:
    print("dream.py: setting GPU mode ...")
    caffe.set_mode_gpu()

    # figure out which device we have -- device names are GPUN, where N = {0, 1}
    # Original Caffe only allows use of single GPU device at a time
    gpuname = os.getenv('SGE_HGR_ngpus').split()
    if len(gpuname) == 1:
        gpuid = int(gpuname[0][-1])
        print("... using device {}".format(gpuid))
        caffe.set_device(gpuid) # select GPU device if multiple devices exist

def showarray(a, fmt='jpeg'):
    a = np.uint8(np.clip(a, 0, 255))
    f = StringIO()
    PIL.Image.fromarray(a).save(f, fmt)
    display(Image(data=f.getvalue()))

model_path = '/mnt/HA/opt/Examples/Caffe/caffe/models/bvlc_googlenet/' # substitute your path here
net_fn   = model_path + 'deploy.prototxt'
param_fn = model_path + 'bvlc_googlenet.caffemodel'

model = caffe.io.caffe_pb2.NetParameter()
text_format.Merge(open(net_fn).read(), model)
model.force_backward = True
open('tmp.prototxt', 'w').write(str(model))

net = caffe.Classifier('tmp.prototxt', param_fn,
                       mean = np.float32([104.0, 116.0, 122.0]), # ImageNet mean, training set dependent
                       channel_swap = (2,1,0)) # the reference model has channels in BGR order instead of RGB

def preprocess(net, img):
    return np.float32(np.rollaxis(img, 2)[::-1]) - net.transformer.mean['data']

def deprocess(net, img):
    return np.dstack((img + net.transformer.mean['data'])[::-1])

def objective_L2(dst):
    dst.diff[:] = dst.data

def make_step(net, step_size=1.5, end='inception_4c/output',
              jitter=32, clip=True, objective=objective_L2):
    '''Basic gradient ascent step.'''

    src = net.blobs['data'] # input image is stored in Net's 'data' blob
    dst = net.blobs[end]

    ox, oy = np.random.randint(-jitter, jitter+1, 2)
    src.data[0] = np.roll(np.roll(src.data[0], ox, -1), oy, -2) # apply jitter shift

    net.forward(end=end)
    objective(dst)  # specify the optimization objective
    net.backward(start=end)
    g = src.diff[0]
    # apply normalized ascent step to the input image
    src.data[:] += step_size/np.abs(g).mean() * g

    src.data[0] = np.roll(np.roll(src.data[0], -ox, -1), -oy, -2) # unshift image

    if clip:
        bias = net.transformer.mean['data']
        src.data[:] = np.clip(src.data, -bias, 255-bias)

def deepdream(net, base_img, iter_n=10, octave_n=4, octave_scale=1.4,
              end='inception_4c/output', clip=True, **step_params):
    # prepare base images for all octaves
    octaves = [preprocess(net, base_img)]
    for i in range(octave_n-1):
        octaves.append(nd.zoom(octaves[-1], (1, 1.0/octave_scale,1.0/octave_scale), order=1))

    src = net.blobs['data']
    detail = np.zeros_like(octaves[-1]) # allocate image for network-produced details
    for octave, octave_base in enumerate(octaves[::-1]):
        h, w = octave_base.shape[-2:]
        if octave > 0:
            # upscale details from the previous octave
            h1, w1 = detail.shape[-2:]
            detail = nd.zoom(detail, (1, 1.0*h/h1,1.0*w/w1), order=1)

        src.reshape(1,3,h,w) # resize the network's input image size
        src.data[0] = octave_base+detail
        for i in range(iter_n):
            make_step(net, end=end, clip=clip, **step_params)

        # extract details produced on the current octave
        detail = src.data[0]-octave_base
    # returning the resulting image
    return deprocess(net, src.data[0])

img = np.float32(PIL.Image.open('sky1024px.jpg'))

print("Starting deepdream...")

_=deepdream(net, img)

_=deepdream(net, img, end='inception_3b/5x5_reduce')

framesdir = 'frames_{}'.format(os.getenv('JOB_ID'))
pathlib.Path(framesdir).mkdir(parents=True, exist_ok=True)
frame = img
frame_i = 0

h, w = frame.shape[:2]
s = 0.05 # scale coefficient
for i in range(100):
    print("Frame %04d..." % (frame_i))
    frame = deepdream(net, frame)
    PIL.Image.fromarray(np.uint8(frame)).save("%s/%04d.jpg" % (framesdir, frame_i))
    frame = nd.affine_transform(frame, [1-s,1-s,1], [h*s/2,w*s/2,0], order=1)
    frame_i += 1

guide = np.float32(PIL.Image.open('flowers.jpg'))

end = 'inception_3b/output'
h, w = guide.shape[:2]
src, dst = net.blobs['data'], net.blobs[end]
src.reshape(1,3,h,w)
src.data[0] = preprocess(net, guide)
net.forward(end=end)
guide_features = dst.data[0].copy()

def objective_guide(dst):
    x = dst.data[0].copy()
    y = guide_features
    ch = x.shape[0]
    x = x.reshape(ch,-1)
    y = y.reshape(ch,-1)
    A = x.T.dot(y) # compute the matrix of dot-products with guide features
    dst.diff[0].reshape(ch,-1)[:] = y[:,A.argmax(1)] # select ones that match best

Job Script

CPU-only

NOTE: the Python script above has been converted to Python 3.6. Some small changes should be made to use Python 2.7.

replace "from io import StringIO" with "from CStringIO import StringIO"
replace "range" with "xrange"
replace the pathlib.Path().mkdir() with the appropriate os.mkdir() call

#!/bin/bash
#$ -S /bin/bash
#$ -q all.q
#$ -P myrsrchPrj
#$ -cwd
#$ -j y
#$ -M myname@drexel.edu
#$ -l h_rt=3:00:00
#$ -l h_vmem=4G
#$ -l vendor=intel
#$ -pe shm 16

. /etc/profile.d/modules.sh
module load shared
module load sge/univa
module load proteus
module load gcc/4.8.1
module load intel/composerxe/2015.1.133
module load proteus-openmpi/intel/2015/1.8.1-mlnx-ofed
module load caffe/intel/1.0

python2.7 dream.py

CUDA-enabled

Python will use all available CPU cores, so request 16 (all cores on a single GPU node)
Caffe with CUDA can use only a single GPU device

#!/bin/bash -l
#$ -S /bin/bash
#$ -q gpu.q
#$ -P FIXME
#$ -cwd
#$ -j y
#$ -M FIXME@drexel.edu
#$ -pe shm 16
#$ -l h_rt=3:00:00
#$ -l h_vmem=8G
#$ -l gpu=1

### activate the caffe environment
conda activate caffe

python3.6 dream.py