Skip to content

Job Script Example 06 Caffe

Description

This is an example of a job to run a Python Caffe deep learning computation. The original code is from Google deepdream.

The full code for this job, plus some documentation is in:

/mnt/HA/opt/Examples/Example06

Python Caffe Script

The Python Caffe script to perform Google DeepDream computations is too long to include in the wiki. Please see the full code on Proteus. Below is a condensed version which removes most descriptive comments. This script should work both with and without CUDA/GPU.

#!/usr/bin/env python3.6
from io import StringIO
import numpy as np
import scipy.ndimage as nd
import PIL.Image
from IPython.display import clear_output, Image, display
from google.protobuf import text_format

import sys
import os
import pathlib

import caffe

if 'SGE_HGR_ngpus' in os.environ:
    print("dream.py: setting GPU mode ...")
    caffe.set_mode_gpu()

    # figure out which device we have -- device names are GPUN, where N = {0, 1}
    # Original Caffe only allows use of single GPU device at a time
    gpuname = os.getenv('SGE_HGR_ngpus').split()
    if len(gpuname) == 1:
        gpuid = int(gpuname[0][-1])
        print("... using device {}".format(gpuid))
        caffe.set_device(gpuid) # select GPU device if multiple devices exist

def showarray(a, fmt='jpeg'):
    a = np.uint8(np.clip(a, 0, 255))
    f = StringIO()
    PIL.Image.fromarray(a).save(f, fmt)
    display(Image(data=f.getvalue()))

model_path = '/mnt/HA/opt/Examples/Caffe/caffe/models/bvlc_googlenet/' # substitute your path here
net_fn   = model_path + 'deploy.prototxt'
param_fn = model_path + 'bvlc_googlenet.caffemodel'

model = caffe.io.caffe_pb2.NetParameter()
text_format.Merge(open(net_fn).read(), model)
model.force_backward = True
open('tmp.prototxt', 'w').write(str(model))

net = caffe.Classifier('tmp.prototxt', param_fn,
                       mean = np.float32([104.0, 116.0, 122.0]), # ImageNet mean, training set dependent
                       channel_swap = (2,1,0)) # the reference model has channels in BGR order instead of RGB

def preprocess(net, img):
    return np.float32(np.rollaxis(img, 2)[::-1]) - net.transformer.mean['data']

def deprocess(net, img):
    return np.dstack((img + net.transformer.mean['data'])[::-1])

def objective_L2(dst):
    dst.diff[:] = dst.data

def make_step(net, step_size=1.5, end='inception_4c/output',
              jitter=32, clip=True, objective=objective_L2):
    '''Basic gradient ascent step.'''

    src = net.blobs['data'] # input image is stored in Net's 'data' blob
    dst = net.blobs[end]

    ox, oy = np.random.randint(-jitter, jitter+1, 2)
    src.data[0] = np.roll(np.roll(src.data[0], ox, -1), oy, -2) # apply jitter shift

    net.forward(end=end)
    objective(dst)  # specify the optimization objective
    net.backward(start=end)
    g = src.diff[0]
    # apply normalized ascent step to the input image
    src.data[:] += step_size/np.abs(g).mean() * g

    src.data[0] = np.roll(np.roll(src.data[0], -ox, -1), -oy, -2) # unshift image

    if clip:
        bias = net.transformer.mean['data']
        src.data[:] = np.clip(src.data, -bias, 255-bias)

def deepdream(net, base_img, iter_n=10, octave_n=4, octave_scale=1.4,
              end='inception_4c/output', clip=True, **step_params):
    # prepare base images for all octaves
    octaves = [preprocess(net, base_img)]
    for i in range(octave_n-1):
        octaves.append(nd.zoom(octaves[-1], (1, 1.0/octave_scale,1.0/octave_scale), order=1))

    src = net.blobs['data']
    detail = np.zeros_like(octaves[-1]) # allocate image for network-produced details
    for octave, octave_base in enumerate(octaves[::-1]):
        h, w = octave_base.shape[-2:]
        if octave > 0:
            # upscale details from the previous octave
            h1, w1 = detail.shape[-2:]
            detail = nd.zoom(detail, (1, 1.0*h/h1,1.0*w/w1), order=1)

        src.reshape(1,3,h,w) # resize the network's input image size
        src.data[0] = octave_base+detail
        for i in range(iter_n):
            make_step(net, end=end, clip=clip, **step_params)

        # extract details produced on the current octave
        detail = src.data[0]-octave_base
    # returning the resulting image
    return deprocess(net, src.data[0])

img = np.float32(PIL.Image.open('sky1024px.jpg'))

print("Starting deepdream...")

_=deepdream(net, img)

_=deepdream(net, img, end='inception_3b/5x5_reduce')

framesdir = 'frames_{}'.format(os.getenv('JOB_ID'))
pathlib.Path(framesdir).mkdir(parents=True, exist_ok=True)
frame = img
frame_i = 0

h, w = frame.shape[:2]
s = 0.05 # scale coefficient
for i in range(100):
    print("Frame %04d..." % (frame_i))
    frame = deepdream(net, frame)
    PIL.Image.fromarray(np.uint8(frame)).save("%s/%04d.jpg" % (framesdir, frame_i))
    frame = nd.affine_transform(frame, [1-s,1-s,1], [h*s/2,w*s/2,0], order=1)
    frame_i += 1

guide = np.float32(PIL.Image.open('flowers.jpg'))

end = 'inception_3b/output'
h, w = guide.shape[:2]
src, dst = net.blobs['data'], net.blobs[end]
src.reshape(1,3,h,w)
src.data[0] = preprocess(net, guide)
net.forward(end=end)
guide_features = dst.data[0].copy()

def objective_guide(dst):
    x = dst.data[0].copy()
    y = guide_features
    ch = x.shape[0]
    x = x.reshape(ch,-1)
    y = y.reshape(ch,-1)
    A = x.T.dot(y) # compute the matrix of dot-products with guide features
    dst.diff[0].reshape(ch,-1)[:] = y[:,A.argmax(1)] # select ones that match best

Job Script

CPU-only

NOTE: the Python script above has been converted to Python 3.6. Some small changes should be made to use Python 2.7.

  • replace "from io import StringIO" with "from CStringIO import StringIO"
  • replace "range" with "xrange"
  • replace the pathlib.Path().mkdir() with the appropriate os.mkdir() call
#!/bin/bash
#$ -S /bin/bash
#$ -q all.q
#$ -P myrsrchPrj
#$ -cwd
#$ -j y
#$ -M myname@drexel.edu
#$ -l h_rt=3:00:00
#$ -l h_vmem=4G
#$ -l vendor=intel
#$ -pe shm 16

. /etc/profile.d/modules.sh
module load shared
module load sge/univa
module load proteus
module load gcc/4.8.1
module load intel/composerxe/2015.1.133
module load proteus-openmpi/intel/2015/1.8.1-mlnx-ofed
module load caffe/intel/1.0

python2.7 dream.py

CUDA-enabled

  • Python will use all available CPU cores, so request 16 (all cores on a single GPU node)
  • Caffe with CUDA can use only a single GPU device
#!/bin/bash -l
#$ -S /bin/bash
#$ -q gpu.q
#$ -P FIXME
#$ -cwd
#$ -j y
#$ -M FIXME@drexel.edu
#$ -pe shm 16
#$ -l h_rt=3:00:00
#$ -l h_vmem=8G
#$ -l gpu=1

### activate the caffe environment
conda activate caffe

python3.6 dream.py