Added fooling code

pull/420/head
Anh Nguyen 9 years ago
parent 2d1fc7a6cd
commit a9c92b6391
  1. 29
      modules/dnns_easily_fooled/.gitignore
  2. BIN
      modules/dnns_easily_fooled/Installation_Guide.pdf
  3. 52
      modules/dnns_easily_fooled/README.md
  4. 65
      modules/dnns_easily_fooled/caffe/.gitignore
  5. 17
      modules/dnns_easily_fooled/caffe/CONTRIBUTORS.md
  6. 7
      modules/dnns_easily_fooled/caffe/INSTALL.md
  7. 22
      modules/dnns_easily_fooled/caffe/LICENSE
  8. 439
      modules/dnns_easily_fooled/caffe/Makefile
  9. 56
      modules/dnns_easily_fooled/caffe/Makefile.config
  10. 56
      modules/dnns_easily_fooled/caffe/Makefile.config.example
  11. 115
      modules/dnns_easily_fooled/caffe/README.md
  12. 66
      modules/dnns_easily_fooled/caffe/ascent/README.md
  13. 213
      modules/dnns_easily_fooled/caffe/ascent/deploy_1_forcebackward.prototxt
  14. 315
      modules/dnns_easily_fooled/caffe/ascent/find_fooling_image.py
  15. 97
      modules/dnns_easily_fooled/caffe/ascent/hyperparam_search.py
  16. 112
      modules/dnns_easily_fooled/caffe/ascent/misc_helper.py
  17. 0
      modules/dnns_easily_fooled/caffe/ascent/results/.gitignore
  18. 7
      modules/dnns_easily_fooled/caffe/ascent/run.sh
  19. 31
      modules/dnns_easily_fooled/caffe/ascent/run_chosen_supplementary.sh
  20. 32
      modules/dnns_easily_fooled/caffe/ascent/run_hyperparam_search.sh
  21. 53
      modules/dnns_easily_fooled/caffe/caffe.cloc
  22. 1
      modules/dnns_easily_fooled/caffe/docs/CNAME
  23. 3
      modules/dnns_easily_fooled/caffe/docs/README.md
  24. 52
      modules/dnns_easily_fooled/caffe/docs/_layouts/default.html
  25. 95
      modules/dnns_easily_fooled/caffe/docs/cifar10.md
  26. 63
      modules/dnns_easily_fooled/caffe/docs/development.md
  27. 71
      modules/dnns_easily_fooled/caffe/docs/feature_extraction.md
  28. 29
      modules/dnns_easily_fooled/caffe/docs/getting_pretrained_models.md
  29. 102
      modules/dnns_easily_fooled/caffe/docs/imagenet_training.md
  30. 79
      modules/dnns_easily_fooled/caffe/docs/index.md
  31. 182
      modules/dnns_easily_fooled/caffe/docs/installation.md
  32. 20
      modules/dnns_easily_fooled/caffe/docs/javascripts/scale.fix.js
  33. 91
      modules/dnns_easily_fooled/caffe/docs/mnist.md
  34. 153
      modules/dnns_easily_fooled/caffe/docs/mnist_prototxt.md
  35. 37
      modules/dnns_easily_fooled/caffe/docs/mnist_solver_prototxt.md
  36. 57
      modules/dnns_easily_fooled/caffe/docs/performance_hardware.md
  37. 69
      modules/dnns_easily_fooled/caffe/docs/stylesheets/pygment_trac.css
  38. 21
      modules/dnns_easily_fooled/caffe/docs/stylesheets/reset.css
  39. 393
      modules/dnns_easily_fooled/caffe/docs/stylesheets/styles.css
  40. 100
      modules/dnns_easily_fooled/caffe/include/caffe/blob.hpp
  41. 19
      modules/dnns_easily_fooled/caffe/include/caffe/caffe.hpp
  42. 169
      modules/dnns_easily_fooled/caffe/include/caffe/common.hpp
  43. 337
      modules/dnns_easily_fooled/caffe/include/caffe/data_layers.hpp
  44. 173
      modules/dnns_easily_fooled/caffe/include/caffe/filler.hpp
  45. 206
      modules/dnns_easily_fooled/caffe/include/caffe/layer.hpp
  46. 198
      modules/dnns_easily_fooled/caffe/include/caffe/loss_layers.hpp
  47. 157
      modules/dnns_easily_fooled/caffe/include/caffe/net.hpp
  48. 272
      modules/dnns_easily_fooled/caffe/include/caffe/neuron_layers.hpp
  49. 77
      modules/dnns_easily_fooled/caffe/include/caffe/solver.hpp
  50. 67
      modules/dnns_easily_fooled/caffe/include/caffe/syncedmem.hpp
  51. 39
      modules/dnns_easily_fooled/caffe/include/caffe/util/benchmark.hpp
  52. 25
      modules/dnns_easily_fooled/caffe/include/caffe/util/format.hpp
  53. 30
      modules/dnns_easily_fooled/caffe/include/caffe/util/im2col.hpp
  54. 31
      modules/dnns_easily_fooled/caffe/include/caffe/util/insert_splits.hpp
  55. 93
      modules/dnns_easily_fooled/caffe/include/caffe/util/io.hpp
  56. 253
      modules/dnns_easily_fooled/caffe/include/caffe/util/math_functions.hpp
  57. 97
      modules/dnns_easily_fooled/caffe/include/caffe/util/mkl_alternate.hpp
  58. 19
      modules/dnns_easily_fooled/caffe/include/caffe/util/rng.hpp
  59. 49
      modules/dnns_easily_fooled/caffe/include/caffe/util/upgrade_proto.hpp
  60. 479
      modules/dnns_easily_fooled/caffe/include/caffe/vision_layers.hpp
  61. BIN
      modules/dnns_easily_fooled/caffe/matlab/caffe/ilsvrc_2012_mean.mat
  62. 370
      modules/dnns_easily_fooled/caffe/matlab/caffe/matcaffe.cpp
  63. 76
      modules/dnns_easily_fooled/caffe/matlab/caffe/matcaffe_batch.m
  64. 110
      modules/dnns_easily_fooled/caffe/matlab/caffe/matcaffe_demo.m
  65. 44
      modules/dnns_easily_fooled/caffe/matlab/caffe/matcaffe_init.m
  66. 41
      modules/dnns_easily_fooled/caffe/matlab/caffe/prepare_batch.m
  67. 42
      modules/dnns_easily_fooled/caffe/matlab/caffe/print_cell.m
  68. 21
      modules/dnns_easily_fooled/caffe/matlab/caffe/read_cell.m
  69. 4
      modules/dnns_easily_fooled/caffe/python/caffe/__init__.py
  70. 357
      modules/dnns_easily_fooled/caffe/python/caffe/_caffe.cpp
  71. 86
      modules/dnns_easily_fooled/caffe/python/caffe/classifier.py
  72. 191
      modules/dnns_easily_fooled/caffe/python/caffe/detector.py
  73. 76
      modules/dnns_easily_fooled/caffe/python/caffe/draw.py
  74. BIN
      modules/dnns_easily_fooled/caffe/python/caffe/imagenet/ilsvrc_2012_mean.npy
  75. 159
      modules/dnns_easily_fooled/caffe/python/caffe/io.py
  76. 352
      modules/dnns_easily_fooled/caffe/python/caffe/pycaffe.py
  77. 120
      modules/dnns_easily_fooled/caffe/python/classify.py
  78. 158
      modules/dnns_easily_fooled/caffe/python/detect.py
  79. 25
      modules/dnns_easily_fooled/caffe/python/draw_net.py
  80. 14
      modules/dnns_easily_fooled/caffe/python/requirements.txt
  81. 171
      modules/dnns_easily_fooled/caffe/python/test.py
  82. 11
      modules/dnns_easily_fooled/caffe/scripts/build_docs.sh
  83. 4796
      modules/dnns_easily_fooled/caffe/scripts/cpp_lint.py
  84. 50
      modules/dnns_easily_fooled/caffe/scripts/deploy_docs.sh
  85. 214
      modules/dnns_easily_fooled/caffe/src/caffe/blob.cpp
  86. 198
      modules/dnns_easily_fooled/caffe/src/caffe/common.cpp
  87. 101
      modules/dnns_easily_fooled/caffe/src/caffe/layer_factory.cpp
  88. 64
      modules/dnns_easily_fooled/caffe/src/caffe/layers/accuracy_layer.cpp
  89. 55
      modules/dnns_easily_fooled/caffe/src/caffe/layers/argmax_layer.cpp
  90. 50
      modules/dnns_easily_fooled/caffe/src/caffe/layers/bnll_layer.cpp
  91. 65
      modules/dnns_easily_fooled/caffe/src/caffe/layers/bnll_layer.cu
  92. 101
      modules/dnns_easily_fooled/caffe/src/caffe/layers/concat_layer.cpp
  93. 75
      modules/dnns_easily_fooled/caffe/src/caffe/layers/concat_layer.cu
  94. 167
      modules/dnns_easily_fooled/caffe/src/caffe/layers/conv_layer.cpp
  95. 104
      modules/dnns_easily_fooled/caffe/src/caffe/layers/conv_layer.cu
  96. 367
      modules/dnns_easily_fooled/caffe/src/caffe/layers/data_layer.cpp
  97. 39
      modules/dnns_easily_fooled/caffe/src/caffe/layers/data_layer.cu
  98. 68
      modules/dnns_easily_fooled/caffe/src/caffe/layers/dropout_layer.cpp
  99. 78
      modules/dnns_easily_fooled/caffe/src/caffe/layers/dropout_layer.cu
  100. 100
      modules/dnns_easily_fooled/caffe/src/caffe/layers/dummy_data_layer.cpp
  101. Some files were not shown because too many files have changed in this diff Show More

@ -0,0 +1,29 @@
# Compiled Object files
*.slo
*.lo
*.o
*.obj
# Precompiled Headers
*.gch
*.pch
# Compiled Dynamic libraries
*.so
*.dylib
*.dll
*.pyc
# Fortran module files
*.mod
# Compiled Static libraries
*.lai
*.la
*.a
*.lib
# Executables
*.exe
*.out
*.app

@ -0,0 +1,52 @@
# Fooling Code
This is the code base used to reproduce the "fooling" images in the paper:
[Nguyen A](http://anhnguyen.me), [Yosinski J](http://yosinski.com/), [Clune J](http://jeffclune.com). ["Deep Neural Networks are Easily Fooled: High Confidence Predictions for Unrecognizable Images"](http://arxiv.org/abs/1412.1897). In Computer Vision and Pattern Recognition (CVPR '15), IEEE, 2015.
**If you use this software in an academic article, please cite:**
@inproceedings{nguyen2015deep,
title={Deep Neural Networks are Easily Fooled: High Confidence Predictions for Unrecognizable Images},
author={Nguyen, Anh and Yosinski, Jason and Clune, Jeff},
booktitle={Computer Vision and Pattern Recognition (CVPR), 2015 IEEE Conference on},
year={2015},
organization={IEEE}
}
For more information regarding the paper, please visit www.evolvingai.org/fooling
## Requirements
This is an installation process that requires two main software packages (included in this package):
1. Caffe: http://caffe.berkeleyvision.org
* Our libraries installed to work with Caffe
* Cuda 6.0
* Boost 1.52
* g++ 4.6
2. Sferes: https://github.com/jbmouret/sferes2
* Our libraries installed to work with Sferes
* OpenCV 2.4.10
* Boost 1.52
* g++ 4.9 (a C++ compiler compatible with C++11 standard)
Note: These are specific versions of the two frameworks with our additional work necessary to produce the images as in the paper. They are not the same as their master branches.
## Installation
Please see the [Installation_Guide](https://github.com/Evolving-AI-Lab/fooling/wiki/Installation-Guide) for more details.
## Usage
* An MNIST experiment (Fig. 4, 5 in the paper) can be run directly on a local machine (4-core) within a reasonable amount of time (around ~5 minutes or less for 200 generations).
* An ImageNet experiment needs to be run on a cluster environment. It took us ~4 days x 128 cores to run 5000 generations and produce 1000 images (Fig. 8 in the paper).
* [How to configure an experiment to test the evolutionary framework quickly](https://github.com/Evolving-AI-Lab/fooling/wiki/How-to-test-the-evolutionary-framework-quickly)
* To reproduce the gradient ascent fooling images (Figures 13, S3, S4, S5, S6, and S7 from the paper), see the [documentation in the caffe/ascent directory](https://github.com/Evolving-AI-Lab/fooling/tree/ascent/caffe/ascent). You'll need to use the `ascent` branch instead of master, because the two required versions of Caffe are different.
## Updates
* Our fork project [here](https://github.com/Evolving-AI-Lab/innovation-engine) has support for the **latest Caffe** and experiments to create *recognizable* images instead of unrecognizable.
## License
Please refer to the licenses of Sferes and Caffe projects.

@ -0,0 +1,65 @@
## General
# Compiled Object files
*.slo
*.lo
*.o
*.cuo
*.png
*.jpg
*.jpeg
# Compiled Dynamic libraries
*.so
*.dylib
# Compiled Static libraries
*.lai
*.la
*.a
# Compiled protocol buffers
*.pb.h
*.pb.cc
*_pb2.py
# Compiled python
*.pyc
# Compiled MATLAB
*.mex*
# build, distribute, and bins
build
.build_debug/*
.build_release/*
distribute/*
*.testbin
*.bin
python/caffe/proto/
# Editor temporaries
*.swp
*~
# IPython notebook checkpoints
.ipynb_checkpoints
## Caffe
# User's build configuration
#Makefile.config
# Data and examples are either
# 1. reference, and not casually committed
# 2. custom, and live on their own unless they're deliberated contributed
data/*
examples/*
# Generated documentation
docs/_site
_site
# Sublime Text settings
*.sublime-workspace
*.sublime-project

@ -0,0 +1,17 @@
# Contributors
Caffe is developed by a core set of BVLC members and the open-source community.
We thank all of our [contributors](https://github.com/BVLC/caffe/graphs/contributors)!
**For the detailed history of contributions** of a given file, try
git blame file
to see line-by-line credits and
git log --follow file
to see the change log even across renames and rewrites.
Please refer to the [acknowledgements](http://caffe.berkeleyvision.org/#acknowledgements) on the Caffe site for further details.

@ -0,0 +1,7 @@
# Installation
See http://caffe.berkeleyvision.org/installation.html for the latest
installation instructions.
Check the issue tracker in case you need help:
https://github.com/BVLC/caffe/issues

@ -0,0 +1,22 @@
Copyright (c) 2014, The Regents of the University of California (Regents)
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

@ -0,0 +1,439 @@
# The makefile for caffe. Pretty hacky.
PROJECT := caffe
CONFIG_FILE := Makefile.config
include $(CONFIG_FILE)
BUILD_DIR_LINK := $(BUILD_DIR)
RELEASE_BUILD_DIR := .$(BUILD_DIR)_release
DEBUG_BUILD_DIR := .$(BUILD_DIR)_debug
DEBUG ?= 0
ifeq ($(DEBUG), 1)
BUILD_DIR := $(DEBUG_BUILD_DIR)
OTHER_BUILD_DIR := $(RELEASE_BUILD_DIR)
else
BUILD_DIR := $(RELEASE_BUILD_DIR)
OTHER_BUILD_DIR := $(DEBUG_BUILD_DIR)
endif
# The target static library and shared library name
LIB_BUILD_DIR := $(BUILD_DIR)/lib
NAME := $(LIB_BUILD_DIR)/lib$(PROJECT).so
STATIC_NAME := $(LIB_BUILD_DIR)/lib$(PROJECT).a
##############################
# Get all source files
##############################
# CXX_SRCS are the source files excluding the test ones.
CXX_SRCS := $(shell find src/$(PROJECT) ! -name "test_*.cpp" -name "*.cpp")
# HXX_SRCS are the header files
HXX_SRCS := $(shell find include/$(PROJECT) -name "*.hpp")
# CU_SRCS are the cuda source files
CU_SRCS := $(shell find src/$(PROJECT) -name "*.cu")
# TEST_SRCS are the test source files
TEST_MAIN_SRC := src/$(PROJECT)/test/test_caffe_main.cpp
TEST_SRCS := $(shell find src/$(PROJECT) -name "test_*.cpp")
TEST_SRCS := $(filter-out $(TEST_MAIN_SRC), $(TEST_SRCS))
GTEST_SRC := src/gtest/gtest-all.cpp
# TEST_HDRS are the test header files
TEST_HDRS := $(shell find src/$(PROJECT) -name "test_*.hpp")
# TOOL_SRCS are the source files for the tool binaries
TOOL_SRCS := $(shell find tools -name "*.cpp")
# EXAMPLE_SRCS are the source files for the example binaries
EXAMPLE_SRCS := $(shell find examples -name "*.cpp")
# BUILD_INCLUDE_DIR contains any generated header files we want to include.
BUILD_INCLUDE_DIR := $(BUILD_DIR)/src
# PROTO_SRCS are the protocol buffer definitions
PROTO_SRC_DIR := src/$(PROJECT)/proto
PROTO_SRCS := $(wildcard $(PROTO_SRC_DIR)/*.proto)
# PROTO_BUILD_DIR will contain the .cc and obj files generated from
# PROTO_SRCS; PROTO_BUILD_INCLUDE_DIR will contain the .h header files
PROTO_BUILD_DIR := $(BUILD_DIR)/$(PROTO_SRC_DIR)
PROTO_BUILD_INCLUDE_DIR := $(BUILD_INCLUDE_DIR)/$(PROJECT)/proto
# NONGEN_CXX_SRCS includes all source/header files except those generated
# automatically (e.g., by proto).
NONGEN_CXX_SRCS := $(shell find \
src/$(PROJECT) \
include/$(PROJECT) \
python/$(PROJECT) \
matlab/$(PROJECT) \
examples \
tools \
-name "*.cpp" -or -name "*.hpp" -or -name "*.cu" -or -name "*.cuh")
LINT_REPORT := $(BUILD_DIR)/cpp_lint.log
FAILED_LINT_REPORT := $(BUILD_DIR)/cpp_lint.error_log
# PY$(PROJECT)_SRC is the python wrapper for $(PROJECT)
PY$(PROJECT)_SRC := python/$(PROJECT)/_$(PROJECT).cpp
PY$(PROJECT)_SO := python/$(PROJECT)/_$(PROJECT).so
# MAT$(PROJECT)_SRC is the matlab wrapper for $(PROJECT)
MAT$(PROJECT)_SRC := matlab/$(PROJECT)/mat$(PROJECT).cpp
ifneq ($(MATLAB_DIR),)
MAT_SO_EXT := $(shell $(MATLAB_DIR)/bin/mexext)
endif
MAT$(PROJECT)_SO := matlab/$(PROJECT)/$(PROJECT).$(MAT_SO_EXT)
##############################
# Derive generated files
##############################
# The generated files for protocol buffers
PROTO_GEN_HEADER_SRCS := $(addprefix $(PROTO_BUILD_DIR)/, \
$(notdir ${PROTO_SRCS:.proto=.pb.h}))
PROTO_GEN_HEADER := $(addprefix $(PROTO_BUILD_INCLUDE_DIR)/, \
$(notdir ${PROTO_SRCS:.proto=.pb.h}))
HXX_SRCS += $(PROTO_GEN_HEADER)
PROTO_GEN_CC := $(addprefix $(BUILD_DIR)/, ${PROTO_SRCS:.proto=.pb.cc})
PY_PROTO_BUILD_DIR := python/$(PROJECT)/proto
PY_PROTO_INIT := python/$(PROJECT)/proto/__init__.py
PROTO_GEN_PY := $(foreach file,${PROTO_SRCS:.proto=_pb2.py}, \
$(PY_PROTO_BUILD_DIR)/$(notdir $(file)))
# The objects corresponding to the source files
# These objects will be linked into the final shared library, so we
# exclude the tool, example, and test objects.
CXX_OBJS := $(addprefix $(BUILD_DIR)/, ${CXX_SRCS:.cpp=.o})
CU_OBJS := $(addprefix $(BUILD_DIR)/, ${CU_SRCS:.cu=.cuo})
PROTO_OBJS := ${PROTO_GEN_CC:.cc=.o}
OBJ_BUILD_DIR := $(BUILD_DIR)/src/$(PROJECT)
LAYER_BUILD_DIR := $(OBJ_BUILD_DIR)/layers
UTIL_BUILD_DIR := $(OBJ_BUILD_DIR)/util
OBJS := $(PROTO_OBJS) $(CXX_OBJS) $(CU_OBJS)
# tool, example, and test objects
TOOL_OBJS := $(addprefix $(BUILD_DIR)/, ${TOOL_SRCS:.cpp=.o})
TOOL_BUILD_DIR := $(BUILD_DIR)/tools
TEST_BUILD_DIR := $(BUILD_DIR)/src/$(PROJECT)/test
TEST_OBJS := $(addprefix $(BUILD_DIR)/, ${TEST_SRCS:.cpp=.o})
GTEST_OBJ := $(addprefix $(BUILD_DIR)/, ${GTEST_SRC:.cpp=.o})
GTEST_BUILD_DIR := $(dir $(GTEST_OBJ))
EXAMPLE_OBJS := $(addprefix $(BUILD_DIR)/, ${EXAMPLE_SRCS:.cpp=.o})
EXAMPLE_BUILD_DIR := $(BUILD_DIR)/examples
EXAMPLE_BUILD_DIRS := $(EXAMPLE_BUILD_DIR)
EXAMPLE_BUILD_DIRS += $(foreach obj,$(EXAMPLE_OBJS),$(dir $(obj)))
# tool, example, and test bins
TOOL_BINS := ${TOOL_OBJS:.o=.bin}
EXAMPLE_BINS := ${EXAMPLE_OBJS:.o=.bin}
# Put the test binaries in build/test for convenience.
TEST_BIN_DIR := $(BUILD_DIR)/test
TEST_BINS := $(addsuffix .testbin,$(addprefix $(TEST_BIN_DIR)/, \
$(foreach obj,$(TEST_OBJS),$(basename $(notdir $(obj))))))
TEST_ALL_BIN := $(TEST_BIN_DIR)/test_all.testbin
##############################
# Derive include and lib directories
##############################
CUDA_INCLUDE_DIR := $(CUDA_DIR)/include
CUDA_LIB_DIR := $(CUDA_DIR)/lib64 $(CUDA_DIR)/lib
INCLUDE_DIRS += $(BUILD_INCLUDE_DIR)
INCLUDE_DIRS += ./src ./include $(CUDA_INCLUDE_DIR)
LIBRARY_DIRS += $(CUDA_LIB_DIR)
LIBRARIES := cudart cublas curand \
pthread \
glog protobuf leveldb snappy \
lmdb \
boost_system \
hdf5_hl hdf5 \
opencv_core opencv_highgui opencv_imgproc
PYTHON_LIBRARIES := boost_python python2.7
WARNINGS := -Wall
##############################
# Set build directories
##############################
DISTRIBUTE_SUBDIRS := $(DISTRIBUTE_DIR)/bin $(DISTRIBUTE_DIR)/lib
DIST_ALIASES := dist
ifneq ($(strip $(DISTRIBUTE_DIR)),distribute)
DIST_ALIASES += distribute
endif
ALL_BUILD_DIRS := $(sort \
$(BUILD_DIR) $(LIB_BUILD_DIR) $(OBJ_BUILD_DIR) \
$(LAYER_BUILD_DIR) $(UTIL_BUILD_DIR) $(TOOL_BUILD_DIR) \
$(TEST_BUILD_DIR) $(TEST_BIN_DIR) $(GTEST_BUILD_DIR) \
$(EXAMPLE_BUILD_DIRS) \
$(PROTO_BUILD_DIR) $(PROTO_BUILD_INCLUDE_DIR) $(PY_PROTO_BUILD_DIR) \
$(DISTRIBUTE_SUBDIRS))
##############################
# Configure build
##############################
# Determine platform
UNAME := $(shell uname -s)
ifeq ($(UNAME), Linux)
LINUX := 1
else ifeq ($(UNAME), Darwin)
OSX := 1
endif
ifeq ($(LINUX), 1)
CXX := /usr/bin/g++
endif
# OS X:
# clang++ instead of g++
# libstdc++ instead of libc++ for CUDA compatibility on 10.9
ifeq ($(OSX), 1)
CXX := /usr/bin/clang++
ifneq ($(findstring 10.9, $(shell sw_vers -productVersion)),)
CXXFLAGS += -stdlib=libstdc++
endif
endif
# Debugging
ifeq ($(DEBUG), 1)
COMMON_FLAGS := -DDEBUG -g -O0
else
COMMON_FLAGS := -DNDEBUG -O2
endif
# BLAS configuration (default = ATLAS)
BLAS ?= atlas
ifeq ($(BLAS), mkl)
# MKL
LIBRARIES += mkl_rt
COMMON_FLAGS += -DUSE_MKL
MKL_DIR = /opt/intel/mkl
BLAS_INCLUDE ?= $(MKL_DIR)/include
BLAS_LIB ?= $(MKL_DIR)/lib $(MKL_DIR)/lib/intel64
else ifeq ($(BLAS), open)
# OpenBLAS
LIBRARIES += openblas
else
# ATLAS
ifeq ($(LINUX), 1)
ifeq ($(BLAS), atlas)
# Linux simply has cblas and atlas
LIBRARIES += cblas atlas
endif
else ifeq ($(OSX), 1)
# OS X packages atlas as the vecLib framework
BLAS_INCLUDE ?= /System/Library/Frameworks/vecLib.framework/Versions/Current/Headers/
LIBRARIES += cblas
LDFLAGS += -framework vecLib
endif
endif
INCLUDE_DIRS += $(BLAS_INCLUDE)
LIBRARY_DIRS += $(BLAS_LIB)
# Complete build flags.
COMMON_FLAGS += $(foreach includedir,$(INCLUDE_DIRS),-I$(includedir))
CXXFLAGS += -pthread -fPIC $(COMMON_FLAGS)
NVCCFLAGS := -ccbin=$(CXX) -Xcompiler -fPIC $(COMMON_FLAGS)
LDFLAGS += $(foreach librarydir,$(LIBRARY_DIRS),-L$(librarydir)) \
$(foreach library,$(LIBRARIES),-l$(library))
PYTHON_LDFLAGS := $(LDFLAGS) $(foreach library,$(PYTHON_LIBRARIES),-l$(library))
# 'superclean' target recursively* deletes all files ending with an extension
# in $(SUPERCLEAN_EXTS) below. This may be useful if you've built older
# versions of Caffe that do not place all generated files in a location known
# to the 'clean' target.
#
# 'supercleanlist' will list the files to be deleted by make superclean.
#
# * Recursive with the exception that symbolic links are never followed, per the
# default behavior of 'find'.
SUPERCLEAN_EXTS := .so .a .o .bin .testbin .pb.cc .pb.h _pb2.py .cuo
##############################
# Define build targets
##############################
.PHONY: all test clean linecount lint tools examples $(DIST_ALIASES) \
py mat py$(PROJECT) mat$(PROJECT) proto runtest \
superclean supercleanlist supercleanfiles
all: $(NAME) $(STATIC_NAME) tools examples
linecount: clean
cloc --read-lang-def=$(PROJECT).cloc src/$(PROJECT)/
lint: $(LINT_REPORT)
$(LINT_REPORT): $(NONGEN_CXX_SRCS) | $(BUILD_DIR)
@ (python ./scripts/cpp_lint.py $(NONGEN_CXX_SRCS) > $(LINT_REPORT) 2>&1 \
&& ($(RM) $(FAILED_LINT_REPORT); echo "No lint errors!")) || ( \
mv $(LINT_REPORT) $(FAILED_LINT_REPORT); \
grep -v "^Done processing " $(FAILED_LINT_REPORT); \
echo "Found 1 or more lint errors; see log at $(FAILED_LINT_REPORT)"; \
exit 1)
test: $(TEST_ALL_BIN) $(TEST_BINS)
tools: $(TOOL_BINS)
examples: $(EXAMPLE_BINS)
py$(PROJECT): py
py: $(PY$(PROJECT)_SO) $(PROTO_GEN_PY)
$(PY$(PROJECT)_SO): $(STATIC_NAME) $(PY$(PROJECT)_SRC)
$(CXX) -shared -o $@ $(PY$(PROJECT)_SRC) \
$(STATIC_NAME) $(CXXFLAGS) $(PYTHON_LDFLAGS)
@ echo
mat$(PROJECT): mat
mat: $(MAT$(PROJECT)_SO)
$(MAT$(PROJECT)_SO): $(MAT$(PROJECT)_SRC) $(STATIC_NAME)
@ if [ -z "$(MATLAB_DIR)" ]; then \
echo "MATLAB_DIR must be specified in $(CONFIG_FILE)" \
"to build mat$(PROJECT)."; \
exit 1; \
fi
$(MATLAB_DIR)/bin/mex $(MAT$(PROJECT)_SRC) $(STATIC_NAME) \
CXXFLAGS="\$$CXXFLAGS $(CXXFLAGS) $(WARNINGS)" \
CXXLIBS="\$$CXXLIBS $(LDFLAGS)" -o $@
@ echo
runtest: $(TEST_ALL_BIN)
$(TEST_ALL_BIN) $(TEST_GPUID) --gtest_shuffle
$(BUILD_DIR_LINK): $(BUILD_DIR)/.linked
# Create a target ".linked" in this BUILD_DIR to tell Make that the "build" link
# is currently correct, then delete the one in the OTHER_BUILD_DIR in case it
# exists and $(DEBUG) is toggled later.
$(BUILD_DIR)/.linked:
@ mkdir -p $(BUILD_DIR)
@ $(RM) $(OTHER_BUILD_DIR)/.linked
@ $(RM) -r $(BUILD_DIR_LINK)
@ ln -s $(BUILD_DIR) $(BUILD_DIR_LINK)
@ touch $@
$(ALL_BUILD_DIRS): | $(BUILD_DIR_LINK)
@ mkdir -p $@
$(NAME): $(PROTO_OBJS) $(OBJS) | $(LIB_BUILD_DIR)
$(CXX) -shared -o $@ $(OBJS) $(CXXFLAGS) $(LDFLAGS) $(WARNINGS)
@ echo
$(STATIC_NAME): $(PROTO_OBJS) $(OBJS) | $(LIB_BUILD_DIR)
ar rcs $@ $(PROTO_OBJS) $(OBJS)
@ echo
$(TEST_BUILD_DIR)/%.o: src/$(PROJECT)/test/%.cpp $(HXX_SRCS) $(TEST_HDRS) \
| $(TEST_BUILD_DIR)
$(CXX) $< $(CXXFLAGS) -c -o $@
@ echo
$(TEST_ALL_BIN): $(TEST_MAIN_SRC) $(TEST_OBJS) $(GTEST_OBJ) $(STATIC_NAME) \
| $(TEST_BIN_DIR)
$(CXX) $(TEST_MAIN_SRC) $(TEST_OBJS) $(GTEST_OBJ) $(STATIC_NAME) \
-o $@ $(CXXFLAGS) $(LDFLAGS) $(WARNINGS)
@ echo
$(TEST_BIN_DIR)/%.testbin: $(TEST_BUILD_DIR)/%.o $(GTEST_OBJ) $(STATIC_NAME) \
| $(TEST_BIN_DIR)
$(CXX) $(TEST_MAIN_SRC) $< $(GTEST_OBJ) $(STATIC_NAME) \
-o $@ $(CXXFLAGS) $(LDFLAGS) $(WARNINGS)
@ echo
$(TOOL_BINS): %.bin : %.o $(STATIC_NAME)
$(CXX) $< $(STATIC_NAME) -o $@ $(CXXFLAGS) $(LDFLAGS) $(WARNINGS)
@ echo
$(EXAMPLE_BINS): %.bin : %.o $(STATIC_NAME)
$(CXX) $< $(STATIC_NAME) -o $@ $(CXXFLAGS) $(LDFLAGS) $(WARNINGS)
@ echo
$(LAYER_BUILD_DIR)/%.o: src/$(PROJECT)/layers/%.cpp $(HXX_SRCS) \
| $(LAYER_BUILD_DIR)
$(CXX) $< $(CXXFLAGS) -c -o $@
@ echo
$(PROTO_BUILD_DIR)/%.pb.o: $(PROTO_BUILD_DIR)/%.pb.cc $(PROTO_GEN_HEADER) \
| $(PROTO_BUILD_DIR)
$(CXX) $< $(CXXFLAGS) -c -o $@
@ echo
$(UTIL_BUILD_DIR)/%.o: src/$(PROJECT)/util/%.cpp $(HXX_SRCS) | $(UTIL_BUILD_DIR)
$(CXX) $< $(CXXFLAGS) -c -o $@
@ echo
$(GTEST_OBJ): $(GTEST_SRC) | $(GTEST_BUILD_DIR)
$(CXX) $< $(CXXFLAGS) -c -o $@
@ echo
$(LAYER_BUILD_DIR)/%.cuo: src/$(PROJECT)/layers/%.cu $(HXX_SRCS) \
| $(LAYER_BUILD_DIR)
$(CUDA_DIR)/bin/nvcc $(NVCCFLAGS) $(CUDA_ARCH) -c $< -o $@
@ echo
$(UTIL_BUILD_DIR)/%.cuo: src/$(PROJECT)/util/%.cu | $(UTIL_BUILD_DIR)
$(CUDA_DIR)/bin/nvcc $(NVCCFLAGS) $(CUDA_ARCH) -c $< -o $@
@ echo
$(TOOL_BUILD_DIR)/%.o: tools/%.cpp $(PROTO_GEN_HEADER) | $(TOOL_BUILD_DIR)
$(CXX) $< $(CXXFLAGS) -c -o $@
@ echo
$(EXAMPLE_BUILD_DIR)/%.o: examples/%.cpp $(PROTO_GEN_HEADER) \
| $(EXAMPLE_BUILD_DIRS)
$(CXX) $< $(CXXFLAGS) -c -o $@
@ echo
$(BUILD_DIR)/src/$(PROJECT)/%.o: src/$(PROJECT)/%.cpp $(HXX_SRCS)
$(CXX) $< $(CXXFLAGS) -c -o $@
@ echo
proto: $(PROTO_GEN_CC) $(PROTO_GEN_HEADER)
$(PROTO_BUILD_DIR)/%.pb.cc $(PROTO_BUILD_DIR)/%.pb.h : \
$(PROTO_SRC_DIR)/%.proto | $(PROTO_BUILD_DIR)
protoc --proto_path=src --cpp_out=$(BUILD_DIR)/src $<
@ echo
$(PY_PROTO_BUILD_DIR)/%_pb2.py : $(PROTO_SRC_DIR)/%.proto \
$(PY_PROTO_INIT) | $(PY_PROTO_BUILD_DIR)
protoc --proto_path=src --python_out=python $<
@ echo
$(PY_PROTO_INIT): | $(PY_PROTO_BUILD_DIR)
touch $(PY_PROTO_INIT)
clean:
@- $(RM) -rf $(ALL_BUILD_DIRS)
@- $(RM) -rf $(OTHER_BUILD_DIR)
@- $(RM) -rf $(BUILD_DIR_LINK)
@- $(RM) -rf $(DISTRIBUTE_DIR)
@- $(RM) $(PY$(PROJECT)_SO)
@- $(RM) $(MAT$(PROJECT)_SO)
supercleanfiles:
$(eval SUPERCLEAN_FILES := $(strip \
$(foreach ext,$(SUPERCLEAN_EXTS), $(shell find . -name '*$(ext)' \
-not -path './data/*'))))
supercleanlist: supercleanfiles
@ \
if [ -z "$(SUPERCLEAN_FILES)" ]; then \
echo "No generated files found."; \
else \
echo $(SUPERCLEAN_FILES) | tr ' ' '\n'; \
fi
superclean: clean supercleanfiles
@ \
if [ -z "$(SUPERCLEAN_FILES)" ]; then \
echo "No generated files found."; \
else \
echo "Deleting the following generated files:"; \
echo $(SUPERCLEAN_FILES) | tr ' ' '\n'; \
$(RM) $(SUPERCLEAN_FILES); \
fi
$(DIST_ALIASES): $(DISTRIBUTE_DIR)
$(DISTRIBUTE_DIR): all py $(HXX_SRCS) | $(DISTRIBUTE_SUBDIRS)
# add include
cp -r include $(DISTRIBUTE_DIR)/
# add tool and example binaries
cp $(TOOL_BINS) $(DISTRIBUTE_DIR)/bin
cp $(EXAMPLE_BINS) $(DISTRIBUTE_DIR)/bin
# add libraries
cp $(NAME) $(DISTRIBUTE_DIR)/lib
cp $(STATIC_NAME) $(DISTRIBUTE_DIR)/lib
# add python - it's not the standard way, indeed...
cp -r python $(DISTRIBUTE_DIR)/python

@ -0,0 +1,56 @@
## Refer to http://caffe.berkeleyvision.org/installation.html
# Contributions simplifying and improving our build system are welcome!
# CUDA directory contains bin/ and lib/ directories that we need.
CUDA_DIR := /usr/local/cuda
# CUDA architecture setting: going with all of them.
CUDA_ARCH := -gencode arch=compute_20,code=sm_20 \
-gencode arch=compute_20,code=sm_21 \
-gencode arch=compute_30,code=sm_30 \
-gencode arch=compute_35,code=sm_35
# BLAS choice:
# atlas for ATLAS (default)
# mkl for MKL
# open for OpenBlas
BLAS := atlas
# Custom (MKL/ATLAS/OpenBLAS) include and lib directories.
# Leave commented to accept the defaults for your choice of BLAS
# (which should work)!
# BLAS_INCLUDE := /path/to/your/blas
BLAS_INCLUDE := /usr/include/atlas
# BLAS_LIB := /path/to/your/blas
BLAS_LIB := /usr/lib/atlas-base
# This is required only if you will compile the matlab interface.
# MATLAB directory should contain the mex binary in /bin.
# MATLAB_DIR := /usr/local
# MATLAB_DIR := /Applications/MATLAB_R2012b.app
# NOTE: this is required only if you will compile the python interface.
# We need to be able to find Python.h and numpy/arrayobject.h.
PYTHON_INCLUDE := /usr/local/include/python2.7 \
/usr/include/python2.7 \
/usr/local/lib/python2.7/dist-packages/numpy/core/include
# Anaconda Python distribution is quite popular. Include path:
# PYTHON_INCLUDE := $(HOME)/anaconda/include \
# $(HOME)/anaconda/include/python2.7 \
# $(HOME)/anaconda/lib/python2.7/site-packages/numpy/core/include
# We need to be able to find libpythonX.X.so or .dylib.
PYTHON_LIB := /usr/local/lib
# PYTHON_LIB := $(HOME)/anaconda/lib
# Whatever else you find you need goes here.
INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include
LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib
BUILD_DIR := build
DISTRIBUTE_DIR := distribute
# Uncomment for debugging.
# DEBUG := 1
# The ID of the GPU that 'make runtest' will use to run unit tests.
TEST_GPUID := 0

@ -0,0 +1,56 @@
## Refer to http://caffe.berkeleyvision.org/installation.html
# Contributions simplifying and improving our build system are welcome!
# CUDA directory contains bin/ and lib/ directories that we need.
CUDA_DIR := /usr/local/cuda
# On Ubuntu 14.04, if cuda tools are installed via
# "sudo apt-get install nvidia-cuda-toolkit" then use this instead:
# CUDA_DIR := /usr
# CUDA architecture setting: going with all of them.
CUDA_ARCH := -gencode arch=compute_20,code=sm_20 \
-gencode arch=compute_20,code=sm_21 \
-gencode arch=compute_30,code=sm_30 \
-gencode arch=compute_35,code=sm_35
# BLAS choice:
# atlas for ATLAS (default)
# mkl for MKL
# open for OpenBlas
BLAS := atlas
# Custom (MKL/ATLAS/OpenBLAS) include and lib directories.
# Leave commented to accept the defaults for your choice of BLAS
# (which should work)!
# BLAS_INCLUDE := /path/to/your/blas
# BLAS_LIB := /path/to/your/blas
# This is required only if you will compile the matlab interface.
# MATLAB directory should contain the mex binary in /bin.
# MATLAB_DIR := /usr/local
# MATLAB_DIR := /Applications/MATLAB_R2012b.app
# NOTE: this is required only if you will compile the python interface.
# We need to be able to find Python.h and numpy/arrayobject.h.
PYTHON_INCLUDE := /usr/local/include/python2.7 \
/usr/local/lib/python2.7/dist-packages/numpy/core/include
# Anaconda Python distribution is quite popular. Include path:
# PYTHON_INCLUDE := $(HOME)/anaconda/include \
# $(HOME)/anaconda/include/python2.7 \
# $(HOME)/anaconda/lib/python2.7/site-packages/numpy/core/include
# We need to be able to find libpythonX.X.so or .dylib.
PYTHON_LIB := /usr/local/lib
# PYTHON_LIB := $(HOME)/anaconda/lib
# Whatever else you find you need goes here.
INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include
LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib
BUILD_DIR := build
DISTRIBUTE_DIR := distribute
# Uncomment for debugging.
# DEBUG := 1
# The ID of the GPU that 'make runtest' will use to run unit tests.
TEST_GPUID := 0

@ -0,0 +1,115 @@
[Caffe: Convolutional Architecture for Fast Feature Extraction](http://caffe.berkeleyvision.org)
Created by [Yangqing Jia](http://daggerfs.com), UC Berkeley EECS department.
In active development by the Berkeley Vision and Learning Center ([BVLC](http://bvlc.eecs.berkeley.edu/)).
## Introduction
Caffe aims to provide computer vision scientists with a **clean, modifiable
implementation** of state-of-the-art deep learning algorithms. Network structure
is easily specified in separate config files, with no mess of hard-coded
parameters in the code. Python and Matlab wrappers are provided.
At the same time, Caffe fits industry needs, with blazing fast C++/Cuda code for
GPU computation. Caffe is currently the fastest GPU CNN implementation publicly
available, and is able to process more than **40 million images per day** on a
single NVIDIA K40 GPU (or 20 million per day on a K20)\*.
Caffe also provides **seamless switching between CPU and GPU**, which allows one
to train models with fast GPUs and then deploy them on non-GPU clusters with one
line of code: `Caffe::set_mode(Caffe::CPU)`.
Even in CPU mode, computing predictions on an image takes only 20 ms when images
are processed in batch mode.
* [Caffe introductory presentation](https://www.dropbox.com/s/10fx16yp5etb8dv/caffe-presentation.pdf)
* [Installation instructions](http://caffe.berkeleyvision.org/installation.html)
\* When measured with the [SuperVision](http://www.image-net.org/challenges/LSVRC/2012/supervision.pdf) model that won the ImageNet Large Scale Visual Recognition Challenge 2012.
## License
Caffe is BSD 2-Clause licensed (refer to the
[LICENSE](http://caffe.berkeleyvision.org/license.html) for details).
The pretrained models published by the BVLC, such as the
[Caffe reference ImageNet model](https://www.dropbox.com/s/n3jups0gr7uj0dv/caffe_reference_imagenet_model)
are licensed for academic research / non-commercial use only. However, Caffe is
a full toolkit for model training, so start brewing your own Caffe model today!
## Citing Caffe
Please kindly cite Caffe in your publications if it helps your research:
@misc{Jia13caffe,
Author = {Yangqing Jia},
Title = { {Caffe}: An Open Source Convolutional Architecture for Fast Feature Embedding},
Year = {2013},
Howpublished = {\url{http://caffe.berkeleyvision.org/}}
}
## Documentation
Tutorials and general documentation are written in Markdown format in the `docs/` folder.
While the format is quite easy to read directly, you may prefer to view the whole thing as a website.
To do so, simply run `jekyll serve -s docs` and view the documentation website at `http://0.0.0.0:4000` (to get [jekyll](http://jekyllrb.com/), you must have ruby and do `gem install jekyll`).
We strive to provide provide lots of usage examples, and to document all code in docstrings.
We'd appreciate your contribution to this effort!
## Development
Caffe is developed with active participation of the community by the [Berkeley Vision and Learning Center](http://bvlc.eecs.berkeley.edu/).
We welcome all contributions!
### The release cycle
- The `dev` branch is for new development, including community contributions. We aim to keep it in a functional state, but large changes may occur and things may get broken every now and then. Use this if you want the "bleeding edge".
- The `master` branch is handled by BVLC, which will integrate changes from `dev` on a roughly monthly schedule, giving it a release tag. Use this if you want more stability.
### Setting priorities
- Make GitHub Issues for bugs, features you'd like to see, questions, etc.
- Development work is guided by [milestones](https://github.com/BVLC/caffe/issues?milestone=1), which are sets of issues selected for concurrent release (integration from `dev` to `master`).
- Please note that since the core developers are largely researchers, we may work on a feature in isolation from the open-source community for some time before releasing it, so as to claim honest academic contribution. We do release it as soon as a reasonable technical report may be written about the work, and we still aim to inform the community of ongoing development through Issues.
### Contibuting
- Do new development in [feature branches](https://www.atlassian.com/git/workflows#!workflow-feature-branch) with descriptive names.
- Bring your work up-to-date by [rebasing](http://git-scm.com/book/en/Git-Branching-Rebasing) onto the latest `dev`. (Polish your changes by [interactive rebase](https://help.github.com/articles/interactive-rebase), if you'd like.)
- [Pull request](https://help.github.com/articles/using-pull-requests) your contribution to BVLC/caffe's `dev` branch for discussion and review.
* PRs should live fast, die young, and leave a beautiful merge. Pull request sooner than later so that discussion can guide development.
* Code must be accompanied by documentation and tests at all times.
* Only fast-forward merges will be accepted.
See our [development guidelines](http://caffe.berkeleyvision.org/development.html) for further details–the more closely these are followed, the sooner your work will be merged.
#### [Shelhamer's](https://github.com/shelhamer) “life of a branch in four acts”
Make the `feature` branch off of the latest `bvlc/dev`
```
git checkout dev
git pull upstream dev
git checkout -b feature
# do your work, make commits
```
Prepare to merge by rebasing your branch on the latest `bvlc/dev`
```
# make sure dev is fresh
git checkout dev
git pull upstream dev
# rebase your branch on the tip of dev
git checkout feature
git rebase dev
```
Push your branch to pull request it into `dev`
```
git push origin feature
# ...make pull request to dev...
```
Now make a pull request! You can do this from the command line (`git pull-request -b dev`) if you install [hub](https://github.com/github/hub).
The pull request of `feature` into `dev` will be a clean merge. Applause.

@ -0,0 +1,66 @@
### General
This directory contains the necessary code to reproduce the gradient
ascent images in the paper: Figures 13, S3, S4, S5, S6, and S7. This
is research code, and so it may contain paths and such that are
particular to our setup that will need to be changed for your own
setup.
**Important note: this code requires the slightly modified version of caffe in this repository's [ascent](https://github.com/Evolving-AI-Lab/fooling/tree/ascent) branch. If you try running on master, you'll get an error about `backward_from_layer`.** See the below steps for using the correct branch.
If you find any bugs, please submit a PR!
If you have any trouble getting the code to work, please get in touch, and we'll help where we can.
### Notes on running the gradient ascent code
* The gist of the gradient ascent code (along with a lot of
experimental bookkeeping) is in the
[find_image function in find_fooling_image.py](https://github.com/Evolving-AI-Lab/fooling/blob/master/caffe/ascent/find_fooling_image.py#L68-L274).
* If you happen to be working in a
cluster environment that uses ```qsub```, you may find the shell scripts
useful; otherwise they probably won't help you much.
* If you don't have a trained net around, you can download the trained model we used here: http://yosinski.cs.cornell.edu/yos_140311__caffenet_iter_450000
* A file containing class labels is also used by the script and can be downloaded here: http://s.yosinski.com/synset_words.txt
### Simple steps to generate one fooling image
We'll walk through exact steps to generate a fooling image of a lion (class 291) using gradient ascent on the output unit for lion.
First, clone the repo and checkout the ascent branch:
[~] $ git clone git@github.com:Evolving-AI-Lab/fooling.git
[~] $ cd fooling
[~/fooling] $ git checkout ascent
[~/fooling] $ cd caffe
Configure and compile caffe. See [installation instructions](http://caffe.berkeleyvision.org/installation.html). Make sure to compile the python bindings too:
[~/fooling/caffe] $ make -j && make -j pycaffe
Once Caffe is built, continue by fetching some auxiliary data (synsets.txt and a pre-trained model):
[~/fooling/caffe] $ cd data/ilsvrc12
[~/fooling/caffe/data/ilsvrc12] $ ./get_ilsvrc_aux.sh
[~/fooling/caffe/data/ilsvrc12] $ cd ../../ascent
[~/fooling/caffe/ascent] $ wget 'http://yosinski.cs.cornell.edu/yos_140311__caffenet_iter_450000'
Now we're ready to run the optimization. To find a quick fooling image for the Lion class (idx 291) using only 3 gradient steps, run the following:
[~/fooling/caffe/ascent] $ ./find_fooling_image.py --push_idx 291 --N 3
...
0 Push idx: 291, val: 0.00209935 (n02129165 lion, king of beasts, Panthera leo)
Max idx: 815, val: 0.0114864 (n04275548 spider web, spider's web)
...
1 Push idx: 291, val: 0.00962483 (n02129165 lion, king of beasts, Panthera leo)
Max idx: 330, val: 0.0224016 (n02325366 wood rabbit, cottontail, cottontail rabbit)
...
2 Push idx: 291, val: 0.0518007 (n02129165 lion, king of beasts, Panthera leo)
Max idx: 291, val: 0.0518007 (n02129165 lion, king of beasts, Panthera leo)
...
Result: majority success

@ -0,0 +1,213 @@
name: "CaffeNet"
input: "data"
input_dim: 1
input_dim: 3
input_dim: 227
input_dim: 227
force_backward: true
layers {
name: "conv1"
type: CONVOLUTION
bottom: "data"
top: "conv1"
convolution_param {
num_output: 96
kernel_size: 11
stride: 4
}
}
layers {
name: "relu1"
type: RELU
bottom: "conv1"
top: "conv1"
}
layers {
name: "pool1"
type: POOLING
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layers {
name: "norm1"
type: LRN
bottom: "pool1"
top: "norm1"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layers {
name: "conv2"
type: CONVOLUTION
bottom: "norm1"
top: "conv2"
convolution_param {
num_output: 256
pad: 2
kernel_size: 5
group: 2
}
}
layers {
name: "relu2"
type: RELU
bottom: "conv2"
top: "conv2"
}
layers {
name: "pool2"
type: POOLING
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layers {
name: "norm2"
type: LRN
bottom: "pool2"
top: "norm2"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layers {
name: "conv3"
type: CONVOLUTION
bottom: "norm2"
top: "conv3"
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
}
}
layers {
name: "relu3"
type: RELU
bottom: "conv3"
top: "conv3"
}
layers {
name: "conv4"
type: CONVOLUTION
bottom: "conv3"
top: "conv4"
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
group: 2
}
}
layers {
name: "relu4"
type: RELU
bottom: "conv4"
top: "conv4"
}
layers {
name: "conv5"
type: CONVOLUTION
bottom: "conv4"
top: "conv5"
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
group: 2
}
}
layers {
name: "relu5"
type: RELU
bottom: "conv5"
top: "conv5"
}
layers {
name: "pool5"
type: POOLING
bottom: "conv5"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layers {
name: "fc6"
type: INNER_PRODUCT
bottom: "pool5"
top: "fc6"
inner_product_param {
num_output: 4096
}
}
layers {
name: "relu6"
type: RELU
bottom: "fc6"
top: "fc6"
}
layers {
name: "drop6"
type: DROPOUT
bottom: "fc6"
top: "fc6"
dropout_param {
dropout_ratio: 0.5
}
}
layers {
name: "fc7"
type: INNER_PRODUCT
bottom: "fc6"
top: "fc7"
inner_product_param {
num_output: 4096
}
}
layers {
name: "relu7"
type: RELU
bottom: "fc7"
top: "fc7"
}
layers {
name: "drop7"
type: DROPOUT
bottom: "fc7"
top: "fc7"
dropout_param {
dropout_ratio: 0.5
}
}
layers {
name: "fc8"
type: INNER_PRODUCT
bottom: "fc7"
top: "fc8"
inner_product_param {
num_output: 1000
}
}
layers {
name: "prob"
type: SOFTMAX
bottom: "fc8"
top: "prob"
}

@ -0,0 +1,315 @@
#! /usr/bin/env python
import argparse
import pickle
import pylab
from pylab import *
from scipy.ndimage.filters import gaussian_filter
from collections import OrderedDict
import ipdb as pdb
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
# Make sure that caffe is on the python path:
caffe_root = '../../' # this file is normally in {caffe_root}/ascent. If it's elsewhere, change this path.
import sys
sys.path.insert(0, caffe_root + 'python')
# If this next line fails, check the relevant paths.
import caffe
from misc_helper import *
def load_net_mean():
# Pick which model to load, which image, etc.
model_def_file = 'deploy_1_forcebackward.prototxt'
# Can be downloaded from http://yosinski.cs.cornell.edu/yos_140311__caffenet_iter_450000
pretrained_model = 'yos_140311__caffenet_iter_450000'
# Can be downloaded from http://s.yosinski.com/synset_words.txt
with open('%s/data/ilsvrc12/synset_words.txt' % caffe_root) as ff:
labels = [line.strip() for line in ff.readlines()]
# Load mean
inmean = np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy')
offset = (256-227)/2
mn = inmean[:, offset:offset+227, offset:offset+227]
mni = mn.transpose((1,2,0))
mnirgb = mni[:,:,::-1] # convert to rgb order
mn4d = mn[newaxis]
net = caffe.Classifier(model_def_file, pretrained_model,
#mean=inmean,
channel_swap=(2,1,0),
#raw_scale=255.0,
#image_dims=(256, 256),
)
net.set_phase_test()
net.set_mode_cpu()
return net, mnirgb, mn4d, labels
def update_result(result, suffix, ii, X, X0):
result['iter_'+suffix] = ii
result['norm_'+suffix] = norm(X)
result['dist_'+suffix] = norm(X-X0)
result['std_'+suffix] = X.flatten().std()
result['X_'+suffix] = X.copy()
def find_image(net, mnirgb, mn4d, labels, decay = .01, N = 300, rseed = 0,
push_layer = 'prob', push_idx = 278, start_at = 'mean_plus', prefix = 'junk',
lr_policy = 'progress',
lr_params = {'max_lr': 1e12, 'early_prog': .03, 'late_prog_mult': .1},
blur_radius = 0, # 0 or at least .3
blur_every = 1,
small_val_percentile = 0,
small_norm_percentile = 0,
px_benefit_percentile = 0,
px_abs_benefit_percentile = 0):
'''Find image for the given net using the specified start position, learning policies, etc.'''
np.random.seed(rseed)
#start_im = mnirgb[:] * 0
if start_at == 'mean_plus':
start_im = np.random.normal(0, 1, mnirgb.shape)
elif start_at == 'randu':
start_im = uniform(0, 255, mnirgb.shape) - mnirgb
elif start_at == 'zero':
start_im = zeros(mnirgb.shape)
else:
raise Exception('Unknown start conditions: %s' % start_at)
if lr_policy == 'progress':
assert 'max_lr' in lr_params
assert 'early_prog' in lr_params
assert 'late_prog_mult' in lr_params
elif lr_policy == 'constant':
assert 'lr' in lr_params
else:
raise Exception('Unknown lr_policy: %s' % lr_policy)
try:
push_idx = tuple(push_idx) # tuple or list given
except TypeError:
push_idx = (push_idx, 0, 0) # int given
assert len(push_idx) == 3, 'provide push_idx in the form: int or (channel, x, y) tuple'
#X0 = mn[newaxis,:]
#im255 = im01 * 255 -
tmp = net.preprocess('data', start_im) # converts rgb -> bgr
X0 = tmp[newaxis,:]
# What to change
#push_idx = 278 # kit fox
push_dir = 1.0
class_unit = push_layer in ('fc8', 'prob') # Whether or not the unit being optimized corresponds to one of the 1000 classes
push_label = labels[push_idx[0]] if class_unit else 'None'
X = X0.copy()
#figsize(20,8)
result = dict(
iter_maj = -1,
iter_99 = -1,
iter_999 = -1,
iter_9999 = -1,
iter_best = -1,
norm_maj = -1,
norm_99 = -1,
norm_999 = -1,
norm_9999 = -1,
norm_best = -1,
dist_maj = -1,
dist_99 = -1,
dist_999 = -1,
dist_9999 = -1,
dist_best = -1,
std_maj = -1,
std_99 = -1,
std_999 = -1,
std_9999 = -1,
std_best = -1,
act_best = -1,
X_maj = None,
X_99 = None,
X_999 = None,
X_9999 = None,
X_best = None,
decay = decay, N = N, push_idx = push_idx, push_dir = push_dir, push_layer = push_layer,
push_label = push_label,
lr_policy = lr_policy, lr_params = lr_params,
blur_radius = blur_radius, blur_every = blur_every,
small_val_percentile = small_val_percentile, small_norm_percentile = small_norm_percentile,
px_benefit_percentile = px_benefit_percentile, px_abs_benefit_percentile = px_abs_benefit_percentile,
)
print '\nParameters:'
for key in sorted(result.keys()):
print '%25s: %s' % (key, result[key])
print
for ii in range(N):
X = minimum(255.0, maximum(0.0, X + mn4d)) - mn4d # Crop all values to [0,255]
out = net.forward_all(data = X)
acts = net.blobs[push_layer].data
iimax = unravel_index(acts.argmax(), acts.shape)[1:] # chop off batch idx of 0
obj = acts[0][push_idx]
if ii > 0 and lr_policy == 'progress':
print ' pred_prog: ', pred_prog, 'actual:', obj - old_obj
if class_unit:
print '%-4d' % ii, 'Push idx: %d, val: %g (%s)\n Max idx: %d, val: %g (%s)' % (push_idx[0], acts[0][push_idx], push_label, iimax[0], acts.max(), labels[iimax[0]])
else:
print '%-4d' % ii, 'Push idx: %s, val: %g\n Max idx: %s, val: %g' % (push_idx, acts[0][push_idx], iimax, acts.max())
print ' X: ', X.min(), X.max(), norm(X)
if acts[0][push_idx] > result['act_best']:
update_result(result, 'best', ii, X, X0)
result['acts_best'] = acts[0][push_idx]
if iimax == push_idx and result['iter_maj'] == -1:
update_result(result, 'maj', ii, X, X0)
if acts[0][push_idx] > .99 and result['iter_99'] == -1:
update_result(result, '99', ii, X, X0)
if acts[0][push_idx] > .999 and result['iter_999'] == -1:
update_result(result, '999', ii, X, X0)
if acts[0][push_idx] > .9999 and result['iter_9999'] == -1:
update_result(result, '9999', ii, X, X0)
#break # Quit once confidence > .9999
diffs = net.blobs[push_layer].diff * 0
diffs[0][push_idx] = push_dir
backout = net.backward_from_layer(push_layer, diffs)
grad = backout['data'].copy()
print ' grad:', grad.min(), grad.max(), norm(grad)
if norm(grad) == 0:
print 'Grad 0, failed'
break
# progress-based lr
if lr_policy == 'progress':
late_prog = lr_params['late_prog_mult'] * (1-obj)
desired_prog = min(lr_params['early_prog'], late_prog)
prog_lr = desired_prog / norm(grad)**2
lr = min(lr_params['max_lr'], prog_lr)
print ' desired_prog:', desired_prog, 'prog_lr:', prog_lr, 'lr:', lr
pred_prog = lr * dot(grad.flatten(), grad.flatten())
elif lr_policy == 'constant':
lr = lr_params['lr']
else:
raise Exception('Unimlemented lr_policy')
print ' change size:', abs(lr * grad).max()
old_obj = obj
if ii < N-1:
X += lr * grad
X *= (1 - decay)
if blur_radius > 0:
if blur_radius < .3:
raise Exception('blur-radius of .3 or less works very poorly')
oldX = X.copy()
if ii % blur_every == 0:
for channel in range(3):
cimg = gaussian_filter(X[0,channel], blur_radius)
X[0,channel] = cimg
if small_val_percentile > 0:
small_entries = (abs(X) < percentile(abs(X), small_val_percentile))
X = X - X*small_entries # set smallest 50% of X to zero
if small_norm_percentile > 0:
pxnorms = norm(X, axis=1)
smallpx = pxnorms < percentile(pxnorms, small_norm_percentile)
smallpx3 = tile(smallpx[:,newaxis,:,:], (1,3,1,1))
X = X - X*smallpx3
if px_benefit_percentile > 0:
pred_0_benefit = grad * -X
px_benefit = pred_0_benefit.sum(1)
smallben = px_benefit < percentile(px_benefit, px_benefit_percentile)
smallben3 = tile(smallben[:,newaxis,:,:], (1,3,1,1))
X = X - X*smallben3
if px_abs_benefit_percentile > 0:
pred_0_benefit = grad * -X
px_benefit = pred_0_benefit.sum(1)
smallaben = abs(px_benefit) < percentile(abs(px_benefit), px_abs_benefit_percentile)
smallaben3 = tile(smallaben[:,newaxis,:,:], (1,3,1,1))
X = X - X*smallaben3
if class_unit:
if result['iter_maj'] != -1:
print 'Result: majority success'
else:
print 'Result: no convergence'
for suffix in ('maj', '99', '999', '9999', 'best'):
if result['X_'+suffix] is not None:
asimg = net.deprocess('data', result['X_'+suffix])
if suffix == 'best':
best_X = asimg.copy()
saveimagescc('%s_%s_X.jpg' % (prefix, suffix), asimg, 0)
saveimagesc('%s_%s_Xpm.jpg' % (prefix, suffix), asimg + mnirgb)
del result['X_'+suffix]
with open('%s_info.pkl' % prefix, 'w') as ff:
pickle.dump(result, ff)
with open('%s_info.txt' % prefix, 'w') as ff:
for key in sorted(result.keys()):
print >>ff, key, result[key]
return best_X
def main():
parser = argparse.ArgumentParser(description='Finds images that activate a network in various ways.')
parser.add_argument('--lr', type = float, default = .01)
parser.add_argument('--decay', type = float, default = .01)
parser.add_argument('--N', type = int, default = 300)
parser.add_argument('--rseed', type = int, default = 0)
parser.add_argument('--push_idx', type = int, default = -1)
parser.add_argument('--start_at', type = str, default = 'mean_plus')
parser.add_argument('--prefix', type = str, default = '%(push_idx)03d')
parser.add_argument('--multi_idx_start', type = int, default = -1)
parser.add_argument('--multi_idx_end', type = int, default = -1)
args = parser.parse_args()
assert (args.push_idx == -1) != (args.multi_idx_start == -1 and args.multi_idx_end == -1), 'Use push_idx xor multi*'
assert (args.multi_idx_start == -1) == (args.multi_idx_end == -1), 'Use all multi* or none'
net, mnirgb, mn4d, labels = load_net_mean()
if args.push_idx != -1:
range_start = args.push_idx
range_end = args.push_idx + 1
else:
range_start = args.multi_idx_start
range_end = args.multi_idx_end
for push_idx in range(range_start, range_end):
prefix_dict = vars(args)
prefix_dict['push_idx'] = push_idx
prefix_str = args.prefix % prefix_dict
print '\n\nFinding image'
print 'prefix_str', prefix_str
find_image(net, mnirgb, mn4d, labels,
lr = args.lr, decay = args.decay, N = args.N, rseed = args.rseed,
push_idx = args.push_idx, start_at = args.start_at,
prefix = prefix_str)
if __name__ == '__main__':
main()

@ -0,0 +1,97 @@
#! /usr/bin/env python
from pylab import *
import os
import argparse
import ipdb as pdb
from find_fooling_image import load_net_mean, find_image
def rchoose(choices, prob=None):
if prob is None:
prob = ones(len(choices))
prob = array(prob, dtype='float')
return np.random.choice(choices, p=prob/prob.sum())
def main():
parser = argparse.ArgumentParser(description='Hyperparam search')
parser.add_argument('--result_prefix', type = str, default = './junk')
parser.add_argument('--hp_seed', type = int, default = 0)
parser.add_argument('--start_seed', type = int, default = 0)
parser.add_argument('--push_idx', type = int, default = 278)
parser.add_argument('--layer', type = str, default = 'prob', choices = ('fc8', 'prob'))
parser.add_argument('--startat', type = int, default = 0, choices = (0, 1))
args = parser.parse_args()
push_idx = args.push_idx
small_val_percentile = 0
start_at = 'mean_plus' if args.startat == 0 else 'randu'
if args.hp_seed == -1:
# Special hp_seed of -1 to do gradient descent without any regularization
decay = 0
N = 500
early_prog = .02
late_prog_mult = .1
blur_radius = 0
blur_every = 1
small_norm_percentile = 0
px_benefit_percentile = 0
px_abs_benefit_percentile = 0
else:
np.random.seed(args.hp_seed)
# Choose hyperparameter values given this seed
decay = rchoose((0, .0001, .001, .01, .1, .2, .3),
(4, 1, 1, 2, 1, 1, 1))
N = rchoose((250, 500, 750, 1000, 1500))
early_prog = rchoose(
(.02, .03, .04),
(1, 2, 1))
late_prog_mult = rchoose((.02, .05, .1, .2))
blur_radius = rchoose(
(0, .3, .4, .5, 1.0),
(10, 2, 1, 1, 1))
blur_every = rchoose((1, 2, 3, 4))
small_norm_percentile = rchoose(
(0, 10, 20, 30, 50, 80, 90),
(10, 10, 5, 2, 2, 2, 2))
px_benefit_percentile = rchoose(
(0, 10, 20, 30, 50, 80, 90),
(20, 10, 5, 2, 2, 2, 2))
px_abs_benefit_percentile = rchoose(
(0, 10, 20, 30, 50, 80, 90),
(10, 10, 5, 2, 2, 2, 2))
prefix = args.result_prefix
print 'prefix is', prefix
net, mnirgb, mn4d, labels = load_net_mean()
find_image(net, mnirgb, mn4d, labels,
decay = decay,
N = N,
rseed = args.start_seed,
push_idx = push_idx,
start_at = start_at,
prefix = prefix,
lr_policy = 'progress',
lr_params = {'max_lr': 1e7,
'early_prog': early_prog,
'late_prog_mult': late_prog_mult},
blur_radius = blur_radius,
blur_every = blur_every,
small_val_percentile = small_val_percentile,
small_norm_percentile = small_norm_percentile,
px_benefit_percentile = px_benefit_percentile,
px_abs_benefit_percentile = px_abs_benefit_percentile,
)
if __name__ == '__main__':
main()

@ -0,0 +1,112 @@
#! /usr/bin/env python
from pylab import *
def figsize(width,height):
rcParams['figure.figsize'] = (width,height)
def norm01(arr):
arr = arr.copy()
arr -= arr.min()
arr /= arr.max()
return arr
def norm01c(arr, center):
'''Maps the center value to .5'''
arr = arr.copy()
arr -= center
arr /= max(2 * arr.max(), -2 * arr.min())
arr += .5
assert arr.min() >= 0
assert arr.max() <= 1
return arr
def showimage(im, c01=False, bgr=False):
if c01:
# switch order from c,0,1 -> 0,1,c
im = im.transpose((1,2,0))
if im.ndim == 3 and bgr:
# Change from BGR -> RGB
im = im[:, :, ::-1]
plt.imshow(im)
#axis('tight')
def showimagesc(im, c01=False, bgr=False):
showimage(norm01(im), c01=c01, bgr=bgr)
def saveimage(filename, im):
matplotlib.image.imsave(filename, im)
def saveimagesc(filename, im):
saveimage(filename, norm01(im))
def saveimagescc(filename, im, center):
saveimage(filename, norm01c(im, center))
def tile_images(data, padsize=1, padval=0, c01=False, width=None):
'''take an array of shape (n, height, width) or (n, height, width, channels)
and visualize each (height, width) thing in a grid. If width = None, produce
a square image of size approx. sqrt(n) by sqrt(n), else calculate height.'''
data = data.copy()
if c01:
# Convert c01 -> 01c
data = data.transpose(0, 2, 3, 1)
data -= data.min()
data /= data.max()
# force the number of filters to be square
if width == None:
width = int(np.ceil(np.sqrt(data.shape[0])))
height = width
else:
assert isinstance(width, int)
height = int(np.ceil(float(data.shape[0]) / width))
padding = ((0, width*height - data.shape[0]), (0, padsize), (0, padsize)) + ((0, 0),) * (data.ndim - 3)
data = np.pad(data, padding, mode='constant', constant_values=(padval, padval))
# tile the filters into an image
data = data.reshape((height, width) + data.shape[1:]).transpose((0, 2, 1, 3) + tuple(range(4, data.ndim + 1)))
data = data.reshape((height * data.shape[1], width * data.shape[3]) + data.shape[4:])
data = data[0:-padsize, 0:-padsize] # remove excess padding
return data
def vis_square(data, padsize=1, padval=0, c01=False):
data = tile_images(data, padsize, padval, c01)
showimage(data, c01=False)
def shownet(net):
'''Print some stats about a net and its activations'''
print '%-41s%-31s%s' % ('', 'acts', 'act diffs')
print '%-45s%-31s%s' % ('', 'params', 'param diffs')
for k, v in net.blobs.items():
if k in net.params:
params = net.params[k]
for pp, blob in enumerate(params):
if pp == 0:
print ' ', 'P: %-5s'%k,
else:
print ' ' * 11,
print '%-32s' % repr(blob.data.shape),
print '%-30s' % ('(%g, %g)' % (blob.data.min(), blob.data.max())),
print '(%g, %g)' % (blob.diff.min(), blob.diff.max())
print '%-5s'%k, '%-34s' % repr(v.data.shape),
print '%-30s' % ('(%g, %g)' % (v.data.min(), v.data.max())),
print '(%g, %g)' % (v.diff.min(), v.diff.max())

@ -0,0 +1,7 @@
#! /bin/bash
echo "just for reference"
exit 0
for idx in 0 1 2 3 4; do ./find_fooling_image.py --push_idx $idx --N 1500 --decay .03 --lr .001 --prefix 'result_idx3/idx_%(push_idx)03d_decay_%(decay).03f_lr_%(lr).03f_'; done
for idx in 0 1 2 3 4; do ./find_fooling_image.py --push_idx $idx --N 1500 --decay .00 --lr .001 --prefix 'result_idx3/idx_%(push_idx)03d_decay_%(decay).03f_lr_%(lr).03f_'; done

@ -0,0 +1,31 @@
#! /bin/bash -x
thisscript=$(readlink -f $0)
scriptdir=`dirname $thisscript`
for hp_seed in -1 169 188 360; do
#for push_idx in 278 543 251 99 906 805; do
for push_idx in 200 207 215 279 366 367 390 414 445 500 509 580 643 657 704 713 782 805 826 906; do
for start_seed in `seq 0 4`; do
startat=0
seed_dir=`printf "seed_%04d" $hp_seed`
result_dir="$scriptdir/results/supplementary_imgs/$seed_dir"
mkdir -p $result_dir
run_str=`printf 's%04d_idx%03d_sa%d_ss%02d' $hp_seed $push_idx $startat $start_seed`
jobname="job_${run_str}"
script="$result_dir/run_${run_str}.sh"
result_prefix="$result_dir/$run_str"
echo "#! /bin/bash" > $script
echo "cd $scriptdir" >> $script
echo "./hyperparam_search.py --result_prefix $result_prefix --hp_seed $hp_seed --push_idx $push_idx --start_seed $start_seed --startat $startat 2>&1" >> $script
chmod +x $script
qsub -N "$jobname" -A ACCOUNT_NAME -l nodes=1:ppn=2 -l walltime="1:00:00" -d "$result_dir" $script
done
done
done

@ -0,0 +1,32 @@
#! /bin/bash -x
thisscript=$(readlink -f $0)
scriptdir=`dirname $thisscript`
for hp_seed in `seq 101 399`; do
#for hp_seed in 0; do
for push_idx in 278 543 251 99 906 805; do
#for push_idx in 278; do
startat=0
start_seed=0
seed_dir=`printf "seed_%04d" $hp_seed`
result_dir="$scriptdir/results/$seed_dir"
mkdir -p $result_dir
run_str=`printf 's%04d_idx%03d_sa%d_ss%02d' $hp_seed $push_idx $start_at $start_seed`
jobname="job_${run_str}"
script="$result_dir/run_${run_str}.sh"
result_prefix="$result_dir/$run_str"
echo "#! /bin/bash" > $script
echo "cd $scriptdir" >> $script
echo "./hyperparam_search.py --result_prefix $result_prefix --hp_seed $hp_seed --push_idx $push_idx --start_seed $start_seed --startat $startat 2>&1" >> $script
chmod +x $script
qsub -N "$jobname" -A ACCOUNT_NAME -l nodes=1:ppn=2 -l walltime="1:00:00" -d "$result_dir" $script
#sleep 1
done
done

@ -0,0 +1,53 @@
Bourne Shell
filter remove_matches ^\s*#
filter remove_inline #.*$
extension sh
script_exe sh
C
filter remove_matches ^\s*//
filter call_regexp_common C
filter remove_inline //.*$
extension c
extension ec
extension pgc
C++
filter remove_matches ^\s*//
filter remove_inline //.*$
filter call_regexp_common C
extension C
extension cc
extension cpp
extension cxx
extension pcc
C/C++ Header
filter remove_matches ^\s*//
filter call_regexp_common C
filter remove_inline //.*$
extension H
extension h
extension hh
extension hpp
Cuda
filter remove_matches ^\s*//
filter remove_inline //.*$
filter call_regexp_common C
extension cu
Python
filter remove_matches ^\s*#
filter docstring_to_C
filter call_regexp_common C
filter remove_inline #.*$
extension py
make
filter remove_matches ^\s*#
filter remove_inline #.*$
extension Gnumakefile
extension Makefile
extension am
extension gnumakefile
extension makefile
filename Gnumakefile
filename Makefile
filename gnumakefile
filename makefile
script_exe make

@ -0,0 +1,3 @@
To generate stuff you can paste in an .md page from an IPython notebook, run
ipython nbconvert --to markdown <notebook_file>

@ -0,0 +1,52 @@
<!doctype html>
<html>
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="chrome=1">
<title>Caffe</title>
<link rel="stylesheet" href="stylesheets/reset.css">
<link rel="stylesheet" href="stylesheets/styles.css">
<link rel="stylesheet" href="stylesheets/pygment_trac.css">
<script src="javascripts/scale.fix.js"></script>
<meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=no">
<!--[if lt IE 9]>
<script src="//html5shiv.googlecode.com/svn/trunk/html5.js"></script>
<![endif]-->
</head>
<body>
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-46255508-1', 'daggerfs.com');
ga('send', 'pageview');
</script>
<div class="wrapper">
<header>
<h1 class="header"><a href="index.html">Caffe</a></h1>
<p class="header">Convolutional Architecture for Fast Feature Embedding</p>
<ul>
<!--<li class="download"><a class="buttons" href="https://github.com/BVLC/caffe/zipball/master">Download ZIP</a></li>
<li class="download"><a class="buttons" href="https://github.com/BVLC/caffe/tarball/master">Download TAR</a></li>-->
<li><a class="buttons github" href="https://github.com/BVLC/caffe">View On GitHub</a></li>
</ul>
<p class="header">Maintained by<br><a class="header name" href="http://bvlc.eecs.berkeley.edu/">BVLC</a></p>
<p class="header">Created by<br><a class="header name" href="http://daggerfs.com/">Yangqing Jia</a></p>
</header>
<section>
{{ content }}
</section>
<footer>
<p><small>Hosted on <a href="http://pages.github.com">GitHub Pages</a>.</small></p>
</footer>
</div>
<!--[if !IE]><script>fixScale(document);</script><![endif]-->
</body>
</html>

@ -0,0 +1,95 @@
---
layout: default
title: Caffe
---
Alex's CIFAR-10 tutorial, Caffe style
=====================================
Alex Krizhevsky's [cuda-convnet](https://code.google.com/p/cuda-convnet/) details the model definitions, parameters, and training procedure for good performance on CIFAR-10. This example reproduces his results in Caffe.
We will assume that you have Caffe successfully compiled. If not, please refer to the [Installation page](installation.html). In this tutorial, we will assume that your caffe installation is located at `CAFFE_ROOT`.
We thank @chyojn for the pull request that defined the model schemas and solver configurations.
*This example is a work-in-progress. It would be nice to further explain details of the network and training choices and benchmark the full training.*
Prepare the Dataset
-------------------
You will first need to download and convert the data format from the [CIFAR-10 website](http://www.cs.toronto.edu/~kriz/cifar.html). To do this, simply run the following commands:
cd $CAFFE_ROOT/data/cifar10
./get_cifar10.sh
cd $CAFFE_ROOT/examples/cifar10
./create_cifar10.sh
If it complains that `wget` or `gunzip` are not installed, you need to install them respectively. After running the script there should be the dataset, `./cifar10-leveldb`, and the data set image mean `./mean.binaryproto`.
The Model
---------
The CIFAR-10 model is a CNN that composes layers of convolution, pooling, rectified linear unit (ReLU) nonlinearities, and local contrast normalization with a linear classifier on top of it all. We have defined the model in the `CAFFE_ROOT/examples/cifar10` directory's `cifar10_quick_train.prototxt`.
Training and Testing the "Quick" Model
--------------------------------------
Training the model is simple after you have written the network definition protobuf and solver protobuf files. Simply run `train_quick.sh`, or the following command directly:
cd $CAFFE_ROOT/examples/cifar10
./train_quick.sh
`train_quick.sh` is a simple script, so have a look inside. `GLOG_logtostderr=1` is the google logging flag that prints all the logging messages directly to stderr. The main tool for training is `train_net.bin`, with the solver protobuf text file as its argument.
When you run the code, you will see a lot of messages flying by like this:
I0317 21:52:48.945710 2008298256 net.cpp:74] Creating Layer conv1
I0317 21:52:48.945716 2008298256 net.cpp:84] conv1 <- data
I0317 21:52:48.945725 2008298256 net.cpp:110] conv1 -> conv1
I0317 21:52:49.298691 2008298256 net.cpp:125] Top shape: 100 32 32 32 (3276800)
I0317 21:52:49.298719 2008298256 net.cpp:151] conv1 needs backward computation.
These messages tell you the details about each layer, its connections and its output shape, which may be helpful in debugging. After the initialization, the training will start:
I0317 21:52:49.309370 2008298256 net.cpp:166] Network initialization done.
I0317 21:52:49.309376 2008298256 net.cpp:167] Memory required for Data 23790808
I0317 21:52:49.309422 2008298256 solver.cpp:36] Solver scaffolding done.
I0317 21:52:49.309447 2008298256 solver.cpp:47] Solving CIFAR10_quick_train
Based on the solver setting, we will print the training loss function every 100 iterations, and test the network every 500 iterations. You will see messages like this:
I0317 21:53:12.179772 2008298256 solver.cpp:208] Iteration 100, lr = 0.001
I0317 21:53:12.185698 2008298256 solver.cpp:65] Iteration 100, loss = 1.73643
...
I0317 21:54:41.150030 2008298256 solver.cpp:87] Iteration 500, Testing net
I0317 21:54:47.129461 2008298256 solver.cpp:114] Test score #0: 0.5504
I0317 21:54:47.129500 2008298256 solver.cpp:114] Test score #1: 1.27805
For each training iteration, `lr` is the learning rate of that iteration, and `loss` is the training function. For the output of the testing phase, **score 0 is the accuracy**, and **score 1 is the testing loss function**.
And after making yourself a cup of coffee, you are done!
I0317 22:12:19.666914 2008298256 solver.cpp:87] Iteration 5000, Testing net
I0317 22:12:25.580330 2008298256 solver.cpp:114] Test score #0: 0.7533
I0317 22:12:25.580379 2008298256 solver.cpp:114] Test score #1: 0.739837
I0317 22:12:25.587262 2008298256 solver.cpp:130] Snapshotting to cifar10_quick_iter_5000
I0317 22:12:25.590215 2008298256 solver.cpp:137] Snapshotting solver state to cifar10_quick_iter_5000.solverstate
I0317 22:12:25.592813 2008298256 solver.cpp:81] Optimization Done.
Our model achieved ~75% test accuracy. The model parameters are stored in binary protobuf format in
cifar10_quick_iter_5000
which is ready-to-deploy in CPU or GPU mode! Refer to the `CAFFE_ROOT/examples/cifar10/cifar10_quick.prototxt` for the deployment model definition that can be called on new data.
Why train on a GPU?
-------------------
CIFAR-10, while still small, has enough data to make GPU training attractive.
To compare CPU vs. GPU training speed, simply change one line in all the `cifar*solver.prototxt`:
# solver mode: CPU or GPU
solver_mode: CPU
and you will be using CPU for training.

@ -0,0 +1,63 @@
---
layout: default
title: Caffe
---
Developing & Contributing
=========================
Caffe is developed with active participation of the community by the [Berkeley Vision and Learning Center](http://bvlc.eecs.berkeley.edu/).
We welcome all contributions!
The [contributing workflow](https://github.com/BVLC/caffe#development) is explained in the README. These guidelines cover development practices in Caffe. This is a work-in-progress.
**Development Flow**
- `master` is golden.
- `dev` is for new development: it is the branching point for features and the base of pull requests.
* The history of `dev` is not rewritten.
* Contributions are shepherded from `dev` to `master` by BVLC by merge.
- To err is human. Accidents are fixed by reverts.
- Releases are marked with tags on merge from `dev` to `master`.
**Issues & Pull Request Protocol**
0. Make issues for [bugs](https://github.com/BVLC/caffe/issues?labels=bug&page=1&state=open), tentative proposals, and [questions](https://github.com/BVLC/caffe/issues?labels=question&page=1&state=open).
1. Make PRs to signal development:
a. Make PRs *as soon as development begins*. Create a feature branch, make your initial commit, push, and PR to let everyone know you are working on it and let discussion guide development instead of review development after-the-fact.
b. When a proposal from the first step earns enough interest to warrant development, make a PR, and reference and close the old issue to direct the conversation to the PR.
2. When a PR is ready, comment to request a maintainer be assigned to review and merge to `dev`.
A PR is only ready for review when the code is committed, documented, linted, and tested!
**Documentation**: the documentation is bundled with Caffe in `docs/`. This includes the site you are reading now. Contributions should be documented both inline in code and through usage examples. New documentation is published by BVLC with each release and between releases as-needed.
We'd appreciate your contribution to the documentation effort!
**Testing**: run `make runtest` to check the project tests. New code requires new tests. Pull requests that fail tests will not be accepted.
The `googletest` framework we use provides many additional options, which you can access by running the test binaries directly. One of the more useful options is `--gtest_filter`, which allows you to filter tests by name:
# run all tests with CPU in the name
build/test/test_all.testbin --gtest_filter='*CPU*'
# run all tests without GPU in the name (note the leading minus sign)
build/test/test_all.testbin --gtest_filter=-'*GPU*'
To get a list of all options `googletest` provides, simply pass the `--help` flag:
build/test/test_all.testbin --help
**Style**
- Follow [Google C++ style](http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml) and [Google python style](http://google-styleguide.googlecode.com/svn/trunk/pyguide.html) + [PEP 8](http://legacy.python.org/dev/peps/pep-0008/).
- Wrap lines at 80 chars.
- Remember that “a foolish consistency is the hobgoblin of little minds,” so use your best judgement to write the clearest code for your particular case.
**Lint**: run `make lint` to check C++ code.
**Copyright**: assign copyright jointly to BVLC and contributors like so:
// Copyright 2014 BVLC and contributors.
The exact details of contributions are recorded by versioning and cited in our [acknowledgements](http://caffe.berkeleyvision.org/#acknowledgements). This method is impartial and always up-to-date.

@ -0,0 +1,71 @@
---
layout: default
title: Caffe
---
Extracting Features
===================
In this tutorial, we will extract features using a pre-trained model.
Follow instructions for [setting up caffe](installation.html) and for [getting](getting_pretrained_models.html) the pre-trained ImageNet model.
If you need detailed information about the tools below, please consult their source code, in which additional documentation is usually provided.
Select data to run on
---------------------
We'll make a temporary folder to store things into.
mkdir examples/_temp
Generate a list of the files to process.
We're going to use the images that ship with caffe.
find `pwd`/examples/images -type f -exec echo {} \; > examples/_temp/temp.txt
The `ImageDataLayer` we'll use expects labels after each filenames, so let's add a 0 to the end of each line
sed "s/$/ 0/" examples/_temp/temp.txt > examples/_temp/file_list.txt
Define the Feature Extraction Network Architecture
--------------------------------------------------
In practice, subtracting the mean image from a dataset significantly improves classification accuracies.
Download the mean image of the ILSVRC dataset.
data/ilsvrc12/get_ilsvrc_aux.sh
We will use `data/ilsvrc212/imagenet_mean.binaryproto` in the network definition prototxt.
Let's copy and modify the network definition.
We'll be using the `ImageDataLayer`, which will load and resize images for us.
cp examples/feature_extraction/imagenet_val.prototxt examples/_temp
Edit `examples/_temp/imagenet_val.prototxt` to use correct path for your setup (replace `$CAFFE_DIR`)
Extract Features
----------------
Now everything necessary is in place.
build/tools/extract_features.bin examples/imagenet/caffe_reference_imagenet_model examples/_temp/imagenet_val.prototxt fc7 examples/_temp/features 10
The name of feature blob that you extract is `fc7`, which represents the highest level feature of the reference model.
We can use any other layer, as well, such as `conv5` or `pool3`.
The last parameter above is the number of data mini-batches.
The features are stored to LevelDB `examples/_temp/features`, ready for access by some other code.
If you meet with the error "Check failed: status.ok() Failed to open leveldb examples/_temp/features", it is because the directory examples/_temp/features has been created the last time you run the command. Remove it and run again.
rm -rf examples/_temp/features/
If you'd like to use the Python wrapper for extracting features, check out the [layer visualization notebook](http://nbviewer.ipython.org/github/BVLC/caffe/blob/master/examples/filter_visualization.ipynb).
Clean Up
--------
Let's remove the temporary directory now.
rm -r examples/_temp

@ -0,0 +1,29 @@
---
layout: default
---
# Pre-trained models
[BVLC](http://bvlc.eecs.berkeley.edu) aims to provide a variety of high quality pre-trained models.
Note that unlike Caffe itself, these models are licensed for **academic research / non-commercial use only**.
If you have any questions, please get in touch with us.
This page will be updated as more models become available.
### ImageNet
**Caffe Reference ImageNet Model**: Our reference implementation of an ImageNet model trained on ILSVRC-2012 can be downloaded (232.6MB) by running `examples/imagenet/get_caffe_reference_imagenet_model.sh` from the Caffe root directory.
- The bundled model is the iteration 310,000 snapshot.
- The best validation performance during training was iteration 313,000 with
validation accuracy 57.412% and loss 1.82328.
**AlexNet**: Our training of the Krizhevsky architecture, which differs from the paper's methodology by (1) not training with the relighting data-augmentation and (2) initializing non-zero biases to 0.1 instead of 1. (2) was found necessary for training, as initialization to 1 gave flat loss. Download the model (243.9MB) by running `examples/imagenet/get_caffe_alexnet_model.sh` from the Caffe root directory.
- The bundled model is the iteration 360,000 snapshot.
- The best validation performance during training was iteration 358,000 with
validation accuracy 57.258% and loss 1.83948.
**R-CNN (ILSVRC13)**: The pure Caffe instantiation of the [R-CNN](https://github.com/rbgirshick/rcnn) model for ILSVRC13 detection. Download the model (230.8MB) by running `examples/imagenet/get_caffe_rcnn_imagenet_model.sh` from the Caffe root directory. This model was made by transplanting the R-CNN SVM classifiers into a `fc-rcnn` classification layer, provided here as an off-the-shelf Caffe detector. Try the [detection example](http://nbviewer.ipython.org/github/BVLC/caffe/blob/master/examples/detection.ipynb) to see it in action. For the full details, refer to the R-CNN site. *N.B. For research purposes, make use of the official R-CNN package and not this example.*
Additionally, you will probably eventually need some auxiliary data (mean image, synset list, etc.): run `data/ilsvrc12/get_ilsvrc_aux.sh` from the root directory to obtain it.

@ -0,0 +1,102 @@
---
layout: default
title: Caffe
---
Yangqing's Recipe on Brewing ImageNet
=====================================
"All your braincells are belong to us."
- Caffeine
We are going to describe a reference implementation for the approach first proposed by Krizhevsky, Sutskever, and Hinton in their [NIPS 2012 paper](http://books.nips.cc/papers/files/nips25/NIPS2012_0534.pdf). Since training the whole model takes some time and energy, we provide a model, trained in the same way as we describe here, to help fight global warming. If you would like to simply use the pretrained model, check out the [Pretrained ImageNet](getting_pretrained_models.html) page. *Note that the pretrained model is for academic research / non-commercial use only*.
To clarify, by ImageNet we actually mean the ILSVRC12 challenge, but you can easily train on the whole of ImageNet as well, just with more disk space, and a little longer training time.
(If you don't get the quote, visit [Yann LeCun's fun page](http://yann.lecun.com/ex/fun/).
Data Preparation
----------------
We assume that you already have downloaded the ImageNet training data and validation data, and they are stored on your disk like:
/path/to/imagenet/train/n01440764/n01440764_10026.JPEG
/path/to/imagenet/val/ILSVRC2012_val_00000001.JPEG
You will first need to prepare some auxiliary data for training. This data can be downloaded by:
cd $CAFFE_ROOT/data/ilsvrc12/
./get_ilsvrc_aux.sh
The training and validation input are described in `train.txt` and `val.txt` as text listing all the files and their labels. Note that we use a different indexing for labels than the ILSVRC devkit: we sort the synset names in their ASCII order, and then label them from 0 to 999. See `synset_words.txt` for the synset/name mapping.
You may want to resize the images to 256x256 in advance. By default, we do not explicitly do this because in a cluster environment, one may benefit from resizing images in a parallel fashion, using mapreduce. For example, Yangqing used his lightedweighted [mincepie](https://github.com/Yangqing/mincepie) package to do mapreduce on the Berkeley cluster. If you would things to be rather simple and straightforward, you can also use shell commands, something like:
for name in /path/to/imagenet/val/*.JPEG; do
convert -resize 256x256\! $name $name
done
Go to `$CAFFE_ROOT/examples/imagenet/` for the rest of this guide.
Take a look at `create_imagenet.sh`. Set the paths to the train and val dirs as needed, and set "RESIZE=true" to resize all images to 256x256 if you haven't resized the images in advance. Now simply create the leveldbs with `./create_imagenet.sh`. Note that `imagenet_train_leveldb` and `imagenet_val_leveldb` should not exist before this execution. It will be created by the script. `GLOG_logtostderr=1` simply dumps more information for you to inspect, and you can safely ignore it.
Compute Image Mean
------------------
The model requires us to subtract the image mean from each image, so we have to compute the mean. `tools/compute_image_mean.cpp` implements that - it is also a good example to familiarize yourself on how to manipulate the multiple components, such as protocol buffers, leveldbs, and logging, if you are not familiar with them. Anyway, the mean computation can be carried out as:
./make_imagenet_mean.sh
which will make `data/ilsvrc12/imagenet_mean.binaryproto`.
Network Definition
------------------
The network definition follows strictly the one in Krizhevsky et al. You can find the detailed definition at `examples/imagenet/imagenet_train.prototxt`. Note the paths in the data layer - if you have not followed the exact paths in this guide you will need to change the following lines:
source: "ilvsrc12_train_leveldb"
mean_file: "../../data/ilsvrc12/imagenet_mean.binaryproto"
to point to your own leveldb and image mean. Likewise, do the same for `examples/imagenet/imagenet_val.prototxt`.
If you look carefully at `imagenet_train.prototxt` and `imagenet_val.prototxt`, you will notice that they are largely the same, with the only difference being the data layer sources, and the last layer: in training, we will be using a `softmax_loss` layer to compute the loss function and to initialize the backpropagation, while in validation we will be using an `accuracy` layer to inspect how well we do in terms of accuracy.
We will also lay out a protocol buffer for running the solver. Let's make a few plans:
* We will run in batches of 256, and run a total of 4,500,000 iterations (about 90 epochs).
* For every 1,000 iterations, we test the learned net on the validation data.
* We set the initial learning rate to 0.01, and decrease it every 100,000 iterations (about 20 epochs).
* Information will be displayed every 20 epochs.
* The network will be trained with momentum 0.9 and a weight decay of 0.0005.
* For every 10,000 iterations, we will take a snapshot of the current status.
Sound good? This is implemented in `examples/imagenet/imagenet_solver.prototxt`. Again, you will need to change the first two lines:
train_net: "imagenet_train.prototxt"
test_net: "imagenet_val.prototxt"
to point to the actual path if you have changed them.
Training ImageNet
-----------------
Ready? Let's train.
./train_imagenet.sh
Sit back and enjoy! On my K20 machine, every 20 iterations take about 36 seconds to run, so effectively about 7 ms per image for the full forward-backward pass. About 2.5 ms of this is on forward, and the rest is backward. If you are interested in dissecting the computation time, you can look at `examples/net_speed_benchmark.cpp`, but it was written purely for debugging purpose, so you may need to figure a few things out yourself.
Resume Training?
----------------
We all experience times when the power goes out, or we feel like rewarding ourself a little by playing Battlefield (does someone still remember Quake?). Since we are snapshotting intermediate results during training, we will be able to resume from snapshots. This can be done as easy as:
./resume_training.sh
where in the script `caffe_imagenet_train_1000.solverstate` is the solver state snapshot that stores all necessary information to recover the exact solver state (including the parameters, momentum history, etc).
Parting Words
-------------
Hope you liked this recipe! Many researchers have gone further since the ILSVRC 2012 challenge, changing the network architecture and/or finetuning the various parameters in the network. The recent ILSVRC 2013 challenge suggests that there are quite some room for improvement. **Caffe allows one to explore different network choices more easily, by simply writing different prototxt files** - isn't that exciting?
And since now you have a trained network, check out how to use it: [Running Pretrained ImageNet](getting_pretrained_models.html). This time we will use Python, but if you have wrappers for other languages, please kindly send a pull request!

@ -0,0 +1,79 @@
---
layout: default
---
# Welcome to Caffe
Caffe is a framework for convolutional neural network algorithms, developed with speed in mind.
It was created by [Yangqing Jia](http://daggerfs.com), and is in active development by the [Berkeley Vision and Learning Center](http://bvlc.eecs.berkeley.edu).
Caffe is released under [the BSD 2-Clause license](https://github.com/BVLC/caffe/blob/master/LICENSE).
Check out the [classification demo](http://demo.caffe.berkeleyvision.org/)!
## Why Caffe?
Caffe aims to provide computer vision scientists and practitioners with a **clean and modifiable implementation** of state-of-the-art deep learning algorithms.
For example, network structure is easily specified in separate config files, with no mess of hard-coded parameters in the code.
At the same time, Caffe fits industry needs, with blazing fast C++/CUDA code for GPU computation.
Caffe is currently the fastest GPU CNN implementation publicly available, and is able to process more than **40 million images per day** with a single NVIDIA K40 or Titan GPU (or 20 million images per day on a K20 GPU)\*. That's 192 images per second during training and 500 images per second during test.
Caffe also provides **seamless switching between CPU and GPU**, which allows one to train models with fast GPUs and then deploy them on non-GPU clusters with one line of code: `Caffe::set_mode(Caffe::CPU)`.
Even in CPU mode, computing predictions on an image takes only 20 ms when images are processed in batch mode. While in GPU mode, computing predictions on an image takes only 2 ms when images are processed in batch mode.
## Documentation
* [Introductory slides](https://www.dropbox.com/s/10fx16yp5etb8dv/caffe-presentation.pdf): slides about the Caffe architecture, *updated 03/14*.
* [Installation](/installation.html): Instructions on installing Caffe (works on Ubuntu, Red Hat, OS X).
* [Pre-trained models](/getting_pretrained_models.html): BVLC provides some pre-trained models for academic / non-commercial use.
* [Development](/development.html): Guidelines for development and contributing to Caffe.
### Examples
* [Image Classification \[notebook\]][imagenet_classification]: classify images with the pretrained ImageNet model by the Python interface.
* [Detection \[notebook\]][detection]: run a pretrained model as a detector in Python.
* [Visualizing Features and Filters \[notebook\]][visualizing_filters]: extracting features and visualizing trained filters with an example image, viewed layer-by-layer.
* [Editing Model Parameters \[notebook\]][net_surgery]: how to do net surgery and manually change model parameters.
* [LeNet / MNIST Demo](/mnist.html): end-to-end training and testing of LeNet on MNIST.
* [CIFAR-10 Demo](/cifar10.html): training and testing on the CIFAR-10 data.
* [Training ImageNet](/imagenet_training.html): recipe for end-to-end training of an ImageNet classifier.
* [Feature extraction with C++](/feature_extraction.html): feature extraction using pre-trained model.
[imagenet_classification]: http://nbviewer.ipython.org/github/BVLC/caffe/blob/master/examples/imagenet_classification.ipynb
[detection]: http://nbviewer.ipython.org/github/BVLC/caffe/blob/master/examples/detection.ipynb
[visualizing_filters]: http://nbviewer.ipython.org/github/BVLC/caffe/blob/master/examples/filter_visualization.ipynb
[net_surgery]: http://nbviewer.ipython.org/github/BVLC/caffe/blob/master/examples/net_surgery.ipynb
## Citing Caffe
Please kindly cite Caffe in your publications if it helps your research:
@misc{Jia13caffe,
Author = {Yangqing Jia},
Title = { {Caffe}: An Open Source Convolutional Architecture for Fast Feature Embedding},
Year = {2013},
Howpublished = {\url{http://caffe.berkeleyvision.org/}
}
### Acknowledgements
Yangqing would like to thank the NVIDIA Academic program for providing K20 GPUs, and [Oriol Vinyals](http://www1.icsi.berkeley.edu/~vinyals/) for various discussions along the journey.
A core set of BVLC members have contributed lots of new functionality and fixes since the original release (alphabetical by first name):
- [Eric Tzeng](https://github.com/erictzeng)
- [Evan Shelhamer](http://imaginarynumber.net/)
- [Jeff Donahue](http://jeffdonahue.com/)
- [Jon Long](https://github.com/longjon)
- [Dr. Ross Girshick](http://www.cs.berkeley.edu/~rbg/)
- [Sergey Karayev](http://sergeykarayev.com/)
- [Dr. Sergio Guadarrama](http://www.eecs.berkeley.edu/~sguada/)
Additionally, the open-source community plays a large and growing role in Caffe's development.
Check out the Github [project pulse](https://github.com/BVLC/caffe/pulse) for recent activity, and the [contributors](https://github.com/BVLC/caffe/graphs/contributors) for an ordered list (by commit activity).
We sincerely appreciate your interest and contributions!
If you'd like to contribute, read [this](development.html).
---
\*: When measured with the [SuperVision](http://www.image-net.org/challenges/LSVRC/2012/supervision.pdf) model that won the ImageNet Large Scale Visual Recognition Challenge 2012. See [performance and hardware configuration details](/performance_hardware.html).

@ -0,0 +1,182 @@
---
layout: default
title: Caffe
---
# Installation
Prior to installing, it is best to read through this guide and take note of the details for your platform.
We have successfully compiled and run Caffe on Ubuntu 12.04, OS X 10.8, and OS X 10.9.
- [Prerequisites](#prerequisites)
- [Compilation](#compilation)
- [Hardware questions](#hardware_questions)
## Prerequisites
Caffe depends on several software packages.
* [CUDA](https://developer.nvidia.com/cuda-zone) (5.0, 5.5, or 6.0).
* [BLAS](http://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms) (provided via ATLAS, MKL, or OpenBLAS).
* [OpenCV](http://opencv.org/).
* [Boost](http://www.boost.org/) (we have only tested 1.55)
* `glog`, `gflags`, `protobuf`, `leveldb`, `snappy`, `hdf5`
* For the Python wrapper
* `Python`, `numpy (>= 1.7)`, boost-provided `boost.python`
* For the MATLAB wrapper
* MATLAB with the `mex` compiler.
### CUDA and BLAS
Caffe requires the CUDA `nvcc` compiler to compile its GPU code.
To install CUDA, go to the [NVIDIA CUDA website](https://developer.nvidia.com/cuda-downloads) and follow installation instructions there. **Note:** you can install the CUDA libraries without a CUDA card or driver, in order to build and run Caffe on a CPU-only machine.
Caffe requires BLAS as the backend of its matrix and vector computations.
There are several implementations of this library.
The choice is yours:
* [ATLAS](http://math-atlas.sourceforge.net/): free, open source, and so the default for Caffe.
+ Ubuntu: `sudo apt-get install libatlas-base-dev`
+ CentOS/RHEL: `sudo yum install libatlas-devel`
+ OS X: already installed as the [Accelerate / vecLib Framework](https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man7/Accelerate.7.html).
* [Intel MKL](http://software.intel.com/en-us/intel-mkl): commercial and optimized for Intel CPUs, with a free trial and [student](http://software.intel.com/en-us/intel-education-offerings) licenses.
1. Install MKL.
2. Set `BLAS := mkl` in `Makefile.config`
* [OpenBLAS](http://www.openblas.net/): free and open source; this optimized and parallel BLAS could require more effort to install, although it might offer a speedup.
1. Install OpenBLAS
2. Set `BLAS := open` in `Makefile.config`
### Python and/or Matlab wrappers (optional)
Python: The main requirements are `numpy` and `boost.python` (provided by boost). `pandas` is useful too and needed for some examples.
For **OS X**, we highly recommend using the [Anaconda](https://store.continuum.io/cshop/anaconda/) Python distribution, which provides most of the necessary packages, as well as the `hdf5` library dependency.
If you don't, please use Homebrew -- but beware of potential linking errors!
Note that if you use the **Ubuntu** default python, you will need to `apt-get install` the `python-dev` package to have the python headers. You can install any remaining dependencies with
pip install -r /path/to/caffe/python/requirements.txt
MATLAB: install MATLAB, and make sure that its `mex` is in your `$PATH`.
### The rest of the dependencies
#### Linux
On **Ubuntu**, the remaining dependencies can be installed with
sudo apt-get install libprotobuf-dev libleveldb-dev libsnappy-dev libopencv-dev libboost-all-dev libhdf5-serial-dev
And on **CentOS or RHEL**, you can install via yum using:
sudo yum install protobuf-devel leveldb-devel snappy-devel opencv-devel boost-devel hdf5-devel
The only exception being the google logging library, which does not exist in the Ubuntu 12.04 or CentOS/RHEL repositories. To install it, do:
wget https://google-glog.googlecode.com/files/glog-0.3.3.tar.gz
tar zxvf glog-0.3.3.tar.gz
./configure
make && make install
#### OS X
On **OS X**, we highly recommend using the [homebrew](http://brew.sh/) package manager, and ideally starting from a clean install of the OS (or from a wiped `/usr/local`) to avoid conflicts.
In the following, we assume that you're using Anaconda Python and Homebrew.
To install the OpenCV dependency, we'll need to provide an additional source for Homebrew:
brew tap homebrew/science
If using Anaconda Python, a modification is required to the OpenCV formula.
Do `brew edit opencv` and change the lines that look like the two lines below to exactly the two lines below.
-DPYTHON_LIBRARY=#{py_prefix}/lib/libpython2.7.dylib
-DPYTHON_INCLUDE_DIR=#{py_prefix}/include/python2.7
**NOTE**: We find that everything compiles successfully if `$LD_LIBRARY_PATH` is not set at all, and `$DYLD_FALLBACK_LIBRARY_PATH` is set to to provide CUDA, Python, and other relevant libraries (e.g. `/usr/local/cuda/lib:$HOME/anaconda/lib:/usr/local/lib:/usr/lib`).
In other `ENV` settings, things may not work as expected.
#### 10.8-specific Instructions
Simply run the following:
brew install --build-from-source --with-python boost
for x in snappy leveldb protobuf gflags glog szip homebrew/science/opencv; do brew install $x; done
Building boost from source is needed to link against your local Python (exceptions might be raised during some OS X installs, but **ignore** these and continue). If you do not need the Python wrapper, simply doing `brew install boost` is fine.
**Note** that the HDF5 dependency is provided by Anaconda Python in this case.
If you're not using Anaconda, include `hdf5` in the list above.
#### 10.9-specific Instructions
In OS X 10.9, clang++ is the default C++ compiler and uses `libc++` as the standard library.
However, NVIDIA CUDA (even version 6.0) currently links only with `libstdc++`.
This makes it necessary to change the compilation settings for each of the dependencies.
We do this by modifying the homebrew formulae before installing any packages.
Make sure that homebrew doesn't install any software dependencies in the background; all packages must be linked to `libstdc++`.
The prerequisite homebrew formulae are
boost snappy leveldb protobuf gflags glog szip homebrew/science/opencv
For each of these formulas, `brew edit FORMULA`, and add the ENV definitions as shown:
def install
# ADD THE FOLLOWING:
ENV.append "CXXFLAGS", "-stdlib=libstdc++"
ENV.append "CFLAGS", "-stdlib=libstdc++"
ENV.append "LDFLAGS", "-stdlib=libstdc++ -lstdc++"
# The following is necessary because libtool likes to strip LDFLAGS:
ENV["CXX"] = "/usr/bin/clang++ -stdlib=libstdc++"
...
To edit the formulae in turn, run
for x in snappy leveldb protobuf gflags glog szip boost homebrew/science/opencv; do brew edit $x; done
After this, run
for x in snappy leveldb protobuf gflags glog szip homebrew/science/opencv; do brew uninstall $x; brew install --build-from-source --fresh -vd $x; done
brew install --build-from-source --with-python --fresh -vd boost
**Note** that `brew install --build-from-source --fresh -vd boost` is fine if you do not need the Caffe Python wrapper.
**Note** that the HDF5 dependency is provided by Anaconda Python in this case.
If you're not using Anaconda, include `hdf5` in the list above.
#### Windows
There is an unofficial Windows port of Caffe at [niuzhiheng/caffe:windows](https://github.com/niuzhiheng/caffe). Thanks [@niuzhiheng](https://github.com/niuzhiheng)!
## Compilation
Now that you have the prerequisites, edit your `Makefile.config` to change the paths for your setup.
The defaults should work, but uncomment the relevant lines if using Anaconda Python.
cp Makefile.config.example Makefile.config
# Adjust Makefile.config (for example, if using Anaconda Python)
make all
make test
make runtest
Note that if there is no GPU in your machine, building and running CPU-only works, but GPU tests will naturally fail.
To compile the Python and MATLAB wrappers do `make pycaffe` and `make matcaffe` respectively.
Be sure to set your MATLAB and Python paths in `Makefile.config` first!
For Python support, you must add the compiled module to your `$PYTHONPATH` (as `/path/to/caffe/python` or the like).
*Distribution*: run `make distribute` to create a `distribute` directory with all the Caffe headers, compiled libraries, binaries, etc. needed for distribution to other machines.
*Speed*: for a faster build, compile in parallel by doing `make all -j8` where 8 is the number of parallel threads for compilation (a good choice for the number of threads is the number of cores in your machine).
Now that you have installed Caffe, check out the [MNIST demo](mnist.html) and the pretrained [ImageNet example](imagenet.html).
## Hardware Questions
**Laboratory Tested Hardware**: Berkeley Vision runs Caffe with K40s, K20s, and Titans including models at ImageNet/ILSVRC scale. We also run on GTX series cards and GPU-equipped MacBook Pros. We have not encountered any trouble in-house with devices with CUDA capability >= 3.0. All reported hardware issues thus-far have been due to GPU configuration, overheating, and the like.
**CUDA compute capability**: devices with compute capability <= 2.0 may have to reduce CUDA thread numbers and batch sizes due to hardware constraints. Your mileage may vary.
Refer to the project's issue tracker for [hardware/compatibility](https://github.com/BVLC/caffe/issues?labels=hardware%2Fcompatibility&page=1&state=open).

@ -0,0 +1,20 @@
fixScale = function(doc) {
var addEvent = 'addEventListener',
type = 'gesturestart',
qsa = 'querySelectorAll',
scales = [1, 1],
meta = qsa in doc ? doc[qsa]('meta[name=viewport]') : [];
function fix() {
meta.content = 'width=device-width,minimum-scale=' + scales[0] + ',maximum-scale=' + scales[1];
doc.removeEventListener(type, fix, true);
}
if ((meta = meta[meta.length - 1]) && addEvent in doc) {
fix();
scales = [.25, 1.6];
doc[addEvent](type, fix, true);
}
};

@ -0,0 +1,91 @@
---
layout: default
title: Caffe
---
Training MNIST with Caffe
================
We will assume that you have caffe successfully compiled. If not, please refer to the [Installation page](installation.html). In this tutorial, we will assume that your caffe installation is located at `CAFFE_ROOT`.
Prepare Datasets
----------------
You will first need to download and convert the data format from the MNIST website. To do this, simply run the following commands:
cd $CAFFE_ROOT/data/mnist
./get_mnist.sh
cd $CAFFE_ROOT/examples/mnist
./create_mnist.sh
If it complains that `wget` or `gunzip` are not installed, you need to install them respectively. After running the script there should be two datasets, `mnist-train-leveldb`, and `mnist-test-leveldb`.
LeNet: the MNIST Classification Model
-------------------------------------
Before we actually run the training program, let's explain what will happen. We will use the [LeNet](http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf) network, which is known to work well on digit classification tasks. We will use a slightly different version from the original LeNet implementation, replacing the sigmoid activations with Rectified Linear Unit (ReLU) activations for the neurons.
The design of LeNet contains the essence of CNNs that are still used in larger models such as the ones in ImageNet. In general, it consists of a convolutional layer followed by a pooling layer, another convolution layer followed by a pooling layer, and then two fully connected layers similar to the conventional multilayer perceptrons. We have defined the layers in `CAFFE_ROOT/data/lenet.prototxt`.
If you would like to read about step-by-step instruction on how the protobuf definitions are written, see [MNIST: Define the Network](mnist_prototxt.html) and [MNIST: Define the Solver](mnist_solver_prototxt.html)?.
Training and Testing the Model
------------------------------
Training the model is simple after you have written the network definition protobuf and solver protobuf files. Simply run `train_mnist.sh`, or the following command directly:
cd $CAFFE_ROOT/examples/mnist
./train_lenet.sh
`train_lenet.sh` is a simple script, but here are a few explanations: `GLOG_logtostderr=1` is the google logging flag that prints all the logging messages directly to stderr. The main tool for training is `train_net.bin`, with the solver protobuf text file as its argument.
When you run the code, you will see a lot of messages flying by like this:
I1203 net.cpp:66] Creating Layer conv1
I1203 net.cpp:76] conv1 <- data
I1203 net.cpp:101] conv1 -> conv1
I1203 net.cpp:116] Top shape: 20 24 24
I1203 net.cpp:127] conv1 needs backward computation.
These messages tell you the details about each layer, its connections and its output shape, which may be helpful in debugging. After the initialization, the training will start:
I1203 net.cpp:142] Network initialization done.
I1203 solver.cpp:36] Solver scaffolding done.
I1203 solver.cpp:44] Solving LeNet
Based on the solver setting, we will print the training loss function every 100 iterations, and test the network every 1000 iterations. You will see messages like this:
I1203 solver.cpp:204] Iteration 100, lr = 0.00992565
I1203 solver.cpp:66] Iteration 100, loss = 0.26044
...
I1203 solver.cpp:84] Testing net
I1203 solver.cpp:111] Test score #0: 0.9785
I1203 solver.cpp:111] Test score #1: 0.0606671
For each training iteration, `lr` is the learning rate of that iteration, and `loss` is the training function. For the output of the testing phase, score 0 is the accuracy, and score 1 is the testing loss function.
And after a few minutes, you are done!
I1203 solver.cpp:84] Testing net
I1203 solver.cpp:111] Test score #0: 0.9897
I1203 solver.cpp:111] Test score #1: 0.0324599
I1203 solver.cpp:126] Snapshotting to lenet_iter_10000
I1203 solver.cpp:133] Snapshotting solver state to lenet_iter_10000.solverstate
I1203 solver.cpp:78] Optimization Done.
The final model, stored as a binary protobuf file, is stored at
lenet_iter_10000
which you can deploy as a trained model in your application, if you are training on a real-world application dataset.
Um... How about GPU training?
-----------------------------
You just did! All the training was carried out on the GPU. In fact, if you would like to do training on CPU, you can simply change one line in `lenet_solver.prototxt`:
# solver mode: CPU or GPU
solver_mode: CPU
and you will be using CPU for training. Isn't that easy?
MNIST is a small dataset, so training with GPU does not really introduce too much benefit due to communication overheads. On larger datasets with more complex models, such as ImageNet, the computation speed difference will be more significant.

@ -0,0 +1,153 @@
---
layout: default
title: Caffe
---
Define the MNIST Network
=========================
This page explains the prototxt file `lenet_train.prototxt` used in the MNIST demo. We assume that you are familiar with [Google Protobuf](https://developers.google.com/protocol-buffers/docs/overview), and assume that you have read the protobuf definitions used by Caffe, which can be found at [src/caffe/proto/caffe.proto](https://github.com/Yangqing/caffe/blob/master/src/caffe/proto/caffe.proto).
Specifically, we will write a `caffe::NetParameter` (or in python, `caffe.proto.caffe_pb2.NetParameter`) protubuf. We will start by giving the network a name:
name: "LeNet"
Writing the Data Layer
----------------------
Currently, we will read the MNIST data from the leveldb we created earlier in the demo. This is defined by a data layer:
layers {
name: "mnist"
type: DATA
data_param {
source: "mnist-train-leveldb"
batch_size: 64
scale: 0.00390625
}
top: "data"
top: "label"
}
Specifically, this layer has name `mnist`, type `data`, and it reads the data from the given leveldb source. We will use a batch size of 64, and scale the incoming pixels so that they are in the range \[0,1\). Why 0.00390625? It is 1 divided by 256. And finally, this layer produces two blobs, one is the `data` blob, and one is the `label` blob.
Writing the Convolution Layer
--------------------------------------------
Let's define the first convolution layer:
layers {
name: "conv1"
type: CONVOLUTION
blobs_lr: 1.
blobs_lr: 2.
convolution_param {
num_output: 20
kernelsize: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
bottom: "data"
top: "conv1"
}
This layer takes the `data` blob (it is provided by the data layer), and produces the `conv1` layer. It produces outputs of 20 channels, with the convolutional kernel size 5 and carried out with stride 1.
The fillers allow us to randomly initialize the value of the weights and bias. For the weight filler, we will use the `xavier` algorithm that automatically determines the scale of initialization based on the number of input and output neurons. For the bias filler, we will simply initialize it as constant, with the default filling value 0.
`blobs_lr` are the learning rate adjustments for the layer's learnable parameters. In this case, we will set the weight learning rate to be the same as the learning rate given by the solver during runtime, and the bias learning rate to be twice as large as that - this usually leads to better convergence rates.
Writing the Pooling Layer
-------------------------
Phew. Pooling layers are actually much easier to define:
layers {
name: "pool1"
type: POOLING
pooling_param {
kernel_size: 2
stride: 2
pool: MAX
}
bottom: "conv1"
top: "pool1"
}
This says we will perform max pooling with a pool kernel size 2 and a stride of 2 (so no overlapping between neighboring pooling regions).
Similarly, you can write up the second convolution and pooling layers. Check `data/lenet.prototxt` for details.
Writing the Fully Connected Layer
----------------------------------
Writing a fully connected layer is also simple:
layers {
name: "ip1"
type: INNER_PRODUCT
blobs_lr: 1.
blobs_lr: 2.
inner_product_param {
num_output: 500
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
bottom: "pool2"
top: "ip1"
}
This defines a fully connected layer (for some legacy reason, Caffe calls it an `innerproduct` layer) with 500 outputs. All other lines look familiar, right?
Writing the ReLU Layer
----------------------
A ReLU Layer is also simple:
layers {
name: "relu1"
type: RELU
bottom: "ip1"
top: "ip1"
}
Since ReLU is an element-wise operation, we can do *in-place* operations to save some memory. This is achieved by simply giving the same name to the bottom and top blobs. Of course, do NOT use duplicated blob names for other layer types!
After the ReLU layer, we will write another innerproduct layer:
layers {
name: "ip2"
type: INNER_PRODUCT
blobs_lr: 1.
blobs_lr: 2.
inner_product_param {
num_output: 10
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
bottom: "ip1"
top: "ip2"
}
Writing the Loss Layer
-------------------------
Finally, we will write the loss!
layers {
name: "loss"
type: SOFTMAX_LOSS
bottom: "ip2"
bottom: "label"
}
The `softmax_loss` layer implements both the softmax and the multinomial logistic loss (that saves time and improves numerical stability). It takes two blobs, the first one being the prediction and the second one being the `label` provided by the data layer (remember it?). It does not produce any outputs - all it does is to compute the loss function value, report it when backpropagation starts, and initiates the gradient with respect to `ip2`. This is where all magic starts.
Now that we have demonstrated how to write the MNIST layer definition prototxt, maybe check out [how we write a solver prototxt](mnist_solver_prototxt.html)?

@ -0,0 +1,37 @@
---
layout: default
title: Caffe
---
Define the MNIST Solver
=======================
The page is under construction. For now, check out the comments in the solver prototxt file, which explains each line in the prototxt:
# The training protocol buffer definition
train_net: "lenet_train.prototxt"
# The testing protocol buffer definition
test_net: "lenet_test.prototxt"
# test_iter specifies how many forward passes the test should carry out.
# In the case of MNIST, we have test batch size 100 and 100 test iterations,
# covering the full 10,000 testing images.
test_iter: 100
# Carry out testing every 500 training iterations.
test_interval: 500
# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.01
momentum: 0.9
weight_decay: 0.0005
# The learning rate policy
lr_policy: "inv"
gamma: 0.0001
power: 0.75
# Display every 100 iterations
display: 100
# The maximum number of iterations
max_iter: 10000
# snapshot intermediate results
snapshot: 5000
snapshot_prefix: "lenet"
# solver mode: 0 for CPU and 1 for GPU
solver_mode: 1

@ -0,0 +1,57 @@
---
layout: default
title: Caffe
---
# Performance and Hardware Configuration
To measure performance on different NVIDIA GPUs we use the Caffe reference ImageNet model.
For training, each time point is 20 iterations/minibatches of 256 images for 5,120 images total. For testing, a 50,000 image validation set is classified.
**Acknowledgements**: BVLC members are very grateful to NVIDIA for providing several GPUs to conduct this research.
## NVIDIA K40
Performance is best with ECC off and boost clock enabled. While ECC makes a negligible difference in speed, disabling it frees ~1 GB of GPU memory.
Best settings with ECC off and maximum clock speed:
* Training is 26.5 secs / 20 iterations (5,120 images)
* Testing is 100 secs / validation set (50,000 images)
Other settings:
* ECC on, max speed: training 26.7 secs / 20 iterations, test 101 secs / validation set
* ECC on, default speed: training 31 secs / 20 iterations, test 117 secs / validation set
* ECC off, default speed: training 31 secs / 20 iterations, test 118 secs / validation set
### K40 configuration tips
For maximum K40 performance, turn off ECC and boost the clock speed (at your own risk).
To turn off ECC, do
sudo nvidia-smi -i 0 --ecc-config=0 # repeat with -i x for each GPU ID
then reboot.
Set the "persistence" mode of the GPU settings by
sudo nvidia-smi -pm 1
and then set the clock speed with
sudo nvidia-smi -i 0 -ac 3004,875 # repeat with -i x for each GPU ID
but note that this configuration resets across driver reloading / rebooting. Include these commands in a boot script to intialize these settings. For a simple fix, add these commands to `/etc/rc.local` (on Ubuntu).
## NVIDIA Titan
Training: 26.26 secs / 20 iterations (5,120 images).
Testing: 100 secs / validation set (50,000 images).
## NVIDIA K20
Training: 36.0 secs / 20 iterations (5,120 images).
Testing: 133 secs / validation set (50,000 images)

@ -0,0 +1,69 @@
.highlight { background: #ffffff; }
.highlight .c { color: #999988; font-style: italic } /* Comment */
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
.highlight .k { font-weight: bold } /* Keyword */
.highlight .o { font-weight: bold } /* Operator */
.highlight .cm { color: #999988; font-style: italic } /* Comment.Multiline */
.highlight .cp { color: #999999; font-weight: bold } /* Comment.Preproc */
.highlight .c1 { color: #999988; font-style: italic } /* Comment.Single */
.highlight .cs { color: #999999; font-weight: bold; font-style: italic } /* Comment.Special */
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
.highlight .gd .x { color: #000000; background-color: #ffaaaa } /* Generic.Deleted.Specific */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .gr { color: #aa0000 } /* Generic.Error */
.highlight .gh { color: #999999 } /* Generic.Heading */
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
.highlight .gi .x { color: #000000; background-color: #aaffaa } /* Generic.Inserted.Specific */
.highlight .go { color: #888888 } /* Generic.Output */
.highlight .gp { color: #555555 } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #800080; font-weight: bold; } /* Generic.Subheading */
.highlight .gt { color: #aa0000 } /* Generic.Traceback */
.highlight .kc { font-weight: bold } /* Keyword.Constant */
.highlight .kd { font-weight: bold } /* Keyword.Declaration */
.highlight .kn { font-weight: bold } /* Keyword.Namespace */
.highlight .kp { font-weight: bold } /* Keyword.Pseudo */
.highlight .kr { font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #445588; font-weight: bold } /* Keyword.Type */
.highlight .m { color: #009999 } /* Literal.Number */
.highlight .s { color: #d14 } /* Literal.String */
.highlight .na { color: #008080 } /* Name.Attribute */
.highlight .nb { color: #0086B3 } /* Name.Builtin */
.highlight .nc { color: #445588; font-weight: bold } /* Name.Class */
.highlight .no { color: #008080 } /* Name.Constant */
.highlight .ni { color: #800080 } /* Name.Entity */
.highlight .ne { color: #990000; font-weight: bold } /* Name.Exception */
.highlight .nf { color: #990000; font-weight: bold } /* Name.Function */
.highlight .nn { color: #555555 } /* Name.Namespace */
.highlight .nt { color: #000080 } /* Name.Tag */
.highlight .nv { color: #008080 } /* Name.Variable */
.highlight .ow { font-weight: bold } /* Operator.Word */
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mf { color: #009999 } /* Literal.Number.Float */
.highlight .mh { color: #009999 } /* Literal.Number.Hex */
.highlight .mi { color: #009999 } /* Literal.Number.Integer */
.highlight .mo { color: #009999 } /* Literal.Number.Oct */
.highlight .sb { color: #d14 } /* Literal.String.Backtick */
.highlight .sc { color: #d14 } /* Literal.String.Char */
.highlight .sd { color: #d14 } /* Literal.String.Doc */
.highlight .s2 { color: #d14 } /* Literal.String.Double */
.highlight .se { color: #d14 } /* Literal.String.Escape */
.highlight .sh { color: #d14 } /* Literal.String.Heredoc */
.highlight .si { color: #d14 } /* Literal.String.Interpol */
.highlight .sx { color: #d14 } /* Literal.String.Other */
.highlight .sr { color: #009926 } /* Literal.String.Regex */
.highlight .s1 { color: #d14 } /* Literal.String.Single */
.highlight .ss { color: #990073 } /* Literal.String.Symbol */
.highlight .bp { color: #999999 } /* Name.Builtin.Pseudo */
.highlight .vc { color: #008080 } /* Name.Variable.Class */
.highlight .vg { color: #008080 } /* Name.Variable.Global */
.highlight .vi { color: #008080 } /* Name.Variable.Instance */
.highlight .il { color: #009999 } /* Literal.Number.Integer.Long */
.type-csharp .highlight .k { color: #0000FF }
.type-csharp .highlight .kt { color: #0000FF }
.type-csharp .highlight .nf { color: #000000; font-weight: normal }
.type-csharp .highlight .nc { color: #2B91AF }
.type-csharp .highlight .nn { color: #000000 }
.type-csharp .highlight .s { color: #A31515 }
.type-csharp .highlight .sc { color: #A31515 }

@ -0,0 +1,21 @@
/* MeyerWeb Reset */
html, body, div, span, applet, object, iframe,
h1, h2, h3, h4, h5, h6, p, blockquote, pre,
a, abbr, acronym, address, big, cite, code,
del, dfn, em, img, ins, kbd, q, s, samp,
small, strike, strong, sub, sup, tt, var,
b, u, i, center,
dl, dt, dd, ol, ul, li,
fieldset, form, label, legend,
table, caption, tbody, tfoot, thead, tr, th, td,
article, aside, canvas, details, embed,
figure, figcaption, footer, header, hgroup,
menu, nav, output, ruby, section, summary,
time, mark, audio, video {
margin: 0;
padding: 0;
border: 0;
font: inherit;
vertical-align: baseline;
}

@ -0,0 +1,393 @@
body {
padding:10px 50px 0 0;
font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
font-weight: 300;
font-size: 14px;
color: #232323;
background-color: #FBFAF7;
margin: 0;
line-height: 1.8em;
-webkit-font-smoothing: antialiased;
}
h1, h2, h3, h4, h5, h6 {
color:#232323;
margin:36px 0 10px;
}
p, ul, ol, table, dl {
margin:0 0 22px;
}
h1, h2, h3 {
font-family: Times, serif;
font-weight: 300;
line-height:1.3;
font-weight: normal;
display: block;
border-bottom: 1px solid #ccc;
padding-bottom: 5px;
}
h1 {
font-size: 30px;
}
h2 {
font-size: 24px;
}
h3 {
font-size: 18px;
}
h4, h5, h6 {
font-family: Times, serif;
font-weight: 700;
}
a {
color:#C30000;
text-decoration:none;
}
a:hover {
text-decoration: underline;
}
a small {
font-size: 12px;
}
em {
font-style: italic;
}
strong {
font-weight:700;
}
ul {
list-style: inside;
padding-left: 25px;
}
ol {
list-style: decimal inside;
padding-left: 20px;
}
blockquote {
margin: 0;
padding: 0 0 0 20px;
font-style: italic;
}
dl, dt, dd, dl p {
font-color: #444;
}
dl dt {
font-weight: bold;
}
dl dd {
padding-left: 20px;
font-style: italic;
}
dl p {
padding-left: 20px;
font-style: italic;
}
hr {
border:0;
background:#ccc;
height:1px;
margin:0 0 24px;
}
/* Images */
img {
position: relative;
margin: 0 auto;
max-width: 650px;
padding: 5px;
margin: 10px 0 32px 0;
border: 1px solid #ccc;
}
p img {
display: inline;
margin: 0;
padding: 0;
vertical-align: middle;
text-align: center;
border: none;
}
/* Code blocks */
code, pre {
font-family: monospace;
color:#000;
font-size:12px;
line-height: 14px;
}
pre {
padding: 6px 12px;
background: #FDFEFB;
border-radius:4px;
border:1px solid #D7D8C8;
overflow: auto;
white-space: pre-wrap;
margin-bottom: 16px;
}
/* Tables */
table {
width:100%;
}
table {
border: 1px solid #ccc;
margin-bottom: 32px;
text-align: left;
}
th {
font-family: 'Arvo', Helvetica, Arial, sans-serif;
font-size: 18px;
font-weight: normal;
padding: 10px;
background: #232323;
color: #FDFEFB;
}
td {
padding: 10px;
background: #ccc;
}
/* Wrapper */
.wrapper {
width:960px;
}
/* Header */
header {
background-color: #171717;
color: #FDFDFB;
width:170px;
float:left;
position:fixed;
border: 1px solid #000;
-webkit-border-top-right-radius: 4px;
-webkit-border-bottom-right-radius: 4px;
-moz-border-radius-topright: 4px;
-moz-border-radius-bottomright: 4px;
border-top-right-radius: 4px;
border-bottom-right-radius: 4px;
padding: 12px 25px 22px 50px;
margin: 24px 25px 0 0;
-webkit-font-smoothing: antialiased;
}
p.header {
font-size: 16px;
}
h1.header {
/*font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;*/
font-size: 30px;
font-weight: 300;
line-height: 1.3em;
border-bottom: none;
margin-top: 0;
}
h1.header, a.header, a.name, header a{
color: #fff;
}
a.header {
text-decoration: underline;
}
a.name {
white-space: nowrap;
}
header ul {
list-style:none;
padding:0;
}
header li {
list-style-type: none;
width:132px;
height:15px;
margin-bottom: 12px;
line-height: 1em;
padding: 6px 6px 6px 7px;
background: #AF0011;
background: -moz-linear-gradient(top, #AF0011 0%, #820011 100%);
background: -webkit-gradient(linear, left top, left bottom, color-stop(0%,#f8f8f8), color-stop(100%,#dddddd));
background: -webkit-linear-gradient(top, #AF0011 0%,#820011 100%);
background: -o-linear-gradient(top, #AF0011 0%,#820011 100%);
background: -ms-linear-gradient(top, #AF0011 0%,#820011 100%);
background: linear-gradient(top, #AF0011 0%,#820011 100%);
border-radius:4px;
border:1px solid #0D0D0D;
-webkit-box-shadow: inset 0px 1px 1px 0 rgba(233,2,38, 1);
box-shadow: inset 0px 1px 1px 0 rgba(233,2,38, 1);
}
header li:hover {
background: #C3001D;
background: -moz-linear-gradient(top, #C3001D 0%, #950119 100%);
background: -webkit-gradient(linear, left top, left bottom, color-stop(0%,#f8f8f8), color-stop(100%,#dddddd));
background: -webkit-linear-gradient(top, #C3001D 0%,#950119 100%);
background: -o-linear-gradient(top, #C3001D 0%,#950119 100%);
background: -ms-linear-gradient(top, #C3001D 0%,#950119 100%);
background: linear-gradient(top, #C3001D 0%,#950119 100%);
}
a.buttons {
-webkit-font-smoothing: antialiased;
background: url(../images/arrow-down.png) no-repeat;
font-weight: normal;
text-shadow: rgba(0, 0, 0, 0.4) 0 -1px 0;
padding: 2px 2px 2px 22px;
height: 30px;
}
a.github {
background: url(../images/octocat-small.png) no-repeat 1px;
}
a.buttons:hover {
color: #fff;
text-decoration: none;
}
/* Section - for main page content */
section {
width:650px;
float:right;
padding-bottom:50px;
}
/* Footer */
footer {
width:170px;
float:left;
position:fixed;
bottom:10px;
padding-left: 50px;
}
@media print, screen and (max-width: 960px) {
div.wrapper {
width:auto;
margin:0;
}
header, section, footer {
float:none;
position:static;
width:auto;
}
footer {
border-top: 1px solid #ccc;
margin:0 84px 0 50px;
padding:0;
}
header {
padding-right:320px;
}
section {
padding:20px 84px 20px 50px;
margin:0 0 20px;
}
header a small {
display:inline;
}
header ul {
position:absolute;
right:130px;
top:84px;
}
}
@media print, screen and (max-width: 720px) {
body {
word-wrap:break-word;
}
header {
padding:10px 20px 0;
margin-right: 0;
}
section {
padding:10px 0 10px 20px;
margin:0 0 30px;
}
footer {
margin: 0 0 0 30px;
}
header ul, header p.view {
position:static;
}
}
@media print, screen and (max-width: 480px) {
header ul li.download {
display:none;
}
footer {
margin: 0 0 0 20px;
}
footer a{
display:block;
}
}
@media print {
body {
padding:0.4in;
font-size:12pt;
color:#444;
}
}

@ -0,0 +1,100 @@
// Copyright 2014 BVLC and contributors.
#ifndef CAFFE_BLOB_HPP_
#define CAFFE_BLOB_HPP_
#include "caffe/common.hpp"
#include "caffe/syncedmem.hpp"
#include "caffe/proto/caffe.pb.h"
namespace caffe {
template <typename Dtype>
class Blob {
public:
Blob()
: num_(0), channels_(0), height_(0), width_(0), count_(0), data_(),
diff_() {}
explicit Blob(const int num, const int channels, const int height,
const int width);
void Reshape(const int num, const int channels, const int height,
const int width);
void ReshapeLike(const Blob& other);
inline int num() const { return num_; }
inline int channels() const { return channels_; }
inline int height() const { return height_; }
inline int width() const { return width_; }
inline int count() const {return count_; }
inline int offset(const int n, const int c = 0, const int h = 0,
const int w = 0) const {
CHECK_GE(n, 0);
CHECK_LE(n, num_);
CHECK_GE(channels_, 0);
CHECK_LE(c, channels_);
CHECK_GE(height_, 0);
CHECK_LE(h, height_);
CHECK_GE(width_, 0);
CHECK_LE(w, width_);
return ((n * channels_ + c) * height_ + h) * width_ + w;
}
// Copy from source. If copy_diff is false, we copy the data; if copy_diff
// is true, we copy the diff.
void CopyFrom(const Blob<Dtype>& source, bool copy_diff = false,
bool reshape = false);
inline Dtype data_at(const int n, const int c, const int h,
const int w) const {
return *(cpu_data() + offset(n, c, h, w));
}
inline Dtype diff_at(const int n, const int c, const int h,
const int w) const {
return *(cpu_diff() + offset(n, c, h, w));
}
inline const shared_ptr<SyncedMemory>& data() const {
CHECK(data_);
return data_;
}
inline const shared_ptr<SyncedMemory>& diff() const {
CHECK(diff_);
return diff_;
}
const Dtype* cpu_data() const;
void set_cpu_data(Dtype* data);
const Dtype* gpu_data() const;
const Dtype* cpu_diff() const;
const Dtype* gpu_diff() const;
Dtype* mutable_cpu_data();
Dtype* mutable_gpu_data();
Dtype* mutable_cpu_diff();
Dtype* mutable_gpu_diff();
void Update();
void FromProto(const BlobProto& proto);
void ToProto(BlobProto* proto, bool write_diff = false) const;
// Set the data_/diff_ shared_ptr to point to the SyncedMemory holding the
// data_/diff_ of Blob other -- useful in layers which simply perform a copy
// in their forward or backward pass.
// This deallocates the SyncedMemory holding this blob's data/diff, as
// shared_ptr calls its destructor when reset with the = operator.
void ShareData(const Blob& other);
void ShareDiff(const Blob& other);
protected:
shared_ptr<SyncedMemory> data_;
shared_ptr<SyncedMemory> diff_;
int num_;
int channels_;
int height_;
int width_;
int count_;
DISABLE_COPY_AND_ASSIGN(Blob);
}; // class Blob
} // namespace caffe
#endif // CAFFE_BLOB_HPP_

@ -0,0 +1,19 @@
// Copyright 2014 BVLC and contributors.
// caffe.hpp is the header file that you need to include in your code. It wraps
// all the internal caffe header files into one for simpler inclusion.
#ifndef CAFFE_CAFFE_HPP_
#define CAFFE_CAFFE_HPP_
#include "caffe/common.hpp"
#include "caffe/blob.hpp"
#include "caffe/filler.hpp"
#include "caffe/layer.hpp"
#include "caffe/net.hpp"
#include "caffe/solver.hpp"
#include "caffe/util/io.hpp"
#include "caffe/vision_layers.hpp"
#include "caffe/proto/caffe.pb.h"
#endif // CAFFE_CAFFE_HPP_

@ -0,0 +1,169 @@
// Copyright 2014 BVLC and contributors.
#ifndef CAFFE_COMMON_HPP_
#define CAFFE_COMMON_HPP_
#include <boost/shared_ptr.hpp>
#include <cublas_v2.h>
#include <cuda.h>
#include <curand.h>
#include <driver_types.h> // cuda driver types
#include <glog/logging.h>
// Disable the copy and assignment operator for a class.
#define DISABLE_COPY_AND_ASSIGN(classname) \
private:\
classname(const classname&);\
classname& operator=(const classname&)
// Instantiate a class with float and double specifications.
#define INSTANTIATE_CLASS(classname) \
template class classname<float>; \
template class classname<double>
// A simple macro to mark codes that are not implemented, so that when the code
// is executed we will see a fatal log.
#define NOT_IMPLEMENTED LOG(FATAL) << "Not Implemented Yet"
// CUDA: various checks for different function calls.
#define CUDA_CHECK(condition) \
/* Code block avoids redefinition of cudaError_t error */ \
do { \
cudaError_t error = condition; \
CHECK_EQ(error, cudaSuccess) << " " << cudaGetErrorString(error); \
} while (0)
#define CUBLAS_CHECK(condition) \
do { \
cublasStatus_t status = condition; \
CHECK_EQ(status, CUBLAS_STATUS_SUCCESS) << " " \
<< caffe::cublasGetErrorString(status); \
} while (0)
#define CURAND_CHECK(condition) \
do { \
curandStatus_t status = condition; \
CHECK_EQ(status, CURAND_STATUS_SUCCESS) << " " \
<< caffe::curandGetErrorString(status); \
} while (0)
// CUDA: grid stride looping
#define CUDA_KERNEL_LOOP(i, n) \
for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
i < (n); \
i += blockDim.x * gridDim.x)
// CUDA: check for error after kernel execution and exit loudly if there is one.
#define CUDA_POST_KERNEL_CHECK CUDA_CHECK(cudaPeekAtLastError())
// Define not supported status for pre-6.0 compatibility.
#if CUDA_VERSION < 6000
#define CUBLAS_STATUS_NOT_SUPPORTED 831486
#endif
namespace caffe {
// We will use the boost shared_ptr instead of the new C++11 one mainly
// because cuda does not work (at least now) well with C++11 features.
using boost::shared_ptr;
// A singleton class to hold common caffe stuff, such as the handler that
// caffe is going to use for cublas, curand, etc.
class Caffe {
public:
~Caffe();
inline static Caffe& Get() {
if (!singleton_.get()) {
singleton_.reset(new Caffe());
}
return *singleton_;
}
enum Brew { CPU, GPU };
enum Phase { TRAIN, TEST };
// This random number generator facade hides boost and CUDA rng
// implementation from one another (for cross-platform compatibility).
class RNG {
public:
RNG();
explicit RNG(unsigned int seed);
explicit RNG(const RNG&);
RNG& operator=(const RNG&);
void* generator();
private:
class Generator;
shared_ptr<Generator> generator_;
};
// Getters for boost rng, curand, and cublas handles
inline static RNG& rng_stream() {
if (!Get().random_generator_) {
Get().random_generator_.reset(new RNG());
}
return *(Get().random_generator_);
}
inline static cublasHandle_t cublas_handle() { return Get().cublas_handle_; }
inline static curandGenerator_t curand_generator() {
return Get().curand_generator_;
}
// Returns the mode: running on CPU or GPU.
inline static Brew mode() { return Get().mode_; }
// Returns the phase: TRAIN or TEST.
inline static Phase phase() { return Get().phase_; }
// The setters for the variables
// Sets the mode. It is recommended that you don't change the mode halfway
// into the program since that may cause allocation of pinned memory being
// freed in a non-pinned way, which may cause problems - I haven't verified
// it personally but better to note it here in the header file.
inline static void set_mode(Brew mode) { Get().mode_ = mode; }
// Sets the phase.
inline static void set_phase(Phase phase) { Get().phase_ = phase; }
// Sets the random seed of both boost and curand
static void set_random_seed(const unsigned int seed);
// Sets the device. Since we have cublas and curand stuff, set device also
// requires us to reset those values.
static void SetDevice(const int device_id);
// Prints the current GPU status.
static void DeviceQuery();
protected:
cublasHandle_t cublas_handle_;
curandGenerator_t curand_generator_;
shared_ptr<RNG> random_generator_;
Brew mode_;
Phase phase_;
static shared_ptr<Caffe> singleton_;
private:
// The private constructor to avoid duplicate instantiation.
Caffe();
DISABLE_COPY_AND_ASSIGN(Caffe);
};
// NVIDIA_CUDA-5.5_Samples/common/inc/helper_cuda.h
const char* cublasGetErrorString(cublasStatus_t error);
const char* curandGetErrorString(curandStatus_t error);
// CUDA: thread number configuration.
// Use 1024 threads per block, which requires cuda sm_2x or above,
// or fall back to attempt compatibility (best of luck to you).
#if __CUDA_ARCH__ >= 200
const int CAFFE_CUDA_NUM_THREADS = 1024;
#else
const int CAFFE_CUDA_NUM_THREADS = 512;
#endif
// CUDA: number of blocks for threads.
inline int CAFFE_GET_BLOCKS(const int N) {
return (N + CAFFE_CUDA_NUM_THREADS - 1) / CAFFE_CUDA_NUM_THREADS;
}
} // namespace caffe
#endif // CAFFE_COMMON_HPP_

@ -0,0 +1,337 @@
// Copyright 2014 BVLC and contributors.
#ifndef CAFFE_DATA_LAYERS_HPP_
#define CAFFE_DATA_LAYERS_HPP_
#include <string>
#include <utility>
#include <vector>
#include <opencv2/opencv.hpp>
#include "leveldb/db.h"
#include "lmdb.h"
#include "pthread.h"
#include "hdf5.h"
#include "boost/scoped_ptr.hpp"
#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/filler.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
namespace caffe {
#define HDF5_DATA_DATASET_NAME "data"
#define HDF5_DATA_LABEL_NAME "label"
template <typename Dtype>
class HDF5OutputLayer : public Layer<Dtype> {
public:
explicit HDF5OutputLayer(const LayerParameter& param);
virtual ~HDF5OutputLayer();
virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {}
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_HDF5_OUTPUT;
}
// TODO: no limit on the number of blobs
virtual inline int ExactNumBottomBlobs() const { return 2; }
virtual inline int ExactNumTopBlobs() const { return 0; }
inline std::string file_name() const { return file_name_; }
protected:
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
virtual void SaveBlobs();
std::string file_name_;
hid_t file_id_;
Blob<Dtype> data_blob_;
Blob<Dtype> label_blob_;
};
template <typename Dtype>
class HDF5DataLayer : public Layer<Dtype> {
public:
explicit HDF5DataLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual ~HDF5DataLayer();
virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_HDF5_DATA;
}
virtual inline int ExactNumBottomBlobs() const { return 0; }
virtual inline int ExactNumTopBlobs() const { return 2; }
protected:
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
virtual void LoadHDF5FileData(const char* filename);
std::vector<std::string> hdf_filenames_;
unsigned int num_files_;
unsigned int current_file_;
hsize_t current_row_;
Blob<Dtype> data_blob_;
Blob<Dtype> label_blob_;
};
// TODO: DataLayer, ImageDataLayer, and WindowDataLayer all have the
// same basic structure and a lot of duplicated code.
// This function is used to create a pthread that prefetches the data.
template <typename Dtype>
void* DataLayerPrefetch(void* layer_pointer);
template <typename Dtype>
class DataLayer : public Layer<Dtype> {
// The function used to perform prefetching.
friend void* DataLayerPrefetch<Dtype>(void* layer_pointer);
public:
explicit DataLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual ~DataLayer();
virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_DATA;
}
virtual inline int ExactNumBottomBlobs() const { return 0; }
virtual inline int MinTopBlobs() const { return 1; }
virtual inline int MaxTopBlobs() const { return 2; }
protected:
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom) { return; }
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom) { return; }
virtual void CreatePrefetchThread();
virtual void JoinPrefetchThread();
virtual unsigned int PrefetchRand();
shared_ptr<Caffe::RNG> prefetch_rng_;
// LEVELDB
shared_ptr<leveldb::DB> db_;
shared_ptr<leveldb::Iterator> iter_;
// LMDB
MDB_env* mdb_env_;
MDB_dbi mdb_dbi_;
MDB_txn* mdb_txn_;
MDB_cursor* mdb_cursor_;
MDB_val mdb_key_, mdb_value_;
int datum_channels_;
int datum_height_;
int datum_width_;
int datum_size_;
pthread_t thread_;
shared_ptr<Blob<Dtype> > prefetch_data_;
shared_ptr<Blob<Dtype> > prefetch_label_;
Blob<Dtype> data_mean_;
bool output_labels_;
Caffe::Phase phase_;
};
template <typename Dtype>
class DummyDataLayer : public Layer<Dtype> {
public:
explicit DummyDataLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_DUMMY_DATA;
}
virtual inline int ExactNumBottomBlobs() const { return 0; }
virtual inline int MinTopBlobs() const { return 1; }
protected:
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom) { return; }
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom) { return; }
vector<shared_ptr<Filler<Dtype> > > fillers_;
vector<bool> refill_;
};
// This function is used to create a pthread that prefetches the data.
template <typename Dtype>
void* ImageDataLayerPrefetch(void* layer_pointer);
template <typename Dtype>
class ImageDataLayer : public Layer<Dtype> {
// The function used to perform prefetching.
friend void* ImageDataLayerPrefetch<Dtype>(void* layer_pointer);
public:
explicit ImageDataLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual ~ImageDataLayer();
virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
void SetUpWithDatum(const int crop_size, const Datum datum,
vector<Blob<Dtype>*>* top);
virtual void AddImagesAndLabels(const vector<cv::Mat>& images,
const vector<int>& labels);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_IMAGE_DATA;
}
virtual inline int ExactNumBottomBlobs() const { return 0; }
virtual inline int ExactNumTopBlobs() const { return 2; }
protected:
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom) { return; }
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom) { return; }
virtual void ShuffleImages();
virtual void CreatePrefetchThread();
virtual void JoinPrefetchThread();
virtual unsigned int PrefetchRand();
shared_ptr<Caffe::RNG> prefetch_rng_;
vector<std::pair<std::string, int> > lines_;
int lines_id_;
int datum_channels_;
int datum_height_;
int datum_width_;
int datum_size_;
pthread_t thread_;
shared_ptr<Blob<Dtype> > prefetch_data_;
shared_ptr<Blob<Dtype> > prefetch_label_;
Blob<Dtype> data_mean_;
Caffe::Phase phase_;
bool is_datum_set_up_;
vector<Blob<Dtype>*>* top_;
};
/* MemoryDataLayer
*/
template <typename Dtype>
class MemoryDataLayer : public Layer<Dtype> {
public:
explicit MemoryDataLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_MEMORY_DATA;
}
virtual inline int ExactNumBottomBlobs() { return 0; }
virtual inline int ExactNumTopBlobs() { return 2; }
// Reset should accept const pointers, but can't, because the memory
// will be given to Blob, which is mutable
void Reset(Dtype* data, Dtype* label, int n);
int datum_channels() { return datum_channels_; }
int datum_height() { return datum_height_; }
int datum_width() { return datum_width_; }
int batch_size() { return batch_size_; }
protected:
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom) { return; }
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom) { return; }
Dtype* data_;
Dtype* labels_;
int datum_channels_;
int datum_height_;
int datum_width_;
int datum_size_;
int batch_size_;
int n_;
int pos_;
};
// This function is used to create a pthread that prefetches the window data.
template <typename Dtype>
void* WindowDataLayerPrefetch(void* layer_pointer);
template <typename Dtype>
class WindowDataLayer : public Layer<Dtype> {
// The function used to perform prefetching.
friend void* WindowDataLayerPrefetch<Dtype>(void* layer_pointer);
public:
explicit WindowDataLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual ~WindowDataLayer();
virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_WINDOW_DATA;
}
virtual inline int ExactNumBottomBlobs() const { return 0; }
virtual inline int ExactNumTopBlobs() const { return 2; }
protected:
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom) { return; }
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom) { return; }
virtual void CreatePrefetchThread();
virtual void JoinPrefetchThread();
virtual unsigned int PrefetchRand();
shared_ptr<Caffe::RNG> prefetch_rng_;
pthread_t thread_;
shared_ptr<Blob<Dtype> > prefetch_data_;
shared_ptr<Blob<Dtype> > prefetch_label_;
Blob<Dtype> data_mean_;
vector<std::pair<std::string, vector<int> > > image_database_;
enum WindowField { IMAGE_INDEX, LABEL, OVERLAP, X1, Y1, X2, Y2, NUM };
vector<vector<float> > fg_windows_;
vector<vector<float> > bg_windows_;
};
} // namespace caffe
#endif // CAFFE_DATA_LAYERS_HPP_

@ -0,0 +1,173 @@
// Copyright 2014 BVLC and contributors.
// Fillers are random number generators that fills a blob using the specified
// algorithm. The expectation is that they are only going to be used during
// initialization time and will not involve any GPUs.
#ifndef CAFFE_FILLER_HPP
#define CAFFE_FILLER_HPP
#include <string>
#include "caffe/common.hpp"
#include "caffe/blob.hpp"
#include "caffe/syncedmem.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/proto/caffe.pb.h"
namespace caffe {
template <typename Dtype>
class Filler {
public:
explicit Filler(const FillerParameter& param) : filler_param_(param) {}
virtual ~Filler() {}
virtual void Fill(Blob<Dtype>* blob) = 0;
protected:
FillerParameter filler_param_;
}; // class Filler
template <typename Dtype>
class ConstantFiller : public Filler<Dtype> {
public:
explicit ConstantFiller(const FillerParameter& param)
: Filler<Dtype>(param) {}
virtual void Fill(Blob<Dtype>* blob) {
Dtype* data = blob->mutable_cpu_data();
const int count = blob->count();
const Dtype value = this->filler_param_.value();
CHECK(count);
for (int i = 0; i < count; ++i) {
data[i] = value;
}
CHECK_EQ(this->filler_param_.sparse(), -1)
<< "Sparsity not supported by this Filler.";
}
};
template <typename Dtype>
class UniformFiller : public Filler<Dtype> {
public:
explicit UniformFiller(const FillerParameter& param)
: Filler<Dtype>(param) {}
virtual void Fill(Blob<Dtype>* blob) {
CHECK(blob->count());
caffe_rng_uniform<Dtype>(blob->count(), Dtype(this->filler_param_.min()),
Dtype(this->filler_param_.max()), blob->mutable_cpu_data());
CHECK_EQ(this->filler_param_.sparse(), -1)
<< "Sparsity not supported by this Filler.";
}
};
template <typename Dtype>
class GaussianFiller : public Filler<Dtype> {
public:
explicit GaussianFiller(const FillerParameter& param)
: Filler<Dtype>(param) {}
virtual void Fill(Blob<Dtype>* blob) {
Dtype* data = blob->mutable_cpu_data();
CHECK(blob->count());
caffe_rng_gaussian<Dtype>(blob->count(), Dtype(this->filler_param_.mean()),
Dtype(this->filler_param_.std()), blob->mutable_cpu_data());
int sparse = this->filler_param_.sparse();
CHECK_GE(sparse, -1);
if (sparse >= 0) {
// Sparse initialization is implemented for "weight" blobs; i.e. matrices.
// These have num == channels == 1; height is number of inputs; width is
// number of outputs. The 'sparse' variable specifies the mean number
// of non-zero input weights for a given output.
CHECK_EQ(blob->num(), 1);
CHECK_EQ(blob->channels(), 1);
int num_inputs = blob->height();
Dtype non_zero_probability = Dtype(sparse) / Dtype(num_inputs);
rand_vec_.reset(new SyncedMemory(blob->count() * sizeof(int)));
int* mask = reinterpret_cast<int*>(rand_vec_->mutable_cpu_data());
caffe_rng_bernoulli(blob->count(), non_zero_probability, mask);
for (int i = 0; i < blob->count(); ++i) {
data[i] *= mask[i];
}
}
}
protected:
shared_ptr<SyncedMemory> rand_vec_;
};
template <typename Dtype>
class PositiveUnitballFiller : public Filler<Dtype> {
public:
explicit PositiveUnitballFiller(const FillerParameter& param)
: Filler<Dtype>(param) {}
virtual void Fill(Blob<Dtype>* blob) {
Dtype* data = blob->mutable_cpu_data();
DCHECK(blob->count());
caffe_rng_uniform<Dtype>(blob->count(), 0, 1, blob->mutable_cpu_data());
// We expect the filler to not be called very frequently, so we will
// just use a simple implementation
int dim = blob->count() / blob->num();
CHECK(dim);
for (int i = 0; i < blob->num(); ++i) {
Dtype sum = 0;
for (int j = 0; j < dim; ++j) {
sum += data[i * dim + j];
}
for (int j = 0; j < dim; ++j) {
data[i * dim + j] /= sum;
}
}
CHECK_EQ(this->filler_param_.sparse(), -1)
<< "Sparsity not supported by this Filler.";
}
};
// A filler based on the paper [Bengio and Glorot 2010]: Understanding
// the difficulty of training deep feedforward neuralnetworks, but does not
// use the fan_out value.
//
// It fills the incoming matrix by randomly sampling uniform data from
// [-scale, scale] where scale = sqrt(3 / fan_in) where fan_in is the number
// of input nodes. You should make sure the input blob has shape (num, a, b, c)
// where a * b * c = fan_in.
template <typename Dtype>
class XavierFiller : public Filler<Dtype> {
public:
explicit XavierFiller(const FillerParameter& param)
: Filler<Dtype>(param) {}
virtual void Fill(Blob<Dtype>* blob) {
CHECK(blob->count());
int fan_in = blob->count() / blob->num();
Dtype scale = sqrt(Dtype(3) / fan_in);
caffe_rng_uniform<Dtype>(blob->count(), -scale, scale,
blob->mutable_cpu_data());
CHECK_EQ(this->filler_param_.sparse(), -1)
<< "Sparsity not supported by this Filler.";
}
};
// A function to get a specific filler from the specification given in
// FillerParameter. Ideally this would be replaced by a factory pattern,
// but we will leave it this way for now.
template <typename Dtype>
Filler<Dtype>* GetFiller(const FillerParameter& param) {
const std::string& type = param.type();
if (type == "constant") {
return new ConstantFiller<Dtype>(param);
} else if (type == "gaussian") {
return new GaussianFiller<Dtype>(param);
} else if (type == "positive_unitball") {
return new PositiveUnitballFiller<Dtype>(param);
} else if (type == "uniform") {
return new UniformFiller<Dtype>(param);
} else if (type == "xavier") {
return new XavierFiller<Dtype>(param);
} else {
CHECK(false) << "Unknown filler name: " << param.type();
}
return (Filler<Dtype>*)(NULL);
}
} // namespace caffe
#endif // CAFFE_FILLER_HPP_

@ -0,0 +1,206 @@
// Copyright 2014 BVLC and contributors.
#ifndef CAFFE_LAYER_H_
#define CAFFE_LAYER_H_
#include <string>
#include <vector>
#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/proto/caffe.pb.h"
using std::string;
using std::vector;
namespace caffe {
template <typename Dtype>
class Layer {
public:
// You should not implement your own constructor. Any set up code should go
// to SetUp(), where the dimensions of the bottom blobs are provided to the
// layer.
explicit Layer(const LayerParameter& param)
: layer_param_(param) {
// The only thing we do is to copy blobs if there are any.
if (layer_param_.blobs_size() > 0) {
blobs_.resize(layer_param_.blobs_size());
for (int i = 0; i < layer_param_.blobs_size(); ++i) {
blobs_[i].reset(new Blob<Dtype>());
blobs_[i]->FromProto(layer_param_.blobs(i));
}
}
}
virtual ~Layer() {}
// SetUp: your function should implement this, and call Layer::SetUp for
// common SetUp functionality.
virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
CheckBlobCounts(bottom, *top);
}
// Forward and backward wrappers. You should implement the cpu and
// gpu specific implementations instead, and should not change these
// functions.
inline Dtype Forward(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
inline void Backward(const vector<Blob<Dtype>*>& top,
const bool propagate_down,
vector<Blob<Dtype>*>* bottom);
// Returns the vector of blobs.
vector<shared_ptr<Blob<Dtype> > >& blobs() {
return blobs_;
}
// Returns the layer parameter
const LayerParameter& layer_param() { return layer_param_; }
// Writes the layer parameter to a protocol buffer
virtual void ToProto(LayerParameter* param, bool write_diff = false);
// Returns the layer type as an enum value.
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_NONE;
}
// Returns the layer type name.
virtual inline const string& type_name() const {
return LayerParameter_LayerType_Name(type());
}
// These methods can be overwritten to declare that this layer type expects
// a certain number of blobs as input and output.
//
// ExactNum{Bottom,Top}Blobs return a non-negative number to require an exact
// number of bottom/top blobs; the Min/Max versions return a non-negative
// number to require a minimum and/or maximum number of blobs.
// If Exact is specified, neither Min nor Max should be specified, and vice
// versa. These methods may not rely on SetUp having been called.
virtual inline int ExactNumBottomBlobs() const { return -1; }
virtual inline int MinBottomBlobs() const { return -1; }
virtual inline int MaxBottomBlobs() const { return -1; }
virtual inline int ExactNumTopBlobs() const { return -1; }
virtual inline int MinTopBlobs() const { return -1; }
virtual inline int MaxTopBlobs() const { return -1; }
protected:
// The protobuf that stores the layer parameters
LayerParameter layer_param_;
// The vector that stores the parameters as a set of blobs.
vector<shared_ptr<Blob<Dtype> > > blobs_;
// Forward functions: compute the layer output
// (and loss layers return the loss; other layers return the dummy value 0.)
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) = 0;
// If no gpu code is provided, we will simply use cpu code.
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
// LOG(WARNING) << "Using CPU code as backup.";
return Forward_cpu(bottom, top);
}
// Backward functions: compute the gradients for any parameters and
// for the bottom blobs if propagate_down is true.
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down,
vector<Blob<Dtype>*>* bottom) = 0;
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down,
vector<Blob<Dtype>*>* bottom) {
// LOG(WARNING) << "Using CPU code as backup.";
Backward_cpu(top, propagate_down, bottom);
}
// CheckBlobCounts: called by the parent Layer's SetUp to check that the
// number of bottom and top Blobs provided as input match the expected
// numbers specified by the {ExactNum,Min,Max}{Bottom,Top}Blobs() functions.
virtual void CheckBlobCounts(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
if (ExactNumBottomBlobs() >= 0) {
CHECK_EQ(ExactNumBottomBlobs(), bottom.size())
<< type_name() << " Layer takes " << ExactNumBottomBlobs()
<< " bottom blob(s) as input.";
}
if (MinBottomBlobs() >= 0) {
CHECK_LE(MinBottomBlobs(), bottom.size())
<< type_name() << " Layer takes at least " << MinBottomBlobs()
<< " bottom blob(s) as input.";
}
if (MaxBottomBlobs() >= 0) {
CHECK_GE(MaxBottomBlobs(), bottom.size())
<< type_name() << " Layer takes at most " << MaxBottomBlobs()
<< " bottom blob(s) as input.";
}
if (ExactNumTopBlobs() >= 0) {
CHECK_EQ(ExactNumTopBlobs(), top.size())
<< type_name() << " Layer produces " << ExactNumTopBlobs()
<< " top blob(s) as output.";
}
if (MinTopBlobs() >= 0) {
CHECK_LE(MinTopBlobs(), top.size())
<< type_name() << " Layer produces at least " << MinTopBlobs()
<< " top blob(s) as output.";
}
if (MaxTopBlobs() >= 0) {
CHECK_GE(MaxTopBlobs(), top.size())
<< type_name() << " Layer produces at most " << MaxTopBlobs()
<< " top blob(s) as output.";
}
}
DISABLE_COPY_AND_ASSIGN(Layer);
}; // class Layer
// Forward and backward wrappers. You should implement the cpu and
// gpu specific implementations instead, and should not change these
// functions.
template <typename Dtype>
inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
switch (Caffe::mode()) {
case Caffe::CPU:
return Forward_cpu(bottom, top);
case Caffe::GPU:
return Forward_gpu(bottom, top);
default:
LOG(FATAL) << "Unknown caffe mode.";
return Dtype(0);
}
}
template <typename Dtype>
inline void Layer<Dtype>::Backward(const vector<Blob<Dtype>*>& top,
const bool propagate_down,
vector<Blob<Dtype>*>* bottom) {
switch (Caffe::mode()) {
case Caffe::CPU:
Backward_cpu(top, propagate_down, bottom);
break;
case Caffe::GPU:
Backward_gpu(top, propagate_down, bottom);
break;
default:
LOG(FATAL) << "Unknown caffe mode.";
}
}
// Serialize LayerParameter to protocol buffer
template <typename Dtype>
void Layer<Dtype>::ToProto(LayerParameter* param, bool write_diff) {
param->Clear();
param->CopyFrom(layer_param_);
param->clear_blobs();
for (int i = 0; i < blobs_.size(); ++i) {
blobs_[i]->ToProto(param->add_blobs(), write_diff);
}
}
// The layer factory function
template <typename Dtype>
Layer<Dtype>* GetLayer(const LayerParameter& param);
} // namespace caffe
#endif // CAFFE_LAYER_H_

@ -0,0 +1,198 @@
// Copyright 2014 BVLC and contributors.
#ifndef CAFFE_LOSS_LAYERS_HPP_
#define CAFFE_LOSS_LAYERS_HPP_
#include <string>
#include <utility>
#include <vector>
#include "leveldb/db.h"
#include "pthread.h"
#include "boost/scoped_ptr.hpp"
#include "hdf5.h"
#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/layer.hpp"
#include "caffe/neuron_layers.hpp"
#include "caffe/proto/caffe.pb.h"
namespace caffe {
const float kLOG_THRESHOLD = 1e-20;
/* LossLayer
Takes two inputs of same num (a and b), and has no output.
The gradient is propagated to a.
*/
template <typename Dtype>
class LossLayer : public Layer<Dtype> {
public:
explicit LossLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void SetUp(
const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top);
virtual void FurtherSetUp(
const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) {}
virtual inline int ExactNumBottomBlobs() const { return 2; }
virtual inline int ExactNumTopBlobs() const { return 0; }
};
/* SigmoidCrossEntropyLossLayer
*/
template <typename Dtype>
class SigmoidCrossEntropyLossLayer : public LossLayer<Dtype> {
public:
explicit SigmoidCrossEntropyLossLayer(const LayerParameter& param)
: LossLayer<Dtype>(param),
sigmoid_layer_(new SigmoidLayer<Dtype>(param)),
sigmoid_output_(new Blob<Dtype>()) {}
virtual void FurtherSetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_SIGMOID_CROSS_ENTROPY_LOSS;
}
protected:
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
shared_ptr<SigmoidLayer<Dtype> > sigmoid_layer_;
// sigmoid_output stores the output of the sigmoid layer.
shared_ptr<Blob<Dtype> > sigmoid_output_;
// Vector holders to call the underlying sigmoid layer forward and backward.
vector<Blob<Dtype>*> sigmoid_bottom_vec_;
vector<Blob<Dtype>*> sigmoid_top_vec_;
};
/* EuclideanLossLayer
Compute the L_2 distance between the two inputs.
loss = (1/2 \sum_i (a_i - b_i)^2)
a' = 1/I (a - b)
*/
template <typename Dtype>
class EuclideanLossLayer : public LossLayer<Dtype> {
public:
explicit EuclideanLossLayer(const LayerParameter& param)
: LossLayer<Dtype>(param), diff_() {}
virtual void FurtherSetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_EUCLIDEAN_LOSS;
}
protected:
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
Blob<Dtype> diff_;
};
/* InfogainLossLayer
*/
template <typename Dtype>
class InfogainLossLayer : public LossLayer<Dtype> {
public:
explicit InfogainLossLayer(const LayerParameter& param)
: LossLayer<Dtype>(param), infogain_() {}
virtual void FurtherSetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_INFOGAIN_LOSS;
}
protected:
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
Blob<Dtype> infogain_;
};
/* HingeLossLayer
*/
template <typename Dtype>
class HingeLossLayer : public LossLayer<Dtype> {
public:
explicit HingeLossLayer(const LayerParameter& param)
: LossLayer<Dtype>(param) {}
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_HINGE_LOSS;
}
protected:
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
};
/* MultinomialLogisticLossLayer
*/
template <typename Dtype>
class MultinomialLogisticLossLayer : public LossLayer<Dtype> {
public:
explicit MultinomialLogisticLossLayer(const LayerParameter& param)
: LossLayer<Dtype>(param) {}
virtual void FurtherSetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_MULTINOMIAL_LOGISTIC_LOSS;
}
protected:
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
};
/* AccuracyLayer
Note: not an actual loss layer! Does not implement backwards step.
Computes the accuracy and logprob of a with respect to b.
*/
template <typename Dtype>
class AccuracyLayer : public Layer<Dtype> {
public:
explicit AccuracyLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_ACCURACY;
}
protected:
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
NOT_IMPLEMENTED;
}
};
/* Also see
- SoftmaxWithLossLayer in vision_layers.hpp
*/
} // namespace caffe
#endif // CAFFE_LOSS_LAYERS_HPP_

@ -0,0 +1,157 @@
// Copyright 2014 BVLC and contributors.
#ifndef CAFFE_NET_HPP_
#define CAFFE_NET_HPP_
#include <map>
#include <set>
#include <string>
#include <vector>
#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
using std::map;
using std::vector;
using std::set;
using std::string;
namespace caffe {
template <typename Dtype>
class Net {
public:
explicit Net(const NetParameter& param);
explicit Net(const string& param_file);
virtual ~Net() {}
// Initialize a network with the network parameter.
void Init(const NetParameter& param);
// Run forward with the input blobs already fed separately. You can get the
// input blobs using input_blobs().
const vector<Blob<Dtype>*>& ForwardPrefilled(Dtype* loss = NULL);
// Run forward using a set of bottom blobs, and return the result.
const vector<Blob<Dtype>*>& Forward(const vector<Blob<Dtype>* > & bottom,
Dtype* loss = NULL);
// Run forward using a serialized BlobProtoVector and return the result
// as a serialized BlobProtoVector
string Forward(const string& input_blob_protos, Dtype* loss = NULL);
// The network backward should take no input and output, since it solely
// computes the gradient w.r.t the parameters, and the data has already
// been provided during the forward pass.
void Backward();
Dtype ForwardBackward(const vector<Blob<Dtype>* > & bottom) {
Dtype loss;
Forward(bottom, &loss);
Backward();
return loss;
}
// Updates the network weights based on the diff values computed.
void Update();
// For an already initialized net, ShareTrainedLayersWith() implicitly copies
// (i.e., using no additional memory) the already trained layers from another
// Net.
void ShareTrainedLayersWith(Net* other);
// For an already initialized net, CopyTrainedLayersFrom() copies the already
// trained layers from another net parameter instance.
void CopyTrainedLayersFrom(const NetParameter& param);
void CopyTrainedLayersFrom(const string trained_filename);
// Writes the net to a proto.
void ToProto(NetParameter* param, bool write_diff = false);
// returns the network name.
inline const string& name() { return name_; }
// returns the layer names
inline const vector<string>& layer_names() { return layer_names_; }
// returns the blob names
inline const vector<string>& blob_names() { return blob_names_; }
// returns the blobs
inline const vector<shared_ptr<Blob<Dtype> > >& blobs() { return blobs_; }
// returns the layers
inline const vector<shared_ptr<Layer<Dtype> > >& layers() { return layers_; }
// returns the bottom and top vecs for each layer - usually you won't need
// this unless you do per-layer checks such as gradients.
inline vector<vector<Blob<Dtype>*> >& bottom_vecs() { return bottom_vecs_; }
inline vector<vector<Blob<Dtype>*> >& top_vecs() { return top_vecs_; }
// returns the parameters
inline vector<shared_ptr<Blob<Dtype> > >& params() { return params_; }
// returns the parameter learning rate multipliers
inline vector<float>& params_lr() {return params_lr_; }
inline vector<float>& params_weight_decay() { return params_weight_decay_; }
// Input and output blob numbers
inline int num_inputs() { return net_input_blobs_.size(); }
inline int num_outputs() { return net_output_blobs_.size(); }
inline vector<Blob<Dtype>*>& input_blobs() { return net_input_blobs_; }
inline vector<Blob<Dtype>*>& output_blobs() { return net_output_blobs_; }
inline vector<int>& input_blob_indices() { return net_input_blob_indices_; }
inline vector<int>& output_blob_indices() { return net_output_blob_indices_; }
// has_blob and blob_by_name are inspired by
// https://github.com/kencoken/caffe/commit/f36e71569455c9fbb4bf8a63c2d53224e32a4e7b
// Access intermediary computation layers, testing with centre image only
bool has_blob(const string& blob_name);
const shared_ptr<Blob<Dtype> > blob_by_name(const string& blob_name);
bool has_layer(const string& layer_name);
const shared_ptr<Layer<Dtype> > layer_by_name(const string& layer_name);
protected:
// Helpers for Init.
// Append a new input or top blob to the net.
void AppendTop(const NetParameter& param, const int layer_id,
const int top_id, set<string>* available_blobs,
map<string, int>* blob_name_to_idx);
// Append a new bottom blob to the net.
int AppendBottom(const NetParameter& param, const int layer_id,
const int bottom_id, set<string>* available_blobs,
map<string, int>* blob_name_to_idx);
// Function to get misc parameters, e.g. the learning rate multiplier and
// weight decay.
void GetLearningRateAndWeightDecay();
// Individual layers in the net
vector<shared_ptr<Layer<Dtype> > > layers_;
vector<string> layer_names_;
map<string, int> layer_names_index_;
vector<bool> layer_need_backward_;
// blobs stores the blobs that store intermediate results between the
// layers.
vector<shared_ptr<Blob<Dtype> > > blobs_;
vector<string> blob_names_;
map<string, int> blob_names_index_;
vector<bool> blob_need_backward_;
// bottom_vecs stores the vectors containing the input for each layer.
// They don't actually host the blobs (blobs_ does), so we simply store
// pointers.
vector<vector<Blob<Dtype>*> > bottom_vecs_;
vector<vector<int> > bottom_id_vecs_;
// top_vecs stores the vectors containing the output for each layer
vector<vector<Blob<Dtype>*> > top_vecs_;
vector<vector<int> > top_id_vecs_;
// blob indices for the input and the output of the net
vector<int> net_input_blob_indices_;
vector<int> net_output_blob_indices_;
vector<Blob<Dtype>*> net_input_blobs_;
vector<Blob<Dtype>*> net_output_blobs_;
string name_;
// The parameters in the network.
vector<shared_ptr<Blob<Dtype> > > params_;
// the learning rate multipliers
vector<float> params_lr_;
// the weight decay multipliers
vector<float> params_weight_decay_;
// The bytes of memory used by this net
size_t memory_used_;
DISABLE_COPY_AND_ASSIGN(Net);
};
} // namespace caffe
#endif // CAFFE_NET_HPP_

@ -0,0 +1,272 @@
// Copyright 2014 BVLC and contributors.
#ifndef CAFFE_NEURON_LAYERS_HPP_
#define CAFFE_NEURON_LAYERS_HPP_
#include <string>
#include <utility>
#include <vector>
#include "leveldb/db.h"
#include "pthread.h"
#include "boost/scoped_ptr.hpp"
#include "hdf5.h"
#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
#define HDF5_DATA_DATASET_NAME "data"
#define HDF5_DATA_LABEL_NAME "label"
namespace caffe {
/* NeuronLayer
An interface for layers that take one blob as input (x),
and produce one blob as output (y).
*/
template <typename Dtype>
class NeuronLayer : public Layer<Dtype> {
public:
explicit NeuronLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_NONE;
}
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int ExactNumTopBlobs() const { return 1; }
};
/* BNLLLayer
y = x + log(1 + exp(-x)) if x > 0
y = log(1 + exp(x)) if x <= 0
y' = exp(x) / (exp(x) + 1)
*/
template <typename Dtype>
class BNLLLayer : public NeuronLayer<Dtype> {
public:
explicit BNLLLayer(const LayerParameter& param)
: NeuronLayer<Dtype>(param) {}
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_BNLL;
}
protected:
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
};
/* DropoutLayer
During training only, sets some portion of x to 0, adjusting the
vector magnitude accordingly.
mask = bernoulli(1 - threshold)
scale = 1 / (1 - threshold)
y = x * mask * scale
y' = mask * scale
*/
template <typename Dtype>
class DropoutLayer : public NeuronLayer<Dtype> {
public:
explicit DropoutLayer(const LayerParameter& param)
: NeuronLayer<Dtype>(param) {}
virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_DROPOUT;
}
protected:
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
shared_ptr<Blob<unsigned int> > rand_vec_;
Dtype threshold_;
Dtype scale_;
unsigned int uint_thres_;
};
/* PowerLayer
y = (shift + scale * x) ^ power
y' = scale * power * (shift + scale * x) ^ (power - 1)
= scale * power * y / (shift + scale * x)
*/
template <typename Dtype>
class PowerLayer : public NeuronLayer<Dtype> {
public:
explicit PowerLayer(const LayerParameter& param)
: NeuronLayer<Dtype>(param) {}
virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_POWER;
}
protected:
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
Dtype power_;
Dtype scale_;
Dtype shift_;
Dtype diff_scale_;
};
/* ReLULayer
Rectified Linear Unit non-linearity.
The simple max is fast to compute, and the function does not saturate.
y = max(0, x).
y' = 0 if x < 0
y' = 1 if x > 0
*/
template <typename Dtype>
class ReLULayer : public NeuronLayer<Dtype> {
public:
explicit ReLULayer(const LayerParameter& param)
: NeuronLayer<Dtype>(param) {}
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_RELU;
}
protected:
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
};
/* SigmoidLayer
Sigmoid function non-linearity, a classic choice in neural networks.
Note that the gradient vanishes as the values move away from 0.
The ReLULayer is often a better choice for this reason.
y = 1. / (1 + exp(-x))
y ' = exp(x) / (1 + exp(x))^2
or
y' = y * (1 - y)
*/
template <typename Dtype>
class SigmoidLayer : public NeuronLayer<Dtype> {
public:
explicit SigmoidLayer(const LayerParameter& param)
: NeuronLayer<Dtype>(param) {}
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_SIGMOID;
}
protected:
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
};
/* TanHLayer
Hyperbolic tangent non-linearity, popular in auto-encoders.
y = 1. * (exp(2x) - 1) / (exp(2x) + 1)
y' = 1 - ( (exp(2x) - 1) / (exp(2x) + 1) ) ^ 2
*/
template <typename Dtype>
class TanHLayer : public NeuronLayer<Dtype> {
public:
explicit TanHLayer(const LayerParameter& param)
: NeuronLayer<Dtype>(param) {}
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_TANH;
}
protected:
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
};
/* ThresholdLayer
Outputs 1 if value in input is above threshold, 0 otherwise.
The defult threshold = 0, which means positive values would become 1 and
negative or 0, would become 0
y = 1 if x > threshold
y = 0 if x <= threshold
y' = don't differenciable
*/
template <typename Dtype>
class ThresholdLayer : public NeuronLayer<Dtype> {
public:
explicit ThresholdLayer(const LayerParameter& param)
: NeuronLayer<Dtype>(param) {}
virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_THRESHOLD;
}
protected:
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
NOT_IMPLEMENTED;
}
Dtype threshold_;
};
} // namespace caffe
#endif // CAFFE_NEURON_LAYERS_HPP_

@ -0,0 +1,77 @@
// Copyright 2014 BVLC and contributors.
#ifndef CAFFE_OPTIMIZATION_SOLVER_HPP_
#define CAFFE_OPTIMIZATION_SOLVER_HPP_
#include <string>
#include <vector>
namespace caffe {
template <typename Dtype>
class Solver {
public:
explicit Solver(const SolverParameter& param);
explicit Solver(const string& param_file);
void Init(const SolverParameter& param);
// The main entry of the solver function. In default, iter will be zero. Pass
// in a non-zero iter number to resume training for a pre-trained net.
virtual void Solve(const char* resume_file = NULL);
inline void Solve(const string resume_file) { Solve(resume_file.c_str()); }
virtual ~Solver() {}
inline shared_ptr<Net<Dtype> > net() { return net_; }
protected:
// PreSolve is run before any solving iteration starts, allowing one to
// put up some scaffold.
virtual void PreSolve() {}
// Get the update value for the current iteration.
virtual void ComputeUpdateValue() = 0;
// The Solver::Snapshot function implements the basic snapshotting utility
// that stores the learned net. You should implement the SnapshotSolverState()
// function that produces a SolverState protocol buffer that needs to be
// written to disk together with the learned net.
void Snapshot();
// The test routine
void TestAll();
void Test(const int test_net_id = 0);
virtual void SnapshotSolverState(SolverState* state) = 0;
// The Restore function implements how one should restore the solver to a
// previously snapshotted state. You should implement the RestoreSolverState()
// function that restores the state from a SolverState protocol buffer.
void Restore(const char* resume_file);
virtual void RestoreSolverState(const SolverState& state) = 0;
SolverParameter param_;
int iter_;
shared_ptr<Net<Dtype> > net_;
vector<shared_ptr<Net<Dtype> > > test_nets_;
DISABLE_COPY_AND_ASSIGN(Solver);
};
template <typename Dtype>
class SGDSolver : public Solver<Dtype> {
public:
explicit SGDSolver(const SolverParameter& param)
: Solver<Dtype>(param) {}
explicit SGDSolver(const string& param_file)
: Solver<Dtype>(param_file) {}
protected:
virtual void PreSolve();
Dtype GetLearningRate();
virtual void ComputeUpdateValue();
virtual void SnapshotSolverState(SolverState * state);
virtual void RestoreSolverState(const SolverState& state);
// history maintains the historical momentum data.
vector<shared_ptr<Blob<Dtype> > > history_;
DISABLE_COPY_AND_ASSIGN(SGDSolver);
};
} // namespace caffe
#endif // CAFFE_OPTIMIZATION_SOLVER_HPP_

@ -0,0 +1,67 @@
// Copyright 2014 BVLC and contributors.
#ifndef CAFFE_SYNCEDMEM_HPP_
#define CAFFE_SYNCEDMEM_HPP_
#include <cstdlib>
#include "caffe/common.hpp"
namespace caffe {
// Theoretically, CaffeMallocHost and CaffeFreeHost should simply call the
// cudaMallocHost and cudaFree functions in order to create pinned memory.
// However, those codes rely on the existence of a cuda GPU (I don't know
// why that is a must since allocating memory should not be accessing the
// GPU resorce, but it just creates an error as of Cuda 5.0) and will cause
// problem when running on a machine without GPU. Thus, we simply define
// these two functions for safety and possible future change if the problem
// of calling cuda functions disappears in a future version.
//
// In practice, although we are creating unpinned memory here, as long as we
// are constantly accessing them the memory pages almost always stays in
// the physical memory (assuming we have large enough memory installed), and
// does not seem to create a memory bottleneck here.
inline void CaffeMallocHost(void** ptr, size_t size) {
*ptr = malloc(size);
}
inline void CaffeFreeHost(void* ptr) {
free(ptr);
}
class SyncedMemory {
public:
SyncedMemory()
: cpu_ptr_(NULL), gpu_ptr_(NULL), size_(0), head_(UNINITIALIZED),
own_cpu_data_(false) {}
explicit SyncedMemory(size_t size)
: cpu_ptr_(NULL), gpu_ptr_(NULL), size_(size), head_(UNINITIALIZED),
own_cpu_data_(false) {}
~SyncedMemory();
const void* cpu_data();
void set_cpu_data(void* data);
const void* gpu_data();
void* mutable_cpu_data();
void* mutable_gpu_data();
enum SyncedHead { UNINITIALIZED, HEAD_AT_CPU, HEAD_AT_GPU, SYNCED };
SyncedHead head() { return head_; }
size_t size() { return size_; }
private:
void to_cpu();
void to_gpu();
void* cpu_ptr_;
void* gpu_ptr_;
size_t size_;
SyncedHead head_;
bool own_cpu_data_;
DISABLE_COPY_AND_ASSIGN(SyncedMemory);
}; // class SyncedMemory
} // namespace caffe
#endif // CAFFE_SYNCEDMEM_HPP_

@ -0,0 +1,39 @@
// Copyright 2014 BVLC and contributors.
#ifndef CAFFE_UTIL_BENCHMARK_H_
#define CAFFE_UTIL_BENCHMARK_H_
#include <boost/date_time/posix_time/posix_time.hpp>
#include <cuda_runtime.h>
namespace caffe {
class Timer {
public:
Timer();
virtual ~Timer();
void Start();
void Stop();
float MilliSeconds();
float Seconds();
inline bool initted() { return initted_; }
inline bool running() { return running_; }
inline bool has_run_at_least_once() { return has_run_at_least_once_; }
protected:
void Init();
bool initted_;
bool running_;
bool has_run_at_least_once_;
cudaEvent_t start_gpu_;
cudaEvent_t stop_gpu_;
boost::posix_time::ptime start_cpu_;
boost::posix_time::ptime stop_cpu_;
float elapsed_milliseconds_;
};
} // namespace caffe
#endif // CAFFE_UTIL_BENCHMARK_H_

@ -0,0 +1,25 @@
// Copyright 2014 BVLC and contributors.
#ifndef CAFFE_UTIL_FORMAT_H_
#define CAFFE_UTIL_FORMAT_H_
#include <opencv2/opencv.hpp>
#include <string>
#include "caffe/proto/caffe.pb.h"
namespace caffe {
bool OpenCVImageToDatum(
const cv::Mat& image, const int label, const int height,
const int width, const bool is_color, Datum* datum);
inline bool OpenCVImageToDatum(
const cv::Mat& image, const int label, const int height,
const int width, Datum* datum) {
return OpenCVImageToDatum(image, label, height, width, true, datum);
}
} // namespace caffe
#endif // CAFFE_UTIL_FORMAT_H_

@ -0,0 +1,30 @@
// Copyright 2014 BVLC and contributors.
#ifndef _CAFFE_UTIL_IM2COL_HPP_
#define _CAFFE_UTIL_IM2COL_HPP_
namespace caffe {
template <typename Dtype>
void im2col_cpu(const Dtype* data_im, const int channels,
const int height, const int width, const int ksize, const int pad,
const int stride, Dtype* data_col);
template <typename Dtype>
void col2im_cpu(const Dtype* data_col, const int channels,
const int height, const int width, const int psize, const int pad,
const int stride, Dtype* data_im);
template <typename Dtype>
void im2col_gpu(const Dtype* data_im, const int channels,
const int height, const int width, const int ksize, const int pad,
const int stride, Dtype* data_col);
template <typename Dtype>
void col2im_gpu(const Dtype* data_col, const int channels,
const int height, const int width, const int psize, const int pad,
const int stride, Dtype* data_im);
} // namespace caffe
#endif // CAFFE_UTIL_IM2COL_HPP_

@ -0,0 +1,31 @@
// Copyright 2014 BVLC and contributors.
#ifndef _CAFFE_UTIL_INSERT_SPLITS_HPP_
#define _CAFFE_UTIL_INSERT_SPLITS_HPP_
#include <string>
#include "caffe/proto/caffe.pb.h"
using std::pair;
using std::string;
namespace caffe {
// Copy NetParameters with SplitLayers added to replace any shared bottom
// blobs with unique bottom blobs provided by the SplitLayer.
void InsertSplits(const NetParameter& param, NetParameter* param_split);
void ConfigureSplitLayer(const string& layer_name, const string& blob_name,
const int blob_idx, const int split_count,
LayerParameter* split_layer_param);
string SplitLayerName(const string& layer_name, const string& blob_name,
const int blob_idx);
string SplitBlobName(const string& layer_name, const string& blob_name,
const int blob_idx, const int split_idx);
} // namespace caffe
#endif // CAFFE_UTIL_INSERT_SPLITS_HPP_

@ -0,0 +1,93 @@
// Copyright 2014 BVLC and contributors.
#ifndef CAFFE_UTIL_IO_H_
#define CAFFE_UTIL_IO_H_
#include <string>
#include "google/protobuf/message.h"
#include "hdf5.h"
#include "hdf5_hl.h"
#include "caffe/proto/caffe.pb.h"
#include "caffe/blob.hpp"
using std::string;
using ::google::protobuf::Message;
#define HDF5_NUM_DIMS 4
namespace caffe {
bool ReadProtoFromTextFile(const char* filename, Message* proto);
inline bool ReadProtoFromTextFile(const string& filename, Message* proto) {
return ReadProtoFromTextFile(filename.c_str(), proto);
}
inline void ReadProtoFromTextFileOrDie(const char* filename, Message* proto) {
CHECK(ReadProtoFromTextFile(filename, proto));
}
inline void ReadProtoFromTextFileOrDie(const string& filename, Message* proto) {
ReadProtoFromTextFileOrDie(filename.c_str(), proto);
}
void WriteProtoToTextFile(const Message& proto, const char* filename);
inline void WriteProtoToTextFile(const Message& proto, const string& filename) {
WriteProtoToTextFile(proto, filename.c_str());
}
bool ReadProtoFromBinaryFile(const char* filename, Message* proto);
inline bool ReadProtoFromBinaryFile(const string& filename, Message* proto) {
return ReadProtoFromBinaryFile(filename.c_str(), proto);
}
inline void ReadProtoFromBinaryFileOrDie(const char* filename, Message* proto) {
CHECK(ReadProtoFromBinaryFile(filename, proto));
}
inline void ReadProtoFromBinaryFileOrDie(const string& filename,
Message* proto) {
ReadProtoFromBinaryFileOrDie(filename.c_str(), proto);
}
void WriteProtoToBinaryFile(const Message& proto, const char* filename);
inline void WriteProtoToBinaryFile(
const Message& proto, const string& filename) {
WriteProtoToBinaryFile(proto, filename.c_str());
}
bool ReadImageToDatum(const string& filename, const int label,
const int height, const int width, const bool is_color, Datum* datum);
inline bool ReadImageToDatum(const string& filename, const int label,
const int height, const int width, Datum* datum) {
return ReadImageToDatum(filename, label, height, width, true, datum);
}
inline bool ReadImageToDatum(const string& filename, const int label,
Datum* datum) {
return ReadImageToDatum(filename, label, 0, 0, datum);
}
template <typename Dtype>
void hdf5_load_nd_dataset_helper(
hid_t file_id, const char* dataset_name_, int min_dim, int max_dim,
Blob<Dtype>* blob);
template <typename Dtype>
void hdf5_load_nd_dataset(
hid_t file_id, const char* dataset_name_, int min_dim, int max_dim,
Blob<Dtype>* blob);
template <typename Dtype>
void hdf5_save_nd_dataset(
const hid_t file_id, const string dataset_name, const Blob<Dtype>& blob);
} // namespace caffe
#endif // CAFFE_UTIL_IO_H_

@ -0,0 +1,253 @@
// Copyright 2014 BVLC and contributors.
#ifndef CAFFE_UTIL_MATH_FUNCTIONS_H_
#define CAFFE_UTIL_MATH_FUNCTIONS_H_
#include <cublas_v2.h>
#include <stdint.h>
#include <cmath> // for std::fabs and std::signbit
#include "glog/logging.h"
#include "caffe/util/mkl_alternate.hpp"
namespace caffe {
// Decaf gemm provides a simpler interface to the gemm functions, with the
// limitation that the data has to be contiguous in memory.
template <typename Dtype>
void caffe_cpu_gemm(const CBLAS_TRANSPOSE TransA,
const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
const Dtype alpha, const Dtype* A, const Dtype* B, const Dtype beta,
Dtype* C);
// Decaf gpu gemm provides an interface that is almost the same as the cpu
// gemm function - following the c convention and calling the fortran-order
// gpu code under the hood.
template <typename Dtype>
void caffe_gpu_gemm(const CBLAS_TRANSPOSE TransA,
const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
const Dtype alpha, const Dtype* A, const Dtype* B, const Dtype beta,
Dtype* C);
template <typename Dtype>
void caffe_cpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N,
const Dtype alpha, const Dtype* A, const Dtype* x, const Dtype beta,
Dtype* y);
template <typename Dtype>
void caffe_gpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N,
const Dtype alpha, const Dtype* A, const Dtype* x, const Dtype beta,
Dtype* y);
template <typename Dtype>
void caffe_axpy(const int N, const Dtype alpha, const Dtype* X,
Dtype* Y);
template <typename Dtype>
void caffe_gpu_axpy(const int N, const Dtype alpha, const Dtype* X,
Dtype* Y);
template <typename Dtype>
void caffe_cpu_axpby(const int N, const Dtype alpha, const Dtype* X,
const Dtype beta, Dtype* Y);
template <typename Dtype>
void caffe_gpu_axpby(const int N, const Dtype alpha, const Dtype* X,
const Dtype beta, Dtype* Y);
template <typename Dtype>
void caffe_copy(const int N, const Dtype *X, Dtype *Y);
template <typename Dtype>
void caffe_set(const int N, const Dtype alpha, Dtype *X);
template <typename Dtype>
void caffe_gpu_set(const int N, const Dtype alpha, Dtype *X);
template <typename Dtype>
void caffe_gpu_copy(const int N, const Dtype *X, Dtype *Y);
template <typename Dtype>
void caffe_add_scalar(const int N, const Dtype alpha, Dtype *X);
template <typename Dtype>
void caffe_gpu_add_scalar(const int N, const Dtype alpha, Dtype *X);
template <typename Dtype>
void caffe_scal(const int N, const Dtype alpha, Dtype *X);
template <typename Dtype>
void caffe_gpu_scal(const int N, const Dtype alpha, Dtype *X);
template <typename Dtype>
void caffe_sqr(const int N, const Dtype* a, Dtype* y);
template <typename Dtype>
void caffe_add(const int N, const Dtype* a, const Dtype* b, Dtype* y);
template <typename Dtype>
void caffe_gpu_add(const int N, const Dtype* a, const Dtype* b, Dtype* y);
template <typename Dtype>
void caffe_sub(const int N, const Dtype* a, const Dtype* b, Dtype* y);
template <typename Dtype>
void caffe_gpu_sub(const int N, const Dtype* a, const Dtype* b, Dtype* y);
template <typename Dtype>
void caffe_mul(const int N, const Dtype* a, const Dtype* b, Dtype* y);
template <typename Dtype>
void caffe_gpu_mul(const int N, const Dtype* a, const Dtype* b, Dtype* y);
template <typename Dtype>
void caffe_div(const int N, const Dtype* a, const Dtype* b, Dtype* y);
template <typename Dtype>
void caffe_gpu_div(const int N, const Dtype* a, const Dtype* b, Dtype* y);
template <typename Dtype>
void caffe_powx(const int n, const Dtype* a, const Dtype b, Dtype* y);
template <typename Dtype>
void caffe_gpu_powx(const int n, const Dtype* a, const Dtype b, Dtype* y);
unsigned int caffe_rng_rand();
template <typename Dtype>
Dtype caffe_nextafter(const Dtype b);
template <typename Dtype>
void caffe_rng_uniform(const int n, const Dtype a, const Dtype b, Dtype* r);
// caffe_gpu_rng_uniform with two arguments generates integers in the range
// [0, UINT_MAX].
void caffe_gpu_rng_uniform(const int n, unsigned int* r);
// caffe_gpu_rng_uniform with four arguments generates floats in the range
// (a, b] (strictly greater than a, less than or equal to b) due to the
// specification of curandGenerateUniform. With a = 0, b = 1, just calls
// curandGenerateUniform; with other limits will shift and scale the outputs
// appropriately after calling curandGenerateUniform.
template <typename Dtype>
void caffe_gpu_rng_uniform(const int n, const Dtype a, const Dtype b, Dtype* r);
template <typename Dtype>
void caffe_rng_gaussian(const int n, const Dtype mu, const Dtype sigma,
Dtype* r);
template <typename Dtype>
void caffe_gpu_rng_gaussian(const int n, const Dtype mu, const Dtype sigma,
Dtype* r);
template <typename Dtype>
void caffe_rng_bernoulli(const int n, const Dtype p, int* r);
template <typename Dtype>
void caffe_rng_bernoulli(const int n, const Dtype p, unsigned int* r);
template <typename Dtype>
void caffe_gpu_rng_bernoulli(const int n, const Dtype p, int* r);
template <typename Dtype>
void caffe_exp(const int n, const Dtype* a, Dtype* y);
template <typename Dtype>
Dtype caffe_cpu_dot(const int n, const Dtype* x, const Dtype* y);
template <typename Dtype>
void caffe_gpu_dot(const int n, const Dtype* x, const Dtype* y, Dtype* out);
template <typename Dtype>
int caffe_cpu_hamming_distance(const int n, const Dtype* x, const Dtype* y);
template <typename Dtype>
uint32_t caffe_gpu_hamming_distance(const int n, const Dtype* x,
const Dtype* y);
// Returns the sum of the absolute values of the elements of vector x
template <typename Dtype>
Dtype caffe_cpu_asum(const int n, const Dtype* x);
template <typename Dtype>
void caffe_gpu_asum(const int n, const Dtype* x, Dtype* y);
// the branchless, type-safe version from
// http://stackoverflow.com/questions/1903954/is-there-a-standard-sign-function-signum-sgn-in-c-c
template<typename Dtype>
inline char caffe_sign(Dtype val) {
return (Dtype(0) < val) - (val < Dtype(0));
}
// The following two macros are modifications of DEFINE_VSL_UNARY_FUNC
// in include/caffe/util/mkl_alternate.hpp authored by @Rowland Depp.
// Please refer to commit 7e8ef25c7 of the boost-eigen branch.
// Git cherry picking that commit caused a conflict hard to resolve and
// copying that file in convenient for code reviewing.
// So they have to be pasted here temporarily.
#define DEFINE_CAFFE_CPU_UNARY_FUNC(name, operation) \
template<typename Dtype> \
void caffe_cpu_##name(const int n, const Dtype* x, Dtype* y) { \
CHECK_GT(n, 0); CHECK(x); CHECK(y); \
for (int i = 0; i < n; ++i) { \
operation; \
} \
}
#define INSTANTIATE_CAFFE_CPU_UNARY_FUNC(name) \
template <> \
void caffe_cpu_##name<float>(const int n, const float* x, float* y); \
template <> \
void caffe_cpu_##name<double>(const int n, const double* x, double* y)
#define DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(name, operation) \
template<typename Dtype> \
__global__ void name##_kernel(const int n, const Dtype* x, Dtype* y) { \
CUDA_KERNEL_LOOP(index, n) { \
operation; \
} \
} \
template <> \
void caffe_gpu_##name<float>(const int n, const float* x, float* y) { \
/* NOLINT_NEXT_LINE(whitespace/operators) */ \
name##_kernel<float><<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>( \
n, x, y); \
} \
template <> \
void caffe_gpu_##name<double>(const int n, const double* x, double* y) { \
/* NOLINT_NEXT_LINE(whitespace/operators) */ \
name##_kernel<double><<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>( \
n, x, y); \
}
// output is 1 for the positives, 0 for zero, and -1 for the negatives
DEFINE_CAFFE_CPU_UNARY_FUNC(sign, y[i] = caffe_sign<Dtype>(x[i]));
template<typename Dtype>
void caffe_gpu_sign(const int n, const Dtype* x, Dtype* y);
// This returns a nonzero value if the input has its sign bit set.
// The name sngbit is meant to avoid conflicts with std::signbit in the macro
using std::signbit;
DEFINE_CAFFE_CPU_UNARY_FUNC(sgnbit, y[i] = signbit(x[i]));
template<typename Dtype>
void caffe_gpu_sgnbit(const int n, const Dtype* x, Dtype* y);
DEFINE_CAFFE_CPU_UNARY_FUNC(fabs, y[i] = std::fabs(x[i]));
template <typename Dtype>
void caffe_gpu_fabs(const int n, const Dtype* x, Dtype* y);
template <typename Dtype>
void caffe_cpu_scale(const int n, const Dtype alpha, const Dtype *x, Dtype* y);
template <typename Dtype>
void caffe_gpu_scale(const int n, const Dtype alpha, const Dtype *x, Dtype* y);
} // namespace caffe
#endif // CAFFE_UTIL_MATH_FUNCTIONS_H_

@ -0,0 +1,97 @@
// Copyright 2014 BVLC and contributors.
#ifndef CAFFE_UTIL_MKL_ALTERNATE_H_
#define CAFFE_UTIL_MKL_ALTERNATE_H_
#ifdef USE_MKL
#include <mkl.h>
#else // If use MKL, simply include the MKL header
extern "C" {
#include <cblas.h>
}
#include <math.h>
// Functions that caffe uses but are not present if MKL is not linked.
// A simple way to define the vsl unary functions. The operation should
// be in the form e.g. y[i] = sqrt(a[i])
#define DEFINE_VSL_UNARY_FUNC(name, operation) \
template<typename Dtype> \
void v##name(const int n, const Dtype* a, Dtype* y) { \
CHECK_GT(n, 0); CHECK(a); CHECK(y); \
for (int i = 0; i < n; ++i) { operation; } \
} \
inline void vs##name( \
const int n, const float* a, float* y) { \
v##name<float>(n, a, y); \
} \
inline void vd##name( \
const int n, const double* a, double* y) { \
v##name<double>(n, a, y); \
}
DEFINE_VSL_UNARY_FUNC(Sqr, y[i] = a[i] * a[i]);
DEFINE_VSL_UNARY_FUNC(Exp, y[i] = exp(a[i]));
// A simple way to define the vsl unary functions with singular parameter b.
// The operation should be in the form e.g. y[i] = pow(a[i], b)
#define DEFINE_VSL_UNARY_FUNC_WITH_PARAM(name, operation) \
template<typename Dtype> \
void v##name(const int n, const Dtype* a, const Dtype b, Dtype* y) { \
CHECK_GT(n, 0); CHECK(a); CHECK(y); \
for (int i = 0; i < n; ++i) { operation; } \
} \
inline void vs##name( \
const int n, const float* a, const float b, float* y) { \
v##name<float>(n, a, b, y); \
} \
inline void vd##name( \
const int n, const double* a, const float b, double* y) { \
v##name<double>(n, a, b, y); \
}
DEFINE_VSL_UNARY_FUNC_WITH_PARAM(Powx, y[i] = pow(a[i], b));
// A simple way to define the vsl binary functions. The operation should
// be in the form e.g. y[i] = a[i] + b[i]
#define DEFINE_VSL_BINARY_FUNC(name, operation) \
template<typename Dtype> \
void v##name(const int n, const Dtype* a, const Dtype* b, Dtype* y) { \
CHECK_GT(n, 0); CHECK(a); CHECK(b); CHECK(y); \
for (int i = 0; i < n; ++i) { operation; } \
} \
inline void vs##name( \
const int n, const float* a, const float* b, float* y) { \
v##name<float>(n, a, b, y); \
} \
inline void vd##name( \
const int n, const double* a, const double* b, double* y) { \
v##name<double>(n, a, b, y); \
}
DEFINE_VSL_BINARY_FUNC(Add, y[i] = a[i] + b[i]);
DEFINE_VSL_BINARY_FUNC(Sub, y[i] = a[i] - b[i]);
DEFINE_VSL_BINARY_FUNC(Mul, y[i] = a[i] * b[i]);
DEFINE_VSL_BINARY_FUNC(Div, y[i] = a[i] / b[i]);
// In addition, MKL comes with an additional function axpby that is not present
// in standard blas. We will simply use a two-step (inefficient, of course) way
// to mimic that.
inline void cblas_saxpby(const int N, const float alpha, const float* X,
const int incX, const float beta, float* Y,
const int incY) {
cblas_sscal(N, beta, Y, incY);
cblas_saxpy(N, alpha, X, incX, Y, incY);
}
inline void cblas_daxpby(const int N, const double alpha, const double* X,
const int incX, const double beta, double* Y,
const int incY) {
cblas_dscal(N, beta, Y, incY);
cblas_daxpy(N, alpha, X, incX, Y, incY);
}
#endif // USE_MKL
#endif // CAFFE_UTIL_MKL_ALTERNATE_H_

@ -0,0 +1,19 @@
// Copyright 2014 BVLC and contributors.
#ifndef CAFFE_RNG_CPP_HPP_
#define CAFFE_RNG_CPP_HPP_
#include <boost/random/mersenne_twister.hpp>
#include "caffe/common.hpp"
namespace caffe {
typedef boost::mt19937 rng_t;
inline rng_t* caffe_rng() {
return static_cast<caffe::rng_t*>(Caffe::rng_stream().generator());
}
} // namespace caffe
#endif // CAFFE_RNG_HPP_

@ -0,0 +1,49 @@
// Copyright 2014 BVLC and contributors.
#ifndef CAFFE_UTIL_UPGRADE_PROTO_H_
#define CAFFE_UTIL_UPGRADE_PROTO_H_
#include <string>
#include "caffe/proto/caffe.pb.h"
#include "caffe/proto/caffe_pretty_print.pb.h"
using std::string;
namespace caffe {
// Return true iff any layer contains parameters specified using
// deprecated V0LayerParameter.
bool NetNeedsUpgrade(const NetParameter& net_param);
// Perform all necessary transformations to upgrade a V0NetParameter into a
// NetParameter (including upgrading padding layers and LayerParameters).
bool UpgradeV0Net(const NetParameter& v0_net_param, NetParameter* net_param);
// Upgrade NetParameter with padding layers to pad-aware conv layers.
// For any padding layer, remove it and put its pad parameter in any layers
// taking its top blob as input.
// Error if any of these above layers are not-conv layers.
void UpgradeV0PaddingLayers(const NetParameter& param,
NetParameter* param_upgraded_pad);
// Upgrade a single V0LayerConnection to the new LayerParameter format.
bool UpgradeLayerParameter(const LayerParameter& v0_layer_connection,
LayerParameter* layer_param);
LayerParameter_LayerType UpgradeV0LayerType(const string& type);
// Convert a NetParameter to NetParameterPrettyPrint used for dumping to
// proto text files.
void NetParameterToPrettyPrint(const NetParameter& param,
NetParameterPrettyPrint* pretty_param);
// Read parameters from a file into a NetParameter proto message.
void ReadNetParamsFromTextFileOrDie(const string& param_file,
NetParameter* param);
void ReadNetParamsFromBinaryFileOrDie(const string& param_file,
NetParameter* param);
} // namespace caffe
#endif // CAFFE_UTIL_UPGRADE_PROTO_H_

@ -0,0 +1,479 @@
// Copyright 2014 BVLC and contributors.
#ifndef CAFFE_VISION_LAYERS_HPP_
#define CAFFE_VISION_LAYERS_HPP_
#include <string>
#include <utility>
#include <vector>
#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/layer.hpp"
#include "caffe/neuron_layers.hpp"
#include "caffe/loss_layers.hpp"
#include "caffe/data_layers.hpp"
#include "caffe/proto/caffe.pb.h"
namespace caffe {
/* ArgmaxLayer
Compute the index of the max value across all (channels x height x width).
[In the future, can take specific dimension.]
Intended for use after a classification layer to produce prediction.
If parameter out_max_val is set to true, then output is a vector of pairs
(max_ind, max_val) for each image.
NOTE: does not implement Backwards operation.
*/
template <typename Dtype>
class ArgMaxLayer : public Layer<Dtype> {
public:
explicit ArgMaxLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_ARGMAX;
}
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int ExactNumTopBlobs() const { return 1; }
protected:
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
NOT_IMPLEMENTED;
}
bool out_max_val_;
};
/* ConcatLayer
Takes at least two blobs and concatenates them along either num or
channel dim, outputting the result.
*/
template <typename Dtype>
class ConcatLayer : public Layer<Dtype> {
public:
explicit ConcatLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_CONCAT;
}
virtual inline int MinBottomBlobs() const { return 2; }
virtual inline int ExactNumTopBlobs() const { return 1; }
protected:
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
Blob<Dtype> col_bob_;
int count_;
int num_;
int channels_;
int height_;
int width_;
int concat_dim_;
};
/* ConvolutionLayer
*/
template <typename Dtype>
class ConvolutionLayer : public Layer<Dtype> {
public:
explicit ConvolutionLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_CONVOLUTION;
}
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int ExactNumTopBlobs() const { return 1; }
protected:
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
int kernel_size_;
int stride_;
int num_;
int channels_;
int pad_;
int height_;
int width_;
int num_output_;
int group_;
Blob<Dtype> col_buffer_;
shared_ptr<SyncedMemory> bias_multiplier_;
bool bias_term_;
int M_;
int K_;
int N_;
};
/* EltwiseLayer
Compute elementwise operations like product or sum.
*/
template <typename Dtype>
class EltwiseLayer : public Layer<Dtype> {
public:
explicit EltwiseLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_ELTWISE;
}
virtual inline int MinBottomBlobs() const { return 2; }
virtual inline int ExactNumTopBlobs() const { return 1; }
protected:
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
EltwiseParameter_EltwiseOp op_;
vector<Dtype> coeffs_;
};
/* FlattenLayer
*/
template <typename Dtype>
class FlattenLayer : public Layer<Dtype> {
public:
explicit FlattenLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_FLATTEN;
}
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int ExactNumTopBlobs() const { return 1; }
protected:
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
int count_;
};
/* Im2colLayer
*/
template <typename Dtype>
class Im2colLayer : public Layer<Dtype> {
public:
explicit Im2colLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_IM2COL;
}
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int ExactNumTopBlobs() const { return 1; }
protected:
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
int kernel_size_;
int stride_;
int channels_;
int height_;
int width_;
int pad_;
};
/* InnerProductLayer
*/
template <typename Dtype>
class InnerProductLayer : public Layer<Dtype> {
public:
explicit InnerProductLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_INNER_PRODUCT;
}
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int ExactNumTopBlobs() const { return 1; }
protected:
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
int M_;
int K_;
int N_;
bool bias_term_;
shared_ptr<SyncedMemory> bias_multiplier_;
};
// Forward declare PoolingLayer and SplitLayer for use in LRNLayer.
template <typename Dtype> class PoolingLayer;
template <typename Dtype> class SplitLayer;
/* LRNLayer
Local Response Normalization
*/
template <typename Dtype>
class LRNLayer : public Layer<Dtype> {
public:
explicit LRNLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_LRN;
}
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int ExactNumTopBlobs() const { return 1; }
protected:
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
virtual Dtype CrossChannelForward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual Dtype CrossChannelForward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual Dtype WithinChannelForward(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void CrossChannelBackward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
virtual void CrossChannelBackward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
virtual void WithinChannelBackward(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
int size_;
int pre_pad_;
Dtype alpha_;
Dtype beta_;
int num_;
int channels_;
int height_;
int width_;
// Fields used for normalization ACROSS_CHANNELS
// scale_ stores the intermediate summing results
Blob<Dtype> scale_;
// Fields used for normalization WITHIN_CHANNEL
shared_ptr<SplitLayer<Dtype> > split_layer_;
vector<Blob<Dtype>*> split_top_vec_;
shared_ptr<PowerLayer<Dtype> > square_layer_;
Blob<Dtype> square_input_;
Blob<Dtype> square_output_;
vector<Blob<Dtype>*> square_bottom_vec_;
vector<Blob<Dtype>*> square_top_vec_;
shared_ptr<PoolingLayer<Dtype> > pool_layer_;
Blob<Dtype> pool_output_;
vector<Blob<Dtype>*> pool_top_vec_;
shared_ptr<PowerLayer<Dtype> > power_layer_;
Blob<Dtype> power_output_;
vector<Blob<Dtype>*> power_top_vec_;
shared_ptr<EltwiseLayer<Dtype> > product_layer_;
Blob<Dtype> product_data_input_;
vector<Blob<Dtype>*> product_bottom_vec_;
};
/* PoolingLayer
*/
template <typename Dtype>
class PoolingLayer : public Layer<Dtype> {
public:
explicit PoolingLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_POOLING;
}
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int MinTopBlobs() const { return 1; }
virtual inline int MaxTopBlobs() const { return max_top_blobs_; }
protected:
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
int max_top_blobs_;
int kernel_size_;
int stride_;
int pad_;
int channels_;
int height_;
int width_;
int pooled_height_;
int pooled_width_;
Blob<Dtype> rand_idx_;
shared_ptr<Blob<int> > max_idx_;
};
/* SoftmaxLayer
*/
template <typename Dtype>
class SoftmaxLayer : public Layer<Dtype> {
public:
explicit SoftmaxLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_SOFTMAX;
}
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int ExactNumTopBlobs() const { return 1; }
protected:
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
// sum_multiplier is just used to carry out sum using blas
Blob<Dtype> sum_multiplier_;
// scale is an intermediate blob to hold temporary results.
Blob<Dtype> scale_;
};
/* SoftmaxWithLossLayer
Implements softmax and computes the loss.
It is preferred over separate softmax + multinomiallogisticloss
layers due to more numerically stable gradients.
In test, this layer could be replaced by simple softmax layer.
*/
template <typename Dtype>
class SoftmaxWithLossLayer : public Layer<Dtype> {
public:
explicit SoftmaxWithLossLayer(const LayerParameter& param)
: Layer<Dtype>(param), softmax_layer_(new SoftmaxLayer<Dtype>(param)) {}
virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_SOFTMAX_LOSS;
}
virtual inline int ExactNumBottomBlobs() const { return 2; }
virtual inline int ExactNumTopBlobs() const { return 0; }
protected:
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
shared_ptr<SoftmaxLayer<Dtype> > softmax_layer_;
// prob stores the output probability of the layer.
Blob<Dtype> prob_;
// Vector holders to call the underlying softmax layer forward and backward.
vector<Blob<Dtype>*> softmax_bottom_vec_;
vector<Blob<Dtype>*> softmax_top_vec_;
};
/* SplitLayer
*/
template <typename Dtype>
class SplitLayer : public Layer<Dtype> {
public:
explicit SplitLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_SPLIT;
}
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int MinTopBlobs() const { return 1; }
protected:
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
int count_;
};
} // namespace caffe
#endif // CAFFE_VISION_LAYERS_HPP_

@ -0,0 +1,370 @@
// Copyright 2014 BVLC and contributors.
//
// matcaffe.cpp provides a wrapper of the caffe::Net class as well as some
// caffe::Caffe functions so that one could easily call it from matlab.
// Note that for matlab, we will simply use float as the data type.
#include <string>
#include <vector>
#include "mex.h"
#include "caffe/caffe.hpp"
#define MEX_ARGS int nlhs, mxArray **plhs, int nrhs, const mxArray **prhs
using namespace caffe; // NOLINT(build/namespaces)
// The pointer to the internal caffe::Net instance
static shared_ptr<Net<float> > net_;
static int init_key = -2;
// Five things to be aware of:
// caffe uses row-major order
// matlab uses column-major order
// caffe uses BGR color channel order
// matlab uses RGB color channel order
// images need to have the data mean subtracted
//
// Data coming in from matlab needs to be in the order
// [width, height, channels, images]
// where width is the fastest dimension.
// Here is the rough matlab for putting image data into the correct
// format:
// % convert from uint8 to single
// im = single(im);
// % reshape to a fixed size (e.g., 227x227)
// im = imresize(im, [IMAGE_DIM IMAGE_DIM], 'bilinear');
// % permute from RGB to BGR and subtract the data mean (already in BGR)
// im = im(:,:,[3 2 1]) - data_mean;
// % flip width and height to make width the fastest dimension
// im = permute(im, [2 1 3]);
//
// If you have multiple images, cat them with cat(4, ...)
//
// The actual forward function. It takes in a cell array of 4-D arrays as
// input and outputs a cell array.
static mxArray* do_forward(const mxArray* const bottom) {
vector<Blob<float>*>& input_blobs = net_->input_blobs();
CHECK_EQ(static_cast<unsigned int>(mxGetDimensions(bottom)[0]),
input_blobs.size());
for (unsigned int i = 0; i < input_blobs.size(); ++i) {
const mxArray* const elem = mxGetCell(bottom, i);
const float* const data_ptr =
reinterpret_cast<const float* const>(mxGetPr(elem));
switch (Caffe::mode()) {
case Caffe::CPU:
memcpy(input_blobs[i]->mutable_cpu_data(), data_ptr,
sizeof(float) * input_blobs[i]->count());
break;
case Caffe::GPU:
cudaMemcpy(input_blobs[i]->mutable_gpu_data(), data_ptr,
sizeof(float) * input_blobs[i]->count(), cudaMemcpyHostToDevice);
break;
default:
LOG(FATAL) << "Unknown Caffe mode.";
} // switch (Caffe::mode())
}
const vector<Blob<float>*>& output_blobs = net_->ForwardPrefilled();
mxArray* mx_out = mxCreateCellMatrix(output_blobs.size(), 1);
for (unsigned int i = 0; i < output_blobs.size(); ++i) {
// internally data is stored as (width, height, channels, num)
// where width is the fastest dimension
mwSize dims[4] = {output_blobs[i]->width(), output_blobs[i]->height(),
output_blobs[i]->channels(), output_blobs[i]->num()};
mxArray* mx_blob = mxCreateNumericArray(4, dims, mxSINGLE_CLASS, mxREAL);
mxSetCell(mx_out, i, mx_blob);
float* data_ptr = reinterpret_cast<float*>(mxGetPr(mx_blob));
switch (Caffe::mode()) {
case Caffe::CPU:
memcpy(data_ptr, output_blobs[i]->cpu_data(),
sizeof(float) * output_blobs[i]->count());
break;
case Caffe::GPU:
cudaMemcpy(data_ptr, output_blobs[i]->gpu_data(),
sizeof(float) * output_blobs[i]->count(), cudaMemcpyDeviceToHost);
break;
default:
LOG(FATAL) << "Unknown Caffe mode.";
} // switch (Caffe::mode())
}
return mx_out;
}
static mxArray* do_backward(const mxArray* const top_diff) {
vector<Blob<float>*>& output_blobs = net_->output_blobs();
vector<Blob<float>*>& input_blobs = net_->input_blobs();
CHECK_EQ(static_cast<unsigned int>(mxGetDimensions(top_diff)[0]),
output_blobs.size());
// First, copy the output diff
for (unsigned int i = 0; i < output_blobs.size(); ++i) {
const mxArray* const elem = mxGetCell(top_diff, i);
const float* const data_ptr =
reinterpret_cast<const float* const>(mxGetPr(elem));
switch (Caffe::mode()) {
case Caffe::CPU:
memcpy(output_blobs[i]->mutable_cpu_diff(), data_ptr,
sizeof(float) * output_blobs[i]->count());
break;
case Caffe::GPU:
cudaMemcpy(output_blobs[i]->mutable_gpu_diff(), data_ptr,
sizeof(float) * output_blobs[i]->count(), cudaMemcpyHostToDevice);
break;
default:
LOG(FATAL) << "Unknown Caffe mode.";
} // switch (Caffe::mode())
}
// LOG(INFO) << "Start";
net_->Backward();
// LOG(INFO) << "End";
mxArray* mx_out = mxCreateCellMatrix(input_blobs.size(), 1);
for (unsigned int i = 0; i < input_blobs.size(); ++i) {
// internally data is stored as (width, height, channels, num)
// where width is the fastest dimension
mwSize dims[4] = {input_blobs[i]->width(), input_blobs[i]->height(),
input_blobs[i]->channels(), input_blobs[i]->num()};
mxArray* mx_blob = mxCreateNumericArray(4, dims, mxSINGLE_CLASS, mxREAL);
mxSetCell(mx_out, i, mx_blob);
float* data_ptr = reinterpret_cast<float*>(mxGetPr(mx_blob));
switch (Caffe::mode()) {
case Caffe::CPU:
memcpy(data_ptr, input_blobs[i]->cpu_diff(),
sizeof(float) * input_blobs[i]->count());
break;
case Caffe::GPU:
cudaMemcpy(data_ptr, input_blobs[i]->gpu_diff(),
sizeof(float) * input_blobs[i]->count(), cudaMemcpyDeviceToHost);
break;
default:
LOG(FATAL) << "Unknown Caffe mode.";
} // switch (Caffe::mode())
}
return mx_out;
}
static mxArray* do_get_weights() {
const vector<shared_ptr<Layer<float> > >& layers = net_->layers();
const vector<string>& layer_names = net_->layer_names();
// Step 1: count the number of layers with weights
int num_layers = 0;
{
string prev_layer_name = "";
for (unsigned int i = 0; i < layers.size(); ++i) {
vector<shared_ptr<Blob<float> > >& layer_blobs = layers[i]->blobs();
if (layer_blobs.size() == 0) {
continue;
}
if (layer_names[i] != prev_layer_name) {
prev_layer_name = layer_names[i];
num_layers++;
}
}
}
// Step 2: prepare output array of structures
mxArray* mx_layers;
{
const mwSize dims[2] = {num_layers, 1};
const char* fnames[2] = {"weights", "layer_names"};
mx_layers = mxCreateStructArray(2, dims, 2, fnames);
}
// Step 3: copy weights into output
{
string prev_layer_name = "";
int mx_layer_index = 0;
for (unsigned int i = 0; i < layers.size(); ++i) {
vector<shared_ptr<Blob<float> > >& layer_blobs = layers[i]->blobs();
if (layer_blobs.size() == 0) {
continue;
}
mxArray* mx_layer_cells = NULL;
if (layer_names[i] != prev_layer_name) {
prev_layer_name = layer_names[i];
const mwSize dims[2] = {layer_blobs.size(), 1};
mx_layer_cells = mxCreateCellArray(2, dims);
mxSetField(mx_layers, mx_layer_index, "weights", mx_layer_cells);
mxSetField(mx_layers, mx_layer_index, "layer_names",
mxCreateString(layer_names[i].c_str()));
mx_layer_index++;
}
for (unsigned int j = 0; j < layer_blobs.size(); ++j) {
// internally data is stored as (width, height, channels, num)
// where width is the fastest dimension
mwSize dims[4] = {layer_blobs[j]->width(), layer_blobs[j]->height(),
layer_blobs[j]->channels(), layer_blobs[j]->num()};
mxArray* mx_weights =
mxCreateNumericArray(4, dims, mxSINGLE_CLASS, mxREAL);
mxSetCell(mx_layer_cells, j, mx_weights);
float* weights_ptr = reinterpret_cast<float*>(mxGetPr(mx_weights));
switch (Caffe::mode()) {
case Caffe::CPU:
memcpy(weights_ptr, layer_blobs[j]->cpu_data(),
sizeof(float) * layer_blobs[j]->count());
break;
case Caffe::GPU:
CUDA_CHECK(cudaMemcpy(weights_ptr, layer_blobs[j]->gpu_data(),
sizeof(float) * layer_blobs[j]->count(), cudaMemcpyDeviceToHost));
break;
default:
LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
}
}
}
}
return mx_layers;
}
static void get_weights(MEX_ARGS) {
plhs[0] = do_get_weights();
}
static void set_mode_cpu(MEX_ARGS) {
Caffe::set_mode(Caffe::CPU);
}
static void set_mode_gpu(MEX_ARGS) {
Caffe::set_mode(Caffe::GPU);
}
static void set_phase_train(MEX_ARGS) {
Caffe::set_phase(Caffe::TRAIN);
}
static void set_phase_test(MEX_ARGS) {
Caffe::set_phase(Caffe::TEST);
}
static void set_device(MEX_ARGS) {
if (nrhs != 1) {
LOG(ERROR) << "Only given " << nrhs << " arguments";
mexErrMsgTxt("Wrong number of arguments");
}
int device_id = static_cast<int>(mxGetScalar(prhs[0]));
Caffe::SetDevice(device_id);
}
static void get_init_key(MEX_ARGS) {
plhs[0] = mxCreateDoubleScalar(init_key);
}
static void init(MEX_ARGS) {
if (nrhs != 2) {
LOG(ERROR) << "Only given " << nrhs << " arguments";
mexErrMsgTxt("Wrong number of arguments");
}
char* param_file = mxArrayToString(prhs[0]);
char* model_file = mxArrayToString(prhs[1]);
net_.reset(new Net<float>(string(param_file)));
net_->CopyTrainedLayersFrom(string(model_file));
mxFree(param_file);
mxFree(model_file);
init_key = random(); // NOLINT(caffe/random_fn)
if (nlhs == 1) {
plhs[0] = mxCreateDoubleScalar(init_key);
}
}
static void reset(MEX_ARGS) {
if (net_) {
net_.reset();
init_key = -2;
LOG(INFO) << "Network reset, call init before use it again";
}
}
static void forward(MEX_ARGS) {
if (nrhs != 1) {
LOG(ERROR) << "Only given " << nrhs << " arguments";
mexErrMsgTxt("Wrong number of arguments");
}
plhs[0] = do_forward(prhs[0]);
}
static void backward(MEX_ARGS) {
if (nrhs != 1) {
LOG(ERROR) << "Only given " << nrhs << " arguments";
mexErrMsgTxt("Wrong number of arguments");
}
plhs[0] = do_backward(prhs[0]);
}
static void is_initialized(MEX_ARGS) {
if (!net_) {
plhs[0] = mxCreateDoubleScalar(0);
} else {
plhs[0] = mxCreateDoubleScalar(1);
}
}
/** -----------------------------------------------------------------
** Available commands.
**/
struct handler_registry {
string cmd;
void (*func)(MEX_ARGS);
};
static handler_registry handlers[] = {
// Public API functions
{ "forward", forward },
{ "backward", backward },
{ "init", init },
{ "is_initialized", is_initialized },
{ "set_mode_cpu", set_mode_cpu },
{ "set_mode_gpu", set_mode_gpu },
{ "set_phase_train", set_phase_train },
{ "set_phase_test", set_phase_test },
{ "set_device", set_device },
{ "get_weights", get_weights },
{ "get_init_key", get_init_key },
{ "reset", reset },
// The end.
{ "END", NULL },
};
/** -----------------------------------------------------------------
** matlab entry point: caffe(api_command, arg1, arg2, ...)
**/
void mexFunction(MEX_ARGS) {
if (nrhs == 0) {
LOG(ERROR) << "No API command given";
mexErrMsgTxt("An API command is requires");
return;
}
{ // Handle input command
char *cmd = mxArrayToString(prhs[0]);
bool dispatched = false;
// Dispatch to cmd handler
for (int i = 0; handlers[i].func != NULL; i++) {
if (handlers[i].cmd.compare(cmd) == 0) {
handlers[i].func(nlhs, plhs, nrhs-1, prhs+1);
dispatched = true;
break;
}
}
if (!dispatched) {
LOG(ERROR) << "Unknown command `" << cmd << "'";
mexErrMsgTxt("API command not recognized");
}
mxFree(cmd);
}
}

@ -0,0 +1,76 @@
function [scores,list_im] = matcaffe_batch(list_im, use_gpu)
% scores = matcaffe_batch(list_im, use_gpu)
%
% Demo of the matlab wrapper using the ILSVRC network.
%
% input
% list_im list of images files
% use_gpu 1 to use the GPU, 0 to use the CPU
%
% output
% scores 1000 x num_images ILSVRC output vector
%
% You may need to do the following before you start matlab:
% $ export LD_LIBRARY_PATH=/opt/intel/mkl/lib/intel64:/usr/local/cuda/lib64
% $ export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libstdc++.so.6
% Or the equivalent based on where things are installed on your system
%
% Usage:
% scores = matcaffe_batch({'peppers.png','onion.png'});
% scores = matcaffe_batch('list_images.txt', 1);
if nargin < 1
% For test purposes
list_im = {'peppers.png','onions.png'};
end
if ischar(list_im)
%Assume it is a file contaning the list of images
filename = list_im;
list_im = read_cell(filename);
end
% Adjust the batch size to match with imagenet_deploy.prototxt
batch_size = 10;
% Adjust dim to the output size of imagenet_deploy.prototxt
dim = 1000;
disp(list_im)
if mod(length(list_im),batch_size)
warning(['Assuming batches of ' num2str(batch_size) ' images rest will be filled with zeros'])
end
% init caffe network (spews logging info)
if exist('use_gpu', 'var')
matcaffe_init(use_gpu);
else
matcaffe_init();
end
d = load('ilsvrc_2012_mean');
IMAGE_MEAN = d.image_mean;
% prepare input
num_images = length(list_im);
scores = zeros(dim,num_images,'single');
num_batches = ceil(length(list_im)/batch_size)
initic=tic;
for bb = 1 : num_batches
batchtic = tic;
range = 1+batch_size*(bb-1):min(num_images,batch_size * bb);
tic
input_data = prepare_batch(list_im(range),IMAGE_MEAN,batch_size);
toc, tic
fprintf('Batch %d out of %d %.2f%% Complete ETA %.2f seconds\n',...
bb,num_batches,bb/num_batches*100,toc(initic)/bb*(num_batches-bb));
output_data = caffe('forward', {input_data});
toc
output_data = squeeze(output_data{1});
scores(:,range) = output_data(:,mod(range-1,batch_size)+1);
toc(batchtic)
end
toc(initic);
if exist('filename', 'var')
save([filename '.probs.mat'],'list_im','scores','-v7.3');
end

@ -0,0 +1,110 @@
function [scores, maxlabel] = matcaffe_demo(im, use_gpu)
% scores = matcaffe_demo(im, use_gpu)
%
% Demo of the matlab wrapper using the ILSVRC network.
%
% input
% im color image as uint8 HxWx3
% use_gpu 1 to use the GPU, 0 to use the CPU
%
% output
% scores 1000-dimensional ILSVRC score vector
%
% You may need to do the following before you start matlab:
% $ export LD_LIBRARY_PATH=/opt/intel/mkl/lib/intel64:/usr/local/cuda-5.5/lib64
% $ export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libstdc++.so.6
% Or the equivalent based on where things are installed on your system
%
% Usage:
% im = imread('../../examples/images/cat.jpg');
% scores = matcaffe_demo(im, 1);
% [score, class] = max(scores);
% Five things to be aware of:
% caffe uses row-major order
% matlab uses column-major order
% caffe uses BGR color channel order
% matlab uses RGB color channel order
% images need to have the data mean subtracted
% Data coming in from matlab needs to be in the order
% [width, height, channels, images]
% where width is the fastest dimension.
% Here is the rough matlab for putting image data into the correct
% format:
% % convert from uint8 to single
% im = single(im);
% % reshape to a fixed size (e.g., 227x227)
% im = imresize(im, [IMAGE_DIM IMAGE_DIM], 'bilinear');
% % permute from RGB to BGR and subtract the data mean (already in BGR)
% im = im(:,:,[3 2 1]) - data_mean;
% % flip width and height to make width the fastest dimension
% im = permute(im, [2 1 3]);
% If you have multiple images, cat them with cat(4, ...)
% The actual forward function. It takes in a cell array of 4-D arrays as
% input and outputs a cell array.
% init caffe network (spews logging info)
if exist('use_gpu', 'var')
matcaffe_init(use_gpu);
else
matcaffe_init();
end
if nargin < 1
% For demo purposes we will use the peppers image
im = imread('peppers.png');
end
% prepare oversampled input
% input_data is Height x Width x Channel x Num
tic;
input_data = {prepare_image(im)};
toc;
% do forward pass to get scores
% scores are now Width x Height x Channels x Num
tic;
scores = caffe('forward', input_data);
toc;
scores = scores{1};
size(scores)
scores = squeeze(scores);
scores = mean(scores,2);
[~,maxlabel] = max(scores);
% ------------------------------------------------------------------------
function images = prepare_image(im)
% ------------------------------------------------------------------------
d = load('ilsvrc_2012_mean');
IMAGE_MEAN = d.image_mean;
IMAGE_DIM = 256;
CROPPED_DIM = 227;
% resize to fixed input size
im = single(im);
im = imresize(im, [IMAGE_DIM IMAGE_DIM], 'bilinear');
% permute from RGB to BGR (IMAGE_MEAN is already BGR)
im = im(:,:,[3 2 1]) - IMAGE_MEAN;
% oversample (4 corners, center, and their x-axis flips)
images = zeros(CROPPED_DIM, CROPPED_DIM, 3, 10, 'single');
indices = [0 IMAGE_DIM-CROPPED_DIM] + 1;
curr = 1;
for i = indices
for j = indices
images(:, :, :, curr) = ...
permute(im(i:i+CROPPED_DIM-1, j:j+CROPPED_DIM-1, :), [2 1 3]);
images(:, :, :, curr+5) = images(end:-1:1, :, :, curr);
curr = curr + 1;
end
end
center = floor(indices(2) / 2)+1;
images(:,:,:,5) = ...
permute(im(center:center+CROPPED_DIM-1,center:center+CROPPED_DIM-1,:), ...
[2 1 3]);
images(:,:,:,10) = images(end:-1:1, :, :, curr);

@ -0,0 +1,44 @@
function matcaffe_init(use_gpu, model_def_file, model_file)
% matcaffe_init(model_def_file, model_file, use_gpu)
% Initilize matcaffe wrapper
if nargin < 1
% By default use CPU
use_gpu = 0;
end
if nargin < 2 || isempty(model_def_file)
% By default use imagenet_deploy
model_def_file = '../../examples/imagenet/imagenet_deploy.prototxt';
end
if nargin < 3 || isempty(model_file)
% By default use caffe reference model
model_file = '../../examples/imagenet/caffe_reference_imagenet_model';
end
if caffe('is_initialized') == 0
if exist(model_file, 'file') == 0
% NOTE: you'll have to get the pre-trained ILSVRC network
error('You need a network model file');
end
if ~exist(model_def_file,'file')
% NOTE: you'll have to get network definition
error('You need the network prototxt definition');
end
caffe('init', model_def_file, model_file)
end
fprintf('Done with init\n');
% set to use GPU or CPU
if use_gpu
fprintf('Using GPU Mode\n');
caffe('set_mode_gpu');
else
fprintf('Using CPU Mode\n');
caffe('set_mode_cpu');
end
fprintf('Done with set_mode\n');
% put into test mode
caffe('set_phase_test');
fprintf('Done with set_phase_test\n');

@ -0,0 +1,41 @@
% ------------------------------------------------------------------------
function images = prepare_batch(image_files,IMAGE_MEAN,batch_size)
% ------------------------------------------------------------------------
if nargin < 2
d = load('ilsvrc_2012_mean');
IMAGE_MEAN = d.image_mean;
end
num_images = length(image_files);
if nargin < 3
batch_size = num_images;
end
IMAGE_DIM = 256;
CROPPED_DIM = 227;
indices = [0 IMAGE_DIM-CROPPED_DIM] + 1;
center = floor(indices(2) / 2)+1;
num_images = length(image_files);
images = zeros(CROPPED_DIM,CROPPED_DIM,3,batch_size,'single');
parfor i=1:num_images
% read file
fprintf('%c Preparing %s\n',13,image_files{i});
try
im = imread(image_files{i});
% resize to fixed input size
im = single(im);
im = imresize(im, [IMAGE_DIM IMAGE_DIM], 'bilinear');
% Transform GRAY to RGB
if size(im,3) == 1
im = cat(3,im,im,im);
end
% permute from RGB to BGR (IMAGE_MEAN is already BGR)
im = im(:,:,[3 2 1]) - IMAGE_MEAN;
% Crop the center of the image
images(:,:,:,i) = permute(im(center:center+CROPPED_DIM-1,...
center:center+CROPPED_DIM-1,:),[2 1 3]);
catch
warning('Problems with file',image_files{i});
end
end

@ -0,0 +1,42 @@
function res=print_cell(input,file,linesep,cellsep)
assert(iscell(input),'The input should be a cell')
if nargin < 4
cellsep = '\t';
end
if nargin < 3
linesep = '\n';
end
if exist('file','var') && ~isempty(file)
%%
fid = fopen(file,'w');
for l=1:length(input)
if iscell(input{l})
for i=1:length(input{l})
fprintf(fid,['%s' cellsep],input{l}{i});
end
fprintf(fid,linesep);
else
if size(input,2) > 1
for i=1:size(input,2)
fprintf(fid,'%s ',input{l,i});
end
fprintf(fid,linesep);
else
fprintf(fid,['%s' linesep],input{l});
end
end
end
fclose(fid);
else
res = '';
for l=1:length(input)
if iscell(input{l})
for i=1:length(input{l})
res = [res sprintf([cellsep{1} '%s' cellsep{2}],input{l}{i})];
end
res = [res sprintf(linesep)];
else
res = [res sprintf(['%s' linesep],input{l}(:))];
end
end
end

@ -0,0 +1,21 @@
function res=read_cell(filename,linesep,cellsep)
if nargin < 2, linesep='\n'; end
if nargin < 3, cellsep = '\t'; end
if exist(filename,'file')
fid = fopen(filename);
else
% Assume that filename is either a file ide or a string
fid = filename;
end
fileLines = textscan(fid,'%s','delimiter',linesep,'BufSize',100000);
fileLines = fileLines{1};
if regexp(fileLines{1},cellsep,'once')
fileLines = regexprep(fileLines,['^' cellsep '|' cellsep '$'],'');
res = regexp(fileLines,cellsep,'split');
res = cell2matcell(res);
else
res = fileLines;
end

@ -0,0 +1,4 @@
from .pycaffe import Net, SGDSolver
from .classifier import Classifier
from .detector import Detector
import io

@ -0,0 +1,357 @@
// Copyright 2014 BVLC and contributors.
// pycaffe provides a wrapper of the caffe::Net class as well as some
// caffe::Caffe functions so that one could easily call it from Python.
// Note that for Python, we will simply use float as the data type.
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#include "boost/python.hpp"
#include "boost/python/suite/indexing/vector_indexing_suite.hpp"
#include "numpy/arrayobject.h"
// these need to be included after boost on OS X
#include <string> // NOLINT(build/include_order)
#include <vector> // NOLINT(build/include_order)
#include <fstream> // NOLINT
#include "caffe/caffe.hpp"
// Temporary solution for numpy < 1.7 versions: old macro, no promises.
// You're strongly advised to upgrade to >= 1.7.
#ifndef NPY_ARRAY_C_CONTIGUOUS
#define NPY_ARRAY_C_CONTIGUOUS NPY_C_CONTIGUOUS
#define PyArray_SetBaseObject(arr, x) (PyArray_BASE(arr) = (x))
#endif
using namespace caffe; // NOLINT(build/namespaces)
using boost::python::extract;
using boost::python::len;
using boost::python::list;
using boost::python::object;
using boost::python::handle;
using boost::python::vector_indexing_suite;
// for convenience, check that input files can be opened, and raise an
// exception that boost will send to Python if not (caffe could still crash
// later if the input files are disturbed before they are actually used, but
// this saves frustration in most cases)
static void CheckFile(const string& filename) {
std::ifstream f(filename.c_str());
if (!f.good()) {
f.close();
throw std::runtime_error("Could not open file " + filename);
}
f.close();
}
// wrap shared_ptr<Blob<float> > in a class that we construct in C++ and pass
// to Python
class CaffeBlob {
public:
CaffeBlob(const shared_ptr<Blob<float> > &blob, const string& name)
: blob_(blob), name_(name) {}
string name() const { return name_; }
int num() const { return blob_->num(); }
int channels() const { return blob_->channels(); }
int height() const { return blob_->height(); }
int width() const { return blob_->width(); }
int count() const { return blob_->count(); }
// this is here only to satisfy boost's vector_indexing_suite
bool operator == (const CaffeBlob &other) {
return this->blob_ == other.blob_;
}
protected:
shared_ptr<Blob<float> > blob_;
string name_;
};
// We need another wrapper (used as boost::python's HeldType) that receives a
// self PyObject * which we can use as ndarray.base, so that data/diff memory
// is not freed while still being used in Python.
class CaffeBlobWrap : public CaffeBlob {
public:
CaffeBlobWrap(PyObject *p, const CaffeBlob &blob)
: CaffeBlob(blob), self_(p) {}
object get_data() {
npy_intp dims[] = {num(), channels(), height(), width()};
PyObject *obj = PyArray_SimpleNewFromData(4, dims, NPY_FLOAT32,
blob_->mutable_cpu_data());
PyArray_SetBaseObject(reinterpret_cast<PyArrayObject *>(obj), self_);
Py_INCREF(self_);
handle<> h(obj);
return object(h);
}
object get_diff() {
npy_intp dims[] = {num(), channels(), height(), width()};
PyObject *obj = PyArray_SimpleNewFromData(4, dims, NPY_FLOAT32,
blob_->mutable_cpu_diff());
PyArray_SetBaseObject(reinterpret_cast<PyArrayObject *>(obj), self_);
Py_INCREF(self_);
handle<> h(obj);
return object(h);
}
private:
PyObject *self_;
};
class CaffeLayer {
public:
CaffeLayer(const shared_ptr<Layer<float> > &layer, const string &name)
: layer_(layer), name_(name) {}
string name() const { return name_; }
vector<CaffeBlob> blobs() {
vector<CaffeBlob> result;
for (int i = 0; i < layer_->blobs().size(); ++i) {
result.push_back(CaffeBlob(layer_->blobs()[i], name_));
}
return result;
}
// this is here only to satisfy boost's vector_indexing_suite
bool operator == (const CaffeLayer &other) {
return this->layer_ == other.layer_;
}
protected:
shared_ptr<Layer<float> > layer_;
string name_;
};
// A simple wrapper over CaffeNet that runs the forward process.
struct CaffeNet {
// For cases where parameters will be determined later by the Python user,
// create a Net with unallocated parameters (which will not be zero-filled
// when accessed).
explicit CaffeNet(string param_file) {
Init(param_file);
}
CaffeNet(string param_file, string pretrained_param_file) {
Init(param_file);
CheckFile(pretrained_param_file);
net_->CopyTrainedLayersFrom(pretrained_param_file);
}
explicit CaffeNet(shared_ptr<Net<float> > net)
: net_(net) {}
void Init(string param_file) {
CheckFile(param_file);
net_.reset(new Net<float>(param_file));
}
virtual ~CaffeNet() {}
// Generate Python exceptions for badly shaped or discontiguous arrays.
inline void check_contiguous_array(PyArrayObject* arr, string name,
int channels, int height, int width) {
if (!(PyArray_FLAGS(arr) & NPY_ARRAY_C_CONTIGUOUS)) {
throw std::runtime_error(name + " must be C contiguous");
}
if (PyArray_NDIM(arr) != 4) {
throw std::runtime_error(name + " must be 4-d");
}
if (PyArray_TYPE(arr) != NPY_FLOAT32) {
throw std::runtime_error(name + " must be float32");
}
if (PyArray_DIMS(arr)[1] != channels) {
throw std::runtime_error(name + " has wrong number of channels");
}
if (PyArray_DIMS(arr)[2] != height) {
throw std::runtime_error(name + " has wrong height");
}
if (PyArray_DIMS(arr)[3] != width) {
throw std::runtime_error(name + " has wrong width");
}
}
void Forward() {
net_->ForwardPrefilled();
}
void Backward() {
net_->Backward();
}
void set_input_arrays(object data_obj, object labels_obj) {
// check that this network has an input MemoryDataLayer
shared_ptr<MemoryDataLayer<float> > md_layer =
boost::dynamic_pointer_cast<MemoryDataLayer<float> >(net_->layers()[0]);
if (!md_layer) {
throw std::runtime_error("set_input_arrays may only be called if the"
" first layer is a MemoryDataLayer");
}
// check that we were passed appropriately-sized contiguous memory
PyArrayObject* data_arr =
reinterpret_cast<PyArrayObject*>(data_obj.ptr());
PyArrayObject* labels_arr =
reinterpret_cast<PyArrayObject*>(labels_obj.ptr());
check_contiguous_array(data_arr, "data array", md_layer->datum_channels(),
md_layer->datum_height(), md_layer->datum_width());
check_contiguous_array(labels_arr, "labels array", 1, 1, 1);
if (PyArray_DIMS(data_arr)[0] != PyArray_DIMS(labels_arr)[0]) {
throw std::runtime_error("data and labels must have the same first"
" dimension");
}
if (PyArray_DIMS(data_arr)[0] % md_layer->batch_size() != 0) {
throw std::runtime_error("first dimensions of input arrays must be a"
" multiple of batch size");
}
// hold references
input_data_ = data_obj;
input_labels_ = labels_obj;
md_layer->Reset(static_cast<float*>(PyArray_DATA(data_arr)),
static_cast<float*>(PyArray_DATA(labels_arr)),
PyArray_DIMS(data_arr)[0]);
}
// save the network weights to binary proto for net surgeries.
void save(string filename) {
NetParameter net_param;
net_->ToProto(&net_param, false);
WriteProtoToBinaryFile(net_param, filename.c_str());
}
// The caffe::Caffe utility functions.
void set_mode_cpu() { Caffe::set_mode(Caffe::CPU); }
void set_mode_gpu() { Caffe::set_mode(Caffe::GPU); }
void set_phase_train() { Caffe::set_phase(Caffe::TRAIN); }
void set_phase_test() { Caffe::set_phase(Caffe::TEST); }
void set_device(int device_id) { Caffe::SetDevice(device_id); }
vector<CaffeBlob> blobs() {
vector<CaffeBlob> result;
for (int i = 0; i < net_->blobs().size(); ++i) {
result.push_back(CaffeBlob(net_->blobs()[i], net_->blob_names()[i]));
}
return result;
}
vector<CaffeLayer> layers() {
vector<CaffeLayer> result;
for (int i = 0; i < net_->layers().size(); ++i) {
result.push_back(CaffeLayer(net_->layers()[i], net_->layer_names()[i]));
}
return result;
}
list inputs() {
list input_blob_names;
for (int i = 0; i < net_->input_blob_indices().size(); ++i) {
input_blob_names.append(
net_->blob_names()[net_->input_blob_indices()[i]]);
}
return input_blob_names;
}
list outputs() {
list output_blob_names;
for (int i = 0; i < net_->output_blob_indices().size(); ++i) {
output_blob_names.append(
net_->blob_names()[net_->output_blob_indices()[i]]);
}
return output_blob_names;
}
// The pointer to the internal caffe::Net instant.
shared_ptr<Net<float> > net_;
// if taking input from an ndarray, we need to hold references
object input_data_;
object input_labels_;
};
class CaffeSGDSolver {
public:
explicit CaffeSGDSolver(const string& param_file) {
// as in CaffeNet, (as a convenience, not a guarantee), create a Python
// exception if param_file can't be opened
CheckFile(param_file);
solver_.reset(new SGDSolver<float>(param_file));
// we need to explicitly store the net wrapper, rather than constructing
// it on the fly, so that it can hold references to Python objects
net_.reset(new CaffeNet(solver_->net()));
}
shared_ptr<CaffeNet> net() { return net_; }
void Solve() { return solver_->Solve(); }
void SolveResume(const string& resume_file) {
CheckFile(resume_file);
return solver_->Solve(resume_file);
}
protected:
shared_ptr<CaffeNet> net_;
shared_ptr<SGDSolver<float> > solver_;
};
// The boost_python module definition.
BOOST_PYTHON_MODULE(_caffe) {
// below, we prepend an underscore to methods that will be replaced
// in Python
boost::python::class_<CaffeNet, shared_ptr<CaffeNet> >(
"Net", boost::python::init<string, string>())
.def(boost::python::init<string>())
.def("_forward", &CaffeNet::Forward)
.def("_backward", &CaffeNet::Backward)
.def("set_mode_cpu", &CaffeNet::set_mode_cpu)
.def("set_mode_gpu", &CaffeNet::set_mode_gpu)
.def("set_phase_train", &CaffeNet::set_phase_train)
.def("set_phase_test", &CaffeNet::set_phase_test)
.def("set_device", &CaffeNet::set_device)
.add_property("_blobs", &CaffeNet::blobs)
.add_property("layers", &CaffeNet::layers)
.add_property("inputs", &CaffeNet::inputs)
.add_property("outputs", &CaffeNet::outputs)
.def("_set_input_arrays", &CaffeNet::set_input_arrays)
.def("save", &CaffeNet::save);
boost::python::class_<CaffeBlob, CaffeBlobWrap>(
"Blob", boost::python::no_init)
.add_property("name", &CaffeBlob::name)
.add_property("num", &CaffeBlob::num)
.add_property("channels", &CaffeBlob::channels)
.add_property("height", &CaffeBlob::height)
.add_property("width", &CaffeBlob::width)
.add_property("count", &CaffeBlob::count)
.add_property("data", &CaffeBlobWrap::get_data)
.add_property("diff", &CaffeBlobWrap::get_diff);
boost::python::class_<CaffeLayer>(
"Layer", boost::python::no_init)
.add_property("name", &CaffeLayer::name)
.add_property("blobs", &CaffeLayer::blobs);
boost::python::class_<CaffeSGDSolver, boost::noncopyable>(
"SGDSolver", boost::python::init<string>())
.add_property("net", &CaffeSGDSolver::net)
.def("solve", &CaffeSGDSolver::Solve)
.def("solve", &CaffeSGDSolver::SolveResume);
boost::python::class_<vector<CaffeBlob> >("BlobVec")
.def(vector_indexing_suite<vector<CaffeBlob>, true>());
boost::python::class_<vector<CaffeLayer> >("LayerVec")
.def(vector_indexing_suite<vector<CaffeLayer>, true>());
import_array();
}

@ -0,0 +1,86 @@
#!/usr/bin/env python
"""
Classifier is an image classifier specialization of Net.
"""
import numpy as np
import caffe
class Classifier(caffe.Net):
"""
Classifier extends Net for image class prediction
by scaling, center cropping, or oversampling.
"""
def __init__(self, model_file, pretrained_file, image_dims=None,
gpu=False, mean_file=None, input_scale=None, channel_swap=None):
"""
Take
image_dims: dimensions to scale input for cropping/sampling.
Default is to scale to net input size for whole-image crop.
gpu, mean_file, input_scale, channel_swap: convenience params for
setting mode, mean, input scale, and channel order.
"""
caffe.Net.__init__(self, model_file, pretrained_file)
self.set_phase_test()
if gpu:
self.set_mode_gpu()
else:
self.set_mode_cpu()
if mean_file:
self.set_mean(self.inputs[0], mean_file)
if input_scale:
self.set_input_scale(self.inputs[0], input_scale)
if channel_swap:
self.set_channel_swap(self.inputs[0], channel_swap)
self.crop_dims = np.array(self.blobs[self.inputs[0]].data.shape[2:])
if not image_dims:
image_dims = self.crop_dims
self.image_dims = image_dims
def predict(self, inputs, oversample=True):
"""
Predict classification probabilities of inputs.
Take
inputs: iterable of (H x W x K) input ndarrays.
oversample: average predictions across center, corners, and mirrors
when True (default). Center-only prediction when False.
Give
predictions: (N x C) ndarray of class probabilities
for N images and C classes.
"""
# Scale to standardize input dimensions.
inputs = np.asarray([caffe.io.resize_image(im, self.image_dims)
for im in inputs])
if oversample:
# Generate center, corner, and mirrored crops.
inputs = caffe.io.oversample(inputs, self.crop_dims)
else:
# Take center crop.
center = np.array(self.image_dims) / 2.0
crop = np.tile(center, (1, 2))[0] + np.concatenate([
-self.crop_dims / 2.0,
self.crop_dims / 2.0
])
inputs = inputs[:, crop[0]:crop[2], crop[1]:crop[3], :]
# Classify
caffe_in = np.asarray([self.preprocess(self.inputs[0], in_)
for in_ in inputs])
out = self.forward_all(**{self.inputs[0]: caffe_in})
predictions = out[self.outputs[0]].squeeze(axis=(2,3))
# For oversampling, average predictions across crops.
if oversample:
predictions = predictions.reshape((len(predictions) / 10, 10, -1))
predictions = predictions.mean(1)
return predictions

@ -0,0 +1,191 @@
#!/usr/bin/env python
"""
Do windowed detection by classifying a number of images/crops at once,
optionally using the selective search window proposal method.
This implementation follows ideas in
Ross Girshick, Jeff Donahue, Trevor Darrell, Jitendra Malik.
Rich feature hierarchies for accurate object detection and semantic
segmentation.
http://arxiv.org/abs/1311.2524
The selective_search_ijcv_with_python code required for the selective search
proposal mode is available at
https://github.com/sergeyk/selective_search_ijcv_with_python
"""
import numpy as np
import os
import caffe
class Detector(caffe.Net):
"""
Detector extends Net for windowed detection by a list of crops or
selective search proposals.
"""
def __init__(self, model_file, pretrained_file, gpu=False, mean_file=None,
input_scale=None, channel_swap=None, context_pad=None):
"""
Take
gpu, mean_file, input_scale, channel_swap: convenience params for
setting mode, mean, input scale, and channel order.
context_pad: amount of surrounding context to take s.t. a `context_pad`
sized border of pixels in the network input image is context, as in
R-CNN feature extraction.
"""
caffe.Net.__init__(self, model_file, pretrained_file)
self.set_phase_test()
if gpu:
self.set_mode_gpu()
else:
self.set_mode_cpu()
if mean_file:
self.set_mean(self.inputs[0], mean_file)
if input_scale:
self.set_input_scale(self.inputs[0], input_scale)
if channel_swap:
self.set_channel_swap(self.inputs[0], channel_swap)
self.configure_crop(context_pad)
def detect_windows(self, images_windows):
"""
Do windowed detection over given images and windows. Windows are
extracted then warped to the input dimensions of the net.
Take
images_windows: (image filename, window list) iterable.
context_crop: size of context border to crop in pixels.
Give
detections: list of {filename: image filename, window: crop coordinates,
predictions: prediction vector} dicts.
"""
# Extract windows.
window_inputs = []
for image_fname, windows in images_windows:
image = caffe.io.load_image(image_fname).astype(np.float32)
for window in windows:
window_inputs.append(self.crop(image, window))
# Run through the net (warping windows to input dimensions).
caffe_in = np.asarray([self.preprocess(self.inputs[0], window_in)
for window_in in window_inputs])
out = self.forward_all(**{self.inputs[0]: caffe_in})
predictions = out[self.outputs[0]].squeeze(axis=(2,3))
# Package predictions with images and windows.
detections = []
ix = 0
for image_fname, windows in images_windows:
for window in windows:
detections.append({
'window': window,
'prediction': predictions[ix],
'filename': image_fname
})
ix += 1
return detections
def detect_selective_search(self, image_fnames):
"""
Do windowed detection over Selective Search proposals by extracting
the crop and warping to the input dimensions of the net.
Take
image_fnames: list
Give
detections: list of {filename: image filename, window: crop coordinates,
predictions: prediction vector} dicts.
"""
import selective_search_ijcv_with_python as selective_search
# Make absolute paths so MATLAB can find the files.
image_fnames = [os.path.abspath(f) for f in image_fnames]
windows_list = selective_search.get_windows(
image_fnames,
cmd='selective_search_rcnn'
)
# Run windowed detection on the selective search list.
return self.detect_windows(zip(image_fnames, windows_list))
def crop(self, im, window):
"""
Crop a window from the image for detection. Include surrounding context
according to the `context_pad` configuration.
Take
im: H x W x K image ndarray to crop.
window: bounding box coordinates as ymin, xmin, ymax, xmax.
Give
crop: cropped window.
"""
# Crop window from the image.
crop = im[window[0]:window[2], window[1]:window[3]]
if self.context_pad:
box = window.copy()
crop_size = self.blobs[self.inputs[0]].width # assumes square
scale = crop_size / (1. * crop_size - self.context_pad * 2)
# Crop a box + surrounding context.
half_h = (box[2] - box[0] + 1) / 2.
half_w = (box[3] - box[1] + 1) / 2.
center = (box[0] + half_h, box[1] + half_w)
scaled_dims = scale * np.array((-half_h, -half_w, half_h, half_w))
box = np.round(np.tile(center, 2) + scaled_dims)
full_h = box[2] - box[0] + 1
full_w = box[3] - box[1] + 1
scale_h = crop_size / full_h
scale_w = crop_size / full_w
pad_y = round(max(0, -box[0]) * scale_h) # amount out-of-bounds
pad_x = round(max(0, -box[1]) * scale_w)
# Clip box to image dimensions.
im_h, im_w = im.shape[:2]
box = np.clip(box, 0., [im_h, im_w, im_h, im_w])
clip_h = box[2] - box[0] + 1
clip_w = box[3] - box[1] + 1
assert(clip_h > 0 and clip_w > 0)
crop_h = round(clip_h * scale_h)
crop_w = round(clip_w * scale_w)
if pad_y + crop_h > crop_size:
crop_h = crop_size - pad_y
if pad_x + crop_w > crop_size:
crop_w = crop_size - pad_x
# collect with context padding and place in input
# with mean padding
context_crop = im[box[0]:box[2], box[1]:box[3]]
context_crop = caffe.io.resize_image(context_crop, (crop_h, crop_w))
crop = self.crop_mean.copy()
crop[pad_y:(pad_y + crop_h), pad_x:(pad_x + crop_w)] = context_crop
return crop
def configure_crop(self, context_pad):
"""
Configure amount of context for cropping.
If context is included, make the special input mean for context padding.
Take
context_pad: amount of context for cropping.
"""
self.context_pad = context_pad
if self.context_pad:
input_scale = self.input_scale.get(self.inputs[0])
channel_order = self.channel_swap.get(self.inputs[0])
# Padding context crops needs the mean in unprocessed input space.
self.crop_mean = self.mean[self.inputs[0]].copy()
self.crop_mean = self.crop_mean.transpose((1,2,0))
channel_order_inverse = [channel_order.index(i)
for i in range(self.crop_mean.shape[2])]
self.crop_mean = self.crop_mean[:,:, channel_order_inverse]
self.crop_mean /= input_scale

@ -0,0 +1,76 @@
"""
Caffe network visualization: draw the NetParameter protobuffer.
NOTE: this requires pydot>=1.0.2, which is not included in requirements.txt
since it requires graphviz and other prerequisites outside the scope of the
Caffe.
"""
from caffe.proto import caffe_pb2
from google.protobuf import text_format
import pydot
# Internal layer and blob styles.
LAYER_STYLE = {'shape': 'record', 'fillcolor': '#6495ED',
'style': 'filled'}
NEURON_LAYER_STYLE = {'shape': 'record', 'fillcolor': '#90EE90',
'style': 'filled'}
BLOB_STYLE = {'shape': 'octagon', 'fillcolor': '#F0E68C',
'style': 'filled'}
def get_enum_name_by_value():
desc = caffe_pb2.LayerParameter.LayerType.DESCRIPTOR
d = {}
for k,v in desc.values_by_name.items():
d[v.number] = k
return d
def get_pydot_graph(caffe_net):
pydot_graph = pydot.Dot(caffe_net.name, graph_type='digraph', rankdir="BT")
pydot_nodes = {}
pydot_edges = []
d = get_enum_name_by_value()
for layer in caffe_net.layers:
name = layer.name
layertype = d[layer.type]
if (len(layer.bottom) == 1 and len(layer.top) == 1 and
layer.bottom[0] == layer.top[0]):
# We have an in-place neuron layer.
pydot_nodes[name + '_' + layertype] = pydot.Node(
'%s (%s)' % (name, layertype), **NEURON_LAYER_STYLE)
else:
pydot_nodes[name + '_' + layertype] = pydot.Node(
'%s (%s)' % (name, layertype), **LAYER_STYLE)
for bottom_blob in layer.bottom:
pydot_nodes[bottom_blob + '_blob'] = pydot.Node(
'%s' % (bottom_blob), **BLOB_STYLE)
pydot_edges.append((bottom_blob + '_blob', name + '_' + layertype))
for top_blob in layer.top:
pydot_nodes[top_blob + '_blob'] = pydot.Node(
'%s' % (top_blob))
pydot_edges.append((name + '_' + layertype, top_blob + '_blob'))
# Now, add the nodes and edges to the graph.
for node in pydot_nodes.values():
pydot_graph.add_node(node)
for edge in pydot_edges:
pydot_graph.add_edge(
pydot.Edge(pydot_nodes[edge[0]], pydot_nodes[edge[1]]))
return pydot_graph
def draw_net(caffe_net, ext='png'):
"""Draws a caffe net and returns the image string encoded using the given
extension.
Input:
caffe_net: a caffe.proto.caffe_pb2.NetParameter protocol buffer.
ext: the image extension. Default 'png'.
"""
return get_pydot_graph(caffe_net).create(format=ext)
def draw_net_to_file(caffe_net, filename):
"""Draws a caffe net, and saves it to file using the format given as the
file extension. Use '.raw' to output raw text that you can manually feed
to graphviz to draw graphs.
"""
ext = filename[filename.rfind('.')+1:]
with open(filename, 'wb') as fid:
fid.write(draw_net(caffe_net, ext))

@ -0,0 +1,159 @@
import numpy as np
import skimage.io
import skimage.transform
from caffe.proto import caffe_pb2
def load_image(filename, color=True):
"""
Load an image converting from grayscale or alpha as needed.
Take
filename: string
color: flag for color format. True (default) loads as RGB while False
loads as intensity (if image is already grayscale).
Give
image: an image with type np.float32 of size (H x W x 3) in RGB or
of size (H x W x 1) in grayscale.
"""
img = skimage.img_as_float(skimage.io.imread(filename)).astype(np.float32)
if img.ndim == 2:
img = img[:, :, np.newaxis]
if color:
img = np.tile(img, (1, 1, 3))
elif img.shape[2] == 4:
img = img[:, :, :3]
return img
def resize_image(im, new_dims, interp_order=1):
"""
Resize an image array with interpolation.
Take
im: (H x W x K) ndarray
new_dims: (height, width) tuple of new dimensions.
interp_order: interpolation order, default is linear.
Give
im: resized ndarray with shape (new_dims[0], new_dims[1], K)
"""
return skimage.transform.resize(im, new_dims, order=interp_order)
def oversample(images, crop_dims):
"""
Crop images into the four corners, center, and their mirrored versions.
Take
image: iterable of (H x W x K) ndarrays
crop_dims: (height, width) tuple for the crops.
Give
crops: (10*N x H x W x K) ndarray of crops for number of inputs N.
"""
# Dimensions and center.
im_shape = np.array(images[0].shape)
crop_dims = np.array(crop_dims)
im_center = im_shape[:2] / 2.0
# Make crop coordinates
h_indices = (0, im_shape[0] - crop_dims[0])
w_indices = (0, im_shape[1] - crop_dims[1])
crops_ix = np.empty((5, 4), dtype=int)
curr = 0
for i in h_indices:
for j in w_indices:
crops_ix[curr] = (i, j, i + crop_dims[0], j + crop_dims[1])
curr += 1
crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate([
-crop_dims / 2.0,
crop_dims / 2.0
])
crops_ix = np.tile(crops_ix, (2, 1))
# Extract crops
crops = np.empty((10 * len(images), crop_dims[0], crop_dims[1],
im_shape[-1]), dtype=np.float32)
ix = 0
for im in images:
for crop in crops_ix:
crops[ix] = im[crop[0]:crop[2], crop[1]:crop[3], :]
ix += 1
crops[ix-5:ix] = crops[ix-5:ix, :, ::-1, :] # flip for mirrors
return crops
def blobproto_to_array(blob, return_diff=False):
"""Convert a blob proto to an array. In default, we will just return the data,
unless return_diff is True, in which case we will return the diff.
"""
if return_diff:
return np.array(blob.diff).reshape(
blob.num, blob.channels, blob.height, blob.width)
else:
return np.array(blob.data).reshape(
blob.num, blob.channels, blob.height, blob.width)
def array_to_blobproto(arr, diff=None):
"""Converts a 4-dimensional array to blob proto. If diff is given, also
convert the diff. You need to make sure that arr and diff have the same
shape, and this function does not do sanity check.
"""
if arr.ndim != 4:
raise ValueError('Incorrect array shape.')
blob = caffe_pb2.BlobProto()
blob.num, blob.channels, blob.height, blob.width = arr.shape;
blob.data.extend(arr.astype(float).flat)
if diff is not None:
blob.diff.extend(diff.astype(float).flat)
return blob
def arraylist_to_blobprotovecor_str(arraylist):
"""Converts a list of arrays to a serialized blobprotovec, which could be
then passed to a network for processing.
"""
vec = caffe_pb2.BlobProtoVector()
vec.blobs.extend([array_to_blobproto(arr) for arr in arraylist])
return vec.SerializeToString()
def blobprotovector_str_to_arraylist(str):
"""Converts a serialized blobprotovec to a list of arrays.
"""
vec = caffe_pb2.BlobProtoVector()
vec.ParseFromString(str)
return [blobproto_to_array(blob) for blob in vec.blobs]
def array_to_datum(arr, label=0):
"""Converts a 3-dimensional array to datum. If the array has dtype uint8,
the output data will be encoded as a string. Otherwise, the output data
will be stored in float format.
"""
if arr.ndim != 3:
raise ValueError('Incorrect array shape.')
datum = caffe_pb2.Datum()
datum.channels, datum.height, datum.width = arr.shape
if arr.dtype == np.uint8:
datum.data = arr.tostring()
else:
datum.float_data.extend(arr.flat)
datum.label = label
return datum
def datum_to_array(datum):
"""Converts a datum to an array. Note that the label is not returned,
as one can easily get it by calling datum.label.
"""
if len(datum.data):
return np.fromstring(datum.data, dtype = np.uint8).reshape(
datum.channels, datum.height, datum.width)
else:
return np.array(datum.float_data).astype(float).reshape(
datum.channels, datum.height, datum.width)

@ -0,0 +1,352 @@
"""
Wrap the internal caffe C++ module (_caffe.so) with a clean, Pythonic
interface.
"""
from collections import OrderedDict
from itertools import izip_longest
import numpy as np
from ._caffe import Net, SGDSolver
import caffe.io
# We directly update methods from Net here (rather than using composition or
# inheritance) so that nets created by caffe (e.g., by SGDSolver) will
# automatically have the improved interface.
@property
def _Net_blobs(self):
"""
An OrderedDict (bottom to top, i.e., input to output) of network
blobs indexed by name
"""
return OrderedDict([(bl.name, bl) for bl in self._blobs])
@property
def _Net_params(self):
"""
An OrderedDict (bottom to top, i.e., input to output) of network
parameters indexed by name; each is a list of multiple blobs (e.g.,
weights and biases)
"""
return OrderedDict([(lr.name, lr.blobs) for lr in self.layers
if len(lr.blobs) > 0])
def _Net_forward(self, blobs=None, **kwargs):
"""
Forward pass: prepare inputs and run the net forward.
Take
blobs: list of blobs to return in addition to output blobs.
kwargs: Keys are input blob names and values are blob ndarrays.
For formatting inputs for Caffe, see Net.preprocess().
If None, input is taken from data layers.
Give
outs: {blob name: blob ndarray} dict.
"""
if blobs is None:
blobs = []
if kwargs:
if set(kwargs.keys()) != set(self.inputs):
raise Exception('Input blob arguments do not match net inputs.')
# Set input according to defined shapes and make arrays single and
# C-contiguous as Caffe expects.
for in_, blob in kwargs.iteritems():
if blob.shape[0] != self.blobs[in_].num:
raise Exception('Input is not batch sized')
if blob.ndim != 4:
raise Exception('{} blob is not 4-d'.format(in_))
self.blobs[in_].data[...] = blob
self._forward()
# Unpack blobs to extract
outs = {out: self.blobs[out].data for out in set(self.outputs + blobs)}
return outs
def _Net_backward(self, diffs=None, **kwargs):
"""
Backward pass: prepare diffs and run the net backward.
Take
diffs: list of diffs to return in addition to bottom diffs.
kwargs: Keys are output blob names and values are diff ndarrays.
If None, top diffs are taken from forward loss.
Give
outs: {blob name: diff ndarray} dict.
"""
if diffs is None:
diffs = []
if kwargs:
if set(kwargs.keys()) != set(self.outputs):
raise Exception('Top diff arguments do not match net outputs.')
# Set top diffs according to defined shapes and make arrays single and
# C-contiguous as Caffe expects.
for top, diff in kwargs.iteritems():
if diff.shape[0] != self.blobs[top].num:
raise Exception('Diff is not batch sized')
if diff.ndim != 4:
raise Exception('{} diff is not 4-d'.format(top))
self.blobs[top].diff[...] = diff
self._backward()
# Unpack diffs to extract
outs = {out: self.blobs[out].diff for out in set(self.inputs + diffs)}
return outs
def _Net_forward_all(self, blobs=None, **kwargs):
"""
Run net forward in batches.
Take
blobs: list of blobs to extract as in forward()
kwargs: Keys are input blob names and values are blob ndarrays.
Refer to forward().
Give
all_outs: {blob name: list of blobs} dict.
"""
# Collect outputs from batches
all_outs = {out: [] for out in set(self.outputs + (blobs or []))}
for batch in self._batch(kwargs):
outs = self.forward(blobs=blobs, **batch)
for out, out_blob in outs.iteritems():
all_outs[out].extend(out_blob.copy())
# Package in ndarray.
for out in all_outs:
all_outs[out] = np.asarray(all_outs[out])
# Discard padding.
pad = len(all_outs.itervalues().next()) - len(kwargs.itervalues().next())
if pad:
for out in all_outs:
all_outs[out] = all_outs[out][:-pad]
return all_outs
def _Net_forward_backward_all(self, blobs=None, diffs=None, **kwargs):
"""
Run net forward + backward in batches.
Take
blobs: list of blobs to extract as in forward()
diffs: list of diffs to extract as in backward()
kwargs: Keys are input (for forward) and output (for backward) blob names
and values are ndarrays. Refer to forward() and backward().
Prefilled variants are called for lack of input or output blobs.
Give
all_blobs: {blob name: blob ndarray} dict.
all_diffs: {blob name: diff ndarray} dict.
"""
# Batch blobs and diffs.
all_outs = {out: [] for out in set(self.outputs + (blobs or []))}
all_diffs = {diff: [] for diff in set(self.inputs + (diffs or []))}
forward_batches = self._batch({in_: kwargs[in_]
for in_ in self.inputs if in_ in kwargs})
backward_batches = self._batch({out: kwargs[out]
for out in self.outputs if out in kwargs})
# Collect outputs from batches (and heed lack of forward/backward batches).
for fb, bb in izip_longest(forward_batches, backward_batches, fillvalue={}):
batch_blobs = self.forward(blobs=blobs, **fb)
batch_diffs = self.backward(diffs=diffs, **bb)
for out, out_blobs in batch_blobs.iteritems():
all_outs[out].extend(out_blobs)
for diff, out_diffs in batch_diffs.iteritems():
all_diffs[diff].extend(out_diffs)
# Package in ndarray.
for out, diff in zip(all_outs, all_diffs):
all_outs[out] = np.asarray(all_outs[out])
all_diffs[diff] = np.asarray(all_diffs[diff])
# Discard padding at the end and package in ndarray.
pad = len(all_outs.itervalues().next()) - len(kwargs.itervalues().next())
if pad:
for out, diff in zip(all_outs, all_diffs):
all_outs[out] = all_outs[out][:-pad]
all_diffs[diff] = all_diffs[diff][:-pad]
return all_outs, all_diffs
def _Net_set_mean(self, input_, mean_f, mode='elementwise'):
"""
Set the mean to subtract for data centering.
Take
input_: which input to assign this mean.
mean_f: path to mean .npy with ndarray (input dimensional or broadcastable)
mode: elementwise = use the whole mean (and check dimensions)
channel = channel constant (e.g. mean pixel instead of mean image)
"""
if not hasattr(self, 'mean'):
self.mean = {}
if input_ not in self.inputs:
raise Exception('Input not in {}'.format(self.inputs))
in_shape = self.blobs[input_].data.shape
mean = np.load(mean_f)
if mode == 'elementwise':
if mean.shape != in_shape[1:]:
# Resize mean (which requires H x W x K input in range [0,1]).
m_min, m_max = mean.min(), mean.max()
normal_mean = (mean - m_min) / (m_max - m_min)
mean = caffe.io.resize_image(normal_mean.transpose((1,2,0)),
in_shape[2:]).transpose((2,0,1)) * (m_max - m_min) + m_min
self.mean[input_] = mean
elif mode == 'channel':
self.mean[input_] = mean.mean(1).mean(1).reshape((in_shape[1], 1, 1))
else:
raise Exception('Mode not in {}'.format(['elementwise', 'channel']))
def _Net_set_input_scale(self, input_, scale):
"""
Set the input feature scaling factor s.t. input blob = input * scale.
Take
input_: which input to assign this scale factor
scale: scale coefficient
"""
if not hasattr(self, 'input_scale'):
self.input_scale = {}
if input_ not in self.inputs:
raise Exception('Input not in {}'.format(self.inputs))
self.input_scale[input_] = scale
def _Net_set_channel_swap(self, input_, order):
"""
Set the input channel order for e.g. RGB to BGR conversion
as needed for the reference ImageNet model.
Take
input_: which input to assign this channel order
order: the order to take the channels.
(2,1,0) maps RGB to BGR for example.
"""
if not hasattr(self, 'channel_swap'):
self.channel_swap = {}
if input_ not in self.inputs:
raise Exception('Input not in {}'.format(self.inputs))
self.channel_swap[input_] = order
def _Net_preprocess(self, input_name, input_):
"""
Format input for Caffe:
- convert to single
- resize to input dimensions (preserving number of channels)
- scale feature
- reorder channels (for instance color to BGR)
- subtract mean
- transpose dimensions to K x H x W
Take
input_name: name of input blob to preprocess for
input_: (H' x W' x K) ndarray
Give
caffe_inputs: (K x H x W) ndarray
"""
caffe_in = input_.astype(np.float32)
input_scale = self.input_scale.get(input_name)
channel_order = self.channel_swap.get(input_name)
mean = self.mean.get(input_name)
in_size = self.blobs[input_name].data.shape[2:]
if caffe_in.shape[:2] != in_size:
caffe_in = caffe.io.resize_image(caffe_in, in_size)
if input_scale:
caffe_in *= input_scale
if channel_order:
caffe_in = caffe_in[:, :, channel_order]
caffe_in = caffe_in.transpose((2, 0, 1))
if mean is not None:
caffe_in -= mean
return caffe_in
def _Net_deprocess(self, input_name, input_):
"""
Invert Caffe formatting; see Net.preprocess().
"""
decaf_in = input_.copy().squeeze()
input_scale = self.input_scale.get(input_name)
channel_order = self.channel_swap.get(input_name)
mean = self.mean.get(input_name)
if mean is not None:
decaf_in += mean
decaf_in = decaf_in.transpose((1,2,0))
if channel_order:
channel_order_inverse = [channel_order.index(i)
for i in range(decaf_in.shape[2])]
decaf_in = decaf_in[:, :, channel_order_inverse]
if input_scale:
decaf_in /= input_scale
return decaf_in
def _Net_set_input_arrays(self, data, labels):
"""
Set input arrays of the in-memory MemoryDataLayer.
(Note: this is only for networks declared with the memory data layer.)
"""
if labels.ndim == 1:
labels = np.ascontiguousarray(labels[:, np.newaxis, np.newaxis,
np.newaxis])
return self._set_input_arrays(data, labels)
def _Net_batch(self, blobs):
"""
Batch blob lists according to net's batch size.
Take
blobs: Keys blob names and values are lists of blobs (of any length).
Naturally, all the lists should have the same length.
Give (yield)
batch: {blob name: list of blobs} dict for a single batch.
"""
num = len(blobs.itervalues().next())
batch_size = self.blobs.itervalues().next().num
remainder = num % batch_size
num_batches = num / batch_size
# Yield full batches.
for b in range(num_batches):
i = b * batch_size
yield {name: blobs[name][i:i + batch_size] for name in blobs}
# Yield last padded batch, if any.
if remainder > 0:
padded_batch = {}
for name in blobs:
padding = np.zeros((batch_size - remainder,)
+ blobs[name].shape[1:])
padded_batch[name] = np.concatenate([blobs[name][-remainder:],
padding])
yield padded_batch
# Attach methods to Net.
Net.blobs = _Net_blobs
Net.params = _Net_params
Net.forward = _Net_forward
Net.backward = _Net_backward
Net.forward_all = _Net_forward_all
Net.forward_backward_all = _Net_forward_backward_all
Net.set_mean = _Net_set_mean
Net.set_input_scale = _Net_set_input_scale
Net.set_channel_swap = _Net_set_channel_swap
Net.preprocess = _Net_preprocess
Net.deprocess = _Net_deprocess
Net.set_input_arrays = _Net_set_input_arrays
Net._batch = _Net_batch

@ -0,0 +1,120 @@
#!/usr/bin/env python
"""
classify.py is an out-of-the-box image classifer callable from the command line.
By default it configures and runs the Caffe reference ImageNet model.
"""
import numpy as np
import os
import sys
import argparse
import glob
import time
import caffe
def main(argv):
pycaffe_dir = os.path.dirname(__file__)
parser = argparse.ArgumentParser()
# Required arguments: input and output files.
parser.add_argument(
"input_file",
help="Input image, directory, or npy."
)
parser.add_argument(
"output_file",
help="Output npy filename."
)
# Optional arguments.
parser.add_argument(
"--model_def",
default=os.path.join(pycaffe_dir,
"../examples/imagenet/imagenet_deploy.prototxt"),
help="Model definition file."
)
parser.add_argument(
"--pretrained_model",
default=os.path.join(pycaffe_dir,
"../examples/imagenet/caffe_reference_imagenet_model"),
help="Trained model weights file."
)
parser.add_argument(
"--gpu",
action='store_true',
help="Switch for gpu computation."
)
parser.add_argument(
"--center_only",
action='store_true',
help="Switch for prediction from center crop alone instead of " +
"averaging predictions across crops (default)."
)
parser.add_argument(
"--images_dim",
default='256,256',
help="Canonical 'height,width' dimensions of input images."
)
parser.add_argument(
"--mean_file",
default=os.path.join(pycaffe_dir,
'caffe/imagenet/ilsvrc_2012_mean.npy'),
help="Data set image mean of H x W x K dimensions (numpy array). " +
"Set to '' for no mean subtraction."
)
parser.add_argument(
"--input_scale",
type=float,
default=255,
help="Multiply input features by this scale before input to net"
)
parser.add_argument(
"--channel_swap",
default='2,1,0',
help="Order to permute input channels. The default converts " +
"RGB -> BGR since BGR is the Caffe default by way of OpenCV."
)
parser.add_argument(
"--ext",
default='jpg',
help="Image file extension to take as input when a directory " +
"is given as the input file."
)
args = parser.parse_args()
image_dims = [int(s) for s in args.images_dim.split(',')]
channel_swap = [int(s) for s in args.channel_swap.split(',')]
# Make classifier.
classifier = caffe.Classifier(args.model_def, args.pretrained_model,
image_dims=image_dims, gpu=args.gpu, mean_file=args.mean_file,
input_scale=args.input_scale, channel_swap=channel_swap)
if args.gpu:
print 'GPU mode'
# Load numpy array (.npy), directory glob (*.jpg), or image file.
args.input_file = os.path.expanduser(args.input_file)
if args.input_file.endswith('npy'):
inputs = np.load(args.input_file)
elif os.path.isdir(args.input_file):
inputs =[caffe.io.load_image(im_f)
for im_f in glob.glob(args.input_file + '/*.' + args.ext)]
else:
inputs = [caffe.io.load_image(args.input_file)]
print "Classifying %d inputs." % len(inputs)
# Classify.
start = time.time()
predictions = classifier.predict(inputs, not args.center_only)
print "Done in %.2f s." % (time.time() - start)
# Save
np.save(args.output_file, predictions)
if __name__ == '__main__':
main(sys.argv)

@ -0,0 +1,158 @@
#!/usr/bin/env python
"""
detector.py is an out-of-the-box windowed detector
callable from the command line.
By default it configures and runs the Caffe reference ImageNet model.
Note that this model was trained for image classification and not detection,
and finetuning for detection can be expected to improve results.
The selective_search_ijcv_with_python code required for the selective search
proposal mode is available at
https://github.com/sergeyk/selective_search_ijcv_with_python
TODO:
- batch up image filenames as well: don't want to load all of them into memory
- come up with a batching scheme that preserved order / keeps a unique ID
"""
import numpy as np
import pandas as pd
import os
import argparse
import time
import caffe
CROP_MODES = ['list', 'selective_search']
COORD_COLS = ['ymin', 'xmin', 'ymax', 'xmax']
def main(argv):
pycaffe_dir = os.path.dirname(__file__)
parser = argparse.ArgumentParser()
# Required arguments: input and output.
parser.add_argument(
"input_file",
help="Input txt/csv filename. If .txt, must be list of filenames.\
If .csv, must be comma-separated file with header\
'filename, xmin, ymin, xmax, ymax'"
)
parser.add_argument(
"output_file",
help="Output h5/csv filename. Format depends on extension."
)
# Optional arguments.
parser.add_argument(
"--model_def",
default=os.path.join(pycaffe_dir,
"../examples/imagenet/imagenet_deploy.prototxt"),
help="Model definition file."
)
parser.add_argument(
"--pretrained_model",
default=os.path.join(pycaffe_dir,
"../examples/imagenet/caffe_reference_imagenet_model"),
help="Trained model weights file."
)
parser.add_argument(
"--crop_mode",
default="selective_search",
choices=CROP_MODES,
help="How to generate windows for detection."
)
parser.add_argument(
"--gpu",
action='store_true',
help="Switch for gpu computation."
)
parser.add_argument(
"--mean_file",
default=os.path.join(pycaffe_dir,
'caffe/imagenet/ilsvrc_2012_mean.npy'),
help="Data set image mean of H x W x K dimensions (numpy array). " +
"Set to '' for no mean subtraction."
)
parser.add_argument(
"--input_scale",
type=float,
default=255,
help="Multiply input features by this scale before input to net"
)
parser.add_argument(
"--channel_swap",
default='2,1,0',
help="Order to permute input channels. The default converts " +
"RGB -> BGR since BGR is the Caffe default by way of OpenCV."
)
parser.add_argument(
"--context_pad",
type=int,
default='16',
help="Amount of surrounding context to collect in input window."
)
args = parser.parse_args()
channel_swap = [int(s) for s in args.channel_swap.split(',')]
# Make detector.
detector = caffe.Detector(args.model_def, args.pretrained_model,
gpu=args.gpu, mean_file=args.mean_file,
input_scale=args.input_scale, channel_swap=channel_swap,
context_pad=args.context_pad)
if args.gpu:
print 'GPU mode'
# Load input.
t = time.time()
print('Loading input...')
if args.input_file.lower().endswith('txt'):
with open(args.input_file) as f:
inputs = [_.strip() for _ in f.readlines()]
elif args.input_file.lower().endswith('csv'):
inputs = pd.read_csv(args.input_file, sep=',', dtype={'filename': str})
inputs.set_index('filename', inplace=True)
else:
raise Exception("Unknown input file type: not in txt or csv.")
# Detect.
if args.crop_mode == 'list':
# Unpack sequence of (image filename, windows).
images_windows = (
(ix, inputs.iloc[np.where(inputs.index == ix)][COORD_COLS].values)
for ix in inputs.index.unique()
)
detections = detector.detect_windows(images_windows)
else:
detections = detector.detect_selective_search(inputs)
print("Processed {} windows in {:.3f} s.".format(len(detections),
time.time() - t))
# Collect into dataframe with labeled fields.
df = pd.DataFrame(detections)
df.set_index('filename', inplace=True)
df[COORD_COLS] = pd.DataFrame(
data=np.vstack(df['window']), index=df.index, columns=COORD_COLS)
del(df['window'])
# Save results.
t = time.time()
if args.output_file.lower().endswith('csv'):
# csv
# Enumerate the class probabilities.
class_cols = ['class{}'.format(x) for x in range(NUM_OUTPUT)]
df[class_cols] = pd.DataFrame(
data=np.vstack(df['feat']), index=df.index, columns=class_cols)
df.to_csv(args.output_file, cols=COORD_COLS + class_cols)
else:
# h5
df.to_hdf(args.output_file, 'df', mode='w')
print("Saved to {} in {:.3f} s.".format(args.output_file,
time.time() - t))
if __name__ == "__main__":
import sys
main(sys.argv)

@ -0,0 +1,25 @@
#!/usr/bin/env python
"""
Draw a graph of the net architecture.
"""
import os
from google.protobuf import text_format
import caffe, caffe.draw
from caffe.proto import caffe_pb2
def main(argv):
if len(argv) != 3:
print 'Usage: %s input_net_proto_file output_image_file' % \
os.path.basename(sys.argv[0])
else:
net = caffe_pb2.NetParameter()
text_format.Merge(open(sys.argv[1]).read(), net)
print 'Drawing net to %s' % sys.argv[2]
caffe.draw.draw_net_to_file(net, sys.argv[2])
if __name__ == '__main__':
import sys
main(sys.argv)

@ -0,0 +1,14 @@
Cython>=0.19.2
h5py>=2.2.0
ipython>=1.1.0
leveldb>=0.191
matplotlib>=1.3.1
networkx>=1.8.1
nose>=1.3.0
numpy>=1.7.1
pandas>=0.12.0
protobuf>=2.5.0
python-gflags>=2.0
scikit-image>=0.9.3
scikit-learn>=0.14.1
scipy>=0.13.2

@ -0,0 +1,171 @@
import numpy as np
import skimage.io
from scipy.ndimage import zoom
from skimage.transform import resize
from caffe.proto import caffe_pb2
def load_image(filename, color=True):
"""
Load an image converting from grayscale or alpha as needed.
Take
filename: string
color: flag for color format. True (default) loads as RGB while False
loads as intensity (if image is already grayscale).
Give
image: an image with type np.float32 in range [0, 1]
of size (H x W x 3) in RGB or
of size (H x W x 1) in grayscale.
"""
img = skimage.img_as_float(skimage.io.imread(filename)).astype(np.float32)
if img.ndim == 2:
img = img[:, :, np.newaxis]
if color:
img = np.tile(img, (1, 1, 3))
elif img.shape[2] == 4:
img = img[:, :, :3]
return img
def resize_image(im, new_dims, interp_order=1):
"""
Resize an image array with interpolation.
Take
im: (H x W x K) ndarray
new_dims: (height, width) tuple of new dimensions.
interp_order: interpolation order, default is linear.
Give
im: resized ndarray with shape (new_dims[0], new_dims[1], K)
"""
if im.shape[-1] == 1 or im.shape[-1] == 3:
# skimage is fast but only understands {1,3} channel images in [0, 1].
im_min, im_max = im.min(), im.max()
im_std = (im - im_min) / (im_max - im_min)
resized_std = resize(im_std, new_dims, order=interp_order)
resized_im = resized_std * (im_max - im_min) + im_min
else:
# ndimage interpolates anything but more slowly.
scale = tuple(np.array(new_dims) / np.array(im.shape[:2]))
resized_im = zoom(im, scale + (1,), order=interp_order)
return resized_im.astype(np.float32)
def oversample(images, crop_dims):
"""
Crop images into the four corners, center, and their mirrored versions.
Take
image: iterable of (H x W x K) ndarrays
crop_dims: (height, width) tuple for the crops.
Give
crops: (10*N x H x W x K) ndarray of crops for number of inputs N.
"""
# Dimensions and center.
im_shape = np.array(images[0].shape)
crop_dims = np.array(crop_dims)
im_center = im_shape[:2] / 2.0
# Make crop coordinates
h_indices = (0, im_shape[0] - crop_dims[0])
w_indices = (0, im_shape[1] - crop_dims[1])
crops_ix = np.empty((5, 4), dtype=int)
curr = 0
for i in h_indices:
for j in w_indices:
crops_ix[curr] = (i, j, i + crop_dims[0], j + crop_dims[1])
curr += 1
crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate([
-crop_dims / 2.0,
crop_dims / 2.0
])
crops_ix = np.tile(crops_ix, (2, 1))
# Extract crops
crops = np.empty((10 * len(images), crop_dims[0], crop_dims[1],
im_shape[-1]), dtype=np.float32)
ix = 0
for im in images:
for crop in crops_ix:
crops[ix] = im[crop[0]:crop[2], crop[1]:crop[3], :]
ix += 1
crops[ix-5:ix] = crops[ix-5:ix, :, ::-1, :] # flip for mirrors
return crops
def blobproto_to_array(blob, return_diff=False):
"""Convert a blob proto to an array. In default, we will just return the data,
unless return_diff is True, in which case we will return the diff.
"""
if return_diff:
return np.array(blob.diff).reshape(
blob.num, blob.channels, blob.height, blob.width)
else:
return np.array(blob.data).reshape(
blob.num, blob.channels, blob.height, blob.width)
def array_to_blobproto(arr, diff=None):
"""Converts a 4-dimensional array to blob proto. If diff is given, also
convert the diff. You need to make sure that arr and diff have the same
shape, and this function does not do sanity check.
"""
if arr.ndim != 4:
raise ValueError('Incorrect array shape.')
blob = caffe_pb2.BlobProto()
blob.num, blob.channels, blob.height, blob.width = arr.shape;
blob.data.extend(arr.astype(float).flat)
if diff is not None:
blob.diff.extend(diff.astype(float).flat)
return blob
def arraylist_to_blobprotovecor_str(arraylist):
"""Converts a list of arrays to a serialized blobprotovec, which could be
then passed to a network for processing.
"""
vec = caffe_pb2.BlobProtoVector()
vec.blobs.extend([array_to_blobproto(arr) for arr in arraylist])
return vec.SerializeToString()
def blobprotovector_str_to_arraylist(str):
"""Converts a serialized blobprotovec to a list of arrays.
"""
vec = caffe_pb2.BlobProtoVector()
vec.ParseFromString(str)
return [blobproto_to_array(blob) for blob in vec.blobs]
def array_to_datum(arr, label=0):
"""Converts a 3-dimensional array to datum. If the array has dtype uint8,
the output data will be encoded as a string. Otherwise, the output data
will be stored in float format.
"""
if arr.ndim != 3:
raise ValueError('Incorrect array shape.')
datum = caffe_pb2.Datum()
datum.channels, datum.height, datum.width = arr.shape
if arr.dtype == np.uint8:
datum.data = arr.tostring()
else:
datum.float_data.extend(arr.flat)
datum.label = label
return datum
def datum_to_array(datum):
"""Converts a datum to an array. Note that the label is not returned,
as one can easily get it by calling datum.label.
"""
if len(datum.data):
return np.fromstring(datum.data, dtype = np.uint8).reshape(
datum.channels, datum.height, datum.width)
else:
return np.array(datum.float_data).astype(float).reshape(
datum.channels, datum.height, datum.width)

@ -0,0 +1,11 @@
#!/bin/bash
PORT=${1:-4000}
echo "usage: build_docs.sh [port]"
# Find the docs dir, no matter where the script is called
DIR="$( cd "$(dirname "$0")" ; pwd -P )"
cd $DIR/../docs
jekyll serve -w -s . -d _site --port=$PORT

File diff suppressed because it is too large Load Diff

@ -0,0 +1,50 @@
#!/usr/bin/env sh
# Publish/ Pull-request documentation to the gh-pages site.
# The remote for pushing the docs (defaults to origin).
# This is where you will submit the PR to BVLC:gh-pages from.
REMOTE=${1:-origin}
echo "Generating docs and pushing to $REMOTE:gh-pages..."
echo "To build and view docs when not on master, simply do 'jekyll serve -s docs'."
echo
REMOTE_URL=`git config --get remote.${REMOTE}.url`
BRANCH=`git rev-parse --abbrev-ref HEAD`
MSG=`git log --oneline -1`
if [[ $BRANCH = 'master' ]]; then
# Find the docs dir, no matter where the script is called
DIR="$( cd "$(dirname "$0")" ; pwd -P )"
DOCS_SITE_DIR=$DIR/../docs/_site
# Make sure that docs/_site tracks remote:gh-pages.
# If not, then we make a new repo and check out just that branch.
mkdir -p $DOCS_SITE_DIR
cd $DOCS_SITE_DIR
SITE_REMOTE_URL=`git config --get remote.${REMOTE}.url`
SITE_BRANCH=`git rev-parse --abbrev-ref HEAD`
echo $SITE_REMOTE_URL
echo $SITE_BRANCH
echo `pwd`
if [[ ( $SITE_REMOTE_URL = $REMOTE_URL ) && ( $SITE_BRANCH = 'gh-pages' ) ]]; then
echo "Confirmed that docs/_site has same remote as main repo, and is on gh-pages."
else
echo "Checking out $REMOTE:gh-pages into docs/_site (will take a little time)."
git init .
git remote add -t gh-pages -f $REMOTE $REMOTE_URL
git checkout gh-pages
fi
echo "Building the site into docs/_site, and committing the changes."
jekyll build -s .. -d .
git add --all .
git commit -m "$MSG"
git push $REMOTE gh-pages
echo "All done!"
cd ../..
else echo "You must run this deployment script from the 'master' branch."
fi

@ -0,0 +1,214 @@
// Copyright 2014 BVLC and contributors.
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/syncedmem.hpp"
#include "caffe/util/math_functions.hpp"
namespace caffe {
template <typename Dtype>
void Blob<Dtype>::Reshape(const int num, const int channels, const int height,
const int width) {
CHECK_GE(num, 0);
CHECK_GE(channels, 0);
CHECK_GE(height, 0);
CHECK_GE(width, 0);
num_ = num;
channels_ = channels;
height_ = height;
width_ = width;
count_ = num_ * channels_ * height_ * width_;
if (count_) {
data_.reset(new SyncedMemory(count_ * sizeof(Dtype)));
diff_.reset(new SyncedMemory(count_ * sizeof(Dtype)));
} else {
data_.reset(reinterpret_cast<SyncedMemory*>(NULL));
diff_.reset(reinterpret_cast<SyncedMemory*>(NULL));
}
}
template <typename Dtype>
void Blob<Dtype>::ReshapeLike(const Blob<Dtype>& other) {
Reshape(other.num(), other.channels(), other.height(), other.width());
}
template <typename Dtype>
Blob<Dtype>::Blob(const int num, const int channels, const int height,
const int width) {
Reshape(num, channels, height, width);
}
template <typename Dtype>
const Dtype* Blob<Dtype>::cpu_data() const {
CHECK(data_);
return (const Dtype*)data_->cpu_data();
}
template <typename Dtype>
void Blob<Dtype>::set_cpu_data(Dtype* data) {
CHECK(data);
data_->set_cpu_data(data);
}
template <typename Dtype>
const Dtype* Blob<Dtype>::gpu_data() const {
CHECK(data_);
return (const Dtype*)data_->gpu_data();
}
template <typename Dtype>
const Dtype* Blob<Dtype>::cpu_diff() const {
CHECK(diff_);
return (const Dtype*)diff_->cpu_data();
}
template <typename Dtype>
const Dtype* Blob<Dtype>::gpu_diff() const {
CHECK(diff_);
return (const Dtype*)diff_->gpu_data();
}
template <typename Dtype>
Dtype* Blob<Dtype>::mutable_cpu_data() {
CHECK(data_);
return reinterpret_cast<Dtype*>(data_->mutable_cpu_data());
}
template <typename Dtype>
Dtype* Blob<Dtype>::mutable_gpu_data() {
CHECK(data_);
return reinterpret_cast<Dtype*>(data_->mutable_gpu_data());
}
template <typename Dtype>
Dtype* Blob<Dtype>::mutable_cpu_diff() {
CHECK(diff_);
return reinterpret_cast<Dtype*>(diff_->mutable_cpu_data());
}
template <typename Dtype>
Dtype* Blob<Dtype>::mutable_gpu_diff() {
CHECK(diff_);
return reinterpret_cast<Dtype*>(diff_->mutable_gpu_data());
}
template <typename Dtype>
void Blob<Dtype>::ShareData(const Blob& other) {
CHECK_EQ(count_, other.count());
data_ = other.data();
}
template <typename Dtype>
void Blob<Dtype>::ShareDiff(const Blob& other) {
CHECK_EQ(count_, other.count());
diff_ = other.diff();
}
// The "update" method is used for parameter blobs in a Net, which are stored
// as Blob<float> or Blob<double> -- hence we do not define it for
// Blob<int> or Blob<unsigned int>.
template <> void Blob<unsigned int>::Update() { NOT_IMPLEMENTED; }
template <> void Blob<int>::Update() { NOT_IMPLEMENTED; }
template <typename Dtype>
void Blob<Dtype>::Update() {
// We will perform update based on where the data is located.
switch (data_->head()) {
case SyncedMemory::HEAD_AT_CPU:
// perform computation on CPU
caffe_axpy<Dtype>(count_, Dtype(-1),
reinterpret_cast<const Dtype*>(diff_->cpu_data()),
reinterpret_cast<Dtype*>(data_->mutable_cpu_data()));
break;
case SyncedMemory::HEAD_AT_GPU:
case SyncedMemory::SYNCED:
// perform computation on GPU
caffe_gpu_axpy<Dtype>(count_, Dtype(-1),
reinterpret_cast<const Dtype*>(diff_->gpu_data()),
reinterpret_cast<Dtype*>(data_->mutable_gpu_data()));
break;
default:
LOG(FATAL) << "Syncedmem not initialized.";
}
}
template <typename Dtype>
void Blob<Dtype>::CopyFrom(const Blob& source, bool copy_diff, bool reshape) {
if (num_ != source.num() || channels_ != source.channels() ||
height_ != source.height() || width_ != source.width()) {
if (reshape) {
Reshape(source.num(), source.channels(), source.height(), source.width());
} else {
LOG(FATAL) << "Trying to copy blobs of different sizes.";
}
}
switch (Caffe::mode()) {
case Caffe::GPU:
if (copy_diff) {
CUDA_CHECK(cudaMemcpy(diff_->mutable_gpu_data(), source.gpu_diff(),
sizeof(Dtype) * count_, cudaMemcpyDeviceToDevice));
} else {
CUDA_CHECK(cudaMemcpy(data_->mutable_gpu_data(), source.gpu_data(),
sizeof(Dtype) * count_, cudaMemcpyDeviceToDevice));
}
break;
case Caffe::CPU:
if (copy_diff) {
memcpy(diff_->mutable_cpu_data(), source.cpu_diff(),
sizeof(Dtype) * count_);
} else {
memcpy(data_->mutable_cpu_data(), source.cpu_data(),
sizeof(Dtype) * count_);
}
break;
default:
LOG(FATAL) << "Unknown caffe mode.";
}
}
template <typename Dtype>
void Blob<Dtype>::FromProto(const BlobProto& proto) {
Reshape(proto.num(), proto.channels(), proto.height(), proto.width());
// copy data
Dtype* data_vec = mutable_cpu_data();
for (int i = 0; i < count_; ++i) {
data_vec[i] = proto.data(i);
}
if (proto.diff_size() > 0) {
Dtype* diff_vec = mutable_cpu_diff();
for (int i = 0; i < count_; ++i) {
diff_vec[i] = proto.diff(i);
}
}
}
template <typename Dtype>
void Blob<Dtype>::ToProto(BlobProto* proto, bool write_diff) const {
proto->set_num(num_);
proto->set_channels(channels_);
proto->set_height(height_);
proto->set_width(width_);
proto->clear_data();
proto->clear_diff();
const Dtype* data_vec = cpu_data();
for (int i = 0; i < count_; ++i) {
proto->add_data(data_vec[i]);
}
if (write_diff) {
const Dtype* diff_vec = cpu_diff();
for (int i = 0; i < count_; ++i) {
proto->add_diff(diff_vec[i]);
}
}
}
INSTANTIATE_CLASS(Blob);
template class Blob<int>;
template class Blob<unsigned int>;
} // namespace caffe

@ -0,0 +1,198 @@
// Copyright 2014 BVLC and contributors.
#include <cstdio>
#include <ctime>
#include "caffe/common.hpp"
#include "caffe/util/rng.hpp"
namespace caffe {
shared_ptr<Caffe> Caffe::singleton_;
// curand seeding
int64_t cluster_seedgen(void) {
int64_t s, seed, pid;
pid = getpid();
s = time(NULL);
seed = abs(((s * 181) * ((pid - 83) * 359)) % 104729);
return seed;
}
Caffe::Caffe()
: mode_(Caffe::CPU), phase_(Caffe::TRAIN), cublas_handle_(NULL),
curand_generator_(NULL),
random_generator_() {
// Try to create a cublas handler, and report an error if failed (but we will
// keep the program running as one might just want to run CPU code).
if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) {
LOG(ERROR) << "Cannot create Cublas handle. Cublas won't be available.";
}
// Try to create a curand handler.
if (curandCreateGenerator(&curand_generator_, CURAND_RNG_PSEUDO_DEFAULT)
!= CURAND_STATUS_SUCCESS ||
curandSetPseudoRandomGeneratorSeed(curand_generator_, cluster_seedgen())
!= CURAND_STATUS_SUCCESS) {
LOG(ERROR) << "Cannot create Curand generator. Curand won't be available.";
}
}
Caffe::~Caffe() {
if (cublas_handle_) CUBLAS_CHECK(cublasDestroy(cublas_handle_));
if (curand_generator_) {
CURAND_CHECK(curandDestroyGenerator(curand_generator_));
}
}
void Caffe::set_random_seed(const unsigned int seed) {
// Curand seed
// Yangqing's note: simply setting the generator seed does not seem to
// work on the tesla K20s, so I wrote the ugly reset thing below.
if (Get().curand_generator_) {
CURAND_CHECK(curandDestroyGenerator(curand_generator()));
CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_,
CURAND_RNG_PSEUDO_DEFAULT));
CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(curand_generator(),
seed));
} else {
LOG(ERROR) << "Curand not available. Skipping setting the curand seed.";
}
// RNG seed
Get().random_generator_.reset(new RNG(seed));
}
void Caffe::SetDevice(const int device_id) {
int current_device;
CUDA_CHECK(cudaGetDevice(&current_device));
if (current_device == device_id) {
return;
}
// The call to cudaSetDevice must come before any calls to Get, which
// may perform initialization using the GPU.
CUDA_CHECK(cudaSetDevice(device_id));
if (Get().cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_));
if (Get().curand_generator_) {
CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_));
}
CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_));
CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_,
CURAND_RNG_PSEUDO_DEFAULT));
CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_,
cluster_seedgen()));
}
void Caffe::DeviceQuery() {
cudaDeviceProp prop;
int device;
if (cudaSuccess != cudaGetDevice(&device)) {
printf("No cuda device present.\n");
return;
}
CUDA_CHECK(cudaGetDeviceProperties(&prop, device));
printf("Device id: %d\n", device);
printf("Major revision number: %d\n", prop.major);
printf("Minor revision number: %d\n", prop.minor);
printf("Name: %s\n", prop.name);
printf("Total global memory: %lu\n", prop.totalGlobalMem);
printf("Total shared memory per block: %lu\n", prop.sharedMemPerBlock);
printf("Total registers per block: %d\n", prop.regsPerBlock);
printf("Warp size: %d\n", prop.warpSize);
printf("Maximum memory pitch: %lu\n", prop.memPitch);
printf("Maximum threads per block: %d\n", prop.maxThreadsPerBlock);
printf("Maximum dimension of block: %d, %d, %d\n",
prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2]);
printf("Maximum dimension of grid: %d, %d, %d\n",
prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2]);
printf("Clock rate: %d\n", prop.clockRate);
printf("Total constant memory: %lu\n", prop.totalConstMem);
printf("Texture alignment: %lu\n", prop.textureAlignment);
printf("Concurrent copy and execution: %s\n",
(prop.deviceOverlap ? "Yes" : "No"));
printf("Number of multiprocessors: %d\n", prop.multiProcessorCount);
printf("Kernel execution timeout: %s\n",
(prop.kernelExecTimeoutEnabled ? "Yes" : "No"));
return;
}
class Caffe::RNG::Generator {
public:
Generator() : rng_(new caffe::rng_t(cluster_seedgen())) {}
explicit Generator(unsigned int seed) : rng_(new caffe::rng_t(seed)) {}
caffe::rng_t* rng() { return rng_.get(); }
private:
shared_ptr<caffe::rng_t> rng_;
};
Caffe::RNG::RNG() : generator_(new Generator()) { }
Caffe::RNG::RNG(unsigned int seed) : generator_(new Generator(seed)) { }
Caffe::RNG& Caffe::RNG::operator=(const RNG& other) {
generator_.reset(other.generator_.get());
return *this;
}
void* Caffe::RNG::generator() {
return static_cast<void*>(generator_->rng());
}
const char* cublasGetErrorString(cublasStatus_t error) {
switch (error) {
case CUBLAS_STATUS_SUCCESS:
return "CUBLAS_STATUS_SUCCESS";
case CUBLAS_STATUS_NOT_INITIALIZED:
return "CUBLAS_STATUS_NOT_INITIALIZED";
case CUBLAS_STATUS_ALLOC_FAILED:
return "CUBLAS_STATUS_ALLOC_FAILED";
case CUBLAS_STATUS_INVALID_VALUE:
return "CUBLAS_STATUS_INVALID_VALUE";
case CUBLAS_STATUS_ARCH_MISMATCH:
return "CUBLAS_STATUS_ARCH_MISMATCH";
case CUBLAS_STATUS_MAPPING_ERROR:
return "CUBLAS_STATUS_MAPPING_ERROR";
case CUBLAS_STATUS_EXECUTION_FAILED:
return "CUBLAS_STATUS_EXECUTION_FAILED";
case CUBLAS_STATUS_INTERNAL_ERROR:
return "CUBLAS_STATUS_INTERNAL_ERROR";
case CUBLAS_STATUS_NOT_SUPPORTED:
return "CUBLAS_STATUS_NOT_SUPPORTED";
}
return "Unknown cublas status";
}
const char* curandGetErrorString(curandStatus_t error) {
switch (error) {
case CURAND_STATUS_SUCCESS:
return "CURAND_STATUS_SUCCESS";
case CURAND_STATUS_VERSION_MISMATCH:
return "CURAND_STATUS_VERSION_MISMATCH";
case CURAND_STATUS_NOT_INITIALIZED:
return "CURAND_STATUS_NOT_INITIALIZED";
case CURAND_STATUS_ALLOCATION_FAILED:
return "CURAND_STATUS_ALLOCATION_FAILED";
case CURAND_STATUS_TYPE_ERROR:
return "CURAND_STATUS_TYPE_ERROR";
case CURAND_STATUS_OUT_OF_RANGE:
return "CURAND_STATUS_OUT_OF_RANGE";
case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
case CURAND_STATUS_LAUNCH_FAILURE:
return "CURAND_STATUS_LAUNCH_FAILURE";
case CURAND_STATUS_PREEXISTING_FAILURE:
return "CURAND_STATUS_PREEXISTING_FAILURE";
case CURAND_STATUS_INITIALIZATION_FAILED:
return "CURAND_STATUS_INITIALIZATION_FAILED";
case CURAND_STATUS_ARCH_MISMATCH:
return "CURAND_STATUS_ARCH_MISMATCH";
case CURAND_STATUS_INTERNAL_ERROR:
return "CURAND_STATUS_INTERNAL_ERROR";
}
return "Unknown curand status";
}
} // namespace caffe

@ -0,0 +1,101 @@
// Copyright 2014 BVLC and contributors.
#ifndef CAFFE_LAYER_FACTORY_HPP_
#define CAFFE_LAYER_FACTORY_HPP_
#include <string>
#include "caffe/layer.hpp"
#include "caffe/vision_layers.hpp"
#include "caffe/proto/caffe.pb.h"
using std::string;
namespace caffe {
// A function to get a specific layer from the specification given in
// LayerParameter. Ideally this would be replaced by a factory pattern,
// but we will leave it this way for now.
template <typename Dtype>
Layer<Dtype>* GetLayer(const LayerParameter& param) {
const string& name = param.name();
const LayerParameter_LayerType& type = param.type();
switch (type) {
case LayerParameter_LayerType_ACCURACY:
return new AccuracyLayer<Dtype>(param);
case LayerParameter_LayerType_ARGMAX:
return new ArgMaxLayer<Dtype>(param);
case LayerParameter_LayerType_BNLL:
return new BNLLLayer<Dtype>(param);
case LayerParameter_LayerType_CONCAT:
return new ConcatLayer<Dtype>(param);
case LayerParameter_LayerType_CONVOLUTION:
return new ConvolutionLayer<Dtype>(param);
case LayerParameter_LayerType_DATA:
return new DataLayer<Dtype>(param);
case LayerParameter_LayerType_DROPOUT:
return new DropoutLayer<Dtype>(param);
case LayerParameter_LayerType_DUMMY_DATA:
return new DummyDataLayer<Dtype>(param);
case LayerParameter_LayerType_EUCLIDEAN_LOSS:
return new EuclideanLossLayer<Dtype>(param);
case LayerParameter_LayerType_ELTWISE:
return new EltwiseLayer<Dtype>(param);
case LayerParameter_LayerType_FLATTEN:
return new FlattenLayer<Dtype>(param);
case LayerParameter_LayerType_HDF5_DATA:
return new HDF5DataLayer<Dtype>(param);
case LayerParameter_LayerType_HDF5_OUTPUT:
return new HDF5OutputLayer<Dtype>(param);
case LayerParameter_LayerType_HINGE_LOSS:
return new HingeLossLayer<Dtype>(param);
case LayerParameter_LayerType_IMAGE_DATA:
return new ImageDataLayer<Dtype>(param);
case LayerParameter_LayerType_IM2COL:
return new Im2colLayer<Dtype>(param);
case LayerParameter_LayerType_INFOGAIN_LOSS:
return new InfogainLossLayer<Dtype>(param);
case LayerParameter_LayerType_INNER_PRODUCT:
return new InnerProductLayer<Dtype>(param);
case LayerParameter_LayerType_LRN:
return new LRNLayer<Dtype>(param);
case LayerParameter_LayerType_MEMORY_DATA:
return new MemoryDataLayer<Dtype>(param);
case LayerParameter_LayerType_MULTINOMIAL_LOGISTIC_LOSS:
return new MultinomialLogisticLossLayer<Dtype>(param);
case LayerParameter_LayerType_POOLING:
return new PoolingLayer<Dtype>(param);
case LayerParameter_LayerType_POWER:
return new PowerLayer<Dtype>(param);
case LayerParameter_LayerType_RELU:
return new ReLULayer<Dtype>(param);
case LayerParameter_LayerType_SIGMOID:
return new SigmoidLayer<Dtype>(param);
case LayerParameter_LayerType_SIGMOID_CROSS_ENTROPY_LOSS:
return new SigmoidCrossEntropyLossLayer<Dtype>(param);
case LayerParameter_LayerType_SOFTMAX:
return new SoftmaxLayer<Dtype>(param);
case LayerParameter_LayerType_SOFTMAX_LOSS:
return new SoftmaxWithLossLayer<Dtype>(param);
case LayerParameter_LayerType_SPLIT:
return new SplitLayer<Dtype>(param);
case LayerParameter_LayerType_TANH:
return new TanHLayer<Dtype>(param);
case LayerParameter_LayerType_WINDOW_DATA:
return new WindowDataLayer<Dtype>(param);
case LayerParameter_LayerType_NONE:
LOG(FATAL) << "Layer " << name << " has unspecified type.";
default:
LOG(FATAL) << "Layer " << name << " has unknown type " << type;
}
// just to suppress old compiler warnings.
return (Layer<Dtype>*)(NULL);
}
template Layer<float>* GetLayer(const LayerParameter& param);
template Layer<double>* GetLayer(const LayerParameter& param);
} // namespace caffe
#endif // CAFFE_LAYER_FACTORY_HPP_

@ -0,0 +1,64 @@
// Copyright 2014 BVLC and contributors.
#include <algorithm>
#include <cmath>
#include <cfloat>
#include <vector>
#include "caffe/layer.hpp"
#include "caffe/vision_layers.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/util/io.hpp"
using std::max;
namespace caffe {
template <typename Dtype>
void AccuracyLayer<Dtype>::SetUp(
const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) {
Layer<Dtype>::SetUp(bottom, top);
CHECK_EQ(bottom[0]->num(), bottom[1]->num())
<< "The data and label should have the same number.";
CHECK_EQ(bottom[1]->channels(), 1);
CHECK_EQ(bottom[1]->height(), 1);
CHECK_EQ(bottom[1]->width(), 1);
(*top)[0]->Reshape(1, 2, 1, 1);
}
template <typename Dtype>
Dtype AccuracyLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
Dtype accuracy = 0;
Dtype logprob = 0;
const Dtype* bottom_data = bottom[0]->cpu_data();
const Dtype* bottom_label = bottom[1]->cpu_data();
int num = bottom[0]->num();
int dim = bottom[0]->count() / bottom[0]->num();
for (int i = 0; i < num; ++i) {
// Accuracy
Dtype maxval = -FLT_MAX;
int max_id = 0;
for (int j = 0; j < dim; ++j) {
if (bottom_data[i * dim + j] > maxval) {
maxval = bottom_data[i * dim + j];
max_id = j;
}
}
if (max_id == static_cast<int>(bottom_label[i])) {
++accuracy;
}
Dtype prob = max(bottom_data[i * dim + static_cast<int>(bottom_label[i])],
Dtype(kLOG_THRESHOLD));
logprob -= log(prob);
}
// LOG(INFO) << "Accuracy: " << accuracy;
(*top)[0]->mutable_cpu_data()[0] = accuracy / num;
(*top)[0]->mutable_cpu_data()[1] = logprob / num;
// Accuracy layer should not be used as a loss function.
return Dtype(0);
}
INSTANTIATE_CLASS(AccuracyLayer);
} // namespace caffe

@ -0,0 +1,55 @@
// Copyright 2014 BVLC and contributors.
#include <vector>
#include <cfloat>
#include "caffe/layer.hpp"
#include "caffe/vision_layers.hpp"
namespace caffe {
template <typename Dtype>
void ArgMaxLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
Layer<Dtype>::SetUp(bottom, top);
out_max_val_ = this->layer_param_.argmax_param().out_max_val();
if (out_max_val_) {
// Produces max_ind and max_val
(*top)[0]->Reshape(bottom[0]->num(), 2, 1, 1);
} else {
// Produces only max_ind
(*top)[0]->Reshape(bottom[0]->num(), 1, 1, 1);
}
}
template <typename Dtype>
Dtype ArgMaxLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
const Dtype* bottom_data = bottom[0]->cpu_data();
Dtype* top_data = (*top)[0]->mutable_cpu_data();
int num = bottom[0]->num();
int dim = bottom[0]->count() / bottom[0]->num();
for (int i = 0; i < num; ++i) {
Dtype max_val = -FLT_MAX;
int max_ind = 0;
for (int j = 0; j < dim; ++j) {
if (bottom_data[i * dim + j] > max_val) {
max_val = bottom_data[i * dim + j];
max_ind = j;
}
}
if (out_max_val_) {
top_data[i * 2] = max_ind;
top_data[i * 2 + 1] = max_val;
} else {
top_data[i] = max_ind;
}
}
return Dtype(0);
}
INSTANTIATE_CLASS(ArgMaxLayer);
} // namespace caffe

@ -0,0 +1,50 @@
// Copyright 2014 BVLC and contributors.
#include <algorithm>
#include <vector>
#include "caffe/layer.hpp"
#include "caffe/vision_layers.hpp"
using std::min;
namespace caffe {
const float kBNLL_THRESHOLD = 50.;
template <typename Dtype>
Dtype BNLLLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
const Dtype* bottom_data = bottom[0]->cpu_data();
Dtype* top_data = (*top)[0]->mutable_cpu_data();
const int count = bottom[0]->count();
for (int i = 0; i < count; ++i) {
top_data[i] = bottom_data[i] > 0 ?
bottom_data[i] + log(1. + exp(-bottom_data[i])) :
log(1. + exp(bottom_data[i]));
}
return Dtype(0);
}
template <typename Dtype>
void BNLLLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down,
vector<Blob<Dtype>*>* bottom) {
if (propagate_down) {
const Dtype* bottom_data = (*bottom)[0]->cpu_data();
const Dtype* top_diff = top[0]->cpu_diff();
Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
const int count = (*bottom)[0]->count();
Dtype expval;
for (int i = 0; i < count; ++i) {
expval = exp(min(bottom_data[i], Dtype(kBNLL_THRESHOLD)));
bottom_diff[i] = top_diff[i] * expval / (expval + 1.);
}
}
}
INSTANTIATE_CLASS(BNLLLayer);
} // namespace caffe

@ -0,0 +1,65 @@
// Copyright 2014 BVLC and contributors.
#include <algorithm>
#include <vector>
#include "caffe/layer.hpp"
#include "caffe/vision_layers.hpp"
using std::max;
namespace caffe {
const float kBNLL_THRESHOLD = 50.;
template <typename Dtype>
__global__ void BNLLForward(const int n, const Dtype* in, Dtype* out) {
CUDA_KERNEL_LOOP(index, n) {
out[index] = in[index] > 0 ?
in[index] + log(1. + exp(-in[index])) :
log(1. + exp(in[index]));
}
}
template <typename Dtype>
Dtype BNLLLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
const Dtype* bottom_data = bottom[0]->gpu_data();
Dtype* top_data = (*top)[0]->mutable_gpu_data();
const int count = bottom[0]->count();
// NOLINT_NEXT_LINE(whitespace/operators)
BNLLForward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, bottom_data, top_data);
CUDA_POST_KERNEL_CHECK;
return Dtype(0);
}
template <typename Dtype>
__global__ void BNLLBackward(const int n, const Dtype* in_diff,
const Dtype* in_data, Dtype* out_diff) {
CUDA_KERNEL_LOOP(index, n) {
Dtype expval = exp(min(in_data[index], Dtype(kBNLL_THRESHOLD)));
out_diff[index] = in_diff[index] * expval / (expval + 1.);
}
}
template <typename Dtype>
void BNLLLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down,
vector<Blob<Dtype>*>* bottom) {
if (propagate_down) {
const Dtype* bottom_data = (*bottom)[0]->gpu_data();
const Dtype* top_diff = top[0]->gpu_diff();
Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
const int count = (*bottom)[0]->count();
// NOLINT_NEXT_LINE(whitespace/operators)
BNLLBackward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, top_diff, bottom_data, bottom_diff);
CUDA_POST_KERNEL_CHECK;
}
}
INSTANTIATE_CLASS(BNLLLayer);
} // namespace caffe

@ -0,0 +1,101 @@
// Copyright 2014 BVLC and contributors.
#include <vector>
#include "caffe/layer.hpp"
#include "caffe/vision_layers.hpp"
#include "caffe/util/math_functions.hpp"
namespace caffe {
template <typename Dtype>
void ConcatLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
Layer<Dtype>::SetUp(bottom, top);
concat_dim_ = this->layer_param_.concat_param().concat_dim();
CHECK_GE(concat_dim_, 0) <<
"concat_dim should be >= 0";
CHECK_LE(concat_dim_, 1) <<
"For now concat_dim <=1, it can only concat num and channels";
// Initialize with the first blob.
count_ = bottom[0]->count();
num_ = bottom[0]->num();
channels_ = bottom[0]->channels();
height_ = bottom[0]->height();
width_ = bottom[0]->width();
for (int i = 1; i < bottom.size(); ++i) {
count_ += bottom[i]->count();
if (concat_dim_== 0) {
num_ += bottom[i]->num();
} else if (concat_dim_ == 1) {
channels_ += bottom[i]->channels();
} else if (concat_dim_ == 2) {
height_ += bottom[i]->height();
} else if (concat_dim_ == 3) {
width_ += bottom[i]->width();
}
}
(*top)[0]->Reshape(num_, channels_, height_, width_);
CHECK_EQ(count_, (*top)[0]->count());
}
template <typename Dtype>
Dtype ConcatLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
Dtype* top_data = (*top)[0]->mutable_cpu_data();
if (concat_dim_== 0) {
int offset_num = 0;
for (int i = 0; i < bottom.size(); ++i) {
const Dtype* bottom_data = bottom[i]->cpu_data();
int num_elem = bottom[i]->count();
caffe_copy(num_elem, bottom_data, top_data+(*top)[0]->offset(offset_num));
offset_num += bottom[i]->num();
}
} else if (concat_dim_ == 1) {
int offset_channel = 0;
for (int i = 0; i < bottom.size(); ++i) {
const Dtype* bottom_data = bottom[i]->cpu_data();
int num_elem =
bottom[i]->channels()*bottom[i]->height()*bottom[i]->width();
for (int n = 0; n < num_; ++n) {
caffe_copy(num_elem, bottom_data+bottom[i]->offset(n),
top_data+(*top)[0]->offset(n, offset_channel));
}
offset_channel += bottom[i]->channels();
} // concat_dim_ is guaranteed to be 0 or 1 by SetUp.
}
return Dtype(0.);
}
template <typename Dtype>
void ConcatLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
const Dtype* top_diff = top[0]->cpu_diff();
if (concat_dim_ == 0) {
int offset_num = 0;
for (int i = 0; i < bottom->size(); ++i) {
Blob<Dtype>* blob = (*bottom)[i];
Dtype* bottom_diff = blob->mutable_cpu_diff();
caffe_copy(blob->count(),
top_diff+top[0]->offset(offset_num), bottom_diff);
offset_num += blob->num();
}
} else if (concat_dim_ == 1) {
int offset_channel = 0;
for (int i = 0; i < bottom->size(); ++i) {
Blob<Dtype>* blob = (*bottom)[i];
Dtype* bottom_diff = blob->mutable_cpu_diff();
int num_elem = blob->channels()*blob->height()*blob->width();
for (int n = 0; n < num_; ++n) {
caffe_copy(num_elem, top_diff+top[0]->offset(n, offset_channel),
bottom_diff+blob->offset(n));
}
offset_channel += blob->channels();
}
} // concat_dim_ is guaranteed to be 0 or 1 by SetUp.
}
INSTANTIATE_CLASS(ConcatLayer);
} // namespace caffe

@ -0,0 +1,75 @@
// Copyright 2014 BVLC and contributors.
#include <vector>
#include "caffe/layer.hpp"
#include "caffe/vision_layers.hpp"
#include "caffe/util/math_functions.hpp"
namespace caffe {
template <typename Dtype>
Dtype ConcatLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
Dtype* top_data = (*top)[0]->mutable_gpu_data();
if (concat_dim_ == 0) {
int offset_num = 0;
for (int i = 0; i < bottom.size(); ++i) {
const Dtype* bottom_data = bottom[i]->gpu_data();
caffe_gpu_copy(bottom[i]->count(), bottom_data,
top_data + (*top)[0]->offset(offset_num));
offset_num += bottom[i]->num();
}
} else if (concat_dim_ == 1) {
int offset_channel = 0;
for (int i = 0; i < bottom.size(); ++i) {
const Dtype* bottom_data = bottom[i]->gpu_data();
int num_elem =
bottom[i]->channels() * bottom[i]->height() * bottom[i]->width();
for (int n = 0; n < num_; ++n) {
caffe_gpu_copy(num_elem, bottom_data+bottom[i]->offset(n),
top_data + (*top)[0]->offset(n, offset_channel));
}
offset_channel += bottom[i]->channels();
}
} else {
LOG(FATAL) << "concat_dim along dim" << concat_dim_ <<
" not implemented yet";
}
return Dtype(0.);
}
template <typename Dtype>
void ConcatLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
const Dtype* top_diff = top[0]->gpu_diff();
if (concat_dim_ == 0) {
int offset_num = 0;
for (int i = 0; i < bottom->size(); ++i) {
Blob<Dtype>* blob = (*bottom)[i];
Dtype* bottom_diff = blob->mutable_gpu_diff();
caffe_gpu_copy(blob->count(),
top_diff + top[0]->offset(offset_num), bottom_diff);
offset_num += blob->num();
}
} else if (concat_dim_ == 1) {
int offset_channel = 0;
for (int i = 0; i < bottom->size(); ++i) {
Blob<Dtype>* blob = (*bottom)[i];
Dtype* bottom_diff = blob->mutable_gpu_diff();
int num_elem = blob->channels()*blob->height()*blob->width();
for (int n = 0; n < num_; ++n) {
caffe_gpu_copy(num_elem, top_diff + top[0]->offset(n, offset_channel),
bottom_diff + blob->offset(n));
}
offset_channel += blob->channels();
}
} else {
LOG(FATAL) << "concat_dim along dim" << concat_dim_ <<
" not implemented yet";
}
}
INSTANTIATE_CLASS(ConcatLayer);
} // namespace caffe

@ -0,0 +1,167 @@
// Copyright 2014 BVLC and contributors.
#include <vector>
#include "caffe/layer.hpp"
#include "caffe/vision_layers.hpp"
#include "caffe/util/im2col.hpp"
#include "caffe/filler.hpp"
#include "caffe/util/math_functions.hpp"
namespace caffe {
template <typename Dtype>
void ConvolutionLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
Layer<Dtype>::SetUp(bottom, top);
kernel_size_ = this->layer_param_.convolution_param().kernel_size();
stride_ = this->layer_param_.convolution_param().stride();
group_ = this->layer_param_.convolution_param().group();
pad_ = this->layer_param_.convolution_param().pad();
num_ = bottom[0]->num();
channels_ = bottom[0]->channels();
height_ = bottom[0]->height();
width_ = bottom[0]->width();
num_output_ = this->layer_param_.convolution_param().num_output();
CHECK_GT(num_output_, 0);
CHECK_EQ(channels_ % group_, 0);
// The im2col result buffer would only hold one image at a time to avoid
// overly large memory usage.
int height_out = (height_ + 2 * pad_ - kernel_size_) / stride_ + 1;
int width_out = (width_ + 2 * pad_ - kernel_size_) / stride_ + 1;
col_buffer_.Reshape(
1, channels_ * kernel_size_ * kernel_size_, height_out, width_out);
// Set the parameters
CHECK_EQ(num_output_ % group_, 0)
<< "Number of output should be multiples of group.";
bias_term_ = this->layer_param_.convolution_param().bias_term();
// Figure out the dimensions for individual gemms.
M_ = num_output_ / group_;
K_ = channels_ * kernel_size_ * kernel_size_ / group_;
N_ = height_out * width_out;
(*top)[0]->Reshape(bottom[0]->num(), num_output_, height_out, width_out);
// Check if we need to set up the weights
if (this->blobs_.size() > 0) {
LOG(INFO) << "Skipping parameter initialization";
} else {
if (bias_term_) {
this->blobs_.resize(2);
} else {
this->blobs_.resize(1);
}
// Intialize the weight
this->blobs_[0].reset(new Blob<Dtype>(
num_output_, channels_ / group_, kernel_size_, kernel_size_));
// fill the weights
shared_ptr<Filler<Dtype> > weight_filler(GetFiller<Dtype>(
this->layer_param_.convolution_param().weight_filler()));
weight_filler->Fill(this->blobs_[0].get());
// If necessary, intiialize and fill the bias term
if (bias_term_) {
this->blobs_[1].reset(new Blob<Dtype>(1, 1, 1, num_output_));
shared_ptr<Filler<Dtype> > bias_filler(GetFiller<Dtype>(
this->layer_param_.convolution_param().bias_filler()));
bias_filler->Fill(this->blobs_[1].get());
}
}
// Set up the bias filler
if (bias_term_) {
bias_multiplier_.reset(new SyncedMemory(N_ * sizeof(Dtype)));
Dtype* bias_multiplier_data =
reinterpret_cast<Dtype*>(bias_multiplier_->mutable_cpu_data());
for (int i = 0; i < N_; ++i) {
bias_multiplier_data[i] = 1.;
}
}
}
template <typename Dtype>
Dtype ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
const Dtype* bottom_data = bottom[0]->cpu_data();
Dtype* top_data = (*top)[0]->mutable_cpu_data();
Dtype* col_data = col_buffer_.mutable_cpu_data();
const Dtype* weight = this->blobs_[0]->cpu_data();
int weight_offset = M_ * K_;
int col_offset = K_ * N_;
int top_offset = M_ * N_;
for (int n = 0; n < num_; ++n) {
// First, im2col
im2col_cpu(bottom_data + bottom[0]->offset(n), channels_, height_,
width_, kernel_size_, pad_, stride_, col_data);
// Second, innerproduct with groups
for (int g = 0; g < group_; ++g) {
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
(Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
(Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g);
}
// third, add bias
if (bias_term_) {
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_,
N_, 1, (Dtype)1., this->blobs_[1]->cpu_data(),
reinterpret_cast<const Dtype*>(bias_multiplier_->cpu_data()),
(Dtype)1., top_data + (*top)[0]->offset(n));
}
}
return Dtype(0.);
}
template <typename Dtype>
void ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
const Dtype* top_diff = top[0]->cpu_diff();
const Dtype* weight = this->blobs_[0]->cpu_data();
Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff();
const Dtype* bottom_data = (*bottom)[0]->cpu_data();
Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
Dtype* col_data = col_buffer_.mutable_cpu_data();
Dtype* col_diff = col_buffer_.mutable_cpu_diff();
// bias gradient if necessary
Dtype* bias_diff = NULL;
if (bias_term_) {
bias_diff = this->blobs_[1]->mutable_cpu_diff();
memset(bias_diff, 0, sizeof(Dtype) * this->blobs_[1]->count());
for (int n = 0; n < num_; ++n) {
caffe_cpu_gemv<Dtype>(CblasNoTrans, num_output_, N_,
1., top_diff + top[0]->offset(n),
reinterpret_cast<const Dtype*>(bias_multiplier_->cpu_data()), 1.,
bias_diff);
}
}
int weight_offset = M_ * K_;
int col_offset = K_ * N_;
int top_offset = M_ * N_;
memset(weight_diff, 0, sizeof(Dtype) * this->blobs_[0]->count());
for (int n = 0; n < num_; ++n) {
// since we saved memory in the forward pass by not storing all col data,
// we will need to recompute them.
im2col_cpu(bottom_data + (*bottom)[0]->offset(n), channels_, height_,
width_, kernel_size_, pad_, stride_, col_data);
// gradient w.r.t. weight. Note that we will accumulate diffs.
for (int g = 0; g < group_; ++g) {
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
(Dtype)1., top_diff + top[0]->offset(n) + top_offset * g,
col_data + col_offset * g, (Dtype)1.,
weight_diff + weight_offset * g);
}
// gradient w.r.t. bottom data, if necessary
if (propagate_down) {
for (int g = 0; g < group_; ++g) {
caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
(Dtype)1., weight + weight_offset * g,
top_diff + top[0]->offset(n) + top_offset * g,
(Dtype)0., col_diff + col_offset * g);
}
// col2im back to the data
col2im_cpu(col_diff, channels_, height_, width_, kernel_size_, pad_,
stride_, bottom_diff + (*bottom)[0]->offset(n));
}
}
}
INSTANTIATE_CLASS(ConvolutionLayer);
} // namespace caffe

@ -0,0 +1,104 @@
// Copyright 2014 BVLC and contributors.
#include <vector>
#include "caffe/layer.hpp"
#include "caffe/vision_layers.hpp"
#include "caffe/util/im2col.hpp"
#include "caffe/filler.hpp"
#include "caffe/util/math_functions.hpp"
namespace caffe {
template <typename Dtype>
Dtype ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
const Dtype* bottom_data = bottom[0]->gpu_data();
Dtype* top_data = (*top)[0]->mutable_gpu_data();
Dtype* col_data = col_buffer_.mutable_gpu_data();
const Dtype* weight = this->blobs_[0]->gpu_data();
int weight_offset = M_ * K_;
int col_offset = K_ * N_;
int top_offset = M_ * N_;
for (int n = 0; n < num_; ++n) {
// First, im2col
im2col_gpu(bottom_data + bottom[0]->offset(n), channels_, height_,
width_, kernel_size_, pad_, stride_, col_data);
// Second, innerproduct with groups
for (int g = 0; g < group_; ++g) {
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
(Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
(Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g);
}
// third, add bias
if (bias_term_) {
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_,
N_, 1, (Dtype)1., this->blobs_[1]->gpu_data(),
reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
(Dtype)1., top_data + (*top)[0]->offset(n));
}
}
return Dtype(0.);
}
template <typename Dtype>
void ConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
const Dtype* top_diff = top[0]->gpu_diff();
const Dtype* weight = this->blobs_[0]->gpu_data();
Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff();
const Dtype* bottom_data = (*bottom)[0]->gpu_data();
Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
Dtype* col_data = col_buffer_.mutable_gpu_data();
Dtype* col_diff = col_buffer_.mutable_gpu_diff();
// bias gradient if necessary
Dtype* bias_diff = NULL;
if (bias_term_) {
bias_diff = this->blobs_[1]->mutable_gpu_diff();
CUDA_CHECK(cudaMemset(bias_diff, 0,
sizeof(Dtype) * this->blobs_[1]->count()));
for (int n = 0; n < num_; ++n) {
caffe_gpu_gemv<Dtype>(CblasNoTrans, num_output_, N_,
1., top_diff + top[0]->offset(n),
reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
1., bias_diff);
}
}
int weight_offset = M_ * K_;
int col_offset = K_ * N_;
int top_offset = M_ * N_;
CUDA_CHECK(cudaMemset(weight_diff, 0,
sizeof(Dtype) * this->blobs_[0]->count()));
for (int n = 0; n < num_; ++n) {
// since we saved memory in the forward pass by not storing all col data,
// we will need to recompute them.
im2col_gpu(bottom_data + (*bottom)[0]->offset(n), channels_, height_,
width_, kernel_size_, pad_, stride_, col_data);
// gradient w.r.t. weight. Note that we will accumulate diffs.
for (int g = 0; g < group_; ++g) {
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
(Dtype)1., top_diff + top[0]->offset(n) + top_offset * g,
col_data + col_offset * g, (Dtype)1.,
weight_diff + weight_offset * g);
}
// gradient w.r.t. bottom data, if necessary
if (propagate_down) {
for (int g = 0; g < group_; ++g) {
caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
(Dtype)1., weight + weight_offset * g,
top_diff + top[0]->offset(n) + top_offset * g,
(Dtype)0., col_diff + col_offset * g);
}
// col2im back to the data
col2im_gpu(col_diff, channels_, height_, width_, kernel_size_, pad_,
stride_, bottom_diff + (*bottom)[0]->offset(n));
}
}
}
INSTANTIATE_CLASS(ConvolutionLayer);
} // namespace caffe

@ -0,0 +1,367 @@
// Copyright 2014 BVLC and contributors.
#include <stdint.h>
#include <leveldb/db.h>
#include <pthread.h>
#include <string>
#include <vector>
#include "caffe/layer.hpp"
#include "caffe/util/io.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/util/rng.hpp"
#include "caffe/vision_layers.hpp"
#include "caffe/proto/caffe.pb.h"
using std::string;
namespace caffe {
template <typename Dtype>
void* DataLayerPrefetch(void* layer_pointer) {
CHECK(layer_pointer);
DataLayer<Dtype>* layer = static_cast<DataLayer<Dtype>*>(layer_pointer);
CHECK(layer);
Datum datum;
CHECK(layer->prefetch_data_);
Dtype* top_data = layer->prefetch_data_->mutable_cpu_data();
Dtype* top_label;
if (layer->output_labels_) {
top_label = layer->prefetch_label_->mutable_cpu_data();
}
const Dtype scale = layer->layer_param_.data_param().scale();
const int batch_size = layer->layer_param_.data_param().batch_size();
const int crop_size = layer->layer_param_.data_param().crop_size();
const bool mirror = layer->layer_param_.data_param().mirror();
if (mirror && crop_size == 0) {
LOG(FATAL) << "Current implementation requires mirror and crop_size to be "
<< "set at the same time.";
}
// datum scales
const int channels = layer->datum_channels_;
const int height = layer->datum_height_;
const int width = layer->datum_width_;
const int size = layer->datum_size_;
const Dtype* mean = layer->data_mean_.cpu_data();
for (int item_id = 0; item_id < batch_size; ++item_id) {
// get a blob
switch (layer->layer_param_.data_param().backend()) {
case DataParameter_DB_LEVELDB:
CHECK(layer->iter_);
CHECK(layer->iter_->Valid());
datum.ParseFromString(layer->iter_->value().ToString());
break;
case DataParameter_DB_LMDB:
CHECK_EQ(mdb_cursor_get(layer->mdb_cursor_, &layer->mdb_key_,
&layer->mdb_value_, MDB_GET_CURRENT), MDB_SUCCESS);
datum.ParseFromArray(layer->mdb_value_.mv_data,
layer->mdb_value_.mv_size);
break;
default:
LOG(FATAL) << "Unknown database backend";
}
const string& data = datum.data();
if (crop_size) {
CHECK(data.size()) << "Image cropping only support uint8 data";
int h_off, w_off;
// We only do random crop when we do training.
if (layer->phase_ == Caffe::TRAIN) {
h_off = layer->PrefetchRand() % (height - crop_size);
w_off = layer->PrefetchRand() % (width - crop_size);
} else {
h_off = (height - crop_size) / 2;
w_off = (width - crop_size) / 2;
}
if (mirror && layer->PrefetchRand() % 2) {
// Copy mirrored version
for (int c = 0; c < channels; ++c) {
for (int h = 0; h < crop_size; ++h) {
for (int w = 0; w < crop_size; ++w) {
int top_index = ((item_id * channels + c) * crop_size + h)
* crop_size + (crop_size - 1 - w);
int data_index = (c * height + h + h_off) * width + w + w_off;
Dtype datum_element =
static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));
top_data[top_index] = (datum_element - mean[data_index]) * scale;
}
}
}
} else {
// Normal copy
for (int c = 0; c < channels; ++c) {
for (int h = 0; h < crop_size; ++h) {
for (int w = 0; w < crop_size; ++w) {
int top_index = ((item_id * channels + c) * crop_size + h)
* crop_size + w;
int data_index = (c * height + h + h_off) * width + w + w_off;
Dtype datum_element =
static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));
top_data[top_index] = (datum_element - mean[data_index]) * scale;
}
}
}
}
} else {
// we will prefer to use data() first, and then try float_data()
if (data.size()) {
for (int j = 0; j < size; ++j) {
Dtype datum_element =
static_cast<Dtype>(static_cast<uint8_t>(data[j]));
top_data[item_id * size + j] = (datum_element - mean[j]) * scale;
}
} else {
for (int j = 0; j < size; ++j) {
top_data[item_id * size + j] =
(datum.float_data(j) - mean[j]) * scale;
}
}
}
if (layer->output_labels_) {
top_label[item_id] = datum.label();
}
// go to the next iter
switch (layer->layer_param_.data_param().backend()) {
case DataParameter_DB_LEVELDB:
layer->iter_->Next();
if (!layer->iter_->Valid()) {
// We have reached the end. Restart from the first.
DLOG(INFO) << "Restarting data prefetching from start.";
layer->iter_->SeekToFirst();
}
break;
case DataParameter_DB_LMDB:
if (mdb_cursor_get(layer->mdb_cursor_, &layer->mdb_key_,
&layer->mdb_value_, MDB_NEXT) != MDB_SUCCESS) {
// We have reached the end. Restart from the first.
DLOG(INFO) << "Restarting data prefetching from start.";
CHECK_EQ(mdb_cursor_get(layer->mdb_cursor_, &layer->mdb_key_,
&layer->mdb_value_, MDB_FIRST), MDB_SUCCESS);
}
break;
default:
LOG(FATAL) << "Unknown database backend";
}
}
return static_cast<void*>(NULL);
}
template <typename Dtype>
DataLayer<Dtype>::~DataLayer<Dtype>() {
JoinPrefetchThread();
// clean up the database resources
switch (this->layer_param_.data_param().backend()) {
case DataParameter_DB_LEVELDB:
break; // do nothing
case DataParameter_DB_LMDB:
mdb_cursor_close(mdb_cursor_);
mdb_close(mdb_env_, mdb_dbi_);
mdb_txn_abort(mdb_txn_);
mdb_env_close(mdb_env_);
break;
default:
LOG(FATAL) << "Unknown database backend";
}
}
template <typename Dtype>
void DataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
Layer<Dtype>::SetUp(bottom, top);
if (top->size() == 1) {
output_labels_ = false;
} else {
output_labels_ = true;
}
// Initialize DB
switch (this->layer_param_.data_param().backend()) {
case DataParameter_DB_LEVELDB:
{
leveldb::DB* db_temp;
leveldb::Options options;
options.create_if_missing = false;
options.max_open_files = 100;
LOG(INFO) << "Opening leveldb " << this->layer_param_.data_param().source();
leveldb::Status status = leveldb::DB::Open(
options, this->layer_param_.data_param().source(), &db_temp);
CHECK(status.ok()) << "Failed to open leveldb "
<< this->layer_param_.data_param().source() << std::endl
<< status.ToString();
db_.reset(db_temp);
iter_.reset(db_->NewIterator(leveldb::ReadOptions()));
iter_->SeekToFirst();
}
break;
case DataParameter_DB_LMDB:
CHECK_EQ(mdb_env_create(&mdb_env_), MDB_SUCCESS) << "mdb_env_create failed";
CHECK_EQ(mdb_env_set_mapsize(mdb_env_, 1099511627776), MDB_SUCCESS); // 1TB
CHECK_EQ(mdb_env_open(mdb_env_,
this->layer_param_.data_param().source().c_str(),
MDB_RDONLY|MDB_NOTLS, 0664), MDB_SUCCESS) << "mdb_env_open failed";
CHECK_EQ(mdb_txn_begin(mdb_env_, NULL, MDB_RDONLY, &mdb_txn_), MDB_SUCCESS)
<< "mdb_txn_begin failed";
CHECK_EQ(mdb_open(mdb_txn_, NULL, 0, &mdb_dbi_), MDB_SUCCESS)
<< "mdb_open failed";
CHECK_EQ(mdb_cursor_open(mdb_txn_, mdb_dbi_, &mdb_cursor_), MDB_SUCCESS)
<< "mdb_cursor_open failed";
LOG(INFO) << "Opening lmdb " << this->layer_param_.data_param().source();
CHECK_EQ(mdb_cursor_get(mdb_cursor_, &mdb_key_, &mdb_value_, MDB_FIRST),
MDB_SUCCESS) << "mdb_cursor_get failed";
break;
default:
LOG(FATAL) << "Unknown database backend";
}
// Check if we would need to randomly skip a few data points
if (this->layer_param_.data_param().rand_skip()) {
unsigned int skip = caffe_rng_rand() %
this->layer_param_.data_param().rand_skip();
LOG(INFO) << "Skipping first " << skip << " data points.";
while (skip-- > 0) {
switch (this->layer_param_.data_param().backend()) {
case DataParameter_DB_LEVELDB:
iter_->Next();
if (!iter_->Valid()) {
iter_->SeekToFirst();
}
break;
case DataParameter_DB_LMDB:
if (mdb_cursor_get(mdb_cursor_, &mdb_key_, &mdb_value_, MDB_NEXT)
!= MDB_SUCCESS) {
CHECK_EQ(mdb_cursor_get(mdb_cursor_, &mdb_key_, &mdb_value_,
MDB_FIRST), MDB_SUCCESS);
}
break;
default:
LOG(FATAL) << "Unknown database backend";
}
}
}
// Read a data point, and use it to initialize the top blob.
Datum datum;
switch (this->layer_param_.data_param().backend()) {
case DataParameter_DB_LEVELDB:
datum.ParseFromString(iter_->value().ToString());
break;
case DataParameter_DB_LMDB:
datum.ParseFromArray(mdb_value_.mv_data, mdb_value_.mv_size);
break;
default:
LOG(FATAL) << "Unknown database backend";
}
// image
int crop_size = this->layer_param_.data_param().crop_size();
if (crop_size > 0) {
(*top)[0]->Reshape(this->layer_param_.data_param().batch_size(),
datum.channels(), crop_size, crop_size);
prefetch_data_.reset(new Blob<Dtype>(
this->layer_param_.data_param().batch_size(), datum.channels(),
crop_size, crop_size));
} else {
(*top)[0]->Reshape(
this->layer_param_.data_param().batch_size(), datum.channels(),
datum.height(), datum.width());
prefetch_data_.reset(new Blob<Dtype>(
this->layer_param_.data_param().batch_size(), datum.channels(),
datum.height(), datum.width()));
}
LOG(INFO) << "output data size: " << (*top)[0]->num() << ","
<< (*top)[0]->channels() << "," << (*top)[0]->height() << ","
<< (*top)[0]->width();
// label
if (output_labels_) {
(*top)[1]->Reshape(this->layer_param_.data_param().batch_size(), 1, 1, 1);
prefetch_label_.reset(
new Blob<Dtype>(this->layer_param_.data_param().batch_size(), 1, 1, 1));
}
// datum size
datum_channels_ = datum.channels();
datum_height_ = datum.height();
datum_width_ = datum.width();
datum_size_ = datum.channels() * datum.height() * datum.width();
CHECK_GT(datum_height_, crop_size);
CHECK_GT(datum_width_, crop_size);
// check if we want to have mean
if (this->layer_param_.data_param().has_mean_file()) {
const string& mean_file = this->layer_param_.data_param().mean_file();
LOG(INFO) << "Loading mean file from" << mean_file;
BlobProto blob_proto;
ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto);
data_mean_.FromProto(blob_proto);
CHECK_EQ(data_mean_.num(), 1);
CHECK_EQ(data_mean_.channels(), datum_channels_);
CHECK_EQ(data_mean_.height(), datum_height_);
CHECK_EQ(data_mean_.width(), datum_width_);
} else {
// Simply initialize an all-empty mean.
data_mean_.Reshape(1, datum_channels_, datum_height_, datum_width_);
}
// Now, start the prefetch thread. Before calling prefetch, we make two
// cpu_data calls so that the prefetch thread does not accidentally make
// simultaneous cudaMalloc calls when the main thread is running. In some
// GPUs this seems to cause failures if we do not so.
prefetch_data_->mutable_cpu_data();
if (output_labels_) {
prefetch_label_->mutable_cpu_data();
}
data_mean_.cpu_data();
DLOG(INFO) << "Initializing prefetch";
CreatePrefetchThread();
DLOG(INFO) << "Prefetch initialized.";
}
template <typename Dtype>
void DataLayer<Dtype>::CreatePrefetchThread() {
phase_ = Caffe::phase();
const bool prefetch_needs_rand = (phase_ == Caffe::TRAIN) &&
(this->layer_param_.data_param().mirror() ||
this->layer_param_.data_param().crop_size());
if (prefetch_needs_rand) {
const unsigned int prefetch_rng_seed = caffe_rng_rand();
prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed));
} else {
prefetch_rng_.reset();
}
// Create the thread.
CHECK(!pthread_create(&thread_, NULL, DataLayerPrefetch<Dtype>,
static_cast<void*>(this))) << "Pthread execution failed.";
}
template <typename Dtype>
void DataLayer<Dtype>::JoinPrefetchThread() {
CHECK(!pthread_join(thread_, NULL)) << "Pthread joining failed.";
}
template <typename Dtype>
unsigned int DataLayer<Dtype>::PrefetchRand() {
CHECK(prefetch_rng_);
caffe::rng_t* prefetch_rng =
static_cast<caffe::rng_t*>(prefetch_rng_->generator());
return (*prefetch_rng)();
}
template <typename Dtype>
Dtype DataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
// First, join the thread
JoinPrefetchThread();
// Copy the data
caffe_copy(prefetch_data_->count(), prefetch_data_->cpu_data(),
(*top)[0]->mutable_cpu_data());
if (output_labels_) {
caffe_copy(prefetch_label_->count(), prefetch_label_->cpu_data(),
(*top)[1]->mutable_cpu_data());
}
// Start a new prefetch thread
CreatePrefetchThread();
return Dtype(0.);
}
INSTANTIATE_CLASS(DataLayer);
} // namespace caffe

@ -0,0 +1,39 @@
// Copyright 2014 BVLC and contributors.
#include <stdint.h>
#include <leveldb/db.h>
#include <pthread.h>
#include <string>
#include <vector>
#include "caffe/layer.hpp"
#include "caffe/util/io.hpp"
#include "caffe/vision_layers.hpp"
using std::string;
namespace caffe {
template <typename Dtype>
Dtype DataLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
// First, join the thread
JoinPrefetchThread();
// Copy the data
CUDA_CHECK(cudaMemcpy((*top)[0]->mutable_gpu_data(),
prefetch_data_->cpu_data(), sizeof(Dtype) * prefetch_data_->count(),
cudaMemcpyHostToDevice));
if (output_labels_) {
CUDA_CHECK(cudaMemcpy((*top)[1]->mutable_gpu_data(),
prefetch_label_->cpu_data(), sizeof(Dtype) * prefetch_label_->count(),
cudaMemcpyHostToDevice));
}
// Start a new prefetch thread
CreatePrefetchThread();
return Dtype(0.);
}
INSTANTIATE_CLASS(DataLayer);
} // namespace caffe

@ -0,0 +1,68 @@
// Copyright 2014 BVLC and contributors.
// TODO (sergeyk): effect should not be dependent on phase. wasted memcpy.
#include <vector>
#include "caffe/common.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/layer.hpp"
#include "caffe/syncedmem.hpp"
#include "caffe/vision_layers.hpp"
namespace caffe {
template <typename Dtype>
void DropoutLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
NeuronLayer<Dtype>::SetUp(bottom, top);
// Set up the cache for random number generation
rand_vec_.reset(new Blob<unsigned int>(bottom[0]->num(),
bottom[0]->channels(), bottom[0]->height(), bottom[0]->width()));
threshold_ = this->layer_param_.dropout_param().dropout_ratio();
DCHECK(threshold_ > 0.);
DCHECK(threshold_ < 1.);
scale_ = 1. / (1. - threshold_);
uint_thres_ = static_cast<unsigned int>(UINT_MAX * threshold_);
}
template <typename Dtype>
Dtype DropoutLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
const Dtype* bottom_data = bottom[0]->cpu_data();
Dtype* top_data = (*top)[0]->mutable_cpu_data();
unsigned int* mask = rand_vec_->mutable_cpu_data();
const int count = bottom[0]->count();
if (Caffe::phase() == Caffe::TRAIN) {
// Create random numbers
caffe_rng_bernoulli(count, 1. - threshold_, mask);
for (int i = 0; i < count; ++i) {
top_data[i] = bottom_data[i] * mask[i] * scale_;
}
} else {
caffe_copy(bottom[0]->count(), bottom_data, top_data);
}
return Dtype(0);
}
template <typename Dtype>
void DropoutLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down,
vector<Blob<Dtype>*>* bottom) {
CHECK(Caffe::phase() == Caffe::TRAIN);
if (propagate_down) {
const Dtype* top_diff = top[0]->cpu_diff();
Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
const unsigned int* mask = rand_vec_->cpu_data();
const int count = (*bottom)[0]->count();
for (int i = 0; i < count; ++i) {
bottom_diff[i] = top_diff[i] * mask[i] * scale_;
}
}
}
INSTANTIATE_CLASS(DropoutLayer);
} // namespace caffe

@ -0,0 +1,78 @@
// Copyright 2014 BVLC and contributors.
#include <algorithm>
#include <limits>
#include <vector>
#include "caffe/common.hpp"
#include "caffe/layer.hpp"
#include "caffe/syncedmem.hpp"
#include "caffe/vision_layers.hpp"
#include "caffe/util/math_functions.hpp"
using std::max;
namespace caffe {
template <typename Dtype>
__global__ void DropoutForward(const int n, const Dtype* in,
const unsigned int* mask, const unsigned int threshold, const float scale,
Dtype* out) {
CUDA_KERNEL_LOOP(index, n) {
out[index] = in[index] * (mask[index] > threshold) * scale;
}
}
template <typename Dtype>
Dtype DropoutLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
const Dtype* bottom_data = bottom[0]->gpu_data();
Dtype* top_data = (*top)[0]->mutable_gpu_data();
const int count = bottom[0]->count();
if (Caffe::phase() == Caffe::TRAIN) {
unsigned int* mask =
static_cast<unsigned int*>(rand_vec_->mutable_gpu_data());
caffe_gpu_rng_uniform(count, mask);
// set thresholds
// NOLINT_NEXT_LINE(whitespace/operators)
DropoutForward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, bottom_data, mask, uint_thres_, scale_, top_data);
CUDA_POST_KERNEL_CHECK;
} else {
caffe_gpu_copy(count, bottom_data, top_data);
}
return Dtype(0);
}
template <typename Dtype>
__global__ void DropoutBackward(const int n, const Dtype* in_diff,
const unsigned int* mask, const unsigned int threshold, const float scale,
Dtype* out_diff) {
CUDA_KERNEL_LOOP(index, n) {
out_diff[index] = in_diff[index] * scale * (mask[index] > threshold);
}
}
template <typename Dtype>
void DropoutLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down,
vector<Blob<Dtype>*>* bottom) {
CHECK(Caffe::phase() == Caffe::TRAIN);
if (propagate_down) {
const Dtype* top_diff = top[0]->gpu_diff();
Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
const unsigned int* mask =
static_cast<const unsigned int*>(rand_vec_->gpu_data());
const int count = (*bottom)[0]->count();
// NOLINT_NEXT_LINE(whitespace/operators)
DropoutBackward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, top_diff, mask, uint_thres_, scale_, bottom_diff);
CUDA_POST_KERNEL_CHECK;
}
}
INSTANTIATE_CLASS(DropoutLayer);
} // namespace caffe

@ -0,0 +1,100 @@
// Copyright 2014 BVLC and contributors.
#include <vector>
#include "caffe/filler.hpp"
#include "caffe/layer.hpp"
#include "caffe/vision_layers.hpp"
namespace caffe {
template <typename Dtype>
void DummyDataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
const int num_top = top->size();
const DummyDataParameter& param = this->layer_param_.dummy_data_param();
const int num_data_filler = param.data_filler_size();
CHECK(num_data_filler == 0 || num_data_filler == 1 ||
num_data_filler == num_top)
<< "Number of data fillers must be 0, 1 or equal to the number of tops: "
<< num_top << "; you specified " << num_data_filler << " data fillers.";
CHECK(param.num_size() == 1 || param.num_size() == num_top)
<< "Must specify either a single (1) 'num' or one for each top blob "
<< "(" << num_top << "); you specified " << param.num_size() << ".";
CHECK(param.channels_size() == 1 || param.channels_size() == num_top)
<< "Must specify either a single (1) 'channels' or one for each top blob "
<< "(" << num_top << "); you specified " << param.channels_size() << ".";
CHECK(param.height_size() == 1 || param.height_size() == num_top)
<< "Must specify either a single (1) 'height' or one for each top blob "
<< "(" << num_top << "); you specified " << param.height_size() << ".";
CHECK(param.width_size() == 1 || param.width_size() == num_top)
<< "Must specify either a single (1) 'width' or one for each top blob "
<< "(" << num_top << "); you specified " << param.width_size() << ".";
// refill_[i] tells Forward i whether or not to actually refill top Blob i.
// If refill_[i] is false, Forward does nothing for Blob i. We use this to
// avoid wastefully refilling "constant" Blobs in every forward pass.
// We first fill refill_ in with the INVERSE of its final values.
// The first time we run Forward from the SetUp method, we'll fill only the
// Blobs for which refill_ is normally false. These Blobs will never be
// filled again.
refill_.clear();
fillers_.clear();
if (num_data_filler <= 1) {
FillerParameter filler_param;
if (num_data_filler == 0) {
filler_param.set_type("constant");
filler_param.set_value(0);
} else {
filler_param.CopyFrom(param.data_filler(0));
}
// Refill on each iteration iff not using a constant filler,
// but use the inverse of this rule for the first run.
refill_.resize(1);
refill_[0] = (strcmp(filler_param.type().c_str(), "constant") == 0);
fillers_.resize(1);
fillers_[0].reset(GetFiller<Dtype>(filler_param));
} else {
refill_.resize(num_top);
fillers_.resize(num_top);
for (int i = 0; i < num_top; ++i) {
fillers_[i].reset(GetFiller<Dtype>(param.data_filler(i)));
// Refill on each iteration iff not using a constant filler,
// but use the inverse of this rule for the first run.
refill_[i] =
(strcmp(param.data_filler(i).type().c_str(), "constant") == 0);
}
}
for (int i = 0; i < num_top; ++i) {
const int num = (param.num_size() == 1) ? param.num(0) : param.num(i);
const int channels =
(param.channels_size() == 1) ? param.channels(0) : param.channels(i);
const int height =
(param.height_size() == 1) ? param.height(0) : param.height(i);
const int width =
(param.width_size() == 1) ? param.width(0) : param.width(i);
(*top)[i]->Reshape(num, channels, height, width);
}
// Run Forward once, with refill_ inverted, to fill the constant Blobs.
this->Forward(bottom, top);
// Invert the inverted refill_ values to refill the desired (non-constant)
// Blobs in every usual forward pass.
for (int i = 0; i < refill_.size(); ++i) {
refill_[i] = !refill_[i];
}
}
template <typename Dtype>
Dtype DummyDataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
for (int i = 0; i < top->size(); ++i) {
const int filler_id = (fillers_.size() > 1) ? i : 0;
if (refill_[filler_id]) {
fillers_[filler_id]->Fill((*top)[i]);
}
}
return Dtype(0.);
}
INSTANTIATE_CLASS(DummyDataLayer);
} // namespace caffe

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save