parent
2d1fc7a6cd
commit
a9c92b6391
395 changed files with 90568 additions and 0 deletions
@ -0,0 +1,29 @@ |
||||
# Compiled Object files |
||||
*.slo |
||||
*.lo |
||||
*.o |
||||
*.obj |
||||
|
||||
# Precompiled Headers |
||||
*.gch |
||||
*.pch |
||||
|
||||
# Compiled Dynamic libraries |
||||
*.so |
||||
*.dylib |
||||
*.dll |
||||
*.pyc |
||||
|
||||
# Fortran module files |
||||
*.mod |
||||
|
||||
# Compiled Static libraries |
||||
*.lai |
||||
*.la |
||||
*.a |
||||
*.lib |
||||
|
||||
# Executables |
||||
*.exe |
||||
*.out |
||||
*.app |
Binary file not shown.
@ -0,0 +1,52 @@ |
||||
# Fooling Code |
||||
|
||||
This is the code base used to reproduce the "fooling" images in the paper: |
||||
|
||||
[Nguyen A](http://anhnguyen.me), [Yosinski J](http://yosinski.com/), [Clune J](http://jeffclune.com). ["Deep Neural Networks are Easily Fooled: High Confidence Predictions for Unrecognizable Images"](http://arxiv.org/abs/1412.1897). In Computer Vision and Pattern Recognition (CVPR '15), IEEE, 2015. |
||||
|
||||
**If you use this software in an academic article, please cite:** |
||||
|
||||
@inproceedings{nguyen2015deep, |
||||
title={Deep Neural Networks are Easily Fooled: High Confidence Predictions for Unrecognizable Images}, |
||||
author={Nguyen, Anh and Yosinski, Jason and Clune, Jeff}, |
||||
booktitle={Computer Vision and Pattern Recognition (CVPR), 2015 IEEE Conference on}, |
||||
year={2015}, |
||||
organization={IEEE} |
||||
} |
||||
|
||||
For more information regarding the paper, please visit www.evolvingai.org/fooling |
||||
|
||||
## Requirements |
||||
This is an installation process that requires two main software packages (included in this package): |
||||
|
||||
1. Caffe: http://caffe.berkeleyvision.org |
||||
* Our libraries installed to work with Caffe |
||||
* Cuda 6.0 |
||||
* Boost 1.52 |
||||
* g++ 4.6 |
||||
2. Sferes: https://github.com/jbmouret/sferes2 |
||||
* Our libraries installed to work with Sferes |
||||
* OpenCV 2.4.10 |
||||
* Boost 1.52 |
||||
* g++ 4.9 (a C++ compiler compatible with C++11 standard) |
||||
|
||||
Note: These are specific versions of the two frameworks with our additional work necessary to produce the images as in the paper. They are not the same as their master branches. |
||||
|
||||
## Installation |
||||
|
||||
Please see the [Installation_Guide](https://github.com/Evolving-AI-Lab/fooling/wiki/Installation-Guide) for more details. |
||||
|
||||
## Usage |
||||
|
||||
* An MNIST experiment (Fig. 4, 5 in the paper) can be run directly on a local machine (4-core) within a reasonable amount of time (around ~5 minutes or less for 200 generations). |
||||
* An ImageNet experiment needs to be run on a cluster environment. It took us ~4 days x 128 cores to run 5000 generations and produce 1000 images (Fig. 8 in the paper). |
||||
* [How to configure an experiment to test the evolutionary framework quickly](https://github.com/Evolving-AI-Lab/fooling/wiki/How-to-test-the-evolutionary-framework-quickly) |
||||
* To reproduce the gradient ascent fooling images (Figures 13, S3, S4, S5, S6, and S7 from the paper), see the [documentation in the caffe/ascent directory](https://github.com/Evolving-AI-Lab/fooling/tree/ascent/caffe/ascent). You'll need to use the `ascent` branch instead of master, because the two required versions of Caffe are different. |
||||
|
||||
## Updates |
||||
|
||||
* Our fork project [here](https://github.com/Evolving-AI-Lab/innovation-engine) has support for the **latest Caffe** and experiments to create *recognizable* images instead of unrecognizable. |
||||
|
||||
## License |
||||
|
||||
Please refer to the licenses of Sferes and Caffe projects. |
@ -0,0 +1,65 @@ |
||||
## General |
||||
|
||||
# Compiled Object files |
||||
*.slo |
||||
*.lo |
||||
*.o |
||||
*.cuo |
||||
*.png |
||||
*.jpg |
||||
*.jpeg |
||||
# Compiled Dynamic libraries |
||||
*.so |
||||
*.dylib |
||||
|
||||
# Compiled Static libraries |
||||
*.lai |
||||
*.la |
||||
*.a |
||||
|
||||
# Compiled protocol buffers |
||||
*.pb.h |
||||
*.pb.cc |
||||
*_pb2.py |
||||
|
||||
# Compiled python |
||||
*.pyc |
||||
|
||||
# Compiled MATLAB |
||||
*.mex* |
||||
|
||||
# build, distribute, and bins |
||||
build |
||||
.build_debug/* |
||||
.build_release/* |
||||
distribute/* |
||||
*.testbin |
||||
*.bin |
||||
python/caffe/proto/ |
||||
|
||||
# Editor temporaries |
||||
*.swp |
||||
*~ |
||||
|
||||
# IPython notebook checkpoints |
||||
.ipynb_checkpoints |
||||
|
||||
## Caffe |
||||
|
||||
# User's build configuration |
||||
#Makefile.config |
||||
|
||||
# Data and examples are either |
||||
# 1. reference, and not casually committed |
||||
# 2. custom, and live on their own unless they're deliberated contributed |
||||
data/* |
||||
examples/* |
||||
|
||||
# Generated documentation |
||||
docs/_site |
||||
_site |
||||
|
||||
# Sublime Text settings |
||||
*.sublime-workspace |
||||
*.sublime-project |
||||
|
@ -0,0 +1,17 @@ |
||||
# Contributors |
||||
|
||||
Caffe is developed by a core set of BVLC members and the open-source community. |
||||
|
||||
We thank all of our [contributors](https://github.com/BVLC/caffe/graphs/contributors)! |
||||
|
||||
**For the detailed history of contributions** of a given file, try |
||||
|
||||
git blame file |
||||
|
||||
to see line-by-line credits and |
||||
|
||||
git log --follow file |
||||
|
||||
to see the change log even across renames and rewrites. |
||||
|
||||
Please refer to the [acknowledgements](http://caffe.berkeleyvision.org/#acknowledgements) on the Caffe site for further details. |
@ -0,0 +1,7 @@ |
||||
# Installation |
||||
|
||||
See http://caffe.berkeleyvision.org/installation.html for the latest |
||||
installation instructions. |
||||
|
||||
Check the issue tracker in case you need help: |
||||
https://github.com/BVLC/caffe/issues |
@ -0,0 +1,22 @@ |
||||
Copyright (c) 2014, The Regents of the University of California (Regents) |
||||
All rights reserved. |
||||
|
||||
Redistribution and use in source and binary forms, with or without |
||||
modification, are permitted provided that the following conditions are met: |
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this |
||||
list of conditions and the following disclaimer. |
||||
2. Redistributions in binary form must reproduce the above copyright notice, |
||||
this list of conditions and the following disclaimer in the documentation |
||||
and/or other materials provided with the distribution. |
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR |
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
@ -0,0 +1,439 @@ |
||||
# The makefile for caffe. Pretty hacky.
|
||||
PROJECT := caffe
|
||||
|
||||
CONFIG_FILE := Makefile.config
|
||||
include $(CONFIG_FILE) |
||||
|
||||
BUILD_DIR_LINK := $(BUILD_DIR)
|
||||
RELEASE_BUILD_DIR := .$(BUILD_DIR)_release
|
||||
DEBUG_BUILD_DIR := .$(BUILD_DIR)_debug
|
||||
|
||||
DEBUG ?= 0
|
||||
ifeq ($(DEBUG), 1) |
||||
BUILD_DIR := $(DEBUG_BUILD_DIR)
|
||||
OTHER_BUILD_DIR := $(RELEASE_BUILD_DIR)
|
||||
else |
||||
BUILD_DIR := $(RELEASE_BUILD_DIR)
|
||||
OTHER_BUILD_DIR := $(DEBUG_BUILD_DIR)
|
||||
endif |
||||
|
||||
# The target static library and shared library name
|
||||
LIB_BUILD_DIR := $(BUILD_DIR)/lib
|
||||
NAME := $(LIB_BUILD_DIR)/lib$(PROJECT).so
|
||||
STATIC_NAME := $(LIB_BUILD_DIR)/lib$(PROJECT).a
|
||||
|
||||
##############################
|
||||
# Get all source files
|
||||
##############################
|
||||
# CXX_SRCS are the source files excluding the test ones.
|
||||
CXX_SRCS := $(shell find src/$(PROJECT) ! -name "test_*.cpp" -name "*.cpp")
|
||||
# HXX_SRCS are the header files
|
||||
HXX_SRCS := $(shell find include/$(PROJECT) -name "*.hpp")
|
||||
# CU_SRCS are the cuda source files
|
||||
CU_SRCS := $(shell find src/$(PROJECT) -name "*.cu")
|
||||
# TEST_SRCS are the test source files
|
||||
TEST_MAIN_SRC := src/$(PROJECT)/test/test_caffe_main.cpp
|
||||
TEST_SRCS := $(shell find src/$(PROJECT) -name "test_*.cpp")
|
||||
TEST_SRCS := $(filter-out $(TEST_MAIN_SRC), $(TEST_SRCS))
|
||||
GTEST_SRC := src/gtest/gtest-all.cpp
|
||||
# TEST_HDRS are the test header files
|
||||
TEST_HDRS := $(shell find src/$(PROJECT) -name "test_*.hpp")
|
||||
# TOOL_SRCS are the source files for the tool binaries
|
||||
TOOL_SRCS := $(shell find tools -name "*.cpp")
|
||||
# EXAMPLE_SRCS are the source files for the example binaries
|
||||
EXAMPLE_SRCS := $(shell find examples -name "*.cpp")
|
||||
# BUILD_INCLUDE_DIR contains any generated header files we want to include.
|
||||
BUILD_INCLUDE_DIR := $(BUILD_DIR)/src
|
||||
# PROTO_SRCS are the protocol buffer definitions
|
||||
PROTO_SRC_DIR := src/$(PROJECT)/proto
|
||||
PROTO_SRCS := $(wildcard $(PROTO_SRC_DIR)/*.proto)
|
||||
# PROTO_BUILD_DIR will contain the .cc and obj files generated from
|
||||
# PROTO_SRCS; PROTO_BUILD_INCLUDE_DIR will contain the .h header files
|
||||
PROTO_BUILD_DIR := $(BUILD_DIR)/$(PROTO_SRC_DIR)
|
||||
PROTO_BUILD_INCLUDE_DIR := $(BUILD_INCLUDE_DIR)/$(PROJECT)/proto
|
||||
# NONGEN_CXX_SRCS includes all source/header files except those generated
|
||||
# automatically (e.g., by proto).
|
||||
NONGEN_CXX_SRCS := $(shell find \
|
||||
src/$(PROJECT) \
|
||||
include/$(PROJECT) \
|
||||
python/$(PROJECT) \
|
||||
matlab/$(PROJECT) \
|
||||
examples \
|
||||
tools \
|
||||
-name "*.cpp" -or -name "*.hpp" -or -name "*.cu" -or -name "*.cuh")
|
||||
LINT_REPORT := $(BUILD_DIR)/cpp_lint.log
|
||||
FAILED_LINT_REPORT := $(BUILD_DIR)/cpp_lint.error_log
|
||||
# PY$(PROJECT)_SRC is the python wrapper for $(PROJECT)
|
||||
PY$(PROJECT)_SRC := python/$(PROJECT)/_$(PROJECT).cpp
|
||||
PY$(PROJECT)_SO := python/$(PROJECT)/_$(PROJECT).so
|
||||
# MAT$(PROJECT)_SRC is the matlab wrapper for $(PROJECT)
|
||||
MAT$(PROJECT)_SRC := matlab/$(PROJECT)/mat$(PROJECT).cpp
|
||||
ifneq ($(MATLAB_DIR),) |
||||
MAT_SO_EXT := $(shell $(MATLAB_DIR)/bin/mexext)
|
||||
endif |
||||
MAT$(PROJECT)_SO := matlab/$(PROJECT)/$(PROJECT).$(MAT_SO_EXT)
|
||||
|
||||
##############################
|
||||
# Derive generated files
|
||||
##############################
|
||||
# The generated files for protocol buffers
|
||||
PROTO_GEN_HEADER_SRCS := $(addprefix $(PROTO_BUILD_DIR)/, \
|
||||
$(notdir ${PROTO_SRCS:.proto=.pb.h}))
|
||||
PROTO_GEN_HEADER := $(addprefix $(PROTO_BUILD_INCLUDE_DIR)/, \
|
||||
$(notdir ${PROTO_SRCS:.proto=.pb.h}))
|
||||
HXX_SRCS += $(PROTO_GEN_HEADER)
|
||||
PROTO_GEN_CC := $(addprefix $(BUILD_DIR)/, ${PROTO_SRCS:.proto=.pb.cc})
|
||||
PY_PROTO_BUILD_DIR := python/$(PROJECT)/proto
|
||||
PY_PROTO_INIT := python/$(PROJECT)/proto/__init__.py
|
||||
PROTO_GEN_PY := $(foreach file,${PROTO_SRCS:.proto=_pb2.py}, \
|
||||
$(PY_PROTO_BUILD_DIR)/$(notdir $(file)))
|
||||
# The objects corresponding to the source files
|
||||
# These objects will be linked into the final shared library, so we
|
||||
# exclude the tool, example, and test objects.
|
||||
CXX_OBJS := $(addprefix $(BUILD_DIR)/, ${CXX_SRCS:.cpp=.o})
|
||||
CU_OBJS := $(addprefix $(BUILD_DIR)/, ${CU_SRCS:.cu=.cuo})
|
||||
PROTO_OBJS := ${PROTO_GEN_CC:.cc=.o}
|
||||
OBJ_BUILD_DIR := $(BUILD_DIR)/src/$(PROJECT)
|
||||
LAYER_BUILD_DIR := $(OBJ_BUILD_DIR)/layers
|
||||
UTIL_BUILD_DIR := $(OBJ_BUILD_DIR)/util
|
||||
OBJS := $(PROTO_OBJS) $(CXX_OBJS) $(CU_OBJS)
|
||||
# tool, example, and test objects
|
||||
TOOL_OBJS := $(addprefix $(BUILD_DIR)/, ${TOOL_SRCS:.cpp=.o})
|
||||
TOOL_BUILD_DIR := $(BUILD_DIR)/tools
|
||||
TEST_BUILD_DIR := $(BUILD_DIR)/src/$(PROJECT)/test
|
||||
TEST_OBJS := $(addprefix $(BUILD_DIR)/, ${TEST_SRCS:.cpp=.o})
|
||||
GTEST_OBJ := $(addprefix $(BUILD_DIR)/, ${GTEST_SRC:.cpp=.o})
|
||||
GTEST_BUILD_DIR := $(dir $(GTEST_OBJ))
|
||||
EXAMPLE_OBJS := $(addprefix $(BUILD_DIR)/, ${EXAMPLE_SRCS:.cpp=.o})
|
||||
EXAMPLE_BUILD_DIR := $(BUILD_DIR)/examples
|
||||
EXAMPLE_BUILD_DIRS := $(EXAMPLE_BUILD_DIR)
|
||||
EXAMPLE_BUILD_DIRS += $(foreach obj,$(EXAMPLE_OBJS),$(dir $(obj)))
|
||||
# tool, example, and test bins
|
||||
TOOL_BINS := ${TOOL_OBJS:.o=.bin}
|
||||
EXAMPLE_BINS := ${EXAMPLE_OBJS:.o=.bin}
|
||||
# Put the test binaries in build/test for convenience.
|
||||
TEST_BIN_DIR := $(BUILD_DIR)/test
|
||||
TEST_BINS := $(addsuffix .testbin,$(addprefix $(TEST_BIN_DIR)/, \
|
||||
$(foreach obj,$(TEST_OBJS),$(basename $(notdir $(obj))))))
|
||||
TEST_ALL_BIN := $(TEST_BIN_DIR)/test_all.testbin
|
||||
|
||||
##############################
|
||||
# Derive include and lib directories
|
||||
##############################
|
||||
CUDA_INCLUDE_DIR := $(CUDA_DIR)/include
|
||||
CUDA_LIB_DIR := $(CUDA_DIR)/lib64 $(CUDA_DIR)/lib
|
||||
|
||||
INCLUDE_DIRS += $(BUILD_INCLUDE_DIR)
|
||||
INCLUDE_DIRS += ./src ./include $(CUDA_INCLUDE_DIR)
|
||||
LIBRARY_DIRS += $(CUDA_LIB_DIR)
|
||||
LIBRARIES := cudart cublas curand \
|
||||
pthread \
|
||||
glog protobuf leveldb snappy \
|
||||
lmdb \
|
||||
boost_system \
|
||||
hdf5_hl hdf5 \
|
||||
opencv_core opencv_highgui opencv_imgproc
|
||||
PYTHON_LIBRARIES := boost_python python2.7
|
||||
WARNINGS := -Wall
|
||||
|
||||
##############################
|
||||
# Set build directories
|
||||
##############################
|
||||
|
||||
DISTRIBUTE_SUBDIRS := $(DISTRIBUTE_DIR)/bin $(DISTRIBUTE_DIR)/lib
|
||||
DIST_ALIASES := dist
|
||||
ifneq ($(strip $(DISTRIBUTE_DIR)),distribute) |
||||
DIST_ALIASES += distribute
|
||||
endif |
||||
|
||||
ALL_BUILD_DIRS := $(sort \
|
||||
$(BUILD_DIR) $(LIB_BUILD_DIR) $(OBJ_BUILD_DIR) \
|
||||
$(LAYER_BUILD_DIR) $(UTIL_BUILD_DIR) $(TOOL_BUILD_DIR) \
|
||||
$(TEST_BUILD_DIR) $(TEST_BIN_DIR) $(GTEST_BUILD_DIR) \
|
||||
$(EXAMPLE_BUILD_DIRS) \
|
||||
$(PROTO_BUILD_DIR) $(PROTO_BUILD_INCLUDE_DIR) $(PY_PROTO_BUILD_DIR) \
|
||||
$(DISTRIBUTE_SUBDIRS))
|
||||
|
||||
##############################
|
||||
# Configure build
|
||||
##############################
|
||||
|
||||
# Determine platform
|
||||
UNAME := $(shell uname -s)
|
||||
ifeq ($(UNAME), Linux) |
||||
LINUX := 1
|
||||
else ifeq ($(UNAME), Darwin) |
||||
OSX := 1
|
||||
endif |
||||
|
||||
ifeq ($(LINUX), 1) |
||||
CXX := /usr/bin/g++
|
||||
endif |
||||
|
||||
# OS X:
|
||||
# clang++ instead of g++
|
||||
# libstdc++ instead of libc++ for CUDA compatibility on 10.9
|
||||
ifeq ($(OSX), 1) |
||||
CXX := /usr/bin/clang++
|
||||
ifneq ($(findstring 10.9, $(shell sw_vers -productVersion)),)
|
||||
CXXFLAGS += -stdlib=libstdc++
|
||||
endif
|
||||
endif |
||||
|
||||
# Debugging
|
||||
ifeq ($(DEBUG), 1) |
||||
COMMON_FLAGS := -DDEBUG -g -O0
|
||||
else |
||||
COMMON_FLAGS := -DNDEBUG -O2
|
||||
endif |
||||
|
||||
# BLAS configuration (default = ATLAS)
|
||||
BLAS ?= atlas
|
||||
ifeq ($(BLAS), mkl) |
||||
# MKL
|
||||
LIBRARIES += mkl_rt
|
||||
COMMON_FLAGS += -DUSE_MKL
|
||||
MKL_DIR = /opt/intel/mkl
|
||||
BLAS_INCLUDE ?= $(MKL_DIR)/include
|
||||
BLAS_LIB ?= $(MKL_DIR)/lib $(MKL_DIR)/lib/intel64
|
||||
else ifeq ($(BLAS), open) |
||||
# OpenBLAS
|
||||
LIBRARIES += openblas
|
||||
else |
||||
# ATLAS
|
||||
ifeq ($(LINUX), 1)
|
||||
ifeq ($(BLAS), atlas)
|
||||
# Linux simply has cblas and atlas
|
||||
LIBRARIES += cblas atlas
|
||||
endif
|
||||
else ifeq ($(OSX), 1)
|
||||
# OS X packages atlas as the vecLib framework
|
||||
BLAS_INCLUDE ?= /System/Library/Frameworks/vecLib.framework/Versions/Current/Headers/
|
||||
LIBRARIES += cblas
|
||||
LDFLAGS += -framework vecLib
|
||||
endif
|
||||
endif |
||||
INCLUDE_DIRS += $(BLAS_INCLUDE)
|
||||
LIBRARY_DIRS += $(BLAS_LIB)
|
||||
|
||||
# Complete build flags.
|
||||
COMMON_FLAGS += $(foreach includedir,$(INCLUDE_DIRS),-I$(includedir))
|
||||
CXXFLAGS += -pthread -fPIC $(COMMON_FLAGS)
|
||||
NVCCFLAGS := -ccbin=$(CXX) -Xcompiler -fPIC $(COMMON_FLAGS)
|
||||
LDFLAGS += $(foreach librarydir,$(LIBRARY_DIRS),-L$(librarydir)) \
|
||||
$(foreach library,$(LIBRARIES),-l$(library))
|
||||
PYTHON_LDFLAGS := $(LDFLAGS) $(foreach library,$(PYTHON_LIBRARIES),-l$(library))
|
||||
|
||||
# 'superclean' target recursively* deletes all files ending with an extension
|
||||
# in $(SUPERCLEAN_EXTS) below. This may be useful if you've built older
|
||||
# versions of Caffe that do not place all generated files in a location known
|
||||
# to the 'clean' target.
|
||||
#
|
||||
# 'supercleanlist' will list the files to be deleted by make superclean.
|
||||
#
|
||||
# * Recursive with the exception that symbolic links are never followed, per the
|
||||
# default behavior of 'find'.
|
||||
SUPERCLEAN_EXTS := .so .a .o .bin .testbin .pb.cc .pb.h _pb2.py .cuo
|
||||
|
||||
##############################
|
||||
# Define build targets
|
||||
##############################
|
||||
.PHONY: all test clean linecount lint tools examples $(DIST_ALIASES) \
|
||||
py mat py$(PROJECT) mat$(PROJECT) proto runtest \
|
||||
superclean supercleanlist supercleanfiles
|
||||
|
||||
all: $(NAME) $(STATIC_NAME) tools examples |
||||
|
||||
linecount: clean |
||||
cloc --read-lang-def=$(PROJECT).cloc src/$(PROJECT)/
|
||||
|
||||
lint: $(LINT_REPORT) |
||||
|
||||
$(LINT_REPORT): $(NONGEN_CXX_SRCS) | $(BUILD_DIR) |
||||
@ (python ./scripts/cpp_lint.py $(NONGEN_CXX_SRCS) > $(LINT_REPORT) 2>&1 \
|
||||
&& ($(RM) $(FAILED_LINT_REPORT); echo "No lint errors!")) || ( \
|
||||
mv $(LINT_REPORT) $(FAILED_LINT_REPORT); \
|
||||
grep -v "^Done processing " $(FAILED_LINT_REPORT); \
|
||||
echo "Found 1 or more lint errors; see log at $(FAILED_LINT_REPORT)"; \
|
||||
exit 1)
|
||||
|
||||
test: $(TEST_ALL_BIN) $(TEST_BINS) |
||||
|
||||
tools: $(TOOL_BINS) |
||||
|
||||
examples: $(EXAMPLE_BINS) |
||||
|
||||
py$(PROJECT): py |
||||
|
||||
py: $(PY$(PROJECT)_SO) $(PROTO_GEN_PY) |
||||
|
||||
$(PY$(PROJECT)_SO): $(STATIC_NAME) $(PY$(PROJECT)_SRC) |
||||
$(CXX) -shared -o $@ $(PY$(PROJECT)_SRC) \
|
||||
$(STATIC_NAME) $(CXXFLAGS) $(PYTHON_LDFLAGS)
|
||||
@ echo
|
||||
|
||||
mat$(PROJECT): mat |
||||
|
||||
mat: $(MAT$(PROJECT)_SO) |
||||
|
||||
$(MAT$(PROJECT)_SO): $(MAT$(PROJECT)_SRC) $(STATIC_NAME) |
||||
@ if [ -z "$(MATLAB_DIR)" ]; then \
|
||||
echo "MATLAB_DIR must be specified in $(CONFIG_FILE)" \
|
||||
"to build mat$(PROJECT)."; \
|
||||
exit 1; \
|
||||
fi
|
||||
$(MATLAB_DIR)/bin/mex $(MAT$(PROJECT)_SRC) $(STATIC_NAME) \
|
||||
CXXFLAGS="\$$CXXFLAGS $(CXXFLAGS) $(WARNINGS)" \
|
||||
CXXLIBS="\$$CXXLIBS $(LDFLAGS)" -o $@
|
||||
@ echo
|
||||
|
||||
runtest: $(TEST_ALL_BIN) |
||||
$(TEST_ALL_BIN) $(TEST_GPUID) --gtest_shuffle
|
||||
|
||||
$(BUILD_DIR_LINK): $(BUILD_DIR)/.linked |
||||
|
||||
# Create a target ".linked" in this BUILD_DIR to tell Make that the "build" link
|
||||
# is currently correct, then delete the one in the OTHER_BUILD_DIR in case it
|
||||
# exists and $(DEBUG) is toggled later.
|
||||
$(BUILD_DIR)/.linked: |
||||
@ mkdir -p $(BUILD_DIR)
|
||||
@ $(RM) $(OTHER_BUILD_DIR)/.linked
|
||||
@ $(RM) -r $(BUILD_DIR_LINK)
|
||||
@ ln -s $(BUILD_DIR) $(BUILD_DIR_LINK)
|
||||
@ touch $@
|
||||
|
||||
$(ALL_BUILD_DIRS): | $(BUILD_DIR_LINK) |
||||
@ mkdir -p $@
|
||||
|
||||
$(NAME): $(PROTO_OBJS) $(OBJS) | $(LIB_BUILD_DIR) |
||||
$(CXX) -shared -o $@ $(OBJS) $(CXXFLAGS) $(LDFLAGS) $(WARNINGS)
|
||||
@ echo
|
||||
|
||||
$(STATIC_NAME): $(PROTO_OBJS) $(OBJS) | $(LIB_BUILD_DIR) |
||||
ar rcs $@ $(PROTO_OBJS) $(OBJS)
|
||||
@ echo
|
||||
|
||||
$(TEST_BUILD_DIR)/%.o: src/$(PROJECT)/test/%.cpp $(HXX_SRCS) $(TEST_HDRS) \
|
||||
| $(TEST_BUILD_DIR)
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
@ echo
|
||||
|
||||
$(TEST_ALL_BIN): $(TEST_MAIN_SRC) $(TEST_OBJS) $(GTEST_OBJ) $(STATIC_NAME) \
|
||||
| $(TEST_BIN_DIR)
|
||||
$(CXX) $(TEST_MAIN_SRC) $(TEST_OBJS) $(GTEST_OBJ) $(STATIC_NAME) \
|
||||
-o $@ $(CXXFLAGS) $(LDFLAGS) $(WARNINGS)
|
||||
@ echo
|
||||
|
||||
$(TEST_BIN_DIR)/%.testbin: $(TEST_BUILD_DIR)/%.o $(GTEST_OBJ) $(STATIC_NAME) \
|
||||
| $(TEST_BIN_DIR)
|
||||
$(CXX) $(TEST_MAIN_SRC) $< $(GTEST_OBJ) $(STATIC_NAME) \
|
||||
-o $@ $(CXXFLAGS) $(LDFLAGS) $(WARNINGS)
|
||||
@ echo
|
||||
|
||||
$(TOOL_BINS): %.bin : %.o $(STATIC_NAME) |
||||
$(CXX) $< $(STATIC_NAME) -o $@ $(CXXFLAGS) $(LDFLAGS) $(WARNINGS)
|
||||
@ echo
|
||||
|
||||
$(EXAMPLE_BINS): %.bin : %.o $(STATIC_NAME) |
||||
$(CXX) $< $(STATIC_NAME) -o $@ $(CXXFLAGS) $(LDFLAGS) $(WARNINGS)
|
||||
@ echo
|
||||
|
||||
$(LAYER_BUILD_DIR)/%.o: src/$(PROJECT)/layers/%.cpp $(HXX_SRCS) \
|
||||
| $(LAYER_BUILD_DIR)
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
@ echo
|
||||
|
||||
$(PROTO_BUILD_DIR)/%.pb.o: $(PROTO_BUILD_DIR)/%.pb.cc $(PROTO_GEN_HEADER) \
|
||||
| $(PROTO_BUILD_DIR)
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
@ echo
|
||||
|
||||
$(UTIL_BUILD_DIR)/%.o: src/$(PROJECT)/util/%.cpp $(HXX_SRCS) | $(UTIL_BUILD_DIR) |
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
@ echo
|
||||
|
||||
$(GTEST_OBJ): $(GTEST_SRC) | $(GTEST_BUILD_DIR) |
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
@ echo
|
||||
|
||||
$(LAYER_BUILD_DIR)/%.cuo: src/$(PROJECT)/layers/%.cu $(HXX_SRCS) \
|
||||
| $(LAYER_BUILD_DIR)
|
||||
$(CUDA_DIR)/bin/nvcc $(NVCCFLAGS) $(CUDA_ARCH) -c $< -o $@
|
||||
@ echo
|
||||
|
||||
$(UTIL_BUILD_DIR)/%.cuo: src/$(PROJECT)/util/%.cu | $(UTIL_BUILD_DIR) |
||||
$(CUDA_DIR)/bin/nvcc $(NVCCFLAGS) $(CUDA_ARCH) -c $< -o $@
|
||||
@ echo
|
||||
|
||||
$(TOOL_BUILD_DIR)/%.o: tools/%.cpp $(PROTO_GEN_HEADER) | $(TOOL_BUILD_DIR) |
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
@ echo
|
||||
|
||||
$(EXAMPLE_BUILD_DIR)/%.o: examples/%.cpp $(PROTO_GEN_HEADER) \
|
||||
| $(EXAMPLE_BUILD_DIRS)
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
@ echo
|
||||
|
||||
$(BUILD_DIR)/src/$(PROJECT)/%.o: src/$(PROJECT)/%.cpp $(HXX_SRCS) |
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
@ echo
|
||||
|
||||
proto: $(PROTO_GEN_CC) $(PROTO_GEN_HEADER) |
||||
|
||||
$(PROTO_BUILD_DIR)/%.pb.cc $(PROTO_BUILD_DIR)/%.pb.h : \
|
||||
$(PROTO_SRC_DIR)/%.proto | $(PROTO_BUILD_DIR)
|
||||
protoc --proto_path=src --cpp_out=$(BUILD_DIR)/src $<
|
||||
@ echo
|
||||
|
||||
$(PY_PROTO_BUILD_DIR)/%_pb2.py : $(PROTO_SRC_DIR)/%.proto \
|
||||
$(PY_PROTO_INIT) | $(PY_PROTO_BUILD_DIR)
|
||||
protoc --proto_path=src --python_out=python $<
|
||||
@ echo
|
||||
|
||||
$(PY_PROTO_INIT): | $(PY_PROTO_BUILD_DIR) |
||||
touch $(PY_PROTO_INIT)
|
||||
|
||||
clean: |
||||
@- $(RM) -rf $(ALL_BUILD_DIRS)
|
||||
@- $(RM) -rf $(OTHER_BUILD_DIR)
|
||||
@- $(RM) -rf $(BUILD_DIR_LINK)
|
||||
@- $(RM) -rf $(DISTRIBUTE_DIR)
|
||||
@- $(RM) $(PY$(PROJECT)_SO)
|
||||
@- $(RM) $(MAT$(PROJECT)_SO)
|
||||
|
||||
supercleanfiles: |
||||
$(eval SUPERCLEAN_FILES := $(strip \
|
||||
$(foreach ext,$(SUPERCLEAN_EXTS), $(shell find . -name '*$(ext)' \
|
||||
-not -path './data/*'))))
|
||||
|
||||
supercleanlist: supercleanfiles |
||||
@ \
|
||||
if [ -z "$(SUPERCLEAN_FILES)" ]; then \
|
||||
echo "No generated files found."; \
|
||||
else \
|
||||
echo $(SUPERCLEAN_FILES) | tr ' ' '\n'; \
|
||||
fi
|
||||
|
||||
superclean: clean supercleanfiles |
||||
@ \
|
||||
if [ -z "$(SUPERCLEAN_FILES)" ]; then \
|
||||
echo "No generated files found."; \
|
||||
else \
|
||||
echo "Deleting the following generated files:"; \
|
||||
echo $(SUPERCLEAN_FILES) | tr ' ' '\n'; \
|
||||
$(RM) $(SUPERCLEAN_FILES); \
|
||||
fi
|
||||
|
||||
$(DIST_ALIASES): $(DISTRIBUTE_DIR) |
||||
|
||||
$(DISTRIBUTE_DIR): all py $(HXX_SRCS) | $(DISTRIBUTE_SUBDIRS) |
||||
# add include
|
||||
cp -r include $(DISTRIBUTE_DIR)/
|
||||
# add tool and example binaries
|
||||
cp $(TOOL_BINS) $(DISTRIBUTE_DIR)/bin
|
||||
cp $(EXAMPLE_BINS) $(DISTRIBUTE_DIR)/bin
|
||||
# add libraries
|
||||
cp $(NAME) $(DISTRIBUTE_DIR)/lib
|
||||
cp $(STATIC_NAME) $(DISTRIBUTE_DIR)/lib
|
||||
# add python - it's not the standard way, indeed...
|
||||
cp -r python $(DISTRIBUTE_DIR)/python
|
@ -0,0 +1,56 @@ |
||||
## Refer to http://caffe.berkeleyvision.org/installation.html
|
||||
# Contributions simplifying and improving our build system are welcome!
|
||||
|
||||
# CUDA directory contains bin/ and lib/ directories that we need.
|
||||
CUDA_DIR := /usr/local/cuda
|
||||
|
||||
# CUDA architecture setting: going with all of them.
|
||||
CUDA_ARCH := -gencode arch=compute_20,code=sm_20 \
|
||||
-gencode arch=compute_20,code=sm_21 \
|
||||
-gencode arch=compute_30,code=sm_30 \
|
||||
-gencode arch=compute_35,code=sm_35
|
||||
|
||||
# BLAS choice:
|
||||
# atlas for ATLAS (default)
|
||||
# mkl for MKL
|
||||
# open for OpenBlas
|
||||
BLAS := atlas
|
||||
# Custom (MKL/ATLAS/OpenBLAS) include and lib directories.
|
||||
# Leave commented to accept the defaults for your choice of BLAS
|
||||
# (which should work)!
|
||||
# BLAS_INCLUDE := /path/to/your/blas
|
||||
BLAS_INCLUDE := /usr/include/atlas
|
||||
# BLAS_LIB := /path/to/your/blas
|
||||
BLAS_LIB := /usr/lib/atlas-base
|
||||
|
||||
# This is required only if you will compile the matlab interface.
|
||||
# MATLAB directory should contain the mex binary in /bin.
|
||||
# MATLAB_DIR := /usr/local
|
||||
# MATLAB_DIR := /Applications/MATLAB_R2012b.app
|
||||
|
||||
# NOTE: this is required only if you will compile the python interface.
|
||||
# We need to be able to find Python.h and numpy/arrayobject.h.
|
||||
PYTHON_INCLUDE := /usr/local/include/python2.7 \
|
||||
/usr/include/python2.7 \
|
||||
/usr/local/lib/python2.7/dist-packages/numpy/core/include
|
||||
# Anaconda Python distribution is quite popular. Include path:
|
||||
# PYTHON_INCLUDE := $(HOME)/anaconda/include \
|
||||
# $(HOME)/anaconda/include/python2.7 \
|
||||
# $(HOME)/anaconda/lib/python2.7/site-packages/numpy/core/include
|
||||
|
||||
# We need to be able to find libpythonX.X.so or .dylib.
|
||||
PYTHON_LIB := /usr/local/lib
|
||||
# PYTHON_LIB := $(HOME)/anaconda/lib
|
||||
|
||||
# Whatever else you find you need goes here.
|
||||
INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include
|
||||
LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib
|
||||
|
||||
BUILD_DIR := build
|
||||
DISTRIBUTE_DIR := distribute
|
||||
|
||||
# Uncomment for debugging.
|
||||
# DEBUG := 1
|
||||
|
||||
# The ID of the GPU that 'make runtest' will use to run unit tests.
|
||||
TEST_GPUID := 0
|
@ -0,0 +1,56 @@ |
||||
## Refer to http://caffe.berkeleyvision.org/installation.html
|
||||
# Contributions simplifying and improving our build system are welcome!
|
||||
|
||||
# CUDA directory contains bin/ and lib/ directories that we need.
|
||||
CUDA_DIR := /usr/local/cuda
|
||||
# On Ubuntu 14.04, if cuda tools are installed via
|
||||
# "sudo apt-get install nvidia-cuda-toolkit" then use this instead:
|
||||
# CUDA_DIR := /usr
|
||||
|
||||
# CUDA architecture setting: going with all of them.
|
||||
CUDA_ARCH := -gencode arch=compute_20,code=sm_20 \
|
||||
-gencode arch=compute_20,code=sm_21 \
|
||||
-gencode arch=compute_30,code=sm_30 \
|
||||
-gencode arch=compute_35,code=sm_35
|
||||
|
||||
# BLAS choice:
|
||||
# atlas for ATLAS (default)
|
||||
# mkl for MKL
|
||||
# open for OpenBlas
|
||||
BLAS := atlas
|
||||
# Custom (MKL/ATLAS/OpenBLAS) include and lib directories.
|
||||
# Leave commented to accept the defaults for your choice of BLAS
|
||||
# (which should work)!
|
||||
# BLAS_INCLUDE := /path/to/your/blas
|
||||
# BLAS_LIB := /path/to/your/blas
|
||||
|
||||
# This is required only if you will compile the matlab interface.
|
||||
# MATLAB directory should contain the mex binary in /bin.
|
||||
# MATLAB_DIR := /usr/local
|
||||
# MATLAB_DIR := /Applications/MATLAB_R2012b.app
|
||||
|
||||
# NOTE: this is required only if you will compile the python interface.
|
||||
# We need to be able to find Python.h and numpy/arrayobject.h.
|
||||
PYTHON_INCLUDE := /usr/local/include/python2.7 \
|
||||
/usr/local/lib/python2.7/dist-packages/numpy/core/include
|
||||
# Anaconda Python distribution is quite popular. Include path:
|
||||
# PYTHON_INCLUDE := $(HOME)/anaconda/include \
|
||||
# $(HOME)/anaconda/include/python2.7 \
|
||||
# $(HOME)/anaconda/lib/python2.7/site-packages/numpy/core/include
|
||||
|
||||
# We need to be able to find libpythonX.X.so or .dylib.
|
||||
PYTHON_LIB := /usr/local/lib
|
||||
# PYTHON_LIB := $(HOME)/anaconda/lib
|
||||
|
||||
# Whatever else you find you need goes here.
|
||||
INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include
|
||||
LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib
|
||||
|
||||
BUILD_DIR := build
|
||||
DISTRIBUTE_DIR := distribute
|
||||
|
||||
# Uncomment for debugging.
|
||||
# DEBUG := 1
|
||||
|
||||
# The ID of the GPU that 'make runtest' will use to run unit tests.
|
||||
TEST_GPUID := 0
|
@ -0,0 +1,115 @@ |
||||
[Caffe: Convolutional Architecture for Fast Feature Extraction](http://caffe.berkeleyvision.org) |
||||
|
||||
Created by [Yangqing Jia](http://daggerfs.com), UC Berkeley EECS department. |
||||
In active development by the Berkeley Vision and Learning Center ([BVLC](http://bvlc.eecs.berkeley.edu/)). |
||||
|
||||
## Introduction |
||||
|
||||
Caffe aims to provide computer vision scientists with a **clean, modifiable |
||||
implementation** of state-of-the-art deep learning algorithms. Network structure |
||||
is easily specified in separate config files, with no mess of hard-coded |
||||
parameters in the code. Python and Matlab wrappers are provided. |
||||
|
||||
At the same time, Caffe fits industry needs, with blazing fast C++/Cuda code for |
||||
GPU computation. Caffe is currently the fastest GPU CNN implementation publicly |
||||
available, and is able to process more than **40 million images per day** on a |
||||
single NVIDIA K40 GPU (or 20 million per day on a K20)\*. |
||||
|
||||
Caffe also provides **seamless switching between CPU and GPU**, which allows one |
||||
to train models with fast GPUs and then deploy them on non-GPU clusters with one |
||||
line of code: `Caffe::set_mode(Caffe::CPU)`. |
||||
|
||||
Even in CPU mode, computing predictions on an image takes only 20 ms when images |
||||
are processed in batch mode. |
||||
|
||||
* [Caffe introductory presentation](https://www.dropbox.com/s/10fx16yp5etb8dv/caffe-presentation.pdf) |
||||
* [Installation instructions](http://caffe.berkeleyvision.org/installation.html) |
||||
|
||||
\* When measured with the [SuperVision](http://www.image-net.org/challenges/LSVRC/2012/supervision.pdf) model that won the ImageNet Large Scale Visual Recognition Challenge 2012. |
||||
|
||||
## License |
||||
|
||||
Caffe is BSD 2-Clause licensed (refer to the |
||||
[LICENSE](http://caffe.berkeleyvision.org/license.html) for details). |
||||
|
||||
The pretrained models published by the BVLC, such as the |
||||
[Caffe reference ImageNet model](https://www.dropbox.com/s/n3jups0gr7uj0dv/caffe_reference_imagenet_model) |
||||
are licensed for academic research / non-commercial use only. However, Caffe is |
||||
a full toolkit for model training, so start brewing your own Caffe model today! |
||||
|
||||
## Citing Caffe |
||||
|
||||
Please kindly cite Caffe in your publications if it helps your research: |
||||
|
||||
@misc{Jia13caffe, |
||||
Author = {Yangqing Jia}, |
||||
Title = { {Caffe}: An Open Source Convolutional Architecture for Fast Feature Embedding}, |
||||
Year = {2013}, |
||||
Howpublished = {\url{http://caffe.berkeleyvision.org/}} |
||||
} |
||||
|
||||
## Documentation |
||||
|
||||
Tutorials and general documentation are written in Markdown format in the `docs/` folder. |
||||
While the format is quite easy to read directly, you may prefer to view the whole thing as a website. |
||||
To do so, simply run `jekyll serve -s docs` and view the documentation website at `http://0.0.0.0:4000` (to get [jekyll](http://jekyllrb.com/), you must have ruby and do `gem install jekyll`). |
||||
|
||||
We strive to provide provide lots of usage examples, and to document all code in docstrings. |
||||
We'd appreciate your contribution to this effort! |
||||
|
||||
## Development |
||||
|
||||
Caffe is developed with active participation of the community by the [Berkeley Vision and Learning Center](http://bvlc.eecs.berkeley.edu/). |
||||
We welcome all contributions! |
||||
|
||||
### The release cycle |
||||
|
||||
- The `dev` branch is for new development, including community contributions. We aim to keep it in a functional state, but large changes may occur and things may get broken every now and then. Use this if you want the "bleeding edge". |
||||
- The `master` branch is handled by BVLC, which will integrate changes from `dev` on a roughly monthly schedule, giving it a release tag. Use this if you want more stability. |
||||
|
||||
### Setting priorities |
||||
|
||||
- Make GitHub Issues for bugs, features you'd like to see, questions, etc. |
||||
- Development work is guided by [milestones](https://github.com/BVLC/caffe/issues?milestone=1), which are sets of issues selected for concurrent release (integration from `dev` to `master`). |
||||
- Please note that since the core developers are largely researchers, we may work on a feature in isolation from the open-source community for some time before releasing it, so as to claim honest academic contribution. We do release it as soon as a reasonable technical report may be written about the work, and we still aim to inform the community of ongoing development through Issues. |
||||
|
||||
### Contibuting |
||||
|
||||
- Do new development in [feature branches](https://www.atlassian.com/git/workflows#!workflow-feature-branch) with descriptive names. |
||||
- Bring your work up-to-date by [rebasing](http://git-scm.com/book/en/Git-Branching-Rebasing) onto the latest `dev`. (Polish your changes by [interactive rebase](https://help.github.com/articles/interactive-rebase), if you'd like.) |
||||
- [Pull request](https://help.github.com/articles/using-pull-requests) your contribution to BVLC/caffe's `dev` branch for discussion and review. |
||||
* PRs should live fast, die young, and leave a beautiful merge. Pull request sooner than later so that discussion can guide development. |
||||
* Code must be accompanied by documentation and tests at all times. |
||||
* Only fast-forward merges will be accepted. |
||||
|
||||
See our [development guidelines](http://caffe.berkeleyvision.org/development.html) for further details–the more closely these are followed, the sooner your work will be merged. |
||||
|
||||
#### [Shelhamer's](https://github.com/shelhamer) “life of a branch in four acts” |
||||
|
||||
Make the `feature` branch off of the latest `bvlc/dev` |
||||
``` |
||||
git checkout dev |
||||
git pull upstream dev |
||||
git checkout -b feature |
||||
# do your work, make commits |
||||
``` |
||||
|
||||
Prepare to merge by rebasing your branch on the latest `bvlc/dev` |
||||
``` |
||||
# make sure dev is fresh |
||||
git checkout dev |
||||
git pull upstream dev |
||||
# rebase your branch on the tip of dev |
||||
git checkout feature |
||||
git rebase dev |
||||
``` |
||||
|
||||
Push your branch to pull request it into `dev` |
||||
``` |
||||
git push origin feature |
||||
# ...make pull request to dev... |
||||
``` |
||||
|
||||
Now make a pull request! You can do this from the command line (`git pull-request -b dev`) if you install [hub](https://github.com/github/hub). |
||||
|
||||
The pull request of `feature` into `dev` will be a clean merge. Applause. |
@ -0,0 +1,66 @@ |
||||
### General |
||||
|
||||
This directory contains the necessary code to reproduce the gradient |
||||
ascent images in the paper: Figures 13, S3, S4, S5, S6, and S7. This |
||||
is research code, and so it may contain paths and such that are |
||||
particular to our setup that will need to be changed for your own |
||||
setup. |
||||
|
||||
**Important note: this code requires the slightly modified version of caffe in this repository's [ascent](https://github.com/Evolving-AI-Lab/fooling/tree/ascent) branch. If you try running on master, you'll get an error about `backward_from_layer`.** See the below steps for using the correct branch. |
||||
|
||||
If you find any bugs, please submit a PR! |
||||
|
||||
If you have any trouble getting the code to work, please get in touch, and we'll help where we can. |
||||
|
||||
|
||||
|
||||
### Notes on running the gradient ascent code |
||||
|
||||
* The gist of the gradient ascent code (along with a lot of |
||||
experimental bookkeeping) is in the |
||||
[find_image function in find_fooling_image.py](https://github.com/Evolving-AI-Lab/fooling/blob/master/caffe/ascent/find_fooling_image.py#L68-L274). |
||||
* If you happen to be working in a |
||||
cluster environment that uses ```qsub```, you may find the shell scripts |
||||
useful; otherwise they probably won't help you much. |
||||
* If you don't have a trained net around, you can download the trained model we used here: http://yosinski.cs.cornell.edu/yos_140311__caffenet_iter_450000 |
||||
* A file containing class labels is also used by the script and can be downloaded here: http://s.yosinski.com/synset_words.txt |
||||
|
||||
|
||||
|
||||
### Simple steps to generate one fooling image |
||||
|
||||
We'll walk through exact steps to generate a fooling image of a lion (class 291) using gradient ascent on the output unit for lion. |
||||
|
||||
First, clone the repo and checkout the ascent branch: |
||||
|
||||
[~] $ git clone git@github.com:Evolving-AI-Lab/fooling.git |
||||
[~] $ cd fooling |
||||
[~/fooling] $ git checkout ascent |
||||
[~/fooling] $ cd caffe |
||||
|
||||
Configure and compile caffe. See [installation instructions](http://caffe.berkeleyvision.org/installation.html). Make sure to compile the python bindings too: |
||||
|
||||
[~/fooling/caffe] $ make -j && make -j pycaffe |
||||
|
||||
Once Caffe is built, continue by fetching some auxiliary data (synsets.txt and a pre-trained model): |
||||
|
||||
[~/fooling/caffe] $ cd data/ilsvrc12 |
||||
[~/fooling/caffe/data/ilsvrc12] $ ./get_ilsvrc_aux.sh |
||||
[~/fooling/caffe/data/ilsvrc12] $ cd ../../ascent |
||||
[~/fooling/caffe/ascent] $ wget 'http://yosinski.cs.cornell.edu/yos_140311__caffenet_iter_450000' |
||||
|
||||
Now we're ready to run the optimization. To find a quick fooling image for the Lion class (idx 291) using only 3 gradient steps, run the following: |
||||
|
||||
[~/fooling/caffe/ascent] $ ./find_fooling_image.py --push_idx 291 --N 3 |
||||
... |
||||
0 Push idx: 291, val: 0.00209935 (n02129165 lion, king of beasts, Panthera leo) |
||||
Max idx: 815, val: 0.0114864 (n04275548 spider web, spider's web) |
||||
... |
||||
1 Push idx: 291, val: 0.00962483 (n02129165 lion, king of beasts, Panthera leo) |
||||
Max idx: 330, val: 0.0224016 (n02325366 wood rabbit, cottontail, cottontail rabbit) |
||||
... |
||||
2 Push idx: 291, val: 0.0518007 (n02129165 lion, king of beasts, Panthera leo) |
||||
Max idx: 291, val: 0.0518007 (n02129165 lion, king of beasts, Panthera leo) |
||||
... |
||||
Result: majority success |
||||
|
@ -0,0 +1,213 @@ |
||||
name: "CaffeNet" |
||||
input: "data" |
||||
input_dim: 1 |
||||
input_dim: 3 |
||||
input_dim: 227 |
||||
input_dim: 227 |
||||
force_backward: true |
||||
layers { |
||||
name: "conv1" |
||||
type: CONVOLUTION |
||||
bottom: "data" |
||||
top: "conv1" |
||||
convolution_param { |
||||
num_output: 96 |
||||
kernel_size: 11 |
||||
stride: 4 |
||||
} |
||||
} |
||||
layers { |
||||
name: "relu1" |
||||
type: RELU |
||||
bottom: "conv1" |
||||
top: "conv1" |
||||
} |
||||
layers { |
||||
name: "pool1" |
||||
type: POOLING |
||||
bottom: "conv1" |
||||
top: "pool1" |
||||
pooling_param { |
||||
pool: MAX |
||||
kernel_size: 3 |
||||
stride: 2 |
||||
} |
||||
} |
||||
layers { |
||||
name: "norm1" |
||||
type: LRN |
||||
bottom: "pool1" |
||||
top: "norm1" |
||||
lrn_param { |
||||
local_size: 5 |
||||
alpha: 0.0001 |
||||
beta: 0.75 |
||||
} |
||||
} |
||||
layers { |
||||
name: "conv2" |
||||
type: CONVOLUTION |
||||
bottom: "norm1" |
||||
top: "conv2" |
||||
convolution_param { |
||||
num_output: 256 |
||||
pad: 2 |
||||
kernel_size: 5 |
||||
group: 2 |
||||
} |
||||
} |
||||
layers { |
||||
name: "relu2" |
||||
type: RELU |
||||
bottom: "conv2" |
||||
top: "conv2" |
||||
} |
||||
layers { |
||||
name: "pool2" |
||||
type: POOLING |
||||
bottom: "conv2" |
||||
top: "pool2" |
||||
pooling_param { |
||||
pool: MAX |
||||
kernel_size: 3 |
||||
stride: 2 |
||||
} |
||||
} |
||||
layers { |
||||
name: "norm2" |
||||
type: LRN |
||||
bottom: "pool2" |
||||
top: "norm2" |
||||
lrn_param { |
||||
local_size: 5 |
||||
alpha: 0.0001 |
||||
beta: 0.75 |
||||
} |
||||
} |
||||
layers { |
||||
name: "conv3" |
||||
type: CONVOLUTION |
||||
bottom: "norm2" |
||||
top: "conv3" |
||||
convolution_param { |
||||
num_output: 384 |
||||
pad: 1 |
||||
kernel_size: 3 |
||||
} |
||||
} |
||||
layers { |
||||
name: "relu3" |
||||
type: RELU |
||||
bottom: "conv3" |
||||
top: "conv3" |
||||
} |
||||
layers { |
||||
name: "conv4" |
||||
type: CONVOLUTION |
||||
bottom: "conv3" |
||||
top: "conv4" |
||||
convolution_param { |
||||
num_output: 384 |
||||
pad: 1 |
||||
kernel_size: 3 |
||||
group: 2 |
||||
} |
||||
} |
||||
layers { |
||||
name: "relu4" |
||||
type: RELU |
||||
bottom: "conv4" |
||||
top: "conv4" |
||||
} |
||||
layers { |
||||
name: "conv5" |
||||
type: CONVOLUTION |
||||
bottom: "conv4" |
||||
top: "conv5" |
||||
convolution_param { |
||||
num_output: 256 |
||||
pad: 1 |
||||
kernel_size: 3 |
||||
group: 2 |
||||
} |
||||
} |
||||
layers { |
||||
name: "relu5" |
||||
type: RELU |
||||
bottom: "conv5" |
||||
top: "conv5" |
||||
} |
||||
layers { |
||||
name: "pool5" |
||||
type: POOLING |
||||
bottom: "conv5" |
||||
top: "pool5" |
||||
pooling_param { |
||||
pool: MAX |
||||
kernel_size: 3 |
||||
stride: 2 |
||||
} |
||||
} |
||||
layers { |
||||
name: "fc6" |
||||
type: INNER_PRODUCT |
||||
bottom: "pool5" |
||||
top: "fc6" |
||||
inner_product_param { |
||||
num_output: 4096 |
||||
} |
||||
} |
||||
layers { |
||||
name: "relu6" |
||||
type: RELU |
||||
bottom: "fc6" |
||||
top: "fc6" |
||||
} |
||||
layers { |
||||
name: "drop6" |
||||
type: DROPOUT |
||||
bottom: "fc6" |
||||
top: "fc6" |
||||
dropout_param { |
||||
dropout_ratio: 0.5 |
||||
} |
||||
} |
||||
layers { |
||||
name: "fc7" |
||||
type: INNER_PRODUCT |
||||
bottom: "fc6" |
||||
top: "fc7" |
||||
inner_product_param { |
||||
num_output: 4096 |
||||
} |
||||
} |
||||
layers { |
||||
name: "relu7" |
||||
type: RELU |
||||
bottom: "fc7" |
||||
top: "fc7" |
||||
} |
||||
layers { |
||||
name: "drop7" |
||||
type: DROPOUT |
||||
bottom: "fc7" |
||||
top: "fc7" |
||||
dropout_param { |
||||
dropout_ratio: 0.5 |
||||
} |
||||
} |
||||
layers { |
||||
name: "fc8" |
||||
type: INNER_PRODUCT |
||||
bottom: "fc7" |
||||
top: "fc8" |
||||
inner_product_param { |
||||
num_output: 1000 |
||||
} |
||||
} |
||||
layers { |
||||
name: "prob" |
||||
type: SOFTMAX |
||||
bottom: "fc8" |
||||
top: "prob" |
||||
} |
@ -0,0 +1,315 @@ |
||||
#! /usr/bin/env python |
||||
|
||||
import argparse |
||||
import pickle |
||||
import pylab |
||||
from pylab import * |
||||
from scipy.ndimage.filters import gaussian_filter |
||||
from collections import OrderedDict |
||||
import ipdb as pdb |
||||
plt.rcParams['image.interpolation'] = 'nearest' |
||||
plt.rcParams['image.cmap'] = 'gray' |
||||
|
||||
# Make sure that caffe is on the python path: |
||||
caffe_root = '../../' # this file is normally in {caffe_root}/ascent. If it's elsewhere, change this path. |
||||
import sys |
||||
sys.path.insert(0, caffe_root + 'python') |
||||
# If this next line fails, check the relevant paths. |
||||
import caffe |
||||
|
||||
from misc_helper import * |
||||
|
||||
|
||||
|
||||
def load_net_mean(): |
||||
# Pick which model to load, which image, etc. |
||||
|
||||
model_def_file = 'deploy_1_forcebackward.prototxt' |
||||
|
||||
# Can be downloaded from http://yosinski.cs.cornell.edu/yos_140311__caffenet_iter_450000 |
||||
pretrained_model = 'yos_140311__caffenet_iter_450000' |
||||
|
||||
# Can be downloaded from http://s.yosinski.com/synset_words.txt |
||||
with open('%s/data/ilsvrc12/synset_words.txt' % caffe_root) as ff: |
||||
labels = [line.strip() for line in ff.readlines()] |
||||
|
||||
# Load mean |
||||
inmean = np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy') |
||||
|
||||
offset = (256-227)/2 |
||||
mn = inmean[:, offset:offset+227, offset:offset+227] |
||||
mni = mn.transpose((1,2,0)) |
||||
mnirgb = mni[:,:,::-1] # convert to rgb order |
||||
mn4d = mn[newaxis] |
||||
|
||||
net = caffe.Classifier(model_def_file, pretrained_model, |
||||
#mean=inmean, |
||||
channel_swap=(2,1,0), |
||||
#raw_scale=255.0, |
||||
#image_dims=(256, 256), |
||||
) |
||||
|
||||
net.set_phase_test() |
||||
net.set_mode_cpu() |
||||
|
||||
return net, mnirgb, mn4d, labels |
||||
|
||||
|
||||
|
||||
def update_result(result, suffix, ii, X, X0): |
||||
result['iter_'+suffix] = ii |
||||
result['norm_'+suffix] = norm(X) |
||||
result['dist_'+suffix] = norm(X-X0) |
||||
result['std_'+suffix] = X.flatten().std() |
||||
result['X_'+suffix] = X.copy() |
||||
|
||||
|
||||
|
||||
def find_image(net, mnirgb, mn4d, labels, decay = .01, N = 300, rseed = 0, |
||||
push_layer = 'prob', push_idx = 278, start_at = 'mean_plus', prefix = 'junk', |
||||
lr_policy = 'progress', |
||||
lr_params = {'max_lr': 1e12, 'early_prog': .03, 'late_prog_mult': .1}, |
||||
blur_radius = 0, # 0 or at least .3 |
||||
blur_every = 1, |
||||
small_val_percentile = 0, |
||||
small_norm_percentile = 0, |
||||
px_benefit_percentile = 0, |
||||
px_abs_benefit_percentile = 0): |
||||
'''Find image for the given net using the specified start position, learning policies, etc.''' |
||||
|
||||
np.random.seed(rseed) |
||||
|
||||
#start_im = mnirgb[:] * 0 |
||||
if start_at == 'mean_plus': |
||||
start_im = np.random.normal(0, 1, mnirgb.shape) |
||||
elif start_at == 'randu': |
||||
start_im = uniform(0, 255, mnirgb.shape) - mnirgb |
||||
elif start_at == 'zero': |
||||
start_im = zeros(mnirgb.shape) |
||||
else: |
||||
raise Exception('Unknown start conditions: %s' % start_at) |
||||
|
||||
if lr_policy == 'progress': |
||||
assert 'max_lr' in lr_params |
||||
assert 'early_prog' in lr_params |
||||
assert 'late_prog_mult' in lr_params |
||||
elif lr_policy == 'constant': |
||||
assert 'lr' in lr_params |
||||
else: |
||||
raise Exception('Unknown lr_policy: %s' % lr_policy) |
||||
|
||||
try: |
||||
push_idx = tuple(push_idx) # tuple or list given |
||||
except TypeError: |
||||
push_idx = (push_idx, 0, 0) # int given |
||||
assert len(push_idx) == 3, 'provide push_idx in the form: int or (channel, x, y) tuple' |
||||
|
||||
#X0 = mn[newaxis,:] |
||||
#im255 = im01 * 255 - |
||||
|
||||
tmp = net.preprocess('data', start_im) # converts rgb -> bgr |
||||
X0 = tmp[newaxis,:] |
||||
|
||||
# What to change |
||||
#push_idx = 278 # kit fox |
||||
push_dir = 1.0 |
||||
class_unit = push_layer in ('fc8', 'prob') # Whether or not the unit being optimized corresponds to one of the 1000 classes |
||||
push_label = labels[push_idx[0]] if class_unit else 'None' |
||||
|
||||
X = X0.copy() |
||||
#figsize(20,8) |
||||
result = dict( |
||||
iter_maj = -1, |
||||
iter_99 = -1, |
||||
iter_999 = -1, |
||||
iter_9999 = -1, |
||||
iter_best = -1, |
||||
norm_maj = -1, |
||||
norm_99 = -1, |
||||
norm_999 = -1, |
||||
norm_9999 = -1, |
||||
norm_best = -1, |
||||
dist_maj = -1, |
||||
dist_99 = -1, |
||||
dist_999 = -1, |
||||
dist_9999 = -1, |
||||
dist_best = -1, |
||||
std_maj = -1, |
||||
std_99 = -1, |
||||
std_999 = -1, |
||||
std_9999 = -1, |
||||
std_best = -1, |
||||
act_best = -1, |
||||
X_maj = None, |
||||
X_99 = None, |
||||
X_999 = None, |
||||
X_9999 = None, |
||||
X_best = None, |
||||
decay = decay, N = N, push_idx = push_idx, push_dir = push_dir, push_layer = push_layer, |
||||
push_label = push_label, |
||||
lr_policy = lr_policy, lr_params = lr_params, |
||||
blur_radius = blur_radius, blur_every = blur_every, |
||||
small_val_percentile = small_val_percentile, small_norm_percentile = small_norm_percentile, |
||||
px_benefit_percentile = px_benefit_percentile, px_abs_benefit_percentile = px_abs_benefit_percentile, |
||||
) |
||||
|
||||
print '\nParameters:' |
||||
for key in sorted(result.keys()): |
||||
print '%25s: %s' % (key, result[key]) |
||||
print |
||||
|
||||
for ii in range(N): |
||||
X = minimum(255.0, maximum(0.0, X + mn4d)) - mn4d # Crop all values to [0,255] |
||||
out = net.forward_all(data = X) |
||||
|
||||
acts = net.blobs[push_layer].data |
||||
|
||||
iimax = unravel_index(acts.argmax(), acts.shape)[1:] # chop off batch idx of 0 |
||||
obj = acts[0][push_idx] |
||||
if ii > 0 and lr_policy == 'progress': |
||||
print ' pred_prog: ', pred_prog, 'actual:', obj - old_obj |
||||
if class_unit: |
||||
print '%-4d' % ii, 'Push idx: %d, val: %g (%s)\n Max idx: %d, val: %g (%s)' % (push_idx[0], acts[0][push_idx], push_label, iimax[0], acts.max(), labels[iimax[0]]) |
||||
else: |
||||
print '%-4d' % ii, 'Push idx: %s, val: %g\n Max idx: %s, val: %g' % (push_idx, acts[0][push_idx], iimax, acts.max()) |
||||
print ' X: ', X.min(), X.max(), norm(X) |
||||
|
||||
if acts[0][push_idx] > result['act_best']: |
||||
update_result(result, 'best', ii, X, X0) |
||||
result['acts_best'] = acts[0][push_idx] |
||||
if iimax == push_idx and result['iter_maj'] == -1: |
||||
update_result(result, 'maj', ii, X, X0) |
||||
if acts[0][push_idx] > .99 and result['iter_99'] == -1: |
||||
update_result(result, '99', ii, X, X0) |
||||
if acts[0][push_idx] > .999 and result['iter_999'] == -1: |
||||
update_result(result, '999', ii, X, X0) |
||||
if acts[0][push_idx] > .9999 and result['iter_9999'] == -1: |
||||
update_result(result, '9999', ii, X, X0) |
||||
#break # Quit once confidence > .9999 |
||||
|
||||
diffs = net.blobs[push_layer].diff * 0 |
||||
diffs[0][push_idx] = push_dir |
||||
backout = net.backward_from_layer(push_layer, diffs) |
||||
|
||||
grad = backout['data'].copy() |
||||
print ' grad:', grad.min(), grad.max(), norm(grad) |
||||
if norm(grad) == 0: |
||||
print 'Grad 0, failed' |
||||
break |
||||
|
||||
# progress-based lr |
||||
if lr_policy == 'progress': |
||||
late_prog = lr_params['late_prog_mult'] * (1-obj) |
||||
desired_prog = min(lr_params['early_prog'], late_prog) |
||||
prog_lr = desired_prog / norm(grad)**2 |
||||
lr = min(lr_params['max_lr'], prog_lr) |
||||
print ' desired_prog:', desired_prog, 'prog_lr:', prog_lr, 'lr:', lr |
||||
pred_prog = lr * dot(grad.flatten(), grad.flatten()) |
||||
elif lr_policy == 'constant': |
||||
lr = lr_params['lr'] |
||||
else: |
||||
raise Exception('Unimlemented lr_policy') |
||||
|
||||
print ' change size:', abs(lr * grad).max() |
||||
old_obj = obj |
||||
|
||||
|
||||
if ii < N-1: |
||||
X += lr * grad |
||||
X *= (1 - decay) |
||||
|
||||
if blur_radius > 0: |
||||
if blur_radius < .3: |
||||
raise Exception('blur-radius of .3 or less works very poorly') |
||||
oldX = X.copy() |
||||
if ii % blur_every == 0: |
||||
for channel in range(3): |
||||
cimg = gaussian_filter(X[0,channel], blur_radius) |
||||
X[0,channel] = cimg |
||||
if small_val_percentile > 0: |
||||
small_entries = (abs(X) < percentile(abs(X), small_val_percentile)) |
||||
X = X - X*small_entries # set smallest 50% of X to zero |
||||
|
||||
if small_norm_percentile > 0: |
||||
pxnorms = norm(X, axis=1) |
||||
smallpx = pxnorms < percentile(pxnorms, small_norm_percentile) |
||||
smallpx3 = tile(smallpx[:,newaxis,:,:], (1,3,1,1)) |
||||
X = X - X*smallpx3 |
||||
|
||||
if px_benefit_percentile > 0: |
||||
pred_0_benefit = grad * -X |
||||
px_benefit = pred_0_benefit.sum(1) |
||||
smallben = px_benefit < percentile(px_benefit, px_benefit_percentile) |
||||
smallben3 = tile(smallben[:,newaxis,:,:], (1,3,1,1)) |
||||
X = X - X*smallben3 |
||||
|
||||
if px_abs_benefit_percentile > 0: |
||||
pred_0_benefit = grad * -X |
||||
px_benefit = pred_0_benefit.sum(1) |
||||
smallaben = abs(px_benefit) < percentile(abs(px_benefit), px_abs_benefit_percentile) |
||||
smallaben3 = tile(smallaben[:,newaxis,:,:], (1,3,1,1)) |
||||
X = X - X*smallaben3 |
||||
|
||||
|
||||
if class_unit: |
||||
if result['iter_maj'] != -1: |
||||
print 'Result: majority success' |
||||
else: |
||||
print 'Result: no convergence' |
||||
|
||||
for suffix in ('maj', '99', '999', '9999', 'best'): |
||||
if result['X_'+suffix] is not None: |
||||
asimg = net.deprocess('data', result['X_'+suffix]) |
||||
if suffix == 'best': |
||||
best_X = asimg.copy() |
||||
saveimagescc('%s_%s_X.jpg' % (prefix, suffix), asimg, 0) |
||||
saveimagesc('%s_%s_Xpm.jpg' % (prefix, suffix), asimg + mnirgb) |
||||
del result['X_'+suffix] |
||||
with open('%s_info.pkl' % prefix, 'w') as ff: |
||||
pickle.dump(result, ff) |
||||
with open('%s_info.txt' % prefix, 'w') as ff: |
||||
for key in sorted(result.keys()): |
||||
print >>ff, key, result[key] |
||||
|
||||
return best_X |
||||
|
||||
|
||||
def main(): |
||||
parser = argparse.ArgumentParser(description='Finds images that activate a network in various ways.') |
||||
parser.add_argument('--lr', type = float, default = .01) |
||||
parser.add_argument('--decay', type = float, default = .01) |
||||
parser.add_argument('--N', type = int, default = 300) |
||||
parser.add_argument('--rseed', type = int, default = 0) |
||||
parser.add_argument('--push_idx', type = int, default = -1) |
||||
parser.add_argument('--start_at', type = str, default = 'mean_plus') |
||||
parser.add_argument('--prefix', type = str, default = '%(push_idx)03d') |
||||
parser.add_argument('--multi_idx_start', type = int, default = -1) |
||||
parser.add_argument('--multi_idx_end', type = int, default = -1) |
||||
args = parser.parse_args() |
||||
|
||||
assert (args.push_idx == -1) != (args.multi_idx_start == -1 and args.multi_idx_end == -1), 'Use push_idx xor multi*' |
||||
assert (args.multi_idx_start == -1) == (args.multi_idx_end == -1), 'Use all multi* or none' |
||||
|
||||
net, mnirgb, mn4d, labels = load_net_mean() |
||||
|
||||
if args.push_idx != -1: |
||||
range_start = args.push_idx |
||||
range_end = args.push_idx + 1 |
||||
else: |
||||
range_start = args.multi_idx_start |
||||
range_end = args.multi_idx_end |
||||
for push_idx in range(range_start, range_end): |
||||
prefix_dict = vars(args) |
||||
prefix_dict['push_idx'] = push_idx |
||||
prefix_str = args.prefix % prefix_dict |
||||
print '\n\nFinding image' |
||||
print 'prefix_str', prefix_str |
||||
find_image(net, mnirgb, mn4d, labels, |
||||
lr = args.lr, decay = args.decay, N = args.N, rseed = args.rseed, |
||||
push_idx = args.push_idx, start_at = args.start_at, |
||||
prefix = prefix_str) |
||||
|
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
main() |
@ -0,0 +1,97 @@ |
||||
#! /usr/bin/env python |
||||
|
||||
from pylab import * |
||||
import os |
||||
import argparse |
||||
import ipdb as pdb |
||||
|
||||
from find_fooling_image import load_net_mean, find_image |
||||
|
||||
|
||||
|
||||
def rchoose(choices, prob=None): |
||||
if prob is None: |
||||
prob = ones(len(choices)) |
||||
prob = array(prob, dtype='float') |
||||
return np.random.choice(choices, p=prob/prob.sum()) |
||||
|
||||
|
||||
|
||||
def main(): |
||||
parser = argparse.ArgumentParser(description='Hyperparam search') |
||||
parser.add_argument('--result_prefix', type = str, default = './junk') |
||||
parser.add_argument('--hp_seed', type = int, default = 0) |
||||
parser.add_argument('--start_seed', type = int, default = 0) |
||||
parser.add_argument('--push_idx', type = int, default = 278) |
||||
parser.add_argument('--layer', type = str, default = 'prob', choices = ('fc8', 'prob')) |
||||
parser.add_argument('--startat', type = int, default = 0, choices = (0, 1)) |
||||
args = parser.parse_args() |
||||
|
||||
push_idx = args.push_idx |
||||
small_val_percentile = 0 |
||||
start_at = 'mean_plus' if args.startat == 0 else 'randu' |
||||
|
||||
if args.hp_seed == -1: |
||||
# Special hp_seed of -1 to do gradient descent without any regularization |
||||
decay = 0 |
||||
N = 500 |
||||
early_prog = .02 |
||||
late_prog_mult = .1 |
||||
blur_radius = 0 |
||||
blur_every = 1 |
||||
small_norm_percentile = 0 |
||||
px_benefit_percentile = 0 |
||||
px_abs_benefit_percentile = 0 |
||||
else: |
||||
np.random.seed(args.hp_seed) |
||||
|
||||
# Choose hyperparameter values given this seed |
||||
decay = rchoose((0, .0001, .001, .01, .1, .2, .3), |
||||
(4, 1, 1, 2, 1, 1, 1)) |
||||
N = rchoose((250, 500, 750, 1000, 1500)) |
||||
early_prog = rchoose( |
||||
(.02, .03, .04), |
||||
(1, 2, 1)) |
||||
late_prog_mult = rchoose((.02, .05, .1, .2)) |
||||
blur_radius = rchoose( |
||||
(0, .3, .4, .5, 1.0), |
||||
(10, 2, 1, 1, 1)) |
||||
blur_every = rchoose((1, 2, 3, 4)) |
||||
small_norm_percentile = rchoose( |
||||
(0, 10, 20, 30, 50, 80, 90), |
||||
(10, 10, 5, 2, 2, 2, 2)) |
||||
px_benefit_percentile = rchoose( |
||||
(0, 10, 20, 30, 50, 80, 90), |
||||
(20, 10, 5, 2, 2, 2, 2)) |
||||
px_abs_benefit_percentile = rchoose( |
||||
(0, 10, 20, 30, 50, 80, 90), |
||||
(10, 10, 5, 2, 2, 2, 2)) |
||||
|
||||
prefix = args.result_prefix |
||||
print 'prefix is', prefix |
||||
|
||||
net, mnirgb, mn4d, labels = load_net_mean() |
||||
|
||||
find_image(net, mnirgb, mn4d, labels, |
||||
decay = decay, |
||||
N = N, |
||||
rseed = args.start_seed, |
||||
push_idx = push_idx, |
||||
start_at = start_at, |
||||
prefix = prefix, |
||||
lr_policy = 'progress', |
||||
lr_params = {'max_lr': 1e7, |
||||
'early_prog': early_prog, |
||||
'late_prog_mult': late_prog_mult}, |
||||
blur_radius = blur_radius, |
||||
blur_every = blur_every, |
||||
small_val_percentile = small_val_percentile, |
||||
small_norm_percentile = small_norm_percentile, |
||||
px_benefit_percentile = px_benefit_percentile, |
||||
px_abs_benefit_percentile = px_abs_benefit_percentile, |
||||
) |
||||
|
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
main() |
@ -0,0 +1,112 @@ |
||||
#! /usr/bin/env python |
||||
|
||||
from pylab import * |
||||
|
||||
|
||||
|
||||
def figsize(width,height): |
||||
rcParams['figure.figsize'] = (width,height) |
||||
|
||||
|
||||
|
||||
def norm01(arr): |
||||
arr = arr.copy() |
||||
arr -= arr.min() |
||||
arr /= arr.max() |
||||
return arr |
||||
|
||||
|
||||
|
||||
def norm01c(arr, center): |
||||
'''Maps the center value to .5''' |
||||
arr = arr.copy() |
||||
arr -= center |
||||
arr /= max(2 * arr.max(), -2 * arr.min()) |
||||
arr += .5 |
||||
assert arr.min() >= 0 |
||||
assert arr.max() <= 1 |
||||
return arr |
||||
|
||||
|
||||
|
||||
def showimage(im, c01=False, bgr=False): |
||||
if c01: |
||||
# switch order from c,0,1 -> 0,1,c |
||||
im = im.transpose((1,2,0)) |
||||
if im.ndim == 3 and bgr: |
||||
# Change from BGR -> RGB |
||||
im = im[:, :, ::-1] |
||||
plt.imshow(im) |
||||
#axis('tight') |
||||
|
||||
def showimagesc(im, c01=False, bgr=False): |
||||
showimage(norm01(im), c01=c01, bgr=bgr) |
||||
|
||||
|
||||
|
||||
def saveimage(filename, im): |
||||
matplotlib.image.imsave(filename, im) |
||||
|
||||
def saveimagesc(filename, im): |
||||
saveimage(filename, norm01(im)) |
||||
|
||||
def saveimagescc(filename, im, center): |
||||
saveimage(filename, norm01c(im, center)) |
||||
|
||||
|
||||
|
||||
def tile_images(data, padsize=1, padval=0, c01=False, width=None): |
||||
'''take an array of shape (n, height, width) or (n, height, width, channels) |
||||
and visualize each (height, width) thing in a grid. If width = None, produce |
||||
a square image of size approx. sqrt(n) by sqrt(n), else calculate height.''' |
||||
data = data.copy() |
||||
if c01: |
||||
# Convert c01 -> 01c |
||||
data = data.transpose(0, 2, 3, 1) |
||||
data -= data.min() |
||||
data /= data.max() |
||||
|
||||
# force the number of filters to be square |
||||
if width == None: |
||||
width = int(np.ceil(np.sqrt(data.shape[0]))) |
||||
height = width |
||||
else: |
||||
assert isinstance(width, int) |
||||
height = int(np.ceil(float(data.shape[0]) / width)) |
||||
padding = ((0, width*height - data.shape[0]), (0, padsize), (0, padsize)) + ((0, 0),) * (data.ndim - 3) |
||||
data = np.pad(data, padding, mode='constant', constant_values=(padval, padval)) |
||||
|
||||
# tile the filters into an image |
||||
data = data.reshape((height, width) + data.shape[1:]).transpose((0, 2, 1, 3) + tuple(range(4, data.ndim + 1))) |
||||
data = data.reshape((height * data.shape[1], width * data.shape[3]) + data.shape[4:]) |
||||
data = data[0:-padsize, 0:-padsize] # remove excess padding |
||||
|
||||
return data |
||||
|
||||
|
||||
|
||||
def vis_square(data, padsize=1, padval=0, c01=False): |
||||
data = tile_images(data, padsize, padval, c01) |
||||
showimage(data, c01=False) |
||||
|
||||
|
||||
|
||||
def shownet(net): |
||||
'''Print some stats about a net and its activations''' |
||||
|
||||
print '%-41s%-31s%s' % ('', 'acts', 'act diffs') |
||||
print '%-45s%-31s%s' % ('', 'params', 'param diffs') |
||||
for k, v in net.blobs.items(): |
||||
if k in net.params: |
||||
params = net.params[k] |
||||
for pp, blob in enumerate(params): |
||||
if pp == 0: |
||||
print ' ', 'P: %-5s'%k, |
||||
else: |
||||
print ' ' * 11, |
||||
print '%-32s' % repr(blob.data.shape), |
||||
print '%-30s' % ('(%g, %g)' % (blob.data.min(), blob.data.max())), |
||||
print '(%g, %g)' % (blob.diff.min(), blob.diff.max()) |
||||
print '%-5s'%k, '%-34s' % repr(v.data.shape), |
||||
print '%-30s' % ('(%g, %g)' % (v.data.min(), v.data.max())), |
||||
print '(%g, %g)' % (v.diff.min(), v.diff.max()) |
@ -0,0 +1,7 @@ |
||||
#! /bin/bash |
||||
|
||||
echo "just for reference" |
||||
exit 0 |
||||
|
||||
for idx in 0 1 2 3 4; do ./find_fooling_image.py --push_idx $idx --N 1500 --decay .03 --lr .001 --prefix 'result_idx3/idx_%(push_idx)03d_decay_%(decay).03f_lr_%(lr).03f_'; done |
||||
for idx in 0 1 2 3 4; do ./find_fooling_image.py --push_idx $idx --N 1500 --decay .00 --lr .001 --prefix 'result_idx3/idx_%(push_idx)03d_decay_%(decay).03f_lr_%(lr).03f_'; done |
@ -0,0 +1,31 @@ |
||||
#! /bin/bash -x |
||||
|
||||
thisscript=$(readlink -f $0) |
||||
scriptdir=`dirname $thisscript` |
||||
|
||||
|
||||
for hp_seed in -1 169 188 360; do |
||||
#for push_idx in 278 543 251 99 906 805; do |
||||
for push_idx in 200 207 215 279 366 367 390 414 445 500 509 580 643 657 704 713 782 805 826 906; do |
||||
for start_seed in `seq 0 4`; do |
||||
startat=0 |
||||
|
||||
seed_dir=`printf "seed_%04d" $hp_seed` |
||||
result_dir="$scriptdir/results/supplementary_imgs/$seed_dir" |
||||
mkdir -p $result_dir |
||||
run_str=`printf 's%04d_idx%03d_sa%d_ss%02d' $hp_seed $push_idx $startat $start_seed` |
||||
jobname="job_${run_str}" |
||||
|
||||
script="$result_dir/run_${run_str}.sh" |
||||
result_prefix="$result_dir/$run_str" |
||||
|
||||
echo "#! /bin/bash" > $script |
||||
echo "cd $scriptdir" >> $script |
||||
echo "./hyperparam_search.py --result_prefix $result_prefix --hp_seed $hp_seed --push_idx $push_idx --start_seed $start_seed --startat $startat 2>&1" >> $script |
||||
chmod +x $script |
||||
|
||||
qsub -N "$jobname" -A ACCOUNT_NAME -l nodes=1:ppn=2 -l walltime="1:00:00" -d "$result_dir" $script |
||||
done |
||||
done |
||||
done |
||||
|
@ -0,0 +1,32 @@ |
||||
#! /bin/bash -x |
||||
|
||||
thisscript=$(readlink -f $0) |
||||
scriptdir=`dirname $thisscript` |
||||
|
||||
for hp_seed in `seq 101 399`; do |
||||
#for hp_seed in 0; do |
||||
for push_idx in 278 543 251 99 906 805; do |
||||
#for push_idx in 278; do |
||||
startat=0 |
||||
start_seed=0 |
||||
|
||||
seed_dir=`printf "seed_%04d" $hp_seed` |
||||
result_dir="$scriptdir/results/$seed_dir" |
||||
mkdir -p $result_dir |
||||
run_str=`printf 's%04d_idx%03d_sa%d_ss%02d' $hp_seed $push_idx $start_at $start_seed` |
||||
jobname="job_${run_str}" |
||||
|
||||
script="$result_dir/run_${run_str}.sh" |
||||
result_prefix="$result_dir/$run_str" |
||||
|
||||
echo "#! /bin/bash" > $script |
||||
echo "cd $scriptdir" >> $script |
||||
echo "./hyperparam_search.py --result_prefix $result_prefix --hp_seed $hp_seed --push_idx $push_idx --start_seed $start_seed --startat $startat 2>&1" >> $script |
||||
chmod +x $script |
||||
|
||||
qsub -N "$jobname" -A ACCOUNT_NAME -l nodes=1:ppn=2 -l walltime="1:00:00" -d "$result_dir" $script |
||||
|
||||
#sleep 1 |
||||
done |
||||
done |
||||
|
@ -0,0 +1,53 @@ |
||||
Bourne Shell |
||||
filter remove_matches ^\s*# |
||||
filter remove_inline #.*$ |
||||
extension sh |
||||
script_exe sh |
||||
C |
||||
filter remove_matches ^\s*// |
||||
filter call_regexp_common C |
||||
filter remove_inline //.*$ |
||||
extension c |
||||
extension ec |
||||
extension pgc |
||||
C++ |
||||
filter remove_matches ^\s*// |
||||
filter remove_inline //.*$ |
||||
filter call_regexp_common C |
||||
extension C |
||||
extension cc |
||||
extension cpp |
||||
extension cxx |
||||
extension pcc |
||||
C/C++ Header |
||||
filter remove_matches ^\s*// |
||||
filter call_regexp_common C |
||||
filter remove_inline //.*$ |
||||
extension H |
||||
extension h |
||||
extension hh |
||||
extension hpp |
||||
Cuda |
||||
filter remove_matches ^\s*// |
||||
filter remove_inline //.*$ |
||||
filter call_regexp_common C |
||||
extension cu |
||||
Python |
||||
filter remove_matches ^\s*# |
||||
filter docstring_to_C |
||||
filter call_regexp_common C |
||||
filter remove_inline #.*$ |
||||
extension py |
||||
make |
||||
filter remove_matches ^\s*# |
||||
filter remove_inline #.*$ |
||||
extension Gnumakefile |
||||
extension Makefile |
||||
extension am |
||||
extension gnumakefile |
||||
extension makefile |
||||
filename Gnumakefile |
||||
filename Makefile |
||||
filename gnumakefile |
||||
filename makefile |
||||
script_exe make |
@ -0,0 +1 @@ |
||||
caffe.berkeleyvision.org |
@ -0,0 +1,3 @@ |
||||
To generate stuff you can paste in an .md page from an IPython notebook, run |
||||
|
||||
ipython nbconvert --to markdown <notebook_file> |
@ -0,0 +1,52 @@ |
||||
<!doctype html> |
||||
<html> |
||||
<head> |
||||
<meta charset="utf-8"> |
||||
<meta http-equiv="X-UA-Compatible" content="chrome=1"> |
||||
<title>Caffe</title> |
||||
|
||||
<link rel="stylesheet" href="stylesheets/reset.css"> |
||||
<link rel="stylesheet" href="stylesheets/styles.css"> |
||||
<link rel="stylesheet" href="stylesheets/pygment_trac.css"> |
||||
<script src="javascripts/scale.fix.js"></script> |
||||
<meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=no"> |
||||
<!--[if lt IE 9]> |
||||
<script src="//html5shiv.googlecode.com/svn/trunk/html5.js"></script> |
||||
<![endif]--> |
||||
</head> |
||||
<body> |
||||
<script> |
||||
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ |
||||
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), |
||||
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) |
||||
})(window,document,'script','//www.google-analytics.com/analytics.js','ga'); |
||||
|
||||
ga('create', 'UA-46255508-1', 'daggerfs.com'); |
||||
ga('send', 'pageview'); |
||||
</script> |
||||
<div class="wrapper"> |
||||
<header> |
||||
<h1 class="header"><a href="index.html">Caffe</a></h1> |
||||
<p class="header">Convolutional Architecture for Fast Feature Embedding</p> |
||||
|
||||
<ul> |
||||
<!--<li class="download"><a class="buttons" href="https://github.com/BVLC/caffe/zipball/master">Download ZIP</a></li> |
||||
<li class="download"><a class="buttons" href="https://github.com/BVLC/caffe/tarball/master">Download TAR</a></li>--> |
||||
<li><a class="buttons github" href="https://github.com/BVLC/caffe">View On GitHub</a></li> |
||||
</ul> |
||||
<p class="header">Maintained by<br><a class="header name" href="http://bvlc.eecs.berkeley.edu/">BVLC</a></p> |
||||
<p class="header">Created by<br><a class="header name" href="http://daggerfs.com/">Yangqing Jia</a></p> |
||||
|
||||
</header> |
||||
<section> |
||||
|
||||
{{ content }} |
||||
|
||||
</section> |
||||
<footer> |
||||
<p><small>Hosted on <a href="http://pages.github.com">GitHub Pages</a>.</small></p> |
||||
</footer> |
||||
</div> |
||||
<!--[if !IE]><script>fixScale(document);</script><![endif]--> |
||||
</body> |
||||
</html> |
@ -0,0 +1,95 @@ |
||||
--- |
||||
layout: default |
||||
title: Caffe |
||||
--- |
||||
|
||||
Alex's CIFAR-10 tutorial, Caffe style |
||||
===================================== |
||||
|
||||
Alex Krizhevsky's [cuda-convnet](https://code.google.com/p/cuda-convnet/) details the model definitions, parameters, and training procedure for good performance on CIFAR-10. This example reproduces his results in Caffe. |
||||
|
||||
We will assume that you have Caffe successfully compiled. If not, please refer to the [Installation page](installation.html). In this tutorial, we will assume that your caffe installation is located at `CAFFE_ROOT`. |
||||
|
||||
We thank @chyojn for the pull request that defined the model schemas and solver configurations. |
||||
|
||||
*This example is a work-in-progress. It would be nice to further explain details of the network and training choices and benchmark the full training.* |
||||
|
||||
Prepare the Dataset |
||||
------------------- |
||||
|
||||
You will first need to download and convert the data format from the [CIFAR-10 website](http://www.cs.toronto.edu/~kriz/cifar.html). To do this, simply run the following commands: |
||||
|
||||
cd $CAFFE_ROOT/data/cifar10 |
||||
./get_cifar10.sh |
||||
cd $CAFFE_ROOT/examples/cifar10 |
||||
./create_cifar10.sh |
||||
|
||||
If it complains that `wget` or `gunzip` are not installed, you need to install them respectively. After running the script there should be the dataset, `./cifar10-leveldb`, and the data set image mean `./mean.binaryproto`. |
||||
|
||||
The Model |
||||
--------- |
||||
|
||||
The CIFAR-10 model is a CNN that composes layers of convolution, pooling, rectified linear unit (ReLU) nonlinearities, and local contrast normalization with a linear classifier on top of it all. We have defined the model in the `CAFFE_ROOT/examples/cifar10` directory's `cifar10_quick_train.prototxt`. |
||||
|
||||
Training and Testing the "Quick" Model |
||||
-------------------------------------- |
||||
|
||||
Training the model is simple after you have written the network definition protobuf and solver protobuf files. Simply run `train_quick.sh`, or the following command directly: |
||||
|
||||
cd $CAFFE_ROOT/examples/cifar10 |
||||
./train_quick.sh |
||||
|
||||
`train_quick.sh` is a simple script, so have a look inside. `GLOG_logtostderr=1` is the google logging flag that prints all the logging messages directly to stderr. The main tool for training is `train_net.bin`, with the solver protobuf text file as its argument. |
||||
|
||||
When you run the code, you will see a lot of messages flying by like this: |
||||
|
||||
I0317 21:52:48.945710 2008298256 net.cpp:74] Creating Layer conv1 |
||||
I0317 21:52:48.945716 2008298256 net.cpp:84] conv1 <- data |
||||
I0317 21:52:48.945725 2008298256 net.cpp:110] conv1 -> conv1 |
||||
I0317 21:52:49.298691 2008298256 net.cpp:125] Top shape: 100 32 32 32 (3276800) |
||||
I0317 21:52:49.298719 2008298256 net.cpp:151] conv1 needs backward computation. |
||||
|
||||
These messages tell you the details about each layer, its connections and its output shape, which may be helpful in debugging. After the initialization, the training will start: |
||||
|
||||
I0317 21:52:49.309370 2008298256 net.cpp:166] Network initialization done. |
||||
I0317 21:52:49.309376 2008298256 net.cpp:167] Memory required for Data 23790808 |
||||
I0317 21:52:49.309422 2008298256 solver.cpp:36] Solver scaffolding done. |
||||
I0317 21:52:49.309447 2008298256 solver.cpp:47] Solving CIFAR10_quick_train |
||||
|
||||
Based on the solver setting, we will print the training loss function every 100 iterations, and test the network every 500 iterations. You will see messages like this: |
||||
|
||||
I0317 21:53:12.179772 2008298256 solver.cpp:208] Iteration 100, lr = 0.001 |
||||
I0317 21:53:12.185698 2008298256 solver.cpp:65] Iteration 100, loss = 1.73643 |
||||
... |
||||
I0317 21:54:41.150030 2008298256 solver.cpp:87] Iteration 500, Testing net |
||||
I0317 21:54:47.129461 2008298256 solver.cpp:114] Test score #0: 0.5504 |
||||
I0317 21:54:47.129500 2008298256 solver.cpp:114] Test score #1: 1.27805 |
||||
|
||||
For each training iteration, `lr` is the learning rate of that iteration, and `loss` is the training function. For the output of the testing phase, **score 0 is the accuracy**, and **score 1 is the testing loss function**. |
||||
|
||||
And after making yourself a cup of coffee, you are done! |
||||
|
||||
I0317 22:12:19.666914 2008298256 solver.cpp:87] Iteration 5000, Testing net |
||||
I0317 22:12:25.580330 2008298256 solver.cpp:114] Test score #0: 0.7533 |
||||
I0317 22:12:25.580379 2008298256 solver.cpp:114] Test score #1: 0.739837 |
||||
I0317 22:12:25.587262 2008298256 solver.cpp:130] Snapshotting to cifar10_quick_iter_5000 |
||||
I0317 22:12:25.590215 2008298256 solver.cpp:137] Snapshotting solver state to cifar10_quick_iter_5000.solverstate |
||||
I0317 22:12:25.592813 2008298256 solver.cpp:81] Optimization Done. |
||||
|
||||
Our model achieved ~75% test accuracy. The model parameters are stored in binary protobuf format in |
||||
|
||||
cifar10_quick_iter_5000 |
||||
|
||||
which is ready-to-deploy in CPU or GPU mode! Refer to the `CAFFE_ROOT/examples/cifar10/cifar10_quick.prototxt` for the deployment model definition that can be called on new data. |
||||
|
||||
Why train on a GPU? |
||||
------------------- |
||||
|
||||
CIFAR-10, while still small, has enough data to make GPU training attractive. |
||||
|
||||
To compare CPU vs. GPU training speed, simply change one line in all the `cifar*solver.prototxt`: |
||||
|
||||
# solver mode: CPU or GPU |
||||
solver_mode: CPU |
||||
|
||||
and you will be using CPU for training. |
@ -0,0 +1,63 @@ |
||||
--- |
||||
layout: default |
||||
title: Caffe |
||||
--- |
||||
|
||||
Developing & Contributing |
||||
========================= |
||||
|
||||
Caffe is developed with active participation of the community by the [Berkeley Vision and Learning Center](http://bvlc.eecs.berkeley.edu/). |
||||
We welcome all contributions! |
||||
|
||||
The [contributing workflow](https://github.com/BVLC/caffe#development) is explained in the README. These guidelines cover development practices in Caffe. This is a work-in-progress. |
||||
|
||||
**Development Flow** |
||||
|
||||
- `master` is golden. |
||||
- `dev` is for new development: it is the branching point for features and the base of pull requests. |
||||
* The history of `dev` is not rewritten. |
||||
* Contributions are shepherded from `dev` to `master` by BVLC by merge. |
||||
- To err is human. Accidents are fixed by reverts. |
||||
- Releases are marked with tags on merge from `dev` to `master`. |
||||
|
||||
**Issues & Pull Request Protocol** |
||||
|
||||
0. Make issues for [bugs](https://github.com/BVLC/caffe/issues?labels=bug&page=1&state=open), tentative proposals, and [questions](https://github.com/BVLC/caffe/issues?labels=question&page=1&state=open). |
||||
1. Make PRs to signal development: |
||||
a. Make PRs *as soon as development begins*. Create a feature branch, make your initial commit, push, and PR to let everyone know you are working on it and let discussion guide development instead of review development after-the-fact. |
||||
b. When a proposal from the first step earns enough interest to warrant development, make a PR, and reference and close the old issue to direct the conversation to the PR. |
||||
2. When a PR is ready, comment to request a maintainer be assigned to review and merge to `dev`. |
||||
|
||||
A PR is only ready for review when the code is committed, documented, linted, and tested! |
||||
|
||||
**Documentation**: the documentation is bundled with Caffe in `docs/`. This includes the site you are reading now. Contributions should be documented both inline in code and through usage examples. New documentation is published by BVLC with each release and between releases as-needed. |
||||
|
||||
We'd appreciate your contribution to the documentation effort! |
||||
|
||||
**Testing**: run `make runtest` to check the project tests. New code requires new tests. Pull requests that fail tests will not be accepted. |
||||
|
||||
The `googletest` framework we use provides many additional options, which you can access by running the test binaries directly. One of the more useful options is `--gtest_filter`, which allows you to filter tests by name: |
||||
|
||||
# run all tests with CPU in the name |
||||
build/test/test_all.testbin --gtest_filter='*CPU*' |
||||
|
||||
# run all tests without GPU in the name (note the leading minus sign) |
||||
build/test/test_all.testbin --gtest_filter=-'*GPU*' |
||||
|
||||
To get a list of all options `googletest` provides, simply pass the `--help` flag: |
||||
|
||||
build/test/test_all.testbin --help |
||||
|
||||
**Style** |
||||
|
||||
- Follow [Google C++ style](http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml) and [Google python style](http://google-styleguide.googlecode.com/svn/trunk/pyguide.html) + [PEP 8](http://legacy.python.org/dev/peps/pep-0008/). |
||||
- Wrap lines at 80 chars. |
||||
- Remember that “a foolish consistency is the hobgoblin of little minds,” so use your best judgement to write the clearest code for your particular case. |
||||
|
||||
**Lint**: run `make lint` to check C++ code. |
||||
|
||||
**Copyright**: assign copyright jointly to BVLC and contributors like so: |
||||
|
||||
// Copyright 2014 BVLC and contributors. |
||||
|
||||
The exact details of contributions are recorded by versioning and cited in our [acknowledgements](http://caffe.berkeleyvision.org/#acknowledgements). This method is impartial and always up-to-date. |
@ -0,0 +1,71 @@ |
||||
--- |
||||
layout: default |
||||
title: Caffe |
||||
--- |
||||
|
||||
Extracting Features |
||||
=================== |
||||
|
||||
In this tutorial, we will extract features using a pre-trained model. |
||||
Follow instructions for [setting up caffe](installation.html) and for [getting](getting_pretrained_models.html) the pre-trained ImageNet model. |
||||
If you need detailed information about the tools below, please consult their source code, in which additional documentation is usually provided. |
||||
|
||||
Select data to run on |
||||
--------------------- |
||||
|
||||
We'll make a temporary folder to store things into. |
||||
|
||||
mkdir examples/_temp |
||||
|
||||
Generate a list of the files to process. |
||||
We're going to use the images that ship with caffe. |
||||
|
||||
find `pwd`/examples/images -type f -exec echo {} \; > examples/_temp/temp.txt |
||||
|
||||
The `ImageDataLayer` we'll use expects labels after each filenames, so let's add a 0 to the end of each line |
||||
|
||||
sed "s/$/ 0/" examples/_temp/temp.txt > examples/_temp/file_list.txt |
||||
|
||||
Define the Feature Extraction Network Architecture |
||||
-------------------------------------------------- |
||||
|
||||
In practice, subtracting the mean image from a dataset significantly improves classification accuracies. |
||||
Download the mean image of the ILSVRC dataset. |
||||
|
||||
data/ilsvrc12/get_ilsvrc_aux.sh |
||||
|
||||
We will use `data/ilsvrc212/imagenet_mean.binaryproto` in the network definition prototxt. |
||||
|
||||
Let's copy and modify the network definition. |
||||
We'll be using the `ImageDataLayer`, which will load and resize images for us. |
||||
|
||||
cp examples/feature_extraction/imagenet_val.prototxt examples/_temp |
||||
|
||||
Edit `examples/_temp/imagenet_val.prototxt` to use correct path for your setup (replace `$CAFFE_DIR`) |
||||
|
||||
Extract Features |
||||
---------------- |
||||
|
||||
Now everything necessary is in place. |
||||
|
||||
build/tools/extract_features.bin examples/imagenet/caffe_reference_imagenet_model examples/_temp/imagenet_val.prototxt fc7 examples/_temp/features 10 |
||||
|
||||
The name of feature blob that you extract is `fc7`, which represents the highest level feature of the reference model. |
||||
We can use any other layer, as well, such as `conv5` or `pool3`. |
||||
|
||||
The last parameter above is the number of data mini-batches. |
||||
|
||||
The features are stored to LevelDB `examples/_temp/features`, ready for access by some other code. |
||||
|
||||
If you meet with the error "Check failed: status.ok() Failed to open leveldb examples/_temp/features", it is because the directory examples/_temp/features has been created the last time you run the command. Remove it and run again. |
||||
|
||||
rm -rf examples/_temp/features/ |
||||
|
||||
If you'd like to use the Python wrapper for extracting features, check out the [layer visualization notebook](http://nbviewer.ipython.org/github/BVLC/caffe/blob/master/examples/filter_visualization.ipynb). |
||||
|
||||
Clean Up |
||||
-------- |
||||
|
||||
Let's remove the temporary directory now. |
||||
|
||||
rm -r examples/_temp |
@ -0,0 +1,29 @@ |
||||
--- |
||||
layout: default |
||||
--- |
||||
|
||||
# Pre-trained models |
||||
|
||||
[BVLC](http://bvlc.eecs.berkeley.edu) aims to provide a variety of high quality pre-trained models. |
||||
Note that unlike Caffe itself, these models are licensed for **academic research / non-commercial use only**. |
||||
If you have any questions, please get in touch with us. |
||||
|
||||
This page will be updated as more models become available. |
||||
|
||||
### ImageNet |
||||
|
||||
**Caffe Reference ImageNet Model**: Our reference implementation of an ImageNet model trained on ILSVRC-2012 can be downloaded (232.6MB) by running `examples/imagenet/get_caffe_reference_imagenet_model.sh` from the Caffe root directory. |
||||
|
||||
- The bundled model is the iteration 310,000 snapshot. |
||||
- The best validation performance during training was iteration 313,000 with |
||||
validation accuracy 57.412% and loss 1.82328. |
||||
|
||||
**AlexNet**: Our training of the Krizhevsky architecture, which differs from the paper's methodology by (1) not training with the relighting data-augmentation and (2) initializing non-zero biases to 0.1 instead of 1. (2) was found necessary for training, as initialization to 1 gave flat loss. Download the model (243.9MB) by running `examples/imagenet/get_caffe_alexnet_model.sh` from the Caffe root directory. |
||||
|
||||
- The bundled model is the iteration 360,000 snapshot. |
||||
- The best validation performance during training was iteration 358,000 with |
||||
validation accuracy 57.258% and loss 1.83948. |
||||
|
||||
**R-CNN (ILSVRC13)**: The pure Caffe instantiation of the [R-CNN](https://github.com/rbgirshick/rcnn) model for ILSVRC13 detection. Download the model (230.8MB) by running `examples/imagenet/get_caffe_rcnn_imagenet_model.sh` from the Caffe root directory. This model was made by transplanting the R-CNN SVM classifiers into a `fc-rcnn` classification layer, provided here as an off-the-shelf Caffe detector. Try the [detection example](http://nbviewer.ipython.org/github/BVLC/caffe/blob/master/examples/detection.ipynb) to see it in action. For the full details, refer to the R-CNN site. *N.B. For research purposes, make use of the official R-CNN package and not this example.* |
||||
|
||||
Additionally, you will probably eventually need some auxiliary data (mean image, synset list, etc.): run `data/ilsvrc12/get_ilsvrc_aux.sh` from the root directory to obtain it. |
@ -0,0 +1,102 @@ |
||||
--- |
||||
layout: default |
||||
title: Caffe |
||||
--- |
||||
|
||||
Yangqing's Recipe on Brewing ImageNet |
||||
===================================== |
||||
|
||||
"All your braincells are belong to us." |
||||
- Caffeine |
||||
|
||||
We are going to describe a reference implementation for the approach first proposed by Krizhevsky, Sutskever, and Hinton in their [NIPS 2012 paper](http://books.nips.cc/papers/files/nips25/NIPS2012_0534.pdf). Since training the whole model takes some time and energy, we provide a model, trained in the same way as we describe here, to help fight global warming. If you would like to simply use the pretrained model, check out the [Pretrained ImageNet](getting_pretrained_models.html) page. *Note that the pretrained model is for academic research / non-commercial use only*. |
||||
|
||||
To clarify, by ImageNet we actually mean the ILSVRC12 challenge, but you can easily train on the whole of ImageNet as well, just with more disk space, and a little longer training time. |
||||
|
||||
(If you don't get the quote, visit [Yann LeCun's fun page](http://yann.lecun.com/ex/fun/). |
||||
|
||||
Data Preparation |
||||
---------------- |
||||
|
||||
We assume that you already have downloaded the ImageNet training data and validation data, and they are stored on your disk like: |
||||
|
||||
/path/to/imagenet/train/n01440764/n01440764_10026.JPEG |
||||
/path/to/imagenet/val/ILSVRC2012_val_00000001.JPEG |
||||
|
||||
You will first need to prepare some auxiliary data for training. This data can be downloaded by: |
||||
|
||||
cd $CAFFE_ROOT/data/ilsvrc12/ |
||||
./get_ilsvrc_aux.sh |
||||
|
||||
The training and validation input are described in `train.txt` and `val.txt` as text listing all the files and their labels. Note that we use a different indexing for labels than the ILSVRC devkit: we sort the synset names in their ASCII order, and then label them from 0 to 999. See `synset_words.txt` for the synset/name mapping. |
||||
|
||||
You may want to resize the images to 256x256 in advance. By default, we do not explicitly do this because in a cluster environment, one may benefit from resizing images in a parallel fashion, using mapreduce. For example, Yangqing used his lightedweighted [mincepie](https://github.com/Yangqing/mincepie) package to do mapreduce on the Berkeley cluster. If you would things to be rather simple and straightforward, you can also use shell commands, something like: |
||||
|
||||
for name in /path/to/imagenet/val/*.JPEG; do |
||||
convert -resize 256x256\! $name $name |
||||
done |
||||
|
||||
Go to `$CAFFE_ROOT/examples/imagenet/` for the rest of this guide. |
||||
|
||||
Take a look at `create_imagenet.sh`. Set the paths to the train and val dirs as needed, and set "RESIZE=true" to resize all images to 256x256 if you haven't resized the images in advance. Now simply create the leveldbs with `./create_imagenet.sh`. Note that `imagenet_train_leveldb` and `imagenet_val_leveldb` should not exist before this execution. It will be created by the script. `GLOG_logtostderr=1` simply dumps more information for you to inspect, and you can safely ignore it. |
||||
|
||||
Compute Image Mean |
||||
------------------ |
||||
|
||||
The model requires us to subtract the image mean from each image, so we have to compute the mean. `tools/compute_image_mean.cpp` implements that - it is also a good example to familiarize yourself on how to manipulate the multiple components, such as protocol buffers, leveldbs, and logging, if you are not familiar with them. Anyway, the mean computation can be carried out as: |
||||
|
||||
./make_imagenet_mean.sh |
||||
|
||||
which will make `data/ilsvrc12/imagenet_mean.binaryproto`. |
||||
|
||||
Network Definition |
||||
------------------ |
||||
|
||||
The network definition follows strictly the one in Krizhevsky et al. You can find the detailed definition at `examples/imagenet/imagenet_train.prototxt`. Note the paths in the data layer - if you have not followed the exact paths in this guide you will need to change the following lines: |
||||
|
||||
source: "ilvsrc12_train_leveldb" |
||||
mean_file: "../../data/ilsvrc12/imagenet_mean.binaryproto" |
||||
|
||||
to point to your own leveldb and image mean. Likewise, do the same for `examples/imagenet/imagenet_val.prototxt`. |
||||
|
||||
If you look carefully at `imagenet_train.prototxt` and `imagenet_val.prototxt`, you will notice that they are largely the same, with the only difference being the data layer sources, and the last layer: in training, we will be using a `softmax_loss` layer to compute the loss function and to initialize the backpropagation, while in validation we will be using an `accuracy` layer to inspect how well we do in terms of accuracy. |
||||
|
||||
We will also lay out a protocol buffer for running the solver. Let's make a few plans: |
||||
* We will run in batches of 256, and run a total of 4,500,000 iterations (about 90 epochs). |
||||
* For every 1,000 iterations, we test the learned net on the validation data. |
||||
* We set the initial learning rate to 0.01, and decrease it every 100,000 iterations (about 20 epochs). |
||||
* Information will be displayed every 20 epochs. |
||||
* The network will be trained with momentum 0.9 and a weight decay of 0.0005. |
||||
* For every 10,000 iterations, we will take a snapshot of the current status. |
||||
|
||||
Sound good? This is implemented in `examples/imagenet/imagenet_solver.prototxt`. Again, you will need to change the first two lines: |
||||
|
||||
train_net: "imagenet_train.prototxt" |
||||
test_net: "imagenet_val.prototxt" |
||||
|
||||
to point to the actual path if you have changed them. |
||||
|
||||
Training ImageNet |
||||
----------------- |
||||
|
||||
Ready? Let's train. |
||||
|
||||
./train_imagenet.sh |
||||
|
||||
Sit back and enjoy! On my K20 machine, every 20 iterations take about 36 seconds to run, so effectively about 7 ms per image for the full forward-backward pass. About 2.5 ms of this is on forward, and the rest is backward. If you are interested in dissecting the computation time, you can look at `examples/net_speed_benchmark.cpp`, but it was written purely for debugging purpose, so you may need to figure a few things out yourself. |
||||
|
||||
Resume Training? |
||||
---------------- |
||||
|
||||
We all experience times when the power goes out, or we feel like rewarding ourself a little by playing Battlefield (does someone still remember Quake?). Since we are snapshotting intermediate results during training, we will be able to resume from snapshots. This can be done as easy as: |
||||
|
||||
./resume_training.sh |
||||
|
||||
where in the script `caffe_imagenet_train_1000.solverstate` is the solver state snapshot that stores all necessary information to recover the exact solver state (including the parameters, momentum history, etc). |
||||
|
||||
Parting Words |
||||
------------- |
||||
|
||||
Hope you liked this recipe! Many researchers have gone further since the ILSVRC 2012 challenge, changing the network architecture and/or finetuning the various parameters in the network. The recent ILSVRC 2013 challenge suggests that there are quite some room for improvement. **Caffe allows one to explore different network choices more easily, by simply writing different prototxt files** - isn't that exciting? |
||||
|
||||
And since now you have a trained network, check out how to use it: [Running Pretrained ImageNet](getting_pretrained_models.html). This time we will use Python, but if you have wrappers for other languages, please kindly send a pull request! |
@ -0,0 +1,79 @@ |
||||
--- |
||||
layout: default |
||||
--- |
||||
# Welcome to Caffe |
||||
|
||||
Caffe is a framework for convolutional neural network algorithms, developed with speed in mind. |
||||
It was created by [Yangqing Jia](http://daggerfs.com), and is in active development by the [Berkeley Vision and Learning Center](http://bvlc.eecs.berkeley.edu). |
||||
|
||||
Caffe is released under [the BSD 2-Clause license](https://github.com/BVLC/caffe/blob/master/LICENSE). |
||||
|
||||
Check out the [classification demo](http://demo.caffe.berkeleyvision.org/)! |
||||
|
||||
## Why Caffe? |
||||
|
||||
Caffe aims to provide computer vision scientists and practitioners with a **clean and modifiable implementation** of state-of-the-art deep learning algorithms. |
||||
For example, network structure is easily specified in separate config files, with no mess of hard-coded parameters in the code. |
||||
|
||||
At the same time, Caffe fits industry needs, with blazing fast C++/CUDA code for GPU computation. |
||||
Caffe is currently the fastest GPU CNN implementation publicly available, and is able to process more than **40 million images per day** with a single NVIDIA K40 or Titan GPU (or 20 million images per day on a K20 GPU)\*. That's 192 images per second during training and 500 images per second during test. |
||||
|
||||
Caffe also provides **seamless switching between CPU and GPU**, which allows one to train models with fast GPUs and then deploy them on non-GPU clusters with one line of code: `Caffe::set_mode(Caffe::CPU)`. |
||||
Even in CPU mode, computing predictions on an image takes only 20 ms when images are processed in batch mode. While in GPU mode, computing predictions on an image takes only 2 ms when images are processed in batch mode. |
||||
|
||||
## Documentation |
||||
|
||||
* [Introductory slides](https://www.dropbox.com/s/10fx16yp5etb8dv/caffe-presentation.pdf): slides about the Caffe architecture, *updated 03/14*. |
||||
* [Installation](/installation.html): Instructions on installing Caffe (works on Ubuntu, Red Hat, OS X). |
||||
* [Pre-trained models](/getting_pretrained_models.html): BVLC provides some pre-trained models for academic / non-commercial use. |
||||
* [Development](/development.html): Guidelines for development and contributing to Caffe. |
||||
|
||||
### Examples |
||||
|
||||
* [Image Classification \[notebook\]][imagenet_classification]: classify images with the pretrained ImageNet model by the Python interface. |
||||
* [Detection \[notebook\]][detection]: run a pretrained model as a detector in Python. |
||||
* [Visualizing Features and Filters \[notebook\]][visualizing_filters]: extracting features and visualizing trained filters with an example image, viewed layer-by-layer. |
||||
* [Editing Model Parameters \[notebook\]][net_surgery]: how to do net surgery and manually change model parameters. |
||||
* [LeNet / MNIST Demo](/mnist.html): end-to-end training and testing of LeNet on MNIST. |
||||
* [CIFAR-10 Demo](/cifar10.html): training and testing on the CIFAR-10 data. |
||||
* [Training ImageNet](/imagenet_training.html): recipe for end-to-end training of an ImageNet classifier. |
||||
* [Feature extraction with C++](/feature_extraction.html): feature extraction using pre-trained model. |
||||
|
||||
[imagenet_classification]: http://nbviewer.ipython.org/github/BVLC/caffe/blob/master/examples/imagenet_classification.ipynb |
||||
[detection]: http://nbviewer.ipython.org/github/BVLC/caffe/blob/master/examples/detection.ipynb |
||||
[visualizing_filters]: http://nbviewer.ipython.org/github/BVLC/caffe/blob/master/examples/filter_visualization.ipynb |
||||
[net_surgery]: http://nbviewer.ipython.org/github/BVLC/caffe/blob/master/examples/net_surgery.ipynb |
||||
|
||||
## Citing Caffe |
||||
|
||||
Please kindly cite Caffe in your publications if it helps your research: |
||||
|
||||
@misc{Jia13caffe, |
||||
Author = {Yangqing Jia}, |
||||
Title = { {Caffe}: An Open Source Convolutional Architecture for Fast Feature Embedding}, |
||||
Year = {2013}, |
||||
Howpublished = {\url{http://caffe.berkeleyvision.org/} |
||||
} |
||||
|
||||
### Acknowledgements |
||||
|
||||
Yangqing would like to thank the NVIDIA Academic program for providing K20 GPUs, and [Oriol Vinyals](http://www1.icsi.berkeley.edu/~vinyals/) for various discussions along the journey. |
||||
|
||||
A core set of BVLC members have contributed lots of new functionality and fixes since the original release (alphabetical by first name): |
||||
|
||||
- [Eric Tzeng](https://github.com/erictzeng) |
||||
- [Evan Shelhamer](http://imaginarynumber.net/) |
||||
- [Jeff Donahue](http://jeffdonahue.com/) |
||||
- [Jon Long](https://github.com/longjon) |
||||
- [Dr. Ross Girshick](http://www.cs.berkeley.edu/~rbg/) |
||||
- [Sergey Karayev](http://sergeykarayev.com/) |
||||
- [Dr. Sergio Guadarrama](http://www.eecs.berkeley.edu/~sguada/) |
||||
|
||||
Additionally, the open-source community plays a large and growing role in Caffe's development. |
||||
Check out the Github [project pulse](https://github.com/BVLC/caffe/pulse) for recent activity, and the [contributors](https://github.com/BVLC/caffe/graphs/contributors) for an ordered list (by commit activity). |
||||
We sincerely appreciate your interest and contributions! |
||||
If you'd like to contribute, read [this](development.html). |
||||
|
||||
--- |
||||
|
||||
\*: When measured with the [SuperVision](http://www.image-net.org/challenges/LSVRC/2012/supervision.pdf) model that won the ImageNet Large Scale Visual Recognition Challenge 2012. See [performance and hardware configuration details](/performance_hardware.html). |
@ -0,0 +1,182 @@ |
||||
--- |
||||
layout: default |
||||
title: Caffe |
||||
--- |
||||
|
||||
# Installation |
||||
|
||||
Prior to installing, it is best to read through this guide and take note of the details for your platform. |
||||
We have successfully compiled and run Caffe on Ubuntu 12.04, OS X 10.8, and OS X 10.9. |
||||
|
||||
- [Prerequisites](#prerequisites) |
||||
- [Compilation](#compilation) |
||||
- [Hardware questions](#hardware_questions) |
||||
|
||||
## Prerequisites |
||||
|
||||
Caffe depends on several software packages. |
||||
|
||||
* [CUDA](https://developer.nvidia.com/cuda-zone) (5.0, 5.5, or 6.0). |
||||
* [BLAS](http://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms) (provided via ATLAS, MKL, or OpenBLAS). |
||||
* [OpenCV](http://opencv.org/). |
||||
* [Boost](http://www.boost.org/) (we have only tested 1.55) |
||||
* `glog`, `gflags`, `protobuf`, `leveldb`, `snappy`, `hdf5` |
||||
* For the Python wrapper |
||||
* `Python`, `numpy (>= 1.7)`, boost-provided `boost.python` |
||||
* For the MATLAB wrapper |
||||
* MATLAB with the `mex` compiler. |
||||
|
||||
### CUDA and BLAS |
||||
|
||||
Caffe requires the CUDA `nvcc` compiler to compile its GPU code. |
||||
To install CUDA, go to the [NVIDIA CUDA website](https://developer.nvidia.com/cuda-downloads) and follow installation instructions there. **Note:** you can install the CUDA libraries without a CUDA card or driver, in order to build and run Caffe on a CPU-only machine. |
||||
|
||||
Caffe requires BLAS as the backend of its matrix and vector computations. |
||||
There are several implementations of this library. |
||||
The choice is yours: |
||||
|
||||
* [ATLAS](http://math-atlas.sourceforge.net/): free, open source, and so the default for Caffe. |
||||
+ Ubuntu: `sudo apt-get install libatlas-base-dev` |
||||
+ CentOS/RHEL: `sudo yum install libatlas-devel` |
||||
+ OS X: already installed as the [Accelerate / vecLib Framework](https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man7/Accelerate.7.html). |
||||
* [Intel MKL](http://software.intel.com/en-us/intel-mkl): commercial and optimized for Intel CPUs, with a free trial and [student](http://software.intel.com/en-us/intel-education-offerings) licenses. |
||||
1. Install MKL. |
||||
2. Set `BLAS := mkl` in `Makefile.config` |
||||
* [OpenBLAS](http://www.openblas.net/): free and open source; this optimized and parallel BLAS could require more effort to install, although it might offer a speedup. |
||||
1. Install OpenBLAS |
||||
2. Set `BLAS := open` in `Makefile.config` |
||||
|
||||
### Python and/or Matlab wrappers (optional) |
||||
|
||||
Python: The main requirements are `numpy` and `boost.python` (provided by boost). `pandas` is useful too and needed for some examples. |
||||
|
||||
For **OS X**, we highly recommend using the [Anaconda](https://store.continuum.io/cshop/anaconda/) Python distribution, which provides most of the necessary packages, as well as the `hdf5` library dependency. |
||||
If you don't, please use Homebrew -- but beware of potential linking errors! |
||||
|
||||
Note that if you use the **Ubuntu** default python, you will need to `apt-get install` the `python-dev` package to have the python headers. You can install any remaining dependencies with |
||||
|
||||
pip install -r /path/to/caffe/python/requirements.txt |
||||
|
||||
MATLAB: install MATLAB, and make sure that its `mex` is in your `$PATH`. |
||||
|
||||
### The rest of the dependencies |
||||
|
||||
#### Linux |
||||
|
||||
On **Ubuntu**, the remaining dependencies can be installed with |
||||
|
||||
sudo apt-get install libprotobuf-dev libleveldb-dev libsnappy-dev libopencv-dev libboost-all-dev libhdf5-serial-dev |
||||
|
||||
And on **CentOS or RHEL**, you can install via yum using: |
||||
|
||||
sudo yum install protobuf-devel leveldb-devel snappy-devel opencv-devel boost-devel hdf5-devel |
||||
|
||||
The only exception being the google logging library, which does not exist in the Ubuntu 12.04 or CentOS/RHEL repositories. To install it, do: |
||||
|
||||
wget https://google-glog.googlecode.com/files/glog-0.3.3.tar.gz |
||||
tar zxvf glog-0.3.3.tar.gz |
||||
./configure |
||||
make && make install |
||||
|
||||
#### OS X |
||||
|
||||
On **OS X**, we highly recommend using the [homebrew](http://brew.sh/) package manager, and ideally starting from a clean install of the OS (or from a wiped `/usr/local`) to avoid conflicts. |
||||
In the following, we assume that you're using Anaconda Python and Homebrew. |
||||
|
||||
To install the OpenCV dependency, we'll need to provide an additional source for Homebrew: |
||||
|
||||
brew tap homebrew/science |
||||
|
||||
If using Anaconda Python, a modification is required to the OpenCV formula. |
||||
Do `brew edit opencv` and change the lines that look like the two lines below to exactly the two lines below. |
||||
|
||||
-DPYTHON_LIBRARY=#{py_prefix}/lib/libpython2.7.dylib |
||||
-DPYTHON_INCLUDE_DIR=#{py_prefix}/include/python2.7 |
||||
|
||||
**NOTE**: We find that everything compiles successfully if `$LD_LIBRARY_PATH` is not set at all, and `$DYLD_FALLBACK_LIBRARY_PATH` is set to to provide CUDA, Python, and other relevant libraries (e.g. `/usr/local/cuda/lib:$HOME/anaconda/lib:/usr/local/lib:/usr/lib`). |
||||
In other `ENV` settings, things may not work as expected. |
||||
|
||||
#### 10.8-specific Instructions |
||||
|
||||
Simply run the following: |
||||
|
||||
brew install --build-from-source --with-python boost |
||||
for x in snappy leveldb protobuf gflags glog szip homebrew/science/opencv; do brew install $x; done |
||||
|
||||
Building boost from source is needed to link against your local Python (exceptions might be raised during some OS X installs, but **ignore** these and continue). If you do not need the Python wrapper, simply doing `brew install boost` is fine. |
||||
|
||||
**Note** that the HDF5 dependency is provided by Anaconda Python in this case. |
||||
If you're not using Anaconda, include `hdf5` in the list above. |
||||
|
||||
#### 10.9-specific Instructions |
||||
|
||||
In OS X 10.9, clang++ is the default C++ compiler and uses `libc++` as the standard library. |
||||
However, NVIDIA CUDA (even version 6.0) currently links only with `libstdc++`. |
||||
This makes it necessary to change the compilation settings for each of the dependencies. |
||||
|
||||
We do this by modifying the homebrew formulae before installing any packages. |
||||
Make sure that homebrew doesn't install any software dependencies in the background; all packages must be linked to `libstdc++`. |
||||
|
||||
The prerequisite homebrew formulae are |
||||
|
||||
boost snappy leveldb protobuf gflags glog szip homebrew/science/opencv |
||||
|
||||
For each of these formulas, `brew edit FORMULA`, and add the ENV definitions as shown: |
||||
|
||||
def install |
||||
# ADD THE FOLLOWING: |
||||
ENV.append "CXXFLAGS", "-stdlib=libstdc++" |
||||
ENV.append "CFLAGS", "-stdlib=libstdc++" |
||||
ENV.append "LDFLAGS", "-stdlib=libstdc++ -lstdc++" |
||||
# The following is necessary because libtool likes to strip LDFLAGS: |
||||
ENV["CXX"] = "/usr/bin/clang++ -stdlib=libstdc++" |
||||
... |
||||
|
||||
To edit the formulae in turn, run |
||||
|
||||
for x in snappy leveldb protobuf gflags glog szip boost homebrew/science/opencv; do brew edit $x; done |
||||
|
||||
After this, run |
||||
|
||||
for x in snappy leveldb protobuf gflags glog szip homebrew/science/opencv; do brew uninstall $x; brew install --build-from-source --fresh -vd $x; done |
||||
brew install --build-from-source --with-python --fresh -vd boost |
||||
|
||||
**Note** that `brew install --build-from-source --fresh -vd boost` is fine if you do not need the Caffe Python wrapper. |
||||
|
||||
**Note** that the HDF5 dependency is provided by Anaconda Python in this case. |
||||
If you're not using Anaconda, include `hdf5` in the list above. |
||||
|
||||
#### Windows |
||||
|
||||
There is an unofficial Windows port of Caffe at [niuzhiheng/caffe:windows](https://github.com/niuzhiheng/caffe). Thanks [@niuzhiheng](https://github.com/niuzhiheng)! |
||||
|
||||
## Compilation |
||||
|
||||
Now that you have the prerequisites, edit your `Makefile.config` to change the paths for your setup. |
||||
The defaults should work, but uncomment the relevant lines if using Anaconda Python. |
||||
|
||||
cp Makefile.config.example Makefile.config |
||||
# Adjust Makefile.config (for example, if using Anaconda Python) |
||||
make all |
||||
make test |
||||
make runtest |
||||
|
||||
Note that if there is no GPU in your machine, building and running CPU-only works, but GPU tests will naturally fail. |
||||
|
||||
To compile the Python and MATLAB wrappers do `make pycaffe` and `make matcaffe` respectively. |
||||
Be sure to set your MATLAB and Python paths in `Makefile.config` first! |
||||
For Python support, you must add the compiled module to your `$PYTHONPATH` (as `/path/to/caffe/python` or the like). |
||||
|
||||
*Distribution*: run `make distribute` to create a `distribute` directory with all the Caffe headers, compiled libraries, binaries, etc. needed for distribution to other machines. |
||||
|
||||
*Speed*: for a faster build, compile in parallel by doing `make all -j8` where 8 is the number of parallel threads for compilation (a good choice for the number of threads is the number of cores in your machine). |
||||
|
||||
Now that you have installed Caffe, check out the [MNIST demo](mnist.html) and the pretrained [ImageNet example](imagenet.html). |
||||
|
||||
## Hardware Questions |
||||
|
||||
**Laboratory Tested Hardware**: Berkeley Vision runs Caffe with K40s, K20s, and Titans including models at ImageNet/ILSVRC scale. We also run on GTX series cards and GPU-equipped MacBook Pros. We have not encountered any trouble in-house with devices with CUDA capability >= 3.0. All reported hardware issues thus-far have been due to GPU configuration, overheating, and the like. |
||||
|
||||
**CUDA compute capability**: devices with compute capability <= 2.0 may have to reduce CUDA thread numbers and batch sizes due to hardware constraints. Your mileage may vary. |
||||
|
||||
Refer to the project's issue tracker for [hardware/compatibility](https://github.com/BVLC/caffe/issues?labels=hardware%2Fcompatibility&page=1&state=open). |
@ -0,0 +1,20 @@ |
||||
fixScale = function(doc) { |
||||
|
||||
var addEvent = 'addEventListener', |
||||
type = 'gesturestart', |
||||
qsa = 'querySelectorAll', |
||||
scales = [1, 1], |
||||
meta = qsa in doc ? doc[qsa]('meta[name=viewport]') : []; |
||||
|
||||
function fix() { |
||||
meta.content = 'width=device-width,minimum-scale=' + scales[0] + ',maximum-scale=' + scales[1]; |
||||
doc.removeEventListener(type, fix, true); |
||||
} |
||||
|
||||
if ((meta = meta[meta.length - 1]) && addEvent in doc) { |
||||
fix(); |
||||
scales = [.25, 1.6]; |
||||
doc[addEvent](type, fix, true); |
||||
} |
||||
|
||||
}; |
@ -0,0 +1,91 @@ |
||||
--- |
||||
layout: default |
||||
title: Caffe |
||||
--- |
||||
|
||||
Training MNIST with Caffe |
||||
================ |
||||
|
||||
We will assume that you have caffe successfully compiled. If not, please refer to the [Installation page](installation.html). In this tutorial, we will assume that your caffe installation is located at `CAFFE_ROOT`. |
||||
|
||||
Prepare Datasets |
||||
---------------- |
||||
|
||||
You will first need to download and convert the data format from the MNIST website. To do this, simply run the following commands: |
||||
|
||||
cd $CAFFE_ROOT/data/mnist |
||||
./get_mnist.sh |
||||
cd $CAFFE_ROOT/examples/mnist |
||||
./create_mnist.sh |
||||
|
||||
If it complains that `wget` or `gunzip` are not installed, you need to install them respectively. After running the script there should be two datasets, `mnist-train-leveldb`, and `mnist-test-leveldb`. |
||||
|
||||
LeNet: the MNIST Classification Model |
||||
------------------------------------- |
||||
Before we actually run the training program, let's explain what will happen. We will use the [LeNet](http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf) network, which is known to work well on digit classification tasks. We will use a slightly different version from the original LeNet implementation, replacing the sigmoid activations with Rectified Linear Unit (ReLU) activations for the neurons. |
||||
|
||||
The design of LeNet contains the essence of CNNs that are still used in larger models such as the ones in ImageNet. In general, it consists of a convolutional layer followed by a pooling layer, another convolution layer followed by a pooling layer, and then two fully connected layers similar to the conventional multilayer perceptrons. We have defined the layers in `CAFFE_ROOT/data/lenet.prototxt`. |
||||
|
||||
If you would like to read about step-by-step instruction on how the protobuf definitions are written, see [MNIST: Define the Network](mnist_prototxt.html) and [MNIST: Define the Solver](mnist_solver_prototxt.html)?. |
||||
|
||||
Training and Testing the Model |
||||
------------------------------ |
||||
|
||||
Training the model is simple after you have written the network definition protobuf and solver protobuf files. Simply run `train_mnist.sh`, or the following command directly: |
||||
|
||||
cd $CAFFE_ROOT/examples/mnist |
||||
./train_lenet.sh |
||||
|
||||
`train_lenet.sh` is a simple script, but here are a few explanations: `GLOG_logtostderr=1` is the google logging flag that prints all the logging messages directly to stderr. The main tool for training is `train_net.bin`, with the solver protobuf text file as its argument. |
||||
|
||||
When you run the code, you will see a lot of messages flying by like this: |
||||
|
||||
I1203 net.cpp:66] Creating Layer conv1 |
||||
I1203 net.cpp:76] conv1 <- data |
||||
I1203 net.cpp:101] conv1 -> conv1 |
||||
I1203 net.cpp:116] Top shape: 20 24 24 |
||||
I1203 net.cpp:127] conv1 needs backward computation. |
||||
|
||||
These messages tell you the details about each layer, its connections and its output shape, which may be helpful in debugging. After the initialization, the training will start: |
||||
|
||||
I1203 net.cpp:142] Network initialization done. |
||||
I1203 solver.cpp:36] Solver scaffolding done. |
||||
I1203 solver.cpp:44] Solving LeNet |
||||
|
||||
Based on the solver setting, we will print the training loss function every 100 iterations, and test the network every 1000 iterations. You will see messages like this: |
||||
|
||||
I1203 solver.cpp:204] Iteration 100, lr = 0.00992565 |
||||
I1203 solver.cpp:66] Iteration 100, loss = 0.26044 |
||||
... |
||||
I1203 solver.cpp:84] Testing net |
||||
I1203 solver.cpp:111] Test score #0: 0.9785 |
||||
I1203 solver.cpp:111] Test score #1: 0.0606671 |
||||
|
||||
For each training iteration, `lr` is the learning rate of that iteration, and `loss` is the training function. For the output of the testing phase, score 0 is the accuracy, and score 1 is the testing loss function. |
||||
|
||||
And after a few minutes, you are done! |
||||
|
||||
I1203 solver.cpp:84] Testing net |
||||
I1203 solver.cpp:111] Test score #0: 0.9897 |
||||
I1203 solver.cpp:111] Test score #1: 0.0324599 |
||||
I1203 solver.cpp:126] Snapshotting to lenet_iter_10000 |
||||
I1203 solver.cpp:133] Snapshotting solver state to lenet_iter_10000.solverstate |
||||
I1203 solver.cpp:78] Optimization Done. |
||||
|
||||
The final model, stored as a binary protobuf file, is stored at |
||||
|
||||
lenet_iter_10000 |
||||
|
||||
which you can deploy as a trained model in your application, if you are training on a real-world application dataset. |
||||
|
||||
Um... How about GPU training? |
||||
----------------------------- |
||||
|
||||
You just did! All the training was carried out on the GPU. In fact, if you would like to do training on CPU, you can simply change one line in `lenet_solver.prototxt`: |
||||
|
||||
# solver mode: CPU or GPU |
||||
solver_mode: CPU |
||||
|
||||
and you will be using CPU for training. Isn't that easy? |
||||
|
||||
MNIST is a small dataset, so training with GPU does not really introduce too much benefit due to communication overheads. On larger datasets with more complex models, such as ImageNet, the computation speed difference will be more significant. |
@ -0,0 +1,153 @@ |
||||
--- |
||||
layout: default |
||||
title: Caffe |
||||
--- |
||||
|
||||
Define the MNIST Network |
||||
========================= |
||||
|
||||
This page explains the prototxt file `lenet_train.prototxt` used in the MNIST demo. We assume that you are familiar with [Google Protobuf](https://developers.google.com/protocol-buffers/docs/overview), and assume that you have read the protobuf definitions used by Caffe, which can be found at [src/caffe/proto/caffe.proto](https://github.com/Yangqing/caffe/blob/master/src/caffe/proto/caffe.proto). |
||||
|
||||
Specifically, we will write a `caffe::NetParameter` (or in python, `caffe.proto.caffe_pb2.NetParameter`) protubuf. We will start by giving the network a name: |
||||
|
||||
name: "LeNet" |
||||
|
||||
Writing the Data Layer |
||||
---------------------- |
||||
Currently, we will read the MNIST data from the leveldb we created earlier in the demo. This is defined by a data layer: |
||||
|
||||
layers { |
||||
name: "mnist" |
||||
type: DATA |
||||
data_param { |
||||
source: "mnist-train-leveldb" |
||||
batch_size: 64 |
||||
scale: 0.00390625 |
||||
} |
||||
top: "data" |
||||
top: "label" |
||||
} |
||||
|
||||
Specifically, this layer has name `mnist`, type `data`, and it reads the data from the given leveldb source. We will use a batch size of 64, and scale the incoming pixels so that they are in the range \[0,1\). Why 0.00390625? It is 1 divided by 256. And finally, this layer produces two blobs, one is the `data` blob, and one is the `label` blob. |
||||
|
||||
Writing the Convolution Layer |
||||
-------------------------------------------- |
||||
Let's define the first convolution layer: |
||||
|
||||
layers { |
||||
name: "conv1" |
||||
type: CONVOLUTION |
||||
blobs_lr: 1. |
||||
blobs_lr: 2. |
||||
convolution_param { |
||||
num_output: 20 |
||||
kernelsize: 5 |
||||
stride: 1 |
||||
weight_filler { |
||||
type: "xavier" |
||||
} |
||||
bias_filler { |
||||
type: "constant" |
||||
} |
||||
} |
||||
bottom: "data" |
||||
top: "conv1" |
||||
} |
||||
|
||||
This layer takes the `data` blob (it is provided by the data layer), and produces the `conv1` layer. It produces outputs of 20 channels, with the convolutional kernel size 5 and carried out with stride 1. |
||||
|
||||
The fillers allow us to randomly initialize the value of the weights and bias. For the weight filler, we will use the `xavier` algorithm that automatically determines the scale of initialization based on the number of input and output neurons. For the bias filler, we will simply initialize it as constant, with the default filling value 0. |
||||
|
||||
`blobs_lr` are the learning rate adjustments for the layer's learnable parameters. In this case, we will set the weight learning rate to be the same as the learning rate given by the solver during runtime, and the bias learning rate to be twice as large as that - this usually leads to better convergence rates. |
||||
|
||||
Writing the Pooling Layer |
||||
------------------------- |
||||
Phew. Pooling layers are actually much easier to define: |
||||
|
||||
layers { |
||||
name: "pool1" |
||||
type: POOLING |
||||
pooling_param { |
||||
kernel_size: 2 |
||||
stride: 2 |
||||
pool: MAX |
||||
} |
||||
bottom: "conv1" |
||||
top: "pool1" |
||||
} |
||||
|
||||
This says we will perform max pooling with a pool kernel size 2 and a stride of 2 (so no overlapping between neighboring pooling regions). |
||||
|
||||
Similarly, you can write up the second convolution and pooling layers. Check `data/lenet.prototxt` for details. |
||||
|
||||
Writing the Fully Connected Layer |
||||
---------------------------------- |
||||
Writing a fully connected layer is also simple: |
||||
|
||||
layers { |
||||
name: "ip1" |
||||
type: INNER_PRODUCT |
||||
blobs_lr: 1. |
||||
blobs_lr: 2. |
||||
inner_product_param { |
||||
num_output: 500 |
||||
weight_filler { |
||||
type: "xavier" |
||||
} |
||||
bias_filler { |
||||
type: "constant" |
||||
} |
||||
} |
||||
bottom: "pool2" |
||||
top: "ip1" |
||||
} |
||||
|
||||
This defines a fully connected layer (for some legacy reason, Caffe calls it an `innerproduct` layer) with 500 outputs. All other lines look familiar, right? |
||||
|
||||
Writing the ReLU Layer |
||||
---------------------- |
||||
A ReLU Layer is also simple: |
||||
|
||||
layers { |
||||
name: "relu1" |
||||
type: RELU |
||||
bottom: "ip1" |
||||
top: "ip1" |
||||
} |
||||
|
||||
Since ReLU is an element-wise operation, we can do *in-place* operations to save some memory. This is achieved by simply giving the same name to the bottom and top blobs. Of course, do NOT use duplicated blob names for other layer types! |
||||
|
||||
After the ReLU layer, we will write another innerproduct layer: |
||||
|
||||
layers { |
||||
name: "ip2" |
||||
type: INNER_PRODUCT |
||||
blobs_lr: 1. |
||||
blobs_lr: 2. |
||||
inner_product_param { |
||||
num_output: 10 |
||||
weight_filler { |
||||
type: "xavier" |
||||
} |
||||
bias_filler { |
||||
type: "constant" |
||||
} |
||||
} |
||||
bottom: "ip1" |
||||
top: "ip2" |
||||
} |
||||
|
||||
Writing the Loss Layer |
||||
------------------------- |
||||
Finally, we will write the loss! |
||||
|
||||
layers { |
||||
name: "loss" |
||||
type: SOFTMAX_LOSS |
||||
bottom: "ip2" |
||||
bottom: "label" |
||||
} |
||||
|
||||
The `softmax_loss` layer implements both the softmax and the multinomial logistic loss (that saves time and improves numerical stability). It takes two blobs, the first one being the prediction and the second one being the `label` provided by the data layer (remember it?). It does not produce any outputs - all it does is to compute the loss function value, report it when backpropagation starts, and initiates the gradient with respect to `ip2`. This is where all magic starts. |
||||
|
||||
Now that we have demonstrated how to write the MNIST layer definition prototxt, maybe check out [how we write a solver prototxt](mnist_solver_prototxt.html)? |
@ -0,0 +1,37 @@ |
||||
--- |
||||
layout: default |
||||
title: Caffe |
||||
--- |
||||
|
||||
Define the MNIST Solver |
||||
======================= |
||||
|
||||
The page is under construction. For now, check out the comments in the solver prototxt file, which explains each line in the prototxt: |
||||
|
||||
# The training protocol buffer definition |
||||
train_net: "lenet_train.prototxt" |
||||
# The testing protocol buffer definition |
||||
test_net: "lenet_test.prototxt" |
||||
# test_iter specifies how many forward passes the test should carry out. |
||||
# In the case of MNIST, we have test batch size 100 and 100 test iterations, |
||||
# covering the full 10,000 testing images. |
||||
test_iter: 100 |
||||
# Carry out testing every 500 training iterations. |
||||
test_interval: 500 |
||||
# The base learning rate, momentum and the weight decay of the network. |
||||
base_lr: 0.01 |
||||
momentum: 0.9 |
||||
weight_decay: 0.0005 |
||||
# The learning rate policy |
||||
lr_policy: "inv" |
||||
gamma: 0.0001 |
||||
power: 0.75 |
||||
# Display every 100 iterations |
||||
display: 100 |
||||
# The maximum number of iterations |
||||
max_iter: 10000 |
||||
# snapshot intermediate results |
||||
snapshot: 5000 |
||||
snapshot_prefix: "lenet" |
||||
# solver mode: 0 for CPU and 1 for GPU |
||||
solver_mode: 1 |
@ -0,0 +1,57 @@ |
||||
--- |
||||
layout: default |
||||
title: Caffe |
||||
--- |
||||
|
||||
# Performance and Hardware Configuration |
||||
|
||||
To measure performance on different NVIDIA GPUs we use the Caffe reference ImageNet model. |
||||
|
||||
For training, each time point is 20 iterations/minibatches of 256 images for 5,120 images total. For testing, a 50,000 image validation set is classified. |
||||
|
||||
**Acknowledgements**: BVLC members are very grateful to NVIDIA for providing several GPUs to conduct this research. |
||||
|
||||
## NVIDIA K40 |
||||
|
||||
Performance is best with ECC off and boost clock enabled. While ECC makes a negligible difference in speed, disabling it frees ~1 GB of GPU memory. |
||||
|
||||
Best settings with ECC off and maximum clock speed: |
||||
|
||||
* Training is 26.5 secs / 20 iterations (5,120 images) |
||||
* Testing is 100 secs / validation set (50,000 images) |
||||
|
||||
Other settings: |
||||
|
||||
* ECC on, max speed: training 26.7 secs / 20 iterations, test 101 secs / validation set |
||||
* ECC on, default speed: training 31 secs / 20 iterations, test 117 secs / validation set |
||||
* ECC off, default speed: training 31 secs / 20 iterations, test 118 secs / validation set |
||||
|
||||
### K40 configuration tips |
||||
|
||||
For maximum K40 performance, turn off ECC and boost the clock speed (at your own risk). |
||||
|
||||
To turn off ECC, do |
||||
|
||||
sudo nvidia-smi -i 0 --ecc-config=0 # repeat with -i x for each GPU ID |
||||
|
||||
then reboot. |
||||
|
||||
Set the "persistence" mode of the GPU settings by |
||||
|
||||
sudo nvidia-smi -pm 1 |
||||
|
||||
and then set the clock speed with |
||||
|
||||
sudo nvidia-smi -i 0 -ac 3004,875 # repeat with -i x for each GPU ID |
||||
|
||||
but note that this configuration resets across driver reloading / rebooting. Include these commands in a boot script to intialize these settings. For a simple fix, add these commands to `/etc/rc.local` (on Ubuntu). |
||||
|
||||
## NVIDIA Titan |
||||
|
||||
Training: 26.26 secs / 20 iterations (5,120 images). |
||||
Testing: 100 secs / validation set (50,000 images). |
||||
|
||||
## NVIDIA K20 |
||||
|
||||
Training: 36.0 secs / 20 iterations (5,120 images). |
||||
Testing: 133 secs / validation set (50,000 images) |
@ -0,0 +1,69 @@ |
||||
.highlight { background: #ffffff; } |
||||
.highlight .c { color: #999988; font-style: italic } /* Comment */ |
||||
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */ |
||||
.highlight .k { font-weight: bold } /* Keyword */ |
||||
.highlight .o { font-weight: bold } /* Operator */ |
||||
.highlight .cm { color: #999988; font-style: italic } /* Comment.Multiline */ |
||||
.highlight .cp { color: #999999; font-weight: bold } /* Comment.Preproc */ |
||||
.highlight .c1 { color: #999988; font-style: italic } /* Comment.Single */ |
||||
.highlight .cs { color: #999999; font-weight: bold; font-style: italic } /* Comment.Special */ |
||||
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */ |
||||
.highlight .gd .x { color: #000000; background-color: #ffaaaa } /* Generic.Deleted.Specific */ |
||||
.highlight .ge { font-style: italic } /* Generic.Emph */ |
||||
.highlight .gr { color: #aa0000 } /* Generic.Error */ |
||||
.highlight .gh { color: #999999 } /* Generic.Heading */ |
||||
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */ |
||||
.highlight .gi .x { color: #000000; background-color: #aaffaa } /* Generic.Inserted.Specific */ |
||||
.highlight .go { color: #888888 } /* Generic.Output */ |
||||
.highlight .gp { color: #555555 } /* Generic.Prompt */ |
||||
.highlight .gs { font-weight: bold } /* Generic.Strong */ |
||||
.highlight .gu { color: #800080; font-weight: bold; } /* Generic.Subheading */ |
||||
.highlight .gt { color: #aa0000 } /* Generic.Traceback */ |
||||
.highlight .kc { font-weight: bold } /* Keyword.Constant */ |
||||
.highlight .kd { font-weight: bold } /* Keyword.Declaration */ |
||||
.highlight .kn { font-weight: bold } /* Keyword.Namespace */ |
||||
.highlight .kp { font-weight: bold } /* Keyword.Pseudo */ |
||||
.highlight .kr { font-weight: bold } /* Keyword.Reserved */ |
||||
.highlight .kt { color: #445588; font-weight: bold } /* Keyword.Type */ |
||||
.highlight .m { color: #009999 } /* Literal.Number */ |
||||
.highlight .s { color: #d14 } /* Literal.String */ |
||||
.highlight .na { color: #008080 } /* Name.Attribute */ |
||||
.highlight .nb { color: #0086B3 } /* Name.Builtin */ |
||||
.highlight .nc { color: #445588; font-weight: bold } /* Name.Class */ |
||||
.highlight .no { color: #008080 } /* Name.Constant */ |
||||
.highlight .ni { color: #800080 } /* Name.Entity */ |
||||
.highlight .ne { color: #990000; font-weight: bold } /* Name.Exception */ |
||||
.highlight .nf { color: #990000; font-weight: bold } /* Name.Function */ |
||||
.highlight .nn { color: #555555 } /* Name.Namespace */ |
||||
.highlight .nt { color: #000080 } /* Name.Tag */ |
||||
.highlight .nv { color: #008080 } /* Name.Variable */ |
||||
.highlight .ow { font-weight: bold } /* Operator.Word */ |
||||
.highlight .w { color: #bbbbbb } /* Text.Whitespace */ |
||||
.highlight .mf { color: #009999 } /* Literal.Number.Float */ |
||||
.highlight .mh { color: #009999 } /* Literal.Number.Hex */ |
||||
.highlight .mi { color: #009999 } /* Literal.Number.Integer */ |
||||
.highlight .mo { color: #009999 } /* Literal.Number.Oct */ |
||||
.highlight .sb { color: #d14 } /* Literal.String.Backtick */ |
||||
.highlight .sc { color: #d14 } /* Literal.String.Char */ |
||||
.highlight .sd { color: #d14 } /* Literal.String.Doc */ |
||||
.highlight .s2 { color: #d14 } /* Literal.String.Double */ |
||||
.highlight .se { color: #d14 } /* Literal.String.Escape */ |
||||
.highlight .sh { color: #d14 } /* Literal.String.Heredoc */ |
||||
.highlight .si { color: #d14 } /* Literal.String.Interpol */ |
||||
.highlight .sx { color: #d14 } /* Literal.String.Other */ |
||||
.highlight .sr { color: #009926 } /* Literal.String.Regex */ |
||||
.highlight .s1 { color: #d14 } /* Literal.String.Single */ |
||||
.highlight .ss { color: #990073 } /* Literal.String.Symbol */ |
||||
.highlight .bp { color: #999999 } /* Name.Builtin.Pseudo */ |
||||
.highlight .vc { color: #008080 } /* Name.Variable.Class */ |
||||
.highlight .vg { color: #008080 } /* Name.Variable.Global */ |
||||
.highlight .vi { color: #008080 } /* Name.Variable.Instance */ |
||||
.highlight .il { color: #009999 } /* Literal.Number.Integer.Long */ |
||||
|
||||
.type-csharp .highlight .k { color: #0000FF } |
||||
.type-csharp .highlight .kt { color: #0000FF } |
||||
.type-csharp .highlight .nf { color: #000000; font-weight: normal } |
||||
.type-csharp .highlight .nc { color: #2B91AF } |
||||
.type-csharp .highlight .nn { color: #000000 } |
||||
.type-csharp .highlight .s { color: #A31515 } |
||||
.type-csharp .highlight .sc { color: #A31515 } |
@ -0,0 +1,21 @@ |
||||
/* MeyerWeb Reset */ |
||||
|
||||
html, body, div, span, applet, object, iframe, |
||||
h1, h2, h3, h4, h5, h6, p, blockquote, pre, |
||||
a, abbr, acronym, address, big, cite, code, |
||||
del, dfn, em, img, ins, kbd, q, s, samp, |
||||
small, strike, strong, sub, sup, tt, var, |
||||
b, u, i, center, |
||||
dl, dt, dd, ol, ul, li, |
||||
fieldset, form, label, legend, |
||||
table, caption, tbody, tfoot, thead, tr, th, td, |
||||
article, aside, canvas, details, embed, |
||||
figure, figcaption, footer, header, hgroup, |
||||
menu, nav, output, ruby, section, summary, |
||||
time, mark, audio, video { |
||||
margin: 0; |
||||
padding: 0; |
||||
border: 0; |
||||
font: inherit; |
||||
vertical-align: baseline; |
||||
} |
@ -0,0 +1,393 @@ |
||||
body { |
||||
padding:10px 50px 0 0; |
||||
font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; |
||||
font-weight: 300; |
||||
font-size: 14px; |
||||
color: #232323; |
||||
background-color: #FBFAF7; |
||||
margin: 0; |
||||
line-height: 1.8em; |
||||
-webkit-font-smoothing: antialiased; |
||||
|
||||
} |
||||
|
||||
h1, h2, h3, h4, h5, h6 { |
||||
color:#232323; |
||||
margin:36px 0 10px; |
||||
} |
||||
|
||||
p, ul, ol, table, dl { |
||||
margin:0 0 22px; |
||||
} |
||||
|
||||
h1, h2, h3 { |
||||
font-family: Times, serif; |
||||
font-weight: 300; |
||||
line-height:1.3; |
||||
font-weight: normal; |
||||
display: block; |
||||
border-bottom: 1px solid #ccc; |
||||
padding-bottom: 5px; |
||||
} |
||||
|
||||
h1 { |
||||
font-size: 30px; |
||||
} |
||||
|
||||
h2 { |
||||
font-size: 24px; |
||||
} |
||||
|
||||
h3 { |
||||
font-size: 18px; |
||||
} |
||||
|
||||
h4, h5, h6 { |
||||
font-family: Times, serif; |
||||
font-weight: 700; |
||||
} |
||||
|
||||
a { |
||||
color:#C30000; |
||||
text-decoration:none; |
||||
} |
||||
|
||||
a:hover { |
||||
text-decoration: underline; |
||||
} |
||||
|
||||
a small { |
||||
font-size: 12px; |
||||
} |
||||
|
||||
em { |
||||
font-style: italic; |
||||
} |
||||
|
||||
strong { |
||||
font-weight:700; |
||||
} |
||||
|
||||
ul { |
||||
list-style: inside; |
||||
padding-left: 25px; |
||||
} |
||||
|
||||
ol { |
||||
list-style: decimal inside; |
||||
padding-left: 20px; |
||||
} |
||||
|
||||
blockquote { |
||||
margin: 0; |
||||
padding: 0 0 0 20px; |
||||
font-style: italic; |
||||
} |
||||
|
||||
dl, dt, dd, dl p { |
||||
font-color: #444; |
||||
} |
||||
|
||||
dl dt { |
||||
font-weight: bold; |
||||
} |
||||
|
||||
dl dd { |
||||
padding-left: 20px; |
||||
font-style: italic; |
||||
} |
||||
|
||||
dl p { |
||||
padding-left: 20px; |
||||
font-style: italic; |
||||
} |
||||
|
||||
hr { |
||||
border:0; |
||||
background:#ccc; |
||||
height:1px; |
||||
margin:0 0 24px; |
||||
} |
||||
|
||||
/* Images */ |
||||
|
||||
img { |
||||
position: relative; |
||||
margin: 0 auto; |
||||
max-width: 650px; |
||||
padding: 5px; |
||||
margin: 10px 0 32px 0; |
||||
border: 1px solid #ccc; |
||||
} |
||||
|
||||
p img { |
||||
display: inline; |
||||
margin: 0; |
||||
padding: 0; |
||||
vertical-align: middle; |
||||
text-align: center; |
||||
border: none; |
||||
} |
||||
|
||||
/* Code blocks */ |
||||
|
||||
code, pre { |
||||
font-family: monospace; |
||||
color:#000; |
||||
font-size:12px; |
||||
line-height: 14px; |
||||
} |
||||
|
||||
pre { |
||||
padding: 6px 12px; |
||||
background: #FDFEFB; |
||||
border-radius:4px; |
||||
border:1px solid #D7D8C8; |
||||
overflow: auto; |
||||
white-space: pre-wrap; |
||||
margin-bottom: 16px; |
||||
} |
||||
|
||||
|
||||
/* Tables */ |
||||
|
||||
table { |
||||
width:100%; |
||||
} |
||||
|
||||
table { |
||||
border: 1px solid #ccc; |
||||
margin-bottom: 32px; |
||||
text-align: left; |
||||
} |
||||
|
||||
th { |
||||
font-family: 'Arvo', Helvetica, Arial, sans-serif; |
||||
font-size: 18px; |
||||
font-weight: normal; |
||||
padding: 10px; |
||||
background: #232323; |
||||
color: #FDFEFB; |
||||
} |
||||
|
||||
td { |
||||
padding: 10px; |
||||
background: #ccc; |
||||
} |
||||
|
||||
|
||||
/* Wrapper */ |
||||
.wrapper { |
||||
width:960px; |
||||
} |
||||
|
||||
|
||||
/* Header */ |
||||
|
||||
header { |
||||
background-color: #171717; |
||||
color: #FDFDFB; |
||||
width:170px; |
||||
float:left; |
||||
position:fixed; |
||||
border: 1px solid #000; |
||||
-webkit-border-top-right-radius: 4px; |
||||
-webkit-border-bottom-right-radius: 4px; |
||||
-moz-border-radius-topright: 4px; |
||||
-moz-border-radius-bottomright: 4px; |
||||
border-top-right-radius: 4px; |
||||
border-bottom-right-radius: 4px; |
||||
padding: 12px 25px 22px 50px; |
||||
margin: 24px 25px 0 0; |
||||
-webkit-font-smoothing: antialiased; |
||||
} |
||||
|
||||
p.header { |
||||
font-size: 16px; |
||||
} |
||||
|
||||
h1.header { |
||||
/*font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;*/ |
||||
font-size: 30px; |
||||
font-weight: 300; |
||||
line-height: 1.3em; |
||||
border-bottom: none; |
||||
margin-top: 0; |
||||
} |
||||
|
||||
|
||||
h1.header, a.header, a.name, header a{ |
||||
color: #fff; |
||||
} |
||||
|
||||
a.header { |
||||
text-decoration: underline; |
||||
} |
||||
|
||||
a.name { |
||||
white-space: nowrap; |
||||
} |
||||
|
||||
header ul { |
||||
list-style:none; |
||||
padding:0; |
||||
} |
||||
|
||||
header li { |
||||
list-style-type: none; |
||||
width:132px; |
||||
height:15px; |
||||
margin-bottom: 12px; |
||||
line-height: 1em; |
||||
padding: 6px 6px 6px 7px; |
||||
|
||||
background: #AF0011; |
||||
background: -moz-linear-gradient(top, #AF0011 0%, #820011 100%); |
||||
background: -webkit-gradient(linear, left top, left bottom, color-stop(0%,#f8f8f8), color-stop(100%,#dddddd)); |
||||
background: -webkit-linear-gradient(top, #AF0011 0%,#820011 100%); |
||||
background: -o-linear-gradient(top, #AF0011 0%,#820011 100%); |
||||
background: -ms-linear-gradient(top, #AF0011 0%,#820011 100%); |
||||
background: linear-gradient(top, #AF0011 0%,#820011 100%); |
||||
|
||||
border-radius:4px; |
||||
border:1px solid #0D0D0D; |
||||
|
||||
-webkit-box-shadow: inset 0px 1px 1px 0 rgba(233,2,38, 1); |
||||
box-shadow: inset 0px 1px 1px 0 rgba(233,2,38, 1); |
||||
|
||||
} |
||||
|
||||
header li:hover { |
||||
background: #C3001D; |
||||
background: -moz-linear-gradient(top, #C3001D 0%, #950119 100%); |
||||
background: -webkit-gradient(linear, left top, left bottom, color-stop(0%,#f8f8f8), color-stop(100%,#dddddd)); |
||||
background: -webkit-linear-gradient(top, #C3001D 0%,#950119 100%); |
||||
background: -o-linear-gradient(top, #C3001D 0%,#950119 100%); |
||||
background: -ms-linear-gradient(top, #C3001D 0%,#950119 100%); |
||||
background: linear-gradient(top, #C3001D 0%,#950119 100%); |
||||
} |
||||
|
||||
a.buttons { |
||||
-webkit-font-smoothing: antialiased; |
||||
background: url(../images/arrow-down.png) no-repeat; |
||||
font-weight: normal; |
||||
text-shadow: rgba(0, 0, 0, 0.4) 0 -1px 0; |
||||
padding: 2px 2px 2px 22px; |
||||
height: 30px; |
||||
} |
||||
|
||||
a.github { |
||||
background: url(../images/octocat-small.png) no-repeat 1px; |
||||
} |
||||
|
||||
a.buttons:hover { |
||||
color: #fff; |
||||
text-decoration: none; |
||||
} |
||||
|
||||
|
||||
/* Section - for main page content */ |
||||
|
||||
section { |
||||
width:650px; |
||||
float:right; |
||||
padding-bottom:50px; |
||||
} |
||||
|
||||
|
||||
/* Footer */ |
||||
|
||||
footer { |
||||
width:170px; |
||||
float:left; |
||||
position:fixed; |
||||
bottom:10px; |
||||
padding-left: 50px; |
||||
} |
||||
|
||||
@media print, screen and (max-width: 960px) { |
||||
|
||||
div.wrapper { |
||||
width:auto; |
||||
margin:0; |
||||
} |
||||
|
||||
header, section, footer { |
||||
float:none; |
||||
position:static; |
||||
width:auto; |
||||
} |
||||
|
||||
footer { |
||||
border-top: 1px solid #ccc; |
||||
margin:0 84px 0 50px; |
||||
padding:0; |
||||
} |
||||
|
||||
header { |
||||
padding-right:320px; |
||||
} |
||||
|
||||
section { |
||||
padding:20px 84px 20px 50px; |
||||
margin:0 0 20px; |
||||
} |
||||
|
||||
header a small { |
||||
display:inline; |
||||
} |
||||
|
||||
header ul { |
||||
position:absolute; |
||||
right:130px; |
||||
top:84px; |
||||
} |
||||
} |
||||
|
||||
@media print, screen and (max-width: 720px) { |
||||
body { |
||||
word-wrap:break-word; |
||||
} |
||||
|
||||
header { |
||||
padding:10px 20px 0; |
||||
margin-right: 0; |
||||
} |
||||
|
||||
section { |
||||
padding:10px 0 10px 20px; |
||||
margin:0 0 30px; |
||||
} |
||||
|
||||
footer { |
||||
margin: 0 0 0 30px; |
||||
} |
||||
|
||||
header ul, header p.view { |
||||
position:static; |
||||
} |
||||
} |
||||
|
||||
@media print, screen and (max-width: 480px) { |
||||
|
||||
header ul li.download { |
||||
display:none; |
||||
} |
||||
|
||||
footer { |
||||
margin: 0 0 0 20px; |
||||
} |
||||
|
||||
footer a{ |
||||
display:block; |
||||
} |
||||
|
||||
} |
||||
|
||||
@media print { |
||||
body { |
||||
padding:0.4in; |
||||
font-size:12pt; |
||||
color:#444; |
||||
} |
||||
} |
@ -0,0 +1,100 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
|
||||
#ifndef CAFFE_BLOB_HPP_ |
||||
#define CAFFE_BLOB_HPP_ |
||||
|
||||
#include "caffe/common.hpp" |
||||
#include "caffe/syncedmem.hpp" |
||||
#include "caffe/proto/caffe.pb.h" |
||||
|
||||
namespace caffe { |
||||
|
||||
template <typename Dtype> |
||||
class Blob { |
||||
public: |
||||
Blob() |
||||
: num_(0), channels_(0), height_(0), width_(0), count_(0), data_(), |
||||
diff_() {} |
||||
explicit Blob(const int num, const int channels, const int height, |
||||
const int width); |
||||
void Reshape(const int num, const int channels, const int height, |
||||
const int width); |
||||
void ReshapeLike(const Blob& other); |
||||
inline int num() const { return num_; } |
||||
inline int channels() const { return channels_; } |
||||
inline int height() const { return height_; } |
||||
inline int width() const { return width_; } |
||||
inline int count() const {return count_; } |
||||
inline int offset(const int n, const int c = 0, const int h = 0, |
||||
const int w = 0) const { |
||||
CHECK_GE(n, 0); |
||||
CHECK_LE(n, num_); |
||||
CHECK_GE(channels_, 0); |
||||
CHECK_LE(c, channels_); |
||||
CHECK_GE(height_, 0); |
||||
CHECK_LE(h, height_); |
||||
CHECK_GE(width_, 0); |
||||
CHECK_LE(w, width_); |
||||
return ((n * channels_ + c) * height_ + h) * width_ + w; |
||||
} |
||||
// Copy from source. If copy_diff is false, we copy the data; if copy_diff
|
||||
// is true, we copy the diff.
|
||||
void CopyFrom(const Blob<Dtype>& source, bool copy_diff = false, |
||||
bool reshape = false); |
||||
|
||||
inline Dtype data_at(const int n, const int c, const int h, |
||||
const int w) const { |
||||
return *(cpu_data() + offset(n, c, h, w)); |
||||
} |
||||
|
||||
inline Dtype diff_at(const int n, const int c, const int h, |
||||
const int w) const { |
||||
return *(cpu_diff() + offset(n, c, h, w)); |
||||
} |
||||
|
||||
inline const shared_ptr<SyncedMemory>& data() const { |
||||
CHECK(data_); |
||||
return data_; |
||||
} |
||||
|
||||
inline const shared_ptr<SyncedMemory>& diff() const { |
||||
CHECK(diff_); |
||||
return diff_; |
||||
} |
||||
|
||||
const Dtype* cpu_data() const; |
||||
void set_cpu_data(Dtype* data); |
||||
const Dtype* gpu_data() const; |
||||
const Dtype* cpu_diff() const; |
||||
const Dtype* gpu_diff() const; |
||||
Dtype* mutable_cpu_data(); |
||||
Dtype* mutable_gpu_data(); |
||||
Dtype* mutable_cpu_diff(); |
||||
Dtype* mutable_gpu_diff(); |
||||
void Update(); |
||||
void FromProto(const BlobProto& proto); |
||||
void ToProto(BlobProto* proto, bool write_diff = false) const; |
||||
|
||||
// Set the data_/diff_ shared_ptr to point to the SyncedMemory holding the
|
||||
// data_/diff_ of Blob other -- useful in layers which simply perform a copy
|
||||
// in their forward or backward pass.
|
||||
// This deallocates the SyncedMemory holding this blob's data/diff, as
|
||||
// shared_ptr calls its destructor when reset with the = operator.
|
||||
void ShareData(const Blob& other); |
||||
void ShareDiff(const Blob& other); |
||||
|
||||
protected: |
||||
shared_ptr<SyncedMemory> data_; |
||||
shared_ptr<SyncedMemory> diff_; |
||||
int num_; |
||||
int channels_; |
||||
int height_; |
||||
int width_; |
||||
int count_; |
||||
|
||||
DISABLE_COPY_AND_ASSIGN(Blob); |
||||
}; // class Blob
|
||||
|
||||
} // namespace caffe
|
||||
|
||||
#endif // CAFFE_BLOB_HPP_
|
@ -0,0 +1,19 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
// caffe.hpp is the header file that you need to include in your code. It wraps
|
||||
// all the internal caffe header files into one for simpler inclusion.
|
||||
|
||||
#ifndef CAFFE_CAFFE_HPP_ |
||||
#define CAFFE_CAFFE_HPP_ |
||||
|
||||
#include "caffe/common.hpp" |
||||
#include "caffe/blob.hpp" |
||||
#include "caffe/filler.hpp" |
||||
#include "caffe/layer.hpp" |
||||
#include "caffe/net.hpp" |
||||
#include "caffe/solver.hpp" |
||||
#include "caffe/util/io.hpp" |
||||
#include "caffe/vision_layers.hpp" |
||||
|
||||
#include "caffe/proto/caffe.pb.h" |
||||
|
||||
#endif // CAFFE_CAFFE_HPP_
|
@ -0,0 +1,169 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
|
||||
#ifndef CAFFE_COMMON_HPP_ |
||||
#define CAFFE_COMMON_HPP_ |
||||
|
||||
#include <boost/shared_ptr.hpp> |
||||
#include <cublas_v2.h> |
||||
#include <cuda.h> |
||||
#include <curand.h> |
||||
#include <driver_types.h> // cuda driver types |
||||
#include <glog/logging.h> |
||||
|
||||
// Disable the copy and assignment operator for a class.
|
||||
#define DISABLE_COPY_AND_ASSIGN(classname) \ |
||||
private:\
|
||||
classname(const classname&);\
|
||||
classname& operator=(const classname&) |
||||
|
||||
// Instantiate a class with float and double specifications.
|
||||
#define INSTANTIATE_CLASS(classname) \ |
||||
template class classname<float>; \
|
||||
template class classname<double> |
||||
|
||||
// A simple macro to mark codes that are not implemented, so that when the code
|
||||
// is executed we will see a fatal log.
|
||||
#define NOT_IMPLEMENTED LOG(FATAL) << "Not Implemented Yet" |
||||
|
||||
// CUDA: various checks for different function calls.
|
||||
#define CUDA_CHECK(condition) \ |
||||
/* Code block avoids redefinition of cudaError_t error */ \
|
||||
do { \
|
||||
cudaError_t error = condition; \
|
||||
CHECK_EQ(error, cudaSuccess) << " " << cudaGetErrorString(error); \
|
||||
} while (0) |
||||
|
||||
#define CUBLAS_CHECK(condition) \ |
||||
do { \
|
||||
cublasStatus_t status = condition; \
|
||||
CHECK_EQ(status, CUBLAS_STATUS_SUCCESS) << " " \
|
||||
<< caffe::cublasGetErrorString(status); \
|
||||
} while (0) |
||||
|
||||
#define CURAND_CHECK(condition) \ |
||||
do { \
|
||||
curandStatus_t status = condition; \
|
||||
CHECK_EQ(status, CURAND_STATUS_SUCCESS) << " " \
|
||||
<< caffe::curandGetErrorString(status); \
|
||||
} while (0) |
||||
|
||||
// CUDA: grid stride looping
|
||||
#define CUDA_KERNEL_LOOP(i, n) \ |
||||
for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
|
||||
i < (n); \
|
||||
i += blockDim.x * gridDim.x) |
||||
|
||||
// CUDA: check for error after kernel execution and exit loudly if there is one.
|
||||
#define CUDA_POST_KERNEL_CHECK CUDA_CHECK(cudaPeekAtLastError()) |
||||
|
||||
// Define not supported status for pre-6.0 compatibility.
|
||||
#if CUDA_VERSION < 6000 |
||||
#define CUBLAS_STATUS_NOT_SUPPORTED 831486 |
||||
#endif |
||||
|
||||
namespace caffe { |
||||
|
||||
// We will use the boost shared_ptr instead of the new C++11 one mainly
|
||||
// because cuda does not work (at least now) well with C++11 features.
|
||||
using boost::shared_ptr; |
||||
|
||||
|
||||
// A singleton class to hold common caffe stuff, such as the handler that
|
||||
// caffe is going to use for cublas, curand, etc.
|
||||
class Caffe { |
||||
public: |
||||
~Caffe(); |
||||
inline static Caffe& Get() { |
||||
if (!singleton_.get()) { |
||||
singleton_.reset(new Caffe()); |
||||
} |
||||
return *singleton_; |
||||
} |
||||
enum Brew { CPU, GPU }; |
||||
enum Phase { TRAIN, TEST }; |
||||
|
||||
|
||||
// This random number generator facade hides boost and CUDA rng
|
||||
// implementation from one another (for cross-platform compatibility).
|
||||
class RNG { |
||||
public: |
||||
RNG(); |
||||
explicit RNG(unsigned int seed); |
||||
explicit RNG(const RNG&); |
||||
RNG& operator=(const RNG&); |
||||
void* generator(); |
||||
private: |
||||
class Generator; |
||||
shared_ptr<Generator> generator_; |
||||
}; |
||||
|
||||
// Getters for boost rng, curand, and cublas handles
|
||||
inline static RNG& rng_stream() { |
||||
if (!Get().random_generator_) { |
||||
Get().random_generator_.reset(new RNG()); |
||||
} |
||||
return *(Get().random_generator_); |
||||
} |
||||
inline static cublasHandle_t cublas_handle() { return Get().cublas_handle_; } |
||||
inline static curandGenerator_t curand_generator() { |
||||
return Get().curand_generator_; |
||||
} |
||||
|
||||
// Returns the mode: running on CPU or GPU.
|
||||
inline static Brew mode() { return Get().mode_; } |
||||
// Returns the phase: TRAIN or TEST.
|
||||
inline static Phase phase() { return Get().phase_; } |
||||
// The setters for the variables
|
||||
// Sets the mode. It is recommended that you don't change the mode halfway
|
||||
// into the program since that may cause allocation of pinned memory being
|
||||
// freed in a non-pinned way, which may cause problems - I haven't verified
|
||||
// it personally but better to note it here in the header file.
|
||||
inline static void set_mode(Brew mode) { Get().mode_ = mode; } |
||||
// Sets the phase.
|
||||
inline static void set_phase(Phase phase) { Get().phase_ = phase; } |
||||
// Sets the random seed of both boost and curand
|
||||
static void set_random_seed(const unsigned int seed); |
||||
// Sets the device. Since we have cublas and curand stuff, set device also
|
||||
// requires us to reset those values.
|
||||
static void SetDevice(const int device_id); |
||||
// Prints the current GPU status.
|
||||
static void DeviceQuery(); |
||||
|
||||
protected: |
||||
cublasHandle_t cublas_handle_; |
||||
curandGenerator_t curand_generator_; |
||||
shared_ptr<RNG> random_generator_; |
||||
|
||||
Brew mode_; |
||||
Phase phase_; |
||||
static shared_ptr<Caffe> singleton_; |
||||
|
||||
private: |
||||
// The private constructor to avoid duplicate instantiation.
|
||||
Caffe(); |
||||
|
||||
DISABLE_COPY_AND_ASSIGN(Caffe); |
||||
}; |
||||
|
||||
// NVIDIA_CUDA-5.5_Samples/common/inc/helper_cuda.h
|
||||
const char* cublasGetErrorString(cublasStatus_t error); |
||||
const char* curandGetErrorString(curandStatus_t error); |
||||
|
||||
// CUDA: thread number configuration.
|
||||
// Use 1024 threads per block, which requires cuda sm_2x or above,
|
||||
// or fall back to attempt compatibility (best of luck to you).
|
||||
#if __CUDA_ARCH__ >= 200 |
||||
const int CAFFE_CUDA_NUM_THREADS = 1024; |
||||
#else |
||||
const int CAFFE_CUDA_NUM_THREADS = 512; |
||||
#endif |
||||
|
||||
// CUDA: number of blocks for threads.
|
||||
inline int CAFFE_GET_BLOCKS(const int N) { |
||||
return (N + CAFFE_CUDA_NUM_THREADS - 1) / CAFFE_CUDA_NUM_THREADS; |
||||
} |
||||
|
||||
|
||||
} // namespace caffe
|
||||
|
||||
#endif // CAFFE_COMMON_HPP_
|
@ -0,0 +1,337 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
|
||||
#ifndef CAFFE_DATA_LAYERS_HPP_ |
||||
#define CAFFE_DATA_LAYERS_HPP_ |
||||
|
||||
#include <string> |
||||
#include <utility> |
||||
#include <vector> |
||||
#include <opencv2/opencv.hpp> |
||||
|
||||
#include "leveldb/db.h" |
||||
#include "lmdb.h" |
||||
#include "pthread.h" |
||||
#include "hdf5.h" |
||||
#include "boost/scoped_ptr.hpp" |
||||
|
||||
#include "caffe/blob.hpp" |
||||
#include "caffe/common.hpp" |
||||
#include "caffe/filler.hpp" |
||||
#include "caffe/layer.hpp" |
||||
#include "caffe/proto/caffe.pb.h" |
||||
|
||||
namespace caffe { |
||||
|
||||
#define HDF5_DATA_DATASET_NAME "data" |
||||
#define HDF5_DATA_LABEL_NAME "label" |
||||
|
||||
template <typename Dtype> |
||||
class HDF5OutputLayer : public Layer<Dtype> { |
||||
public: |
||||
explicit HDF5OutputLayer(const LayerParameter& param); |
||||
virtual ~HDF5OutputLayer(); |
||||
virtual void SetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top) {} |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_HDF5_OUTPUT; |
||||
} |
||||
// TODO: no limit on the number of blobs
|
||||
virtual inline int ExactNumBottomBlobs() const { return 2; } |
||||
virtual inline int ExactNumTopBlobs() const { return 0; } |
||||
|
||||
inline std::string file_name() const { return file_name_; } |
||||
|
||||
protected: |
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
virtual void SaveBlobs(); |
||||
|
||||
std::string file_name_; |
||||
hid_t file_id_; |
||||
Blob<Dtype> data_blob_; |
||||
Blob<Dtype> label_blob_; |
||||
}; |
||||
|
||||
|
||||
template <typename Dtype> |
||||
class HDF5DataLayer : public Layer<Dtype> { |
||||
public: |
||||
explicit HDF5DataLayer(const LayerParameter& param) |
||||
: Layer<Dtype>(param) {} |
||||
virtual ~HDF5DataLayer(); |
||||
virtual void SetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_HDF5_DATA; |
||||
} |
||||
virtual inline int ExactNumBottomBlobs() const { return 0; } |
||||
virtual inline int ExactNumTopBlobs() const { return 2; } |
||||
|
||||
protected: |
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
virtual void LoadHDF5FileData(const char* filename); |
||||
|
||||
std::vector<std::string> hdf_filenames_; |
||||
unsigned int num_files_; |
||||
unsigned int current_file_; |
||||
hsize_t current_row_; |
||||
Blob<Dtype> data_blob_; |
||||
Blob<Dtype> label_blob_; |
||||
}; |
||||
|
||||
// TODO: DataLayer, ImageDataLayer, and WindowDataLayer all have the
|
||||
// same basic structure and a lot of duplicated code.
|
||||
|
||||
// This function is used to create a pthread that prefetches the data.
|
||||
template <typename Dtype> |
||||
void* DataLayerPrefetch(void* layer_pointer); |
||||
|
||||
template <typename Dtype> |
||||
class DataLayer : public Layer<Dtype> { |
||||
// The function used to perform prefetching.
|
||||
friend void* DataLayerPrefetch<Dtype>(void* layer_pointer); |
||||
|
||||
public: |
||||
explicit DataLayer(const LayerParameter& param) |
||||
: Layer<Dtype>(param) {} |
||||
virtual ~DataLayer(); |
||||
virtual void SetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_DATA; |
||||
} |
||||
virtual inline int ExactNumBottomBlobs() const { return 0; } |
||||
virtual inline int MinTopBlobs() const { return 1; } |
||||
virtual inline int MaxTopBlobs() const { return 2; } |
||||
|
||||
protected: |
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom) { return; } |
||||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom) { return; } |
||||
|
||||
virtual void CreatePrefetchThread(); |
||||
virtual void JoinPrefetchThread(); |
||||
virtual unsigned int PrefetchRand(); |
||||
|
||||
shared_ptr<Caffe::RNG> prefetch_rng_; |
||||
|
||||
// LEVELDB
|
||||
shared_ptr<leveldb::DB> db_; |
||||
shared_ptr<leveldb::Iterator> iter_; |
||||
// LMDB
|
||||
MDB_env* mdb_env_; |
||||
MDB_dbi mdb_dbi_; |
||||
MDB_txn* mdb_txn_; |
||||
MDB_cursor* mdb_cursor_; |
||||
MDB_val mdb_key_, mdb_value_; |
||||
|
||||
int datum_channels_; |
||||
int datum_height_; |
||||
int datum_width_; |
||||
int datum_size_; |
||||
pthread_t thread_; |
||||
shared_ptr<Blob<Dtype> > prefetch_data_; |
||||
shared_ptr<Blob<Dtype> > prefetch_label_; |
||||
Blob<Dtype> data_mean_; |
||||
bool output_labels_; |
||||
Caffe::Phase phase_; |
||||
}; |
||||
|
||||
template <typename Dtype> |
||||
class DummyDataLayer : public Layer<Dtype> { |
||||
public: |
||||
explicit DummyDataLayer(const LayerParameter& param) |
||||
: Layer<Dtype>(param) {} |
||||
virtual void SetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_DUMMY_DATA; |
||||
} |
||||
virtual inline int ExactNumBottomBlobs() const { return 0; } |
||||
virtual inline int MinTopBlobs() const { return 1; } |
||||
|
||||
protected: |
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom) { return; } |
||||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom) { return; } |
||||
|
||||
vector<shared_ptr<Filler<Dtype> > > fillers_; |
||||
vector<bool> refill_; |
||||
}; |
||||
|
||||
// This function is used to create a pthread that prefetches the data.
|
||||
template <typename Dtype> |
||||
void* ImageDataLayerPrefetch(void* layer_pointer); |
||||
|
||||
template <typename Dtype> |
||||
class ImageDataLayer : public Layer<Dtype> { |
||||
// The function used to perform prefetching.
|
||||
friend void* ImageDataLayerPrefetch<Dtype>(void* layer_pointer); |
||||
|
||||
public: |
||||
explicit ImageDataLayer(const LayerParameter& param) |
||||
: Layer<Dtype>(param) {} |
||||
virtual ~ImageDataLayer(); |
||||
virtual void SetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
void SetUpWithDatum(const int crop_size, const Datum datum, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void AddImagesAndLabels(const vector<cv::Mat>& images, |
||||
const vector<int>& labels); |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_IMAGE_DATA; |
||||
} |
||||
virtual inline int ExactNumBottomBlobs() const { return 0; } |
||||
virtual inline int ExactNumTopBlobs() const { return 2; } |
||||
|
||||
protected: |
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom) { return; } |
||||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom) { return; } |
||||
|
||||
virtual void ShuffleImages(); |
||||
|
||||
virtual void CreatePrefetchThread(); |
||||
virtual void JoinPrefetchThread(); |
||||
virtual unsigned int PrefetchRand(); |
||||
|
||||
shared_ptr<Caffe::RNG> prefetch_rng_; |
||||
vector<std::pair<std::string, int> > lines_; |
||||
int lines_id_; |
||||
int datum_channels_; |
||||
int datum_height_; |
||||
int datum_width_; |
||||
int datum_size_; |
||||
pthread_t thread_; |
||||
shared_ptr<Blob<Dtype> > prefetch_data_; |
||||
shared_ptr<Blob<Dtype> > prefetch_label_; |
||||
Blob<Dtype> data_mean_; |
||||
Caffe::Phase phase_; |
||||
bool is_datum_set_up_; |
||||
vector<Blob<Dtype>*>* top_; |
||||
}; |
||||
|
||||
/* MemoryDataLayer
|
||||
*/ |
||||
template <typename Dtype> |
||||
class MemoryDataLayer : public Layer<Dtype> { |
||||
public: |
||||
explicit MemoryDataLayer(const LayerParameter& param) |
||||
: Layer<Dtype>(param) {} |
||||
virtual void SetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_MEMORY_DATA; |
||||
} |
||||
virtual inline int ExactNumBottomBlobs() { return 0; } |
||||
virtual inline int ExactNumTopBlobs() { return 2; } |
||||
|
||||
// Reset should accept const pointers, but can't, because the memory
|
||||
// will be given to Blob, which is mutable
|
||||
void Reset(Dtype* data, Dtype* label, int n); |
||||
int datum_channels() { return datum_channels_; } |
||||
int datum_height() { return datum_height_; } |
||||
int datum_width() { return datum_width_; } |
||||
int batch_size() { return batch_size_; } |
||||
|
||||
protected: |
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom) { return; } |
||||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom) { return; } |
||||
|
||||
Dtype* data_; |
||||
Dtype* labels_; |
||||
int datum_channels_; |
||||
int datum_height_; |
||||
int datum_width_; |
||||
int datum_size_; |
||||
int batch_size_; |
||||
int n_; |
||||
int pos_; |
||||
}; |
||||
|
||||
// This function is used to create a pthread that prefetches the window data.
|
||||
template <typename Dtype> |
||||
void* WindowDataLayerPrefetch(void* layer_pointer); |
||||
|
||||
template <typename Dtype> |
||||
class WindowDataLayer : public Layer<Dtype> { |
||||
// The function used to perform prefetching.
|
||||
friend void* WindowDataLayerPrefetch<Dtype>(void* layer_pointer); |
||||
|
||||
public: |
||||
explicit WindowDataLayer(const LayerParameter& param) |
||||
: Layer<Dtype>(param) {} |
||||
virtual ~WindowDataLayer(); |
||||
virtual void SetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_WINDOW_DATA; |
||||
} |
||||
virtual inline int ExactNumBottomBlobs() const { return 0; } |
||||
virtual inline int ExactNumTopBlobs() const { return 2; } |
||||
|
||||
protected: |
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom) { return; } |
||||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom) { return; } |
||||
|
||||
virtual void CreatePrefetchThread(); |
||||
virtual void JoinPrefetchThread(); |
||||
virtual unsigned int PrefetchRand(); |
||||
|
||||
shared_ptr<Caffe::RNG> prefetch_rng_; |
||||
pthread_t thread_; |
||||
shared_ptr<Blob<Dtype> > prefetch_data_; |
||||
shared_ptr<Blob<Dtype> > prefetch_label_; |
||||
Blob<Dtype> data_mean_; |
||||
vector<std::pair<std::string, vector<int> > > image_database_; |
||||
enum WindowField { IMAGE_INDEX, LABEL, OVERLAP, X1, Y1, X2, Y2, NUM }; |
||||
vector<vector<float> > fg_windows_; |
||||
vector<vector<float> > bg_windows_; |
||||
}; |
||||
|
||||
} // namespace caffe
|
||||
|
||||
#endif // CAFFE_DATA_LAYERS_HPP_
|
@ -0,0 +1,173 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
|
||||
// Fillers are random number generators that fills a blob using the specified
|
||||
// algorithm. The expectation is that they are only going to be used during
|
||||
// initialization time and will not involve any GPUs.
|
||||
|
||||
#ifndef CAFFE_FILLER_HPP |
||||
#define CAFFE_FILLER_HPP |
||||
|
||||
#include <string> |
||||
|
||||
#include "caffe/common.hpp" |
||||
#include "caffe/blob.hpp" |
||||
#include "caffe/syncedmem.hpp" |
||||
#include "caffe/util/math_functions.hpp" |
||||
#include "caffe/proto/caffe.pb.h" |
||||
|
||||
namespace caffe { |
||||
|
||||
template <typename Dtype> |
||||
class Filler { |
||||
public: |
||||
explicit Filler(const FillerParameter& param) : filler_param_(param) {} |
||||
virtual ~Filler() {} |
||||
virtual void Fill(Blob<Dtype>* blob) = 0; |
||||
protected: |
||||
FillerParameter filler_param_; |
||||
}; // class Filler
|
||||
|
||||
|
||||
template <typename Dtype> |
||||
class ConstantFiller : public Filler<Dtype> { |
||||
public: |
||||
explicit ConstantFiller(const FillerParameter& param) |
||||
: Filler<Dtype>(param) {} |
||||
virtual void Fill(Blob<Dtype>* blob) { |
||||
Dtype* data = blob->mutable_cpu_data(); |
||||
const int count = blob->count(); |
||||
const Dtype value = this->filler_param_.value(); |
||||
CHECK(count); |
||||
for (int i = 0; i < count; ++i) { |
||||
data[i] = value; |
||||
} |
||||
CHECK_EQ(this->filler_param_.sparse(), -1) |
||||
<< "Sparsity not supported by this Filler."; |
||||
} |
||||
}; |
||||
|
||||
template <typename Dtype> |
||||
class UniformFiller : public Filler<Dtype> { |
||||
public: |
||||
explicit UniformFiller(const FillerParameter& param) |
||||
: Filler<Dtype>(param) {} |
||||
virtual void Fill(Blob<Dtype>* blob) { |
||||
CHECK(blob->count()); |
||||
caffe_rng_uniform<Dtype>(blob->count(), Dtype(this->filler_param_.min()), |
||||
Dtype(this->filler_param_.max()), blob->mutable_cpu_data()); |
||||
CHECK_EQ(this->filler_param_.sparse(), -1) |
||||
<< "Sparsity not supported by this Filler."; |
||||
} |
||||
}; |
||||
|
||||
template <typename Dtype> |
||||
class GaussianFiller : public Filler<Dtype> { |
||||
public: |
||||
explicit GaussianFiller(const FillerParameter& param) |
||||
: Filler<Dtype>(param) {} |
||||
virtual void Fill(Blob<Dtype>* blob) { |
||||
Dtype* data = blob->mutable_cpu_data(); |
||||
CHECK(blob->count()); |
||||
caffe_rng_gaussian<Dtype>(blob->count(), Dtype(this->filler_param_.mean()), |
||||
Dtype(this->filler_param_.std()), blob->mutable_cpu_data()); |
||||
int sparse = this->filler_param_.sparse(); |
||||
CHECK_GE(sparse, -1); |
||||
if (sparse >= 0) { |
||||
// Sparse initialization is implemented for "weight" blobs; i.e. matrices.
|
||||
// These have num == channels == 1; height is number of inputs; width is
|
||||
// number of outputs. The 'sparse' variable specifies the mean number
|
||||
// of non-zero input weights for a given output.
|
||||
CHECK_EQ(blob->num(), 1); |
||||
CHECK_EQ(blob->channels(), 1); |
||||
int num_inputs = blob->height(); |
||||
Dtype non_zero_probability = Dtype(sparse) / Dtype(num_inputs); |
||||
rand_vec_.reset(new SyncedMemory(blob->count() * sizeof(int))); |
||||
int* mask = reinterpret_cast<int*>(rand_vec_->mutable_cpu_data()); |
||||
caffe_rng_bernoulli(blob->count(), non_zero_probability, mask); |
||||
for (int i = 0; i < blob->count(); ++i) { |
||||
data[i] *= mask[i]; |
||||
} |
||||
} |
||||
} |
||||
|
||||
protected: |
||||
shared_ptr<SyncedMemory> rand_vec_; |
||||
}; |
||||
|
||||
template <typename Dtype> |
||||
class PositiveUnitballFiller : public Filler<Dtype> { |
||||
public: |
||||
explicit PositiveUnitballFiller(const FillerParameter& param) |
||||
: Filler<Dtype>(param) {} |
||||
virtual void Fill(Blob<Dtype>* blob) { |
||||
Dtype* data = blob->mutable_cpu_data(); |
||||
DCHECK(blob->count()); |
||||
caffe_rng_uniform<Dtype>(blob->count(), 0, 1, blob->mutable_cpu_data()); |
||||
// We expect the filler to not be called very frequently, so we will
|
||||
// just use a simple implementation
|
||||
int dim = blob->count() / blob->num(); |
||||
CHECK(dim); |
||||
for (int i = 0; i < blob->num(); ++i) { |
||||
Dtype sum = 0; |
||||
for (int j = 0; j < dim; ++j) { |
||||
sum += data[i * dim + j]; |
||||
} |
||||
for (int j = 0; j < dim; ++j) { |
||||
data[i * dim + j] /= sum; |
||||
} |
||||
} |
||||
CHECK_EQ(this->filler_param_.sparse(), -1) |
||||
<< "Sparsity not supported by this Filler."; |
||||
} |
||||
}; |
||||
|
||||
// A filler based on the paper [Bengio and Glorot 2010]: Understanding
|
||||
// the difficulty of training deep feedforward neuralnetworks, but does not
|
||||
// use the fan_out value.
|
||||
//
|
||||
// It fills the incoming matrix by randomly sampling uniform data from
|
||||
// [-scale, scale] where scale = sqrt(3 / fan_in) where fan_in is the number
|
||||
// of input nodes. You should make sure the input blob has shape (num, a, b, c)
|
||||
// where a * b * c = fan_in.
|
||||
template <typename Dtype> |
||||
class XavierFiller : public Filler<Dtype> { |
||||
public: |
||||
explicit XavierFiller(const FillerParameter& param) |
||||
: Filler<Dtype>(param) {} |
||||
virtual void Fill(Blob<Dtype>* blob) { |
||||
CHECK(blob->count()); |
||||
int fan_in = blob->count() / blob->num(); |
||||
Dtype scale = sqrt(Dtype(3) / fan_in); |
||||
caffe_rng_uniform<Dtype>(blob->count(), -scale, scale, |
||||
blob->mutable_cpu_data()); |
||||
CHECK_EQ(this->filler_param_.sparse(), -1) |
||||
<< "Sparsity not supported by this Filler."; |
||||
} |
||||
}; |
||||
|
||||
|
||||
// A function to get a specific filler from the specification given in
|
||||
// FillerParameter. Ideally this would be replaced by a factory pattern,
|
||||
// but we will leave it this way for now.
|
||||
template <typename Dtype> |
||||
Filler<Dtype>* GetFiller(const FillerParameter& param) { |
||||
const std::string& type = param.type(); |
||||
if (type == "constant") { |
||||
return new ConstantFiller<Dtype>(param); |
||||
} else if (type == "gaussian") { |
||||
return new GaussianFiller<Dtype>(param); |
||||
} else if (type == "positive_unitball") { |
||||
return new PositiveUnitballFiller<Dtype>(param); |
||||
} else if (type == "uniform") { |
||||
return new UniformFiller<Dtype>(param); |
||||
} else if (type == "xavier") { |
||||
return new XavierFiller<Dtype>(param); |
||||
} else { |
||||
CHECK(false) << "Unknown filler name: " << param.type(); |
||||
} |
||||
return (Filler<Dtype>*)(NULL); |
||||
} |
||||
|
||||
} // namespace caffe
|
||||
|
||||
#endif // CAFFE_FILLER_HPP_
|
@ -0,0 +1,206 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
|
||||
#ifndef CAFFE_LAYER_H_ |
||||
#define CAFFE_LAYER_H_ |
||||
|
||||
#include <string> |
||||
#include <vector> |
||||
|
||||
#include "caffe/blob.hpp" |
||||
#include "caffe/common.hpp" |
||||
#include "caffe/proto/caffe.pb.h" |
||||
|
||||
using std::string; |
||||
using std::vector; |
||||
|
||||
namespace caffe { |
||||
|
||||
template <typename Dtype> |
||||
class Layer { |
||||
public: |
||||
// You should not implement your own constructor. Any set up code should go
|
||||
// to SetUp(), where the dimensions of the bottom blobs are provided to the
|
||||
// layer.
|
||||
explicit Layer(const LayerParameter& param) |
||||
: layer_param_(param) { |
||||
// The only thing we do is to copy blobs if there are any.
|
||||
if (layer_param_.blobs_size() > 0) { |
||||
blobs_.resize(layer_param_.blobs_size()); |
||||
for (int i = 0; i < layer_param_.blobs_size(); ++i) { |
||||
blobs_[i].reset(new Blob<Dtype>()); |
||||
blobs_[i]->FromProto(layer_param_.blobs(i)); |
||||
} |
||||
} |
||||
} |
||||
virtual ~Layer() {} |
||||
// SetUp: your function should implement this, and call Layer::SetUp for
|
||||
// common SetUp functionality.
|
||||
virtual void SetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top) { |
||||
CheckBlobCounts(bottom, *top); |
||||
} |
||||
|
||||
// Forward and backward wrappers. You should implement the cpu and
|
||||
// gpu specific implementations instead, and should not change these
|
||||
// functions.
|
||||
inline Dtype Forward(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
inline void Backward(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, |
||||
vector<Blob<Dtype>*>* bottom); |
||||
|
||||
// Returns the vector of blobs.
|
||||
vector<shared_ptr<Blob<Dtype> > >& blobs() { |
||||
return blobs_; |
||||
} |
||||
|
||||
// Returns the layer parameter
|
||||
const LayerParameter& layer_param() { return layer_param_; } |
||||
// Writes the layer parameter to a protocol buffer
|
||||
virtual void ToProto(LayerParameter* param, bool write_diff = false); |
||||
|
||||
// Returns the layer type as an enum value.
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_NONE; |
||||
} |
||||
|
||||
// Returns the layer type name.
|
||||
virtual inline const string& type_name() const { |
||||
return LayerParameter_LayerType_Name(type()); |
||||
} |
||||
|
||||
// These methods can be overwritten to declare that this layer type expects
|
||||
// a certain number of blobs as input and output.
|
||||
//
|
||||
// ExactNum{Bottom,Top}Blobs return a non-negative number to require an exact
|
||||
// number of bottom/top blobs; the Min/Max versions return a non-negative
|
||||
// number to require a minimum and/or maximum number of blobs.
|
||||
// If Exact is specified, neither Min nor Max should be specified, and vice
|
||||
// versa. These methods may not rely on SetUp having been called.
|
||||
virtual inline int ExactNumBottomBlobs() const { return -1; } |
||||
virtual inline int MinBottomBlobs() const { return -1; } |
||||
virtual inline int MaxBottomBlobs() const { return -1; } |
||||
virtual inline int ExactNumTopBlobs() const { return -1; } |
||||
virtual inline int MinTopBlobs() const { return -1; } |
||||
virtual inline int MaxTopBlobs() const { return -1; } |
||||
|
||||
protected: |
||||
// The protobuf that stores the layer parameters
|
||||
LayerParameter layer_param_; |
||||
// The vector that stores the parameters as a set of blobs.
|
||||
vector<shared_ptr<Blob<Dtype> > > blobs_; |
||||
|
||||
// Forward functions: compute the layer output
|
||||
// (and loss layers return the loss; other layers return the dummy value 0.)
|
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top) = 0; |
||||
// If no gpu code is provided, we will simply use cpu code.
|
||||
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top) { |
||||
// LOG(WARNING) << "Using CPU code as backup.";
|
||||
return Forward_cpu(bottom, top); |
||||
} |
||||
|
||||
// Backward functions: compute the gradients for any parameters and
|
||||
// for the bottom blobs if propagate_down is true.
|
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, |
||||
vector<Blob<Dtype>*>* bottom) = 0; |
||||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, |
||||
vector<Blob<Dtype>*>* bottom) { |
||||
// LOG(WARNING) << "Using CPU code as backup.";
|
||||
Backward_cpu(top, propagate_down, bottom); |
||||
} |
||||
|
||||
// CheckBlobCounts: called by the parent Layer's SetUp to check that the
|
||||
// number of bottom and top Blobs provided as input match the expected
|
||||
// numbers specified by the {ExactNum,Min,Max}{Bottom,Top}Blobs() functions.
|
||||
virtual void CheckBlobCounts(const vector<Blob<Dtype>*>& bottom, |
||||
const vector<Blob<Dtype>*>& top) { |
||||
if (ExactNumBottomBlobs() >= 0) { |
||||
CHECK_EQ(ExactNumBottomBlobs(), bottom.size()) |
||||
<< type_name() << " Layer takes " << ExactNumBottomBlobs() |
||||
<< " bottom blob(s) as input."; |
||||
} |
||||
if (MinBottomBlobs() >= 0) { |
||||
CHECK_LE(MinBottomBlobs(), bottom.size()) |
||||
<< type_name() << " Layer takes at least " << MinBottomBlobs() |
||||
<< " bottom blob(s) as input."; |
||||
} |
||||
if (MaxBottomBlobs() >= 0) { |
||||
CHECK_GE(MaxBottomBlobs(), bottom.size()) |
||||
<< type_name() << " Layer takes at most " << MaxBottomBlobs() |
||||
<< " bottom blob(s) as input."; |
||||
} |
||||
if (ExactNumTopBlobs() >= 0) { |
||||
CHECK_EQ(ExactNumTopBlobs(), top.size()) |
||||
<< type_name() << " Layer produces " << ExactNumTopBlobs() |
||||
<< " top blob(s) as output."; |
||||
} |
||||
if (MinTopBlobs() >= 0) { |
||||
CHECK_LE(MinTopBlobs(), top.size()) |
||||
<< type_name() << " Layer produces at least " << MinTopBlobs() |
||||
<< " top blob(s) as output."; |
||||
} |
||||
if (MaxTopBlobs() >= 0) { |
||||
CHECK_GE(MaxTopBlobs(), top.size()) |
||||
<< type_name() << " Layer produces at most " << MaxTopBlobs() |
||||
<< " top blob(s) as output."; |
||||
} |
||||
} |
||||
|
||||
DISABLE_COPY_AND_ASSIGN(Layer); |
||||
}; // class Layer
|
||||
|
||||
// Forward and backward wrappers. You should implement the cpu and
|
||||
// gpu specific implementations instead, and should not change these
|
||||
// functions.
|
||||
template <typename Dtype> |
||||
inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top) { |
||||
switch (Caffe::mode()) { |
||||
case Caffe::CPU: |
||||
return Forward_cpu(bottom, top); |
||||
case Caffe::GPU: |
||||
return Forward_gpu(bottom, top); |
||||
default: |
||||
LOG(FATAL) << "Unknown caffe mode."; |
||||
return Dtype(0); |
||||
} |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
inline void Layer<Dtype>::Backward(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, |
||||
vector<Blob<Dtype>*>* bottom) { |
||||
switch (Caffe::mode()) { |
||||
case Caffe::CPU: |
||||
Backward_cpu(top, propagate_down, bottom); |
||||
break; |
||||
case Caffe::GPU: |
||||
Backward_gpu(top, propagate_down, bottom); |
||||
break; |
||||
default: |
||||
LOG(FATAL) << "Unknown caffe mode."; |
||||
} |
||||
} |
||||
|
||||
// Serialize LayerParameter to protocol buffer
|
||||
template <typename Dtype> |
||||
void Layer<Dtype>::ToProto(LayerParameter* param, bool write_diff) { |
||||
param->Clear(); |
||||
param->CopyFrom(layer_param_); |
||||
param->clear_blobs(); |
||||
for (int i = 0; i < blobs_.size(); ++i) { |
||||
blobs_[i]->ToProto(param->add_blobs(), write_diff); |
||||
} |
||||
} |
||||
|
||||
// The layer factory function
|
||||
template <typename Dtype> |
||||
Layer<Dtype>* GetLayer(const LayerParameter& param); |
||||
|
||||
} // namespace caffe
|
||||
|
||||
#endif // CAFFE_LAYER_H_
|
@ -0,0 +1,198 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
|
||||
#ifndef CAFFE_LOSS_LAYERS_HPP_ |
||||
#define CAFFE_LOSS_LAYERS_HPP_ |
||||
|
||||
#include <string> |
||||
#include <utility> |
||||
#include <vector> |
||||
|
||||
#include "leveldb/db.h" |
||||
#include "pthread.h" |
||||
#include "boost/scoped_ptr.hpp" |
||||
#include "hdf5.h" |
||||
|
||||
#include "caffe/blob.hpp" |
||||
#include "caffe/common.hpp" |
||||
#include "caffe/layer.hpp" |
||||
#include "caffe/neuron_layers.hpp" |
||||
#include "caffe/proto/caffe.pb.h" |
||||
|
||||
namespace caffe { |
||||
|
||||
const float kLOG_THRESHOLD = 1e-20; |
||||
|
||||
/* LossLayer
|
||||
Takes two inputs of same num (a and b), and has no output. |
||||
The gradient is propagated to a. |
||||
*/ |
||||
template <typename Dtype> |
||||
class LossLayer : public Layer<Dtype> { |
||||
public: |
||||
explicit LossLayer(const LayerParameter& param) |
||||
: Layer<Dtype>(param) {} |
||||
virtual void SetUp( |
||||
const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top); |
||||
virtual void FurtherSetUp( |
||||
const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) {} |
||||
|
||||
virtual inline int ExactNumBottomBlobs() const { return 2; } |
||||
virtual inline int ExactNumTopBlobs() const { return 0; } |
||||
}; |
||||
|
||||
/* SigmoidCrossEntropyLossLayer
|
||||
*/ |
||||
template <typename Dtype> |
||||
class SigmoidCrossEntropyLossLayer : public LossLayer<Dtype> { |
||||
public: |
||||
explicit SigmoidCrossEntropyLossLayer(const LayerParameter& param) |
||||
: LossLayer<Dtype>(param), |
||||
sigmoid_layer_(new SigmoidLayer<Dtype>(param)), |
||||
sigmoid_output_(new Blob<Dtype>()) {} |
||||
virtual void FurtherSetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_SIGMOID_CROSS_ENTROPY_LOSS; |
||||
} |
||||
|
||||
protected: |
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
|
||||
shared_ptr<SigmoidLayer<Dtype> > sigmoid_layer_; |
||||
// sigmoid_output stores the output of the sigmoid layer.
|
||||
shared_ptr<Blob<Dtype> > sigmoid_output_; |
||||
// Vector holders to call the underlying sigmoid layer forward and backward.
|
||||
vector<Blob<Dtype>*> sigmoid_bottom_vec_; |
||||
vector<Blob<Dtype>*> sigmoid_top_vec_; |
||||
}; |
||||
|
||||
/* EuclideanLossLayer
|
||||
Compute the L_2 distance between the two inputs. |
||||
|
||||
loss = (1/2 \sum_i (a_i - b_i)^2) |
||||
a' = 1/I (a - b) |
||||
*/ |
||||
template <typename Dtype> |
||||
class EuclideanLossLayer : public LossLayer<Dtype> { |
||||
public: |
||||
explicit EuclideanLossLayer(const LayerParameter& param) |
||||
: LossLayer<Dtype>(param), diff_() {} |
||||
virtual void FurtherSetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_EUCLIDEAN_LOSS; |
||||
} |
||||
|
||||
protected: |
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
|
||||
Blob<Dtype> diff_; |
||||
}; |
||||
|
||||
/* InfogainLossLayer
|
||||
*/ |
||||
template <typename Dtype> |
||||
class InfogainLossLayer : public LossLayer<Dtype> { |
||||
public: |
||||
explicit InfogainLossLayer(const LayerParameter& param) |
||||
: LossLayer<Dtype>(param), infogain_() {} |
||||
virtual void FurtherSetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_INFOGAIN_LOSS; |
||||
} |
||||
|
||||
protected: |
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
|
||||
Blob<Dtype> infogain_; |
||||
}; |
||||
|
||||
/* HingeLossLayer
|
||||
*/ |
||||
template <typename Dtype> |
||||
class HingeLossLayer : public LossLayer<Dtype> { |
||||
public: |
||||
explicit HingeLossLayer(const LayerParameter& param) |
||||
: LossLayer<Dtype>(param) {} |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_HINGE_LOSS; |
||||
} |
||||
|
||||
protected: |
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
}; |
||||
|
||||
/* MultinomialLogisticLossLayer
|
||||
*/ |
||||
template <typename Dtype> |
||||
class MultinomialLogisticLossLayer : public LossLayer<Dtype> { |
||||
public: |
||||
explicit MultinomialLogisticLossLayer(const LayerParameter& param) |
||||
: LossLayer<Dtype>(param) {} |
||||
virtual void FurtherSetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_MULTINOMIAL_LOGISTIC_LOSS; |
||||
} |
||||
|
||||
protected: |
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
}; |
||||
|
||||
/* AccuracyLayer
|
||||
Note: not an actual loss layer! Does not implement backwards step. |
||||
Computes the accuracy and logprob of a with respect to b. |
||||
*/ |
||||
template <typename Dtype> |
||||
class AccuracyLayer : public Layer<Dtype> { |
||||
public: |
||||
explicit AccuracyLayer(const LayerParameter& param) |
||||
: Layer<Dtype>(param) {} |
||||
virtual void SetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_ACCURACY; |
||||
} |
||||
|
||||
protected: |
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom) { |
||||
NOT_IMPLEMENTED; |
||||
} |
||||
}; |
||||
|
||||
/* Also see
|
||||
- SoftmaxWithLossLayer in vision_layers.hpp |
||||
*/ |
||||
|
||||
} // namespace caffe
|
||||
|
||||
#endif // CAFFE_LOSS_LAYERS_HPP_
|
@ -0,0 +1,157 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
|
||||
#ifndef CAFFE_NET_HPP_ |
||||
#define CAFFE_NET_HPP_ |
||||
|
||||
#include <map> |
||||
#include <set> |
||||
#include <string> |
||||
#include <vector> |
||||
|
||||
#include "caffe/blob.hpp" |
||||
#include "caffe/common.hpp" |
||||
#include "caffe/layer.hpp" |
||||
#include "caffe/proto/caffe.pb.h" |
||||
|
||||
using std::map; |
||||
using std::vector; |
||||
using std::set; |
||||
using std::string; |
||||
|
||||
namespace caffe { |
||||
|
||||
|
||||
template <typename Dtype> |
||||
class Net { |
||||
public: |
||||
explicit Net(const NetParameter& param); |
||||
explicit Net(const string& param_file); |
||||
virtual ~Net() {} |
||||
|
||||
// Initialize a network with the network parameter.
|
||||
void Init(const NetParameter& param); |
||||
|
||||
// Run forward with the input blobs already fed separately. You can get the
|
||||
// input blobs using input_blobs().
|
||||
const vector<Blob<Dtype>*>& ForwardPrefilled(Dtype* loss = NULL); |
||||
// Run forward using a set of bottom blobs, and return the result.
|
||||
const vector<Blob<Dtype>*>& Forward(const vector<Blob<Dtype>* > & bottom, |
||||
Dtype* loss = NULL); |
||||
// Run forward using a serialized BlobProtoVector and return the result
|
||||
// as a serialized BlobProtoVector
|
||||
string Forward(const string& input_blob_protos, Dtype* loss = NULL); |
||||
|
||||
// The network backward should take no input and output, since it solely
|
||||
// computes the gradient w.r.t the parameters, and the data has already
|
||||
// been provided during the forward pass.
|
||||
void Backward(); |
||||
|
||||
Dtype ForwardBackward(const vector<Blob<Dtype>* > & bottom) { |
||||
Dtype loss; |
||||
Forward(bottom, &loss); |
||||
Backward(); |
||||
return loss; |
||||
} |
||||
|
||||
// Updates the network weights based on the diff values computed.
|
||||
void Update(); |
||||
|
||||
// For an already initialized net, ShareTrainedLayersWith() implicitly copies
|
||||
// (i.e., using no additional memory) the already trained layers from another
|
||||
// Net.
|
||||
void ShareTrainedLayersWith(Net* other); |
||||
// For an already initialized net, CopyTrainedLayersFrom() copies the already
|
||||
// trained layers from another net parameter instance.
|
||||
void CopyTrainedLayersFrom(const NetParameter& param); |
||||
void CopyTrainedLayersFrom(const string trained_filename); |
||||
// Writes the net to a proto.
|
||||
void ToProto(NetParameter* param, bool write_diff = false); |
||||
|
||||
// returns the network name.
|
||||
inline const string& name() { return name_; } |
||||
// returns the layer names
|
||||
inline const vector<string>& layer_names() { return layer_names_; } |
||||
// returns the blob names
|
||||
inline const vector<string>& blob_names() { return blob_names_; } |
||||
// returns the blobs
|
||||
inline const vector<shared_ptr<Blob<Dtype> > >& blobs() { return blobs_; } |
||||
// returns the layers
|
||||
inline const vector<shared_ptr<Layer<Dtype> > >& layers() { return layers_; } |
||||
// returns the bottom and top vecs for each layer - usually you won't need
|
||||
// this unless you do per-layer checks such as gradients.
|
||||
inline vector<vector<Blob<Dtype>*> >& bottom_vecs() { return bottom_vecs_; } |
||||
inline vector<vector<Blob<Dtype>*> >& top_vecs() { return top_vecs_; } |
||||
// returns the parameters
|
||||
inline vector<shared_ptr<Blob<Dtype> > >& params() { return params_; } |
||||
// returns the parameter learning rate multipliers
|
||||
inline vector<float>& params_lr() {return params_lr_; } |
||||
inline vector<float>& params_weight_decay() { return params_weight_decay_; } |
||||
// Input and output blob numbers
|
||||
inline int num_inputs() { return net_input_blobs_.size(); } |
||||
inline int num_outputs() { return net_output_blobs_.size(); } |
||||
inline vector<Blob<Dtype>*>& input_blobs() { return net_input_blobs_; } |
||||
inline vector<Blob<Dtype>*>& output_blobs() { return net_output_blobs_; } |
||||
inline vector<int>& input_blob_indices() { return net_input_blob_indices_; } |
||||
inline vector<int>& output_blob_indices() { return net_output_blob_indices_; } |
||||
// has_blob and blob_by_name are inspired by
|
||||
// https://github.com/kencoken/caffe/commit/f36e71569455c9fbb4bf8a63c2d53224e32a4e7b
|
||||
// Access intermediary computation layers, testing with centre image only
|
||||
bool has_blob(const string& blob_name); |
||||
const shared_ptr<Blob<Dtype> > blob_by_name(const string& blob_name); |
||||
bool has_layer(const string& layer_name); |
||||
const shared_ptr<Layer<Dtype> > layer_by_name(const string& layer_name); |
||||
|
||||
protected: |
||||
// Helpers for Init.
|
||||
// Append a new input or top blob to the net.
|
||||
void AppendTop(const NetParameter& param, const int layer_id, |
||||
const int top_id, set<string>* available_blobs, |
||||
map<string, int>* blob_name_to_idx); |
||||
// Append a new bottom blob to the net.
|
||||
int AppendBottom(const NetParameter& param, const int layer_id, |
||||
const int bottom_id, set<string>* available_blobs, |
||||
map<string, int>* blob_name_to_idx); |
||||
// Function to get misc parameters, e.g. the learning rate multiplier and
|
||||
// weight decay.
|
||||
void GetLearningRateAndWeightDecay(); |
||||
|
||||
// Individual layers in the net
|
||||
vector<shared_ptr<Layer<Dtype> > > layers_; |
||||
vector<string> layer_names_; |
||||
map<string, int> layer_names_index_; |
||||
vector<bool> layer_need_backward_; |
||||
// blobs stores the blobs that store intermediate results between the
|
||||
// layers.
|
||||
vector<shared_ptr<Blob<Dtype> > > blobs_; |
||||
vector<string> blob_names_; |
||||
map<string, int> blob_names_index_; |
||||
vector<bool> blob_need_backward_; |
||||
// bottom_vecs stores the vectors containing the input for each layer.
|
||||
// They don't actually host the blobs (blobs_ does), so we simply store
|
||||
// pointers.
|
||||
vector<vector<Blob<Dtype>*> > bottom_vecs_; |
||||
vector<vector<int> > bottom_id_vecs_; |
||||
// top_vecs stores the vectors containing the output for each layer
|
||||
vector<vector<Blob<Dtype>*> > top_vecs_; |
||||
vector<vector<int> > top_id_vecs_; |
||||
// blob indices for the input and the output of the net
|
||||
vector<int> net_input_blob_indices_; |
||||
vector<int> net_output_blob_indices_; |
||||
vector<Blob<Dtype>*> net_input_blobs_; |
||||
vector<Blob<Dtype>*> net_output_blobs_; |
||||
string name_; |
||||
// The parameters in the network.
|
||||
vector<shared_ptr<Blob<Dtype> > > params_; |
||||
// the learning rate multipliers
|
||||
vector<float> params_lr_; |
||||
// the weight decay multipliers
|
||||
vector<float> params_weight_decay_; |
||||
// The bytes of memory used by this net
|
||||
size_t memory_used_; |
||||
DISABLE_COPY_AND_ASSIGN(Net); |
||||
}; |
||||
|
||||
|
||||
} // namespace caffe
|
||||
|
||||
#endif // CAFFE_NET_HPP_
|
@ -0,0 +1,272 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
|
||||
#ifndef CAFFE_NEURON_LAYERS_HPP_ |
||||
#define CAFFE_NEURON_LAYERS_HPP_ |
||||
|
||||
#include <string> |
||||
#include <utility> |
||||
#include <vector> |
||||
|
||||
#include "leveldb/db.h" |
||||
#include "pthread.h" |
||||
#include "boost/scoped_ptr.hpp" |
||||
#include "hdf5.h" |
||||
|
||||
#include "caffe/blob.hpp" |
||||
#include "caffe/common.hpp" |
||||
#include "caffe/layer.hpp" |
||||
#include "caffe/proto/caffe.pb.h" |
||||
|
||||
#define HDF5_DATA_DATASET_NAME "data" |
||||
#define HDF5_DATA_LABEL_NAME "label" |
||||
|
||||
namespace caffe { |
||||
|
||||
/* NeuronLayer
|
||||
An interface for layers that take one blob as input (x), |
||||
and produce one blob as output (y). |
||||
*/ |
||||
template <typename Dtype> |
||||
class NeuronLayer : public Layer<Dtype> { |
||||
public: |
||||
explicit NeuronLayer(const LayerParameter& param) |
||||
: Layer<Dtype>(param) {} |
||||
virtual void SetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_NONE; |
||||
} |
||||
virtual inline int ExactNumBottomBlobs() const { return 1; } |
||||
virtual inline int ExactNumTopBlobs() const { return 1; } |
||||
}; |
||||
|
||||
/* BNLLLayer
|
||||
|
||||
y = x + log(1 + exp(-x)) if x > 0 |
||||
y = log(1 + exp(x)) if x <= 0 |
||||
|
||||
y' = exp(x) / (exp(x) + 1) |
||||
*/ |
||||
template <typename Dtype> |
||||
class BNLLLayer : public NeuronLayer<Dtype> { |
||||
public: |
||||
explicit BNLLLayer(const LayerParameter& param) |
||||
: NeuronLayer<Dtype>(param) {} |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_BNLL; |
||||
} |
||||
|
||||
protected: |
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
}; |
||||
|
||||
/* DropoutLayer
|
||||
During training only, sets some portion of x to 0, adjusting the |
||||
vector magnitude accordingly. |
||||
|
||||
mask = bernoulli(1 - threshold) |
||||
scale = 1 / (1 - threshold) |
||||
y = x * mask * scale |
||||
|
||||
y' = mask * scale |
||||
*/ |
||||
template <typename Dtype> |
||||
class DropoutLayer : public NeuronLayer<Dtype> { |
||||
public: |
||||
explicit DropoutLayer(const LayerParameter& param) |
||||
: NeuronLayer<Dtype>(param) {} |
||||
virtual void SetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_DROPOUT; |
||||
} |
||||
|
||||
protected: |
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
|
||||
shared_ptr<Blob<unsigned int> > rand_vec_; |
||||
Dtype threshold_; |
||||
Dtype scale_; |
||||
unsigned int uint_thres_; |
||||
}; |
||||
|
||||
/* PowerLayer
|
||||
y = (shift + scale * x) ^ power |
||||
|
||||
y' = scale * power * (shift + scale * x) ^ (power - 1) |
||||
= scale * power * y / (shift + scale * x) |
||||
*/ |
||||
template <typename Dtype> |
||||
class PowerLayer : public NeuronLayer<Dtype> { |
||||
public: |
||||
explicit PowerLayer(const LayerParameter& param) |
||||
: NeuronLayer<Dtype>(param) {} |
||||
virtual void SetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_POWER; |
||||
} |
||||
|
||||
protected: |
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
|
||||
Dtype power_; |
||||
Dtype scale_; |
||||
Dtype shift_; |
||||
Dtype diff_scale_; |
||||
}; |
||||
|
||||
/* ReLULayer
|
||||
Rectified Linear Unit non-linearity. |
||||
The simple max is fast to compute, and the function does not saturate. |
||||
|
||||
y = max(0, x). |
||||
|
||||
y' = 0 if x < 0 |
||||
y' = 1 if x > 0 |
||||
*/ |
||||
template <typename Dtype> |
||||
class ReLULayer : public NeuronLayer<Dtype> { |
||||
public: |
||||
explicit ReLULayer(const LayerParameter& param) |
||||
: NeuronLayer<Dtype>(param) {} |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_RELU; |
||||
} |
||||
|
||||
protected: |
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
|
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
}; |
||||
|
||||
/* SigmoidLayer
|
||||
Sigmoid function non-linearity, a classic choice in neural networks. |
||||
Note that the gradient vanishes as the values move away from 0. |
||||
The ReLULayer is often a better choice for this reason. |
||||
|
||||
y = 1. / (1 + exp(-x)) |
||||
|
||||
y ' = exp(x) / (1 + exp(x))^2 |
||||
or |
||||
y' = y * (1 - y) |
||||
*/ |
||||
template <typename Dtype> |
||||
class SigmoidLayer : public NeuronLayer<Dtype> { |
||||
public: |
||||
explicit SigmoidLayer(const LayerParameter& param) |
||||
: NeuronLayer<Dtype>(param) {} |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_SIGMOID; |
||||
} |
||||
|
||||
protected: |
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
}; |
||||
|
||||
/* TanHLayer
|
||||
Hyperbolic tangent non-linearity, popular in auto-encoders. |
||||
|
||||
y = 1. * (exp(2x) - 1) / (exp(2x) + 1) |
||||
|
||||
y' = 1 - ( (exp(2x) - 1) / (exp(2x) + 1) ) ^ 2 |
||||
*/ |
||||
template <typename Dtype> |
||||
class TanHLayer : public NeuronLayer<Dtype> { |
||||
public: |
||||
explicit TanHLayer(const LayerParameter& param) |
||||
: NeuronLayer<Dtype>(param) {} |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_TANH; |
||||
} |
||||
|
||||
protected: |
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
}; |
||||
|
||||
/* ThresholdLayer
|
||||
Outputs 1 if value in input is above threshold, 0 otherwise. |
||||
The defult threshold = 0, which means positive values would become 1 and
|
||||
negative or 0, would become 0 |
||||
|
||||
y = 1 if x > threshold |
||||
y = 0 if x <= threshold |
||||
|
||||
y' = don't differenciable |
||||
*/ |
||||
template <typename Dtype> |
||||
class ThresholdLayer : public NeuronLayer<Dtype> { |
||||
public: |
||||
explicit ThresholdLayer(const LayerParameter& param) |
||||
: NeuronLayer<Dtype>(param) {} |
||||
virtual void SetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_THRESHOLD; |
||||
} |
||||
|
||||
protected: |
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom) { |
||||
NOT_IMPLEMENTED; |
||||
} |
||||
|
||||
Dtype threshold_; |
||||
}; |
||||
|
||||
} // namespace caffe
|
||||
|
||||
#endif // CAFFE_NEURON_LAYERS_HPP_
|
@ -0,0 +1,77 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
|
||||
#ifndef CAFFE_OPTIMIZATION_SOLVER_HPP_ |
||||
#define CAFFE_OPTIMIZATION_SOLVER_HPP_ |
||||
|
||||
#include <string> |
||||
#include <vector> |
||||
|
||||
namespace caffe { |
||||
|
||||
template <typename Dtype> |
||||
class Solver { |
||||
public: |
||||
explicit Solver(const SolverParameter& param); |
||||
explicit Solver(const string& param_file); |
||||
void Init(const SolverParameter& param); |
||||
// The main entry of the solver function. In default, iter will be zero. Pass
|
||||
// in a non-zero iter number to resume training for a pre-trained net.
|
||||
virtual void Solve(const char* resume_file = NULL); |
||||
inline void Solve(const string resume_file) { Solve(resume_file.c_str()); } |
||||
virtual ~Solver() {} |
||||
inline shared_ptr<Net<Dtype> > net() { return net_; } |
||||
|
||||
protected: |
||||
// PreSolve is run before any solving iteration starts, allowing one to
|
||||
// put up some scaffold.
|
||||
virtual void PreSolve() {} |
||||
// Get the update value for the current iteration.
|
||||
virtual void ComputeUpdateValue() = 0; |
||||
// The Solver::Snapshot function implements the basic snapshotting utility
|
||||
// that stores the learned net. You should implement the SnapshotSolverState()
|
||||
// function that produces a SolverState protocol buffer that needs to be
|
||||
// written to disk together with the learned net.
|
||||
void Snapshot(); |
||||
// The test routine
|
||||
void TestAll(); |
||||
void Test(const int test_net_id = 0); |
||||
virtual void SnapshotSolverState(SolverState* state) = 0; |
||||
// The Restore function implements how one should restore the solver to a
|
||||
// previously snapshotted state. You should implement the RestoreSolverState()
|
||||
// function that restores the state from a SolverState protocol buffer.
|
||||
void Restore(const char* resume_file); |
||||
virtual void RestoreSolverState(const SolverState& state) = 0; |
||||
|
||||
SolverParameter param_; |
||||
int iter_; |
||||
shared_ptr<Net<Dtype> > net_; |
||||
vector<shared_ptr<Net<Dtype> > > test_nets_; |
||||
|
||||
DISABLE_COPY_AND_ASSIGN(Solver); |
||||
}; |
||||
|
||||
|
||||
template <typename Dtype> |
||||
class SGDSolver : public Solver<Dtype> { |
||||
public: |
||||
explicit SGDSolver(const SolverParameter& param) |
||||
: Solver<Dtype>(param) {} |
||||
explicit SGDSolver(const string& param_file) |
||||
: Solver<Dtype>(param_file) {} |
||||
|
||||
protected: |
||||
virtual void PreSolve(); |
||||
Dtype GetLearningRate(); |
||||
virtual void ComputeUpdateValue(); |
||||
virtual void SnapshotSolverState(SolverState * state); |
||||
virtual void RestoreSolverState(const SolverState& state); |
||||
// history maintains the historical momentum data.
|
||||
vector<shared_ptr<Blob<Dtype> > > history_; |
||||
|
||||
DISABLE_COPY_AND_ASSIGN(SGDSolver); |
||||
}; |
||||
|
||||
|
||||
} // namespace caffe
|
||||
|
||||
#endif // CAFFE_OPTIMIZATION_SOLVER_HPP_
|
@ -0,0 +1,67 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
|
||||
#ifndef CAFFE_SYNCEDMEM_HPP_ |
||||
#define CAFFE_SYNCEDMEM_HPP_ |
||||
|
||||
#include <cstdlib> |
||||
|
||||
#include "caffe/common.hpp" |
||||
|
||||
namespace caffe { |
||||
|
||||
// Theoretically, CaffeMallocHost and CaffeFreeHost should simply call the
|
||||
// cudaMallocHost and cudaFree functions in order to create pinned memory.
|
||||
// However, those codes rely on the existence of a cuda GPU (I don't know
|
||||
// why that is a must since allocating memory should not be accessing the
|
||||
// GPU resorce, but it just creates an error as of Cuda 5.0) and will cause
|
||||
// problem when running on a machine without GPU. Thus, we simply define
|
||||
// these two functions for safety and possible future change if the problem
|
||||
// of calling cuda functions disappears in a future version.
|
||||
//
|
||||
// In practice, although we are creating unpinned memory here, as long as we
|
||||
// are constantly accessing them the memory pages almost always stays in
|
||||
// the physical memory (assuming we have large enough memory installed), and
|
||||
// does not seem to create a memory bottleneck here.
|
||||
|
||||
inline void CaffeMallocHost(void** ptr, size_t size) { |
||||
*ptr = malloc(size); |
||||
} |
||||
|
||||
inline void CaffeFreeHost(void* ptr) { |
||||
free(ptr); |
||||
} |
||||
|
||||
|
||||
class SyncedMemory { |
||||
public: |
||||
SyncedMemory() |
||||
: cpu_ptr_(NULL), gpu_ptr_(NULL), size_(0), head_(UNINITIALIZED), |
||||
own_cpu_data_(false) {} |
||||
explicit SyncedMemory(size_t size) |
||||
: cpu_ptr_(NULL), gpu_ptr_(NULL), size_(size), head_(UNINITIALIZED), |
||||
own_cpu_data_(false) {} |
||||
~SyncedMemory(); |
||||
const void* cpu_data(); |
||||
void set_cpu_data(void* data); |
||||
const void* gpu_data(); |
||||
void* mutable_cpu_data(); |
||||
void* mutable_gpu_data(); |
||||
enum SyncedHead { UNINITIALIZED, HEAD_AT_CPU, HEAD_AT_GPU, SYNCED }; |
||||
SyncedHead head() { return head_; } |
||||
size_t size() { return size_; } |
||||
|
||||
private: |
||||
void to_cpu(); |
||||
void to_gpu(); |
||||
void* cpu_ptr_; |
||||
void* gpu_ptr_; |
||||
size_t size_; |
||||
SyncedHead head_; |
||||
bool own_cpu_data_; |
||||
|
||||
DISABLE_COPY_AND_ASSIGN(SyncedMemory); |
||||
}; // class SyncedMemory
|
||||
|
||||
} // namespace caffe
|
||||
|
||||
#endif // CAFFE_SYNCEDMEM_HPP_
|
@ -0,0 +1,39 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
|
||||
#ifndef CAFFE_UTIL_BENCHMARK_H_ |
||||
#define CAFFE_UTIL_BENCHMARK_H_ |
||||
|
||||
#include <boost/date_time/posix_time/posix_time.hpp> |
||||
#include <cuda_runtime.h> |
||||
|
||||
namespace caffe { |
||||
|
||||
class Timer { |
||||
public: |
||||
Timer(); |
||||
virtual ~Timer(); |
||||
void Start(); |
||||
void Stop(); |
||||
float MilliSeconds(); |
||||
float Seconds(); |
||||
|
||||
inline bool initted() { return initted_; } |
||||
inline bool running() { return running_; } |
||||
inline bool has_run_at_least_once() { return has_run_at_least_once_; } |
||||
|
||||
protected: |
||||
void Init(); |
||||
|
||||
bool initted_; |
||||
bool running_; |
||||
bool has_run_at_least_once_; |
||||
cudaEvent_t start_gpu_; |
||||
cudaEvent_t stop_gpu_; |
||||
boost::posix_time::ptime start_cpu_; |
||||
boost::posix_time::ptime stop_cpu_; |
||||
float elapsed_milliseconds_; |
||||
}; |
||||
|
||||
} // namespace caffe
|
||||
|
||||
#endif // CAFFE_UTIL_BENCHMARK_H_
|
@ -0,0 +1,25 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
|
||||
#ifndef CAFFE_UTIL_FORMAT_H_ |
||||
#define CAFFE_UTIL_FORMAT_H_ |
||||
|
||||
#include <opencv2/opencv.hpp> |
||||
#include <string> |
||||
|
||||
#include "caffe/proto/caffe.pb.h" |
||||
|
||||
namespace caffe { |
||||
|
||||
bool OpenCVImageToDatum( |
||||
const cv::Mat& image, const int label, const int height, |
||||
const int width, const bool is_color, Datum* datum); |
||||
|
||||
inline bool OpenCVImageToDatum( |
||||
const cv::Mat& image, const int label, const int height, |
||||
const int width, Datum* datum) { |
||||
return OpenCVImageToDatum(image, label, height, width, true, datum); |
||||
} |
||||
|
||||
} // namespace caffe
|
||||
|
||||
#endif // CAFFE_UTIL_FORMAT_H_
|
@ -0,0 +1,30 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
|
||||
#ifndef _CAFFE_UTIL_IM2COL_HPP_ |
||||
#define _CAFFE_UTIL_IM2COL_HPP_ |
||||
|
||||
namespace caffe { |
||||
|
||||
template <typename Dtype> |
||||
void im2col_cpu(const Dtype* data_im, const int channels, |
||||
const int height, const int width, const int ksize, const int pad, |
||||
const int stride, Dtype* data_col); |
||||
|
||||
template <typename Dtype> |
||||
void col2im_cpu(const Dtype* data_col, const int channels, |
||||
const int height, const int width, const int psize, const int pad, |
||||
const int stride, Dtype* data_im); |
||||
|
||||
template <typename Dtype> |
||||
void im2col_gpu(const Dtype* data_im, const int channels, |
||||
const int height, const int width, const int ksize, const int pad, |
||||
const int stride, Dtype* data_col); |
||||
|
||||
template <typename Dtype> |
||||
void col2im_gpu(const Dtype* data_col, const int channels, |
||||
const int height, const int width, const int psize, const int pad, |
||||
const int stride, Dtype* data_im); |
||||
|
||||
} // namespace caffe
|
||||
|
||||
#endif // CAFFE_UTIL_IM2COL_HPP_
|
@ -0,0 +1,31 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
|
||||
#ifndef _CAFFE_UTIL_INSERT_SPLITS_HPP_ |
||||
#define _CAFFE_UTIL_INSERT_SPLITS_HPP_ |
||||
|
||||
#include <string> |
||||
|
||||
#include "caffe/proto/caffe.pb.h" |
||||
|
||||
using std::pair; |
||||
using std::string; |
||||
|
||||
namespace caffe { |
||||
|
||||
// Copy NetParameters with SplitLayers added to replace any shared bottom
|
||||
// blobs with unique bottom blobs provided by the SplitLayer.
|
||||
void InsertSplits(const NetParameter& param, NetParameter* param_split); |
||||
|
||||
void ConfigureSplitLayer(const string& layer_name, const string& blob_name, |
||||
const int blob_idx, const int split_count, |
||||
LayerParameter* split_layer_param); |
||||
|
||||
string SplitLayerName(const string& layer_name, const string& blob_name, |
||||
const int blob_idx); |
||||
|
||||
string SplitBlobName(const string& layer_name, const string& blob_name, |
||||
const int blob_idx, const int split_idx); |
||||
|
||||
} // namespace caffe
|
||||
|
||||
#endif // CAFFE_UTIL_INSERT_SPLITS_HPP_
|
@ -0,0 +1,93 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
|
||||
#ifndef CAFFE_UTIL_IO_H_ |
||||
#define CAFFE_UTIL_IO_H_ |
||||
|
||||
#include <string> |
||||
|
||||
#include "google/protobuf/message.h" |
||||
#include "hdf5.h" |
||||
#include "hdf5_hl.h" |
||||
#include "caffe/proto/caffe.pb.h" |
||||
|
||||
#include "caffe/blob.hpp" |
||||
|
||||
using std::string; |
||||
using ::google::protobuf::Message; |
||||
|
||||
#define HDF5_NUM_DIMS 4 |
||||
|
||||
namespace caffe { |
||||
|
||||
bool ReadProtoFromTextFile(const char* filename, Message* proto); |
||||
|
||||
inline bool ReadProtoFromTextFile(const string& filename, Message* proto) { |
||||
return ReadProtoFromTextFile(filename.c_str(), proto); |
||||
} |
||||
|
||||
inline void ReadProtoFromTextFileOrDie(const char* filename, Message* proto) { |
||||
CHECK(ReadProtoFromTextFile(filename, proto)); |
||||
} |
||||
|
||||
inline void ReadProtoFromTextFileOrDie(const string& filename, Message* proto) { |
||||
ReadProtoFromTextFileOrDie(filename.c_str(), proto); |
||||
} |
||||
|
||||
void WriteProtoToTextFile(const Message& proto, const char* filename); |
||||
inline void WriteProtoToTextFile(const Message& proto, const string& filename) { |
||||
WriteProtoToTextFile(proto, filename.c_str()); |
||||
} |
||||
|
||||
bool ReadProtoFromBinaryFile(const char* filename, Message* proto); |
||||
|
||||
inline bool ReadProtoFromBinaryFile(const string& filename, Message* proto) { |
||||
return ReadProtoFromBinaryFile(filename.c_str(), proto); |
||||
} |
||||
|
||||
inline void ReadProtoFromBinaryFileOrDie(const char* filename, Message* proto) { |
||||
CHECK(ReadProtoFromBinaryFile(filename, proto)); |
||||
} |
||||
|
||||
inline void ReadProtoFromBinaryFileOrDie(const string& filename, |
||||
Message* proto) { |
||||
ReadProtoFromBinaryFileOrDie(filename.c_str(), proto); |
||||
} |
||||
|
||||
|
||||
void WriteProtoToBinaryFile(const Message& proto, const char* filename); |
||||
inline void WriteProtoToBinaryFile( |
||||
const Message& proto, const string& filename) { |
||||
WriteProtoToBinaryFile(proto, filename.c_str()); |
||||
} |
||||
|
||||
bool ReadImageToDatum(const string& filename, const int label, |
||||
const int height, const int width, const bool is_color, Datum* datum); |
||||
|
||||
inline bool ReadImageToDatum(const string& filename, const int label, |
||||
const int height, const int width, Datum* datum) { |
||||
return ReadImageToDatum(filename, label, height, width, true, datum); |
||||
} |
||||
|
||||
inline bool ReadImageToDatum(const string& filename, const int label, |
||||
Datum* datum) { |
||||
return ReadImageToDatum(filename, label, 0, 0, datum); |
||||
} |
||||
|
||||
|
||||
template <typename Dtype> |
||||
void hdf5_load_nd_dataset_helper( |
||||
hid_t file_id, const char* dataset_name_, int min_dim, int max_dim, |
||||
Blob<Dtype>* blob); |
||||
|
||||
template <typename Dtype> |
||||
void hdf5_load_nd_dataset( |
||||
hid_t file_id, const char* dataset_name_, int min_dim, int max_dim, |
||||
Blob<Dtype>* blob); |
||||
|
||||
template <typename Dtype> |
||||
void hdf5_save_nd_dataset( |
||||
const hid_t file_id, const string dataset_name, const Blob<Dtype>& blob); |
||||
|
||||
} // namespace caffe
|
||||
|
||||
#endif // CAFFE_UTIL_IO_H_
|
@ -0,0 +1,253 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
|
||||
#ifndef CAFFE_UTIL_MATH_FUNCTIONS_H_ |
||||
#define CAFFE_UTIL_MATH_FUNCTIONS_H_ |
||||
|
||||
#include <cublas_v2.h> |
||||
#include <stdint.h> |
||||
#include <cmath> // for std::fabs and std::signbit |
||||
|
||||
#include "glog/logging.h" |
||||
|
||||
#include "caffe/util/mkl_alternate.hpp" |
||||
|
||||
namespace caffe { |
||||
|
||||
// Decaf gemm provides a simpler interface to the gemm functions, with the
|
||||
// limitation that the data has to be contiguous in memory.
|
||||
template <typename Dtype> |
||||
void caffe_cpu_gemm(const CBLAS_TRANSPOSE TransA, |
||||
const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, |
||||
const Dtype alpha, const Dtype* A, const Dtype* B, const Dtype beta, |
||||
Dtype* C); |
||||
|
||||
// Decaf gpu gemm provides an interface that is almost the same as the cpu
|
||||
// gemm function - following the c convention and calling the fortran-order
|
||||
// gpu code under the hood.
|
||||
template <typename Dtype> |
||||
void caffe_gpu_gemm(const CBLAS_TRANSPOSE TransA, |
||||
const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, |
||||
const Dtype alpha, const Dtype* A, const Dtype* B, const Dtype beta, |
||||
Dtype* C); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_cpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N, |
||||
const Dtype alpha, const Dtype* A, const Dtype* x, const Dtype beta, |
||||
Dtype* y); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_gpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N, |
||||
const Dtype alpha, const Dtype* A, const Dtype* x, const Dtype beta, |
||||
Dtype* y); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_axpy(const int N, const Dtype alpha, const Dtype* X, |
||||
Dtype* Y); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_gpu_axpy(const int N, const Dtype alpha, const Dtype* X, |
||||
Dtype* Y); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_cpu_axpby(const int N, const Dtype alpha, const Dtype* X, |
||||
const Dtype beta, Dtype* Y); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_gpu_axpby(const int N, const Dtype alpha, const Dtype* X, |
||||
const Dtype beta, Dtype* Y); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_copy(const int N, const Dtype *X, Dtype *Y); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_set(const int N, const Dtype alpha, Dtype *X); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_gpu_set(const int N, const Dtype alpha, Dtype *X); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_gpu_copy(const int N, const Dtype *X, Dtype *Y); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_add_scalar(const int N, const Dtype alpha, Dtype *X); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_gpu_add_scalar(const int N, const Dtype alpha, Dtype *X); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_scal(const int N, const Dtype alpha, Dtype *X); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_gpu_scal(const int N, const Dtype alpha, Dtype *X); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_sqr(const int N, const Dtype* a, Dtype* y); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_add(const int N, const Dtype* a, const Dtype* b, Dtype* y); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_gpu_add(const int N, const Dtype* a, const Dtype* b, Dtype* y); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_sub(const int N, const Dtype* a, const Dtype* b, Dtype* y); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_gpu_sub(const int N, const Dtype* a, const Dtype* b, Dtype* y); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_mul(const int N, const Dtype* a, const Dtype* b, Dtype* y); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_gpu_mul(const int N, const Dtype* a, const Dtype* b, Dtype* y); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_div(const int N, const Dtype* a, const Dtype* b, Dtype* y); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_gpu_div(const int N, const Dtype* a, const Dtype* b, Dtype* y); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_powx(const int n, const Dtype* a, const Dtype b, Dtype* y); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_gpu_powx(const int n, const Dtype* a, const Dtype b, Dtype* y); |
||||
|
||||
unsigned int caffe_rng_rand(); |
||||
|
||||
template <typename Dtype> |
||||
Dtype caffe_nextafter(const Dtype b); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_rng_uniform(const int n, const Dtype a, const Dtype b, Dtype* r); |
||||
|
||||
// caffe_gpu_rng_uniform with two arguments generates integers in the range
|
||||
// [0, UINT_MAX].
|
||||
void caffe_gpu_rng_uniform(const int n, unsigned int* r); |
||||
|
||||
// caffe_gpu_rng_uniform with four arguments generates floats in the range
|
||||
// (a, b] (strictly greater than a, less than or equal to b) due to the
|
||||
// specification of curandGenerateUniform. With a = 0, b = 1, just calls
|
||||
// curandGenerateUniform; with other limits will shift and scale the outputs
|
||||
// appropriately after calling curandGenerateUniform.
|
||||
template <typename Dtype> |
||||
void caffe_gpu_rng_uniform(const int n, const Dtype a, const Dtype b, Dtype* r); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_rng_gaussian(const int n, const Dtype mu, const Dtype sigma, |
||||
Dtype* r); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_gpu_rng_gaussian(const int n, const Dtype mu, const Dtype sigma, |
||||
Dtype* r); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_rng_bernoulli(const int n, const Dtype p, int* r); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_rng_bernoulli(const int n, const Dtype p, unsigned int* r); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_gpu_rng_bernoulli(const int n, const Dtype p, int* r); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_exp(const int n, const Dtype* a, Dtype* y); |
||||
|
||||
template <typename Dtype> |
||||
Dtype caffe_cpu_dot(const int n, const Dtype* x, const Dtype* y); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_gpu_dot(const int n, const Dtype* x, const Dtype* y, Dtype* out); |
||||
|
||||
template <typename Dtype> |
||||
int caffe_cpu_hamming_distance(const int n, const Dtype* x, const Dtype* y); |
||||
|
||||
template <typename Dtype> |
||||
uint32_t caffe_gpu_hamming_distance(const int n, const Dtype* x, |
||||
const Dtype* y); |
||||
|
||||
// Returns the sum of the absolute values of the elements of vector x
|
||||
template <typename Dtype> |
||||
Dtype caffe_cpu_asum(const int n, const Dtype* x); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_gpu_asum(const int n, const Dtype* x, Dtype* y); |
||||
|
||||
// the branchless, type-safe version from
|
||||
// http://stackoverflow.com/questions/1903954/is-there-a-standard-sign-function-signum-sgn-in-c-c
|
||||
template<typename Dtype> |
||||
inline char caffe_sign(Dtype val) { |
||||
return (Dtype(0) < val) - (val < Dtype(0)); |
||||
} |
||||
|
||||
// The following two macros are modifications of DEFINE_VSL_UNARY_FUNC
|
||||
// in include/caffe/util/mkl_alternate.hpp authored by @Rowland Depp.
|
||||
// Please refer to commit 7e8ef25c7 of the boost-eigen branch.
|
||||
// Git cherry picking that commit caused a conflict hard to resolve and
|
||||
// copying that file in convenient for code reviewing.
|
||||
// So they have to be pasted here temporarily.
|
||||
#define DEFINE_CAFFE_CPU_UNARY_FUNC(name, operation) \ |
||||
template<typename Dtype> \
|
||||
void caffe_cpu_##name(const int n, const Dtype* x, Dtype* y) { \
|
||||
CHECK_GT(n, 0); CHECK(x); CHECK(y); \
|
||||
for (int i = 0; i < n; ++i) { \
|
||||
operation; \
|
||||
} \
|
||||
} |
||||
|
||||
#define INSTANTIATE_CAFFE_CPU_UNARY_FUNC(name) \ |
||||
template <> \
|
||||
void caffe_cpu_##name<float>(const int n, const float* x, float* y); \
|
||||
template <> \
|
||||
void caffe_cpu_##name<double>(const int n, const double* x, double* y) |
||||
|
||||
|
||||
#define DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(name, operation) \ |
||||
template<typename Dtype> \
|
||||
__global__ void name##_kernel(const int n, const Dtype* x, Dtype* y) { \
|
||||
CUDA_KERNEL_LOOP(index, n) { \
|
||||
operation; \
|
||||
} \
|
||||
} \
|
||||
template <> \
|
||||
void caffe_gpu_##name<float>(const int n, const float* x, float* y) { \
|
||||
/* NOLINT_NEXT_LINE(whitespace/operators) */ \
|
||||
name##_kernel<float><<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>( \
|
||||
n, x, y); \
|
||||
} \
|
||||
template <> \
|
||||
void caffe_gpu_##name<double>(const int n, const double* x, double* y) { \
|
||||
/* NOLINT_NEXT_LINE(whitespace/operators) */ \
|
||||
name##_kernel<double><<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>( \
|
||||
n, x, y); \
|
||||
} |
||||
|
||||
// output is 1 for the positives, 0 for zero, and -1 for the negatives
|
||||
DEFINE_CAFFE_CPU_UNARY_FUNC(sign, y[i] = caffe_sign<Dtype>(x[i])); |
||||
|
||||
template<typename Dtype> |
||||
void caffe_gpu_sign(const int n, const Dtype* x, Dtype* y); |
||||
|
||||
// This returns a nonzero value if the input has its sign bit set.
|
||||
// The name sngbit is meant to avoid conflicts with std::signbit in the macro
|
||||
using std::signbit; |
||||
DEFINE_CAFFE_CPU_UNARY_FUNC(sgnbit, y[i] = signbit(x[i])); |
||||
|
||||
template<typename Dtype> |
||||
void caffe_gpu_sgnbit(const int n, const Dtype* x, Dtype* y); |
||||
|
||||
DEFINE_CAFFE_CPU_UNARY_FUNC(fabs, y[i] = std::fabs(x[i])); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_gpu_fabs(const int n, const Dtype* x, Dtype* y); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_cpu_scale(const int n, const Dtype alpha, const Dtype *x, Dtype* y); |
||||
|
||||
template <typename Dtype> |
||||
void caffe_gpu_scale(const int n, const Dtype alpha, const Dtype *x, Dtype* y); |
||||
|
||||
} // namespace caffe
|
||||
|
||||
|
||||
#endif // CAFFE_UTIL_MATH_FUNCTIONS_H_
|
@ -0,0 +1,97 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
|
||||
#ifndef CAFFE_UTIL_MKL_ALTERNATE_H_ |
||||
#define CAFFE_UTIL_MKL_ALTERNATE_H_ |
||||
|
||||
#ifdef USE_MKL |
||||
|
||||
#include <mkl.h> |
||||
|
||||
#else // If use MKL, simply include the MKL header
|
||||
|
||||
extern "C" { |
||||
#include <cblas.h> |
||||
} |
||||
#include <math.h> |
||||
|
||||
// Functions that caffe uses but are not present if MKL is not linked.
|
||||
|
||||
// A simple way to define the vsl unary functions. The operation should
|
||||
// be in the form e.g. y[i] = sqrt(a[i])
|
||||
#define DEFINE_VSL_UNARY_FUNC(name, operation) \ |
||||
template<typename Dtype> \
|
||||
void v##name(const int n, const Dtype* a, Dtype* y) { \
|
||||
CHECK_GT(n, 0); CHECK(a); CHECK(y); \
|
||||
for (int i = 0; i < n; ++i) { operation; } \
|
||||
} \
|
||||
inline void vs##name( \
|
||||
const int n, const float* a, float* y) { \
|
||||
v##name<float>(n, a, y); \
|
||||
} \
|
||||
inline void vd##name( \
|
||||
const int n, const double* a, double* y) { \
|
||||
v##name<double>(n, a, y); \
|
||||
} |
||||
|
||||
DEFINE_VSL_UNARY_FUNC(Sqr, y[i] = a[i] * a[i]); |
||||
DEFINE_VSL_UNARY_FUNC(Exp, y[i] = exp(a[i])); |
||||
|
||||
// A simple way to define the vsl unary functions with singular parameter b.
|
||||
// The operation should be in the form e.g. y[i] = pow(a[i], b)
|
||||
#define DEFINE_VSL_UNARY_FUNC_WITH_PARAM(name, operation) \ |
||||
template<typename Dtype> \
|
||||
void v##name(const int n, const Dtype* a, const Dtype b, Dtype* y) { \
|
||||
CHECK_GT(n, 0); CHECK(a); CHECK(y); \
|
||||
for (int i = 0; i < n; ++i) { operation; } \
|
||||
} \
|
||||
inline void vs##name( \
|
||||
const int n, const float* a, const float b, float* y) { \
|
||||
v##name<float>(n, a, b, y); \
|
||||
} \
|
||||
inline void vd##name( \
|
||||
const int n, const double* a, const float b, double* y) { \
|
||||
v##name<double>(n, a, b, y); \
|
||||
} |
||||
|
||||
DEFINE_VSL_UNARY_FUNC_WITH_PARAM(Powx, y[i] = pow(a[i], b)); |
||||
|
||||
// A simple way to define the vsl binary functions. The operation should
|
||||
// be in the form e.g. y[i] = a[i] + b[i]
|
||||
#define DEFINE_VSL_BINARY_FUNC(name, operation) \ |
||||
template<typename Dtype> \
|
||||
void v##name(const int n, const Dtype* a, const Dtype* b, Dtype* y) { \
|
||||
CHECK_GT(n, 0); CHECK(a); CHECK(b); CHECK(y); \
|
||||
for (int i = 0; i < n; ++i) { operation; } \
|
||||
} \
|
||||
inline void vs##name( \
|
||||
const int n, const float* a, const float* b, float* y) { \
|
||||
v##name<float>(n, a, b, y); \
|
||||
} \
|
||||
inline void vd##name( \
|
||||
const int n, const double* a, const double* b, double* y) { \
|
||||
v##name<double>(n, a, b, y); \
|
||||
} |
||||
|
||||
DEFINE_VSL_BINARY_FUNC(Add, y[i] = a[i] + b[i]); |
||||
DEFINE_VSL_BINARY_FUNC(Sub, y[i] = a[i] - b[i]); |
||||
DEFINE_VSL_BINARY_FUNC(Mul, y[i] = a[i] * b[i]); |
||||
DEFINE_VSL_BINARY_FUNC(Div, y[i] = a[i] / b[i]); |
||||
|
||||
// In addition, MKL comes with an additional function axpby that is not present
|
||||
// in standard blas. We will simply use a two-step (inefficient, of course) way
|
||||
// to mimic that.
|
||||
inline void cblas_saxpby(const int N, const float alpha, const float* X, |
||||
const int incX, const float beta, float* Y, |
||||
const int incY) { |
||||
cblas_sscal(N, beta, Y, incY); |
||||
cblas_saxpy(N, alpha, X, incX, Y, incY); |
||||
} |
||||
inline void cblas_daxpby(const int N, const double alpha, const double* X, |
||||
const int incX, const double beta, double* Y, |
||||
const int incY) { |
||||
cblas_dscal(N, beta, Y, incY); |
||||
cblas_daxpy(N, alpha, X, incX, Y, incY); |
||||
} |
||||
|
||||
#endif // USE_MKL
|
||||
#endif // CAFFE_UTIL_MKL_ALTERNATE_H_
|
@ -0,0 +1,19 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
|
||||
#ifndef CAFFE_RNG_CPP_HPP_ |
||||
#define CAFFE_RNG_CPP_HPP_ |
||||
|
||||
#include <boost/random/mersenne_twister.hpp> |
||||
#include "caffe/common.hpp" |
||||
|
||||
namespace caffe { |
||||
|
||||
typedef boost::mt19937 rng_t; |
||||
|
||||
inline rng_t* caffe_rng() { |
||||
return static_cast<caffe::rng_t*>(Caffe::rng_stream().generator()); |
||||
} |
||||
|
||||
} // namespace caffe
|
||||
|
||||
#endif // CAFFE_RNG_HPP_
|
@ -0,0 +1,49 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
|
||||
#ifndef CAFFE_UTIL_UPGRADE_PROTO_H_ |
||||
#define CAFFE_UTIL_UPGRADE_PROTO_H_ |
||||
|
||||
#include <string> |
||||
|
||||
#include "caffe/proto/caffe.pb.h" |
||||
#include "caffe/proto/caffe_pretty_print.pb.h" |
||||
|
||||
using std::string; |
||||
|
||||
namespace caffe { |
||||
|
||||
// Return true iff any layer contains parameters specified using
|
||||
// deprecated V0LayerParameter.
|
||||
bool NetNeedsUpgrade(const NetParameter& net_param); |
||||
|
||||
// Perform all necessary transformations to upgrade a V0NetParameter into a
|
||||
// NetParameter (including upgrading padding layers and LayerParameters).
|
||||
bool UpgradeV0Net(const NetParameter& v0_net_param, NetParameter* net_param); |
||||
|
||||
// Upgrade NetParameter with padding layers to pad-aware conv layers.
|
||||
// For any padding layer, remove it and put its pad parameter in any layers
|
||||
// taking its top blob as input.
|
||||
// Error if any of these above layers are not-conv layers.
|
||||
void UpgradeV0PaddingLayers(const NetParameter& param, |
||||
NetParameter* param_upgraded_pad); |
||||
|
||||
// Upgrade a single V0LayerConnection to the new LayerParameter format.
|
||||
bool UpgradeLayerParameter(const LayerParameter& v0_layer_connection, |
||||
LayerParameter* layer_param); |
||||
|
||||
LayerParameter_LayerType UpgradeV0LayerType(const string& type); |
||||
|
||||
// Convert a NetParameter to NetParameterPrettyPrint used for dumping to
|
||||
// proto text files.
|
||||
void NetParameterToPrettyPrint(const NetParameter& param, |
||||
NetParameterPrettyPrint* pretty_param); |
||||
|
||||
// Read parameters from a file into a NetParameter proto message.
|
||||
void ReadNetParamsFromTextFileOrDie(const string& param_file, |
||||
NetParameter* param); |
||||
void ReadNetParamsFromBinaryFileOrDie(const string& param_file, |
||||
NetParameter* param); |
||||
|
||||
} // namespace caffe
|
||||
|
||||
#endif // CAFFE_UTIL_UPGRADE_PROTO_H_
|
@ -0,0 +1,479 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
|
||||
#ifndef CAFFE_VISION_LAYERS_HPP_ |
||||
#define CAFFE_VISION_LAYERS_HPP_ |
||||
|
||||
#include <string> |
||||
#include <utility> |
||||
#include <vector> |
||||
|
||||
#include "caffe/blob.hpp" |
||||
#include "caffe/common.hpp" |
||||
#include "caffe/layer.hpp" |
||||
#include "caffe/neuron_layers.hpp" |
||||
#include "caffe/loss_layers.hpp" |
||||
#include "caffe/data_layers.hpp" |
||||
#include "caffe/proto/caffe.pb.h" |
||||
|
||||
namespace caffe { |
||||
|
||||
/* ArgmaxLayer
|
||||
Compute the index of the max value across all (channels x height x width). |
||||
[In the future, can take specific dimension.] |
||||
Intended for use after a classification layer to produce prediction. |
||||
If parameter out_max_val is set to true, then output is a vector of pairs |
||||
(max_ind, max_val) for each image. |
||||
|
||||
NOTE: does not implement Backwards operation. |
||||
*/ |
||||
template <typename Dtype> |
||||
class ArgMaxLayer : public Layer<Dtype> { |
||||
public: |
||||
explicit ArgMaxLayer(const LayerParameter& param) |
||||
: Layer<Dtype>(param) {} |
||||
virtual void SetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_ARGMAX; |
||||
} |
||||
virtual inline int ExactNumBottomBlobs() const { return 1; } |
||||
virtual inline int ExactNumTopBlobs() const { return 1; } |
||||
|
||||
protected: |
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom) { |
||||
NOT_IMPLEMENTED; |
||||
} |
||||
bool out_max_val_; |
||||
}; |
||||
|
||||
/* ConcatLayer
|
||||
Takes at least two blobs and concatenates them along either num or |
||||
channel dim, outputting the result. |
||||
*/ |
||||
template <typename Dtype> |
||||
class ConcatLayer : public Layer<Dtype> { |
||||
public: |
||||
explicit ConcatLayer(const LayerParameter& param) |
||||
: Layer<Dtype>(param) {} |
||||
virtual void SetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_CONCAT; |
||||
} |
||||
virtual inline int MinBottomBlobs() const { return 2; } |
||||
virtual inline int ExactNumTopBlobs() const { return 1; } |
||||
|
||||
protected: |
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
|
||||
Blob<Dtype> col_bob_; |
||||
int count_; |
||||
int num_; |
||||
int channels_; |
||||
int height_; |
||||
int width_; |
||||
int concat_dim_; |
||||
}; |
||||
|
||||
/* ConvolutionLayer
|
||||
*/ |
||||
template <typename Dtype> |
||||
class ConvolutionLayer : public Layer<Dtype> { |
||||
public: |
||||
explicit ConvolutionLayer(const LayerParameter& param) |
||||
: Layer<Dtype>(param) {} |
||||
virtual void SetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_CONVOLUTION; |
||||
} |
||||
virtual inline int ExactNumBottomBlobs() const { return 1; } |
||||
virtual inline int ExactNumTopBlobs() const { return 1; } |
||||
|
||||
protected: |
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
|
||||
int kernel_size_; |
||||
int stride_; |
||||
int num_; |
||||
int channels_; |
||||
int pad_; |
||||
int height_; |
||||
int width_; |
||||
int num_output_; |
||||
int group_; |
||||
Blob<Dtype> col_buffer_; |
||||
shared_ptr<SyncedMemory> bias_multiplier_; |
||||
bool bias_term_; |
||||
int M_; |
||||
int K_; |
||||
int N_; |
||||
}; |
||||
|
||||
/* EltwiseLayer
|
||||
Compute elementwise operations like product or sum. |
||||
*/ |
||||
template <typename Dtype> |
||||
class EltwiseLayer : public Layer<Dtype> { |
||||
public: |
||||
explicit EltwiseLayer(const LayerParameter& param) |
||||
: Layer<Dtype>(param) {} |
||||
virtual void SetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_ELTWISE; |
||||
} |
||||
virtual inline int MinBottomBlobs() const { return 2; } |
||||
virtual inline int ExactNumTopBlobs() const { return 1; } |
||||
|
||||
protected: |
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
|
||||
EltwiseParameter_EltwiseOp op_; |
||||
vector<Dtype> coeffs_; |
||||
}; |
||||
|
||||
/* FlattenLayer
|
||||
*/ |
||||
template <typename Dtype> |
||||
class FlattenLayer : public Layer<Dtype> { |
||||
public: |
||||
explicit FlattenLayer(const LayerParameter& param) |
||||
: Layer<Dtype>(param) {} |
||||
virtual void SetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_FLATTEN; |
||||
} |
||||
virtual inline int ExactNumBottomBlobs() const { return 1; } |
||||
virtual inline int ExactNumTopBlobs() const { return 1; } |
||||
|
||||
protected: |
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
|
||||
int count_; |
||||
}; |
||||
|
||||
/* Im2colLayer
|
||||
*/ |
||||
template <typename Dtype> |
||||
class Im2colLayer : public Layer<Dtype> { |
||||
public: |
||||
explicit Im2colLayer(const LayerParameter& param) |
||||
: Layer<Dtype>(param) {} |
||||
virtual void SetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_IM2COL; |
||||
} |
||||
virtual inline int ExactNumBottomBlobs() const { return 1; } |
||||
virtual inline int ExactNumTopBlobs() const { return 1; } |
||||
|
||||
protected: |
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
|
||||
int kernel_size_; |
||||
int stride_; |
||||
int channels_; |
||||
int height_; |
||||
int width_; |
||||
int pad_; |
||||
}; |
||||
|
||||
/* InnerProductLayer
|
||||
*/ |
||||
template <typename Dtype> |
||||
class InnerProductLayer : public Layer<Dtype> { |
||||
public: |
||||
explicit InnerProductLayer(const LayerParameter& param) |
||||
: Layer<Dtype>(param) {} |
||||
virtual void SetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_INNER_PRODUCT; |
||||
} |
||||
virtual inline int ExactNumBottomBlobs() const { return 1; } |
||||
virtual inline int ExactNumTopBlobs() const { return 1; } |
||||
|
||||
protected: |
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
|
||||
int M_; |
||||
int K_; |
||||
int N_; |
||||
bool bias_term_; |
||||
shared_ptr<SyncedMemory> bias_multiplier_; |
||||
}; |
||||
|
||||
// Forward declare PoolingLayer and SplitLayer for use in LRNLayer.
|
||||
template <typename Dtype> class PoolingLayer; |
||||
template <typename Dtype> class SplitLayer; |
||||
|
||||
/* LRNLayer
|
||||
Local Response Normalization |
||||
*/ |
||||
template <typename Dtype> |
||||
class LRNLayer : public Layer<Dtype> { |
||||
public: |
||||
explicit LRNLayer(const LayerParameter& param) |
||||
: Layer<Dtype>(param) {} |
||||
virtual void SetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_LRN; |
||||
} |
||||
virtual inline int ExactNumBottomBlobs() const { return 1; } |
||||
virtual inline int ExactNumTopBlobs() const { return 1; } |
||||
|
||||
protected: |
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
|
||||
virtual Dtype CrossChannelForward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual Dtype CrossChannelForward_gpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual Dtype WithinChannelForward(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void CrossChannelBackward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
virtual void CrossChannelBackward_gpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
virtual void WithinChannelBackward(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
|
||||
int size_; |
||||
int pre_pad_; |
||||
Dtype alpha_; |
||||
Dtype beta_; |
||||
int num_; |
||||
int channels_; |
||||
int height_; |
||||
int width_; |
||||
|
||||
// Fields used for normalization ACROSS_CHANNELS
|
||||
// scale_ stores the intermediate summing results
|
||||
Blob<Dtype> scale_; |
||||
|
||||
// Fields used for normalization WITHIN_CHANNEL
|
||||
shared_ptr<SplitLayer<Dtype> > split_layer_; |
||||
vector<Blob<Dtype>*> split_top_vec_; |
||||
shared_ptr<PowerLayer<Dtype> > square_layer_; |
||||
Blob<Dtype> square_input_; |
||||
Blob<Dtype> square_output_; |
||||
vector<Blob<Dtype>*> square_bottom_vec_; |
||||
vector<Blob<Dtype>*> square_top_vec_; |
||||
shared_ptr<PoolingLayer<Dtype> > pool_layer_; |
||||
Blob<Dtype> pool_output_; |
||||
vector<Blob<Dtype>*> pool_top_vec_; |
||||
shared_ptr<PowerLayer<Dtype> > power_layer_; |
||||
Blob<Dtype> power_output_; |
||||
vector<Blob<Dtype>*> power_top_vec_; |
||||
shared_ptr<EltwiseLayer<Dtype> > product_layer_; |
||||
Blob<Dtype> product_data_input_; |
||||
vector<Blob<Dtype>*> product_bottom_vec_; |
||||
}; |
||||
|
||||
/* PoolingLayer
|
||||
*/ |
||||
template <typename Dtype> |
||||
class PoolingLayer : public Layer<Dtype> { |
||||
public: |
||||
explicit PoolingLayer(const LayerParameter& param) |
||||
: Layer<Dtype>(param) {} |
||||
virtual void SetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_POOLING; |
||||
} |
||||
virtual inline int ExactNumBottomBlobs() const { return 1; } |
||||
virtual inline int MinTopBlobs() const { return 1; } |
||||
virtual inline int MaxTopBlobs() const { return max_top_blobs_; } |
||||
|
||||
protected: |
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
|
||||
int max_top_blobs_; |
||||
int kernel_size_; |
||||
int stride_; |
||||
int pad_; |
||||
int channels_; |
||||
int height_; |
||||
int width_; |
||||
int pooled_height_; |
||||
int pooled_width_; |
||||
Blob<Dtype> rand_idx_; |
||||
shared_ptr<Blob<int> > max_idx_; |
||||
}; |
||||
|
||||
/* SoftmaxLayer
|
||||
*/ |
||||
template <typename Dtype> |
||||
class SoftmaxLayer : public Layer<Dtype> { |
||||
public: |
||||
explicit SoftmaxLayer(const LayerParameter& param) |
||||
: Layer<Dtype>(param) {} |
||||
virtual void SetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_SOFTMAX; |
||||
} |
||||
virtual inline int ExactNumBottomBlobs() const { return 1; } |
||||
virtual inline int ExactNumTopBlobs() const { return 1; } |
||||
|
||||
protected: |
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
|
||||
// sum_multiplier is just used to carry out sum using blas
|
||||
Blob<Dtype> sum_multiplier_; |
||||
// scale is an intermediate blob to hold temporary results.
|
||||
Blob<Dtype> scale_; |
||||
}; |
||||
|
||||
/* SoftmaxWithLossLayer
|
||||
Implements softmax and computes the loss. |
||||
|
||||
It is preferred over separate softmax + multinomiallogisticloss |
||||
layers due to more numerically stable gradients. |
||||
|
||||
In test, this layer could be replaced by simple softmax layer. |
||||
*/ |
||||
template <typename Dtype> |
||||
class SoftmaxWithLossLayer : public Layer<Dtype> { |
||||
public: |
||||
explicit SoftmaxWithLossLayer(const LayerParameter& param) |
||||
: Layer<Dtype>(param), softmax_layer_(new SoftmaxLayer<Dtype>(param)) {} |
||||
virtual void SetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_SOFTMAX_LOSS; |
||||
} |
||||
virtual inline int ExactNumBottomBlobs() const { return 2; } |
||||
virtual inline int ExactNumTopBlobs() const { return 0; } |
||||
|
||||
protected: |
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
|
||||
shared_ptr<SoftmaxLayer<Dtype> > softmax_layer_; |
||||
// prob stores the output probability of the layer.
|
||||
Blob<Dtype> prob_; |
||||
// Vector holders to call the underlying softmax layer forward and backward.
|
||||
vector<Blob<Dtype>*> softmax_bottom_vec_; |
||||
vector<Blob<Dtype>*> softmax_top_vec_; |
||||
}; |
||||
|
||||
/* SplitLayer
|
||||
*/ |
||||
template <typename Dtype> |
||||
class SplitLayer : public Layer<Dtype> { |
||||
public: |
||||
explicit SplitLayer(const LayerParameter& param) |
||||
: Layer<Dtype>(param) {} |
||||
virtual void SetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
|
||||
virtual inline LayerParameter_LayerType type() const { |
||||
return LayerParameter_LayerType_SPLIT; |
||||
} |
||||
virtual inline int ExactNumBottomBlobs() const { return 1; } |
||||
virtual inline int MinTopBlobs() const { return 1; } |
||||
|
||||
protected: |
||||
virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top); |
||||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom); |
||||
|
||||
int count_; |
||||
}; |
||||
|
||||
} // namespace caffe
|
||||
|
||||
#endif // CAFFE_VISION_LAYERS_HPP_
|
Binary file not shown.
@ -0,0 +1,370 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
//
|
||||
// matcaffe.cpp provides a wrapper of the caffe::Net class as well as some
|
||||
// caffe::Caffe functions so that one could easily call it from matlab.
|
||||
// Note that for matlab, we will simply use float as the data type.
|
||||
|
||||
#include <string> |
||||
#include <vector> |
||||
|
||||
#include "mex.h" |
||||
#include "caffe/caffe.hpp" |
||||
|
||||
#define MEX_ARGS int nlhs, mxArray **plhs, int nrhs, const mxArray **prhs |
||||
|
||||
using namespace caffe; // NOLINT(build/namespaces)
|
||||
|
||||
// The pointer to the internal caffe::Net instance
|
||||
static shared_ptr<Net<float> > net_; |
||||
static int init_key = -2; |
||||
|
||||
// Five things to be aware of:
|
||||
// caffe uses row-major order
|
||||
// matlab uses column-major order
|
||||
// caffe uses BGR color channel order
|
||||
// matlab uses RGB color channel order
|
||||
// images need to have the data mean subtracted
|
||||
//
|
||||
// Data coming in from matlab needs to be in the order
|
||||
// [width, height, channels, images]
|
||||
// where width is the fastest dimension.
|
||||
// Here is the rough matlab for putting image data into the correct
|
||||
// format:
|
||||
// % convert from uint8 to single
|
||||
// im = single(im);
|
||||
// % reshape to a fixed size (e.g., 227x227)
|
||||
// im = imresize(im, [IMAGE_DIM IMAGE_DIM], 'bilinear');
|
||||
// % permute from RGB to BGR and subtract the data mean (already in BGR)
|
||||
// im = im(:,:,[3 2 1]) - data_mean;
|
||||
// % flip width and height to make width the fastest dimension
|
||||
// im = permute(im, [2 1 3]);
|
||||
//
|
||||
// If you have multiple images, cat them with cat(4, ...)
|
||||
//
|
||||
// The actual forward function. It takes in a cell array of 4-D arrays as
|
||||
// input and outputs a cell array.
|
||||
|
||||
static mxArray* do_forward(const mxArray* const bottom) { |
||||
vector<Blob<float>*>& input_blobs = net_->input_blobs(); |
||||
CHECK_EQ(static_cast<unsigned int>(mxGetDimensions(bottom)[0]), |
||||
input_blobs.size()); |
||||
for (unsigned int i = 0; i < input_blobs.size(); ++i) { |
||||
const mxArray* const elem = mxGetCell(bottom, i); |
||||
const float* const data_ptr = |
||||
reinterpret_cast<const float* const>(mxGetPr(elem)); |
||||
switch (Caffe::mode()) { |
||||
case Caffe::CPU: |
||||
memcpy(input_blobs[i]->mutable_cpu_data(), data_ptr, |
||||
sizeof(float) * input_blobs[i]->count()); |
||||
break; |
||||
case Caffe::GPU: |
||||
cudaMemcpy(input_blobs[i]->mutable_gpu_data(), data_ptr, |
||||
sizeof(float) * input_blobs[i]->count(), cudaMemcpyHostToDevice); |
||||
break; |
||||
default: |
||||
LOG(FATAL) << "Unknown Caffe mode."; |
||||
} // switch (Caffe::mode())
|
||||
} |
||||
const vector<Blob<float>*>& output_blobs = net_->ForwardPrefilled(); |
||||
mxArray* mx_out = mxCreateCellMatrix(output_blobs.size(), 1); |
||||
for (unsigned int i = 0; i < output_blobs.size(); ++i) { |
||||
// internally data is stored as (width, height, channels, num)
|
||||
// where width is the fastest dimension
|
||||
mwSize dims[4] = {output_blobs[i]->width(), output_blobs[i]->height(), |
||||
output_blobs[i]->channels(), output_blobs[i]->num()}; |
||||
mxArray* mx_blob = mxCreateNumericArray(4, dims, mxSINGLE_CLASS, mxREAL); |
||||
mxSetCell(mx_out, i, mx_blob); |
||||
float* data_ptr = reinterpret_cast<float*>(mxGetPr(mx_blob)); |
||||
switch (Caffe::mode()) { |
||||
case Caffe::CPU: |
||||
memcpy(data_ptr, output_blobs[i]->cpu_data(), |
||||
sizeof(float) * output_blobs[i]->count()); |
||||
break; |
||||
case Caffe::GPU: |
||||
cudaMemcpy(data_ptr, output_blobs[i]->gpu_data(), |
||||
sizeof(float) * output_blobs[i]->count(), cudaMemcpyDeviceToHost); |
||||
break; |
||||
default: |
||||
LOG(FATAL) << "Unknown Caffe mode."; |
||||
} // switch (Caffe::mode())
|
||||
} |
||||
|
||||
return mx_out; |
||||
} |
||||
|
||||
static mxArray* do_backward(const mxArray* const top_diff) { |
||||
vector<Blob<float>*>& output_blobs = net_->output_blobs(); |
||||
vector<Blob<float>*>& input_blobs = net_->input_blobs(); |
||||
CHECK_EQ(static_cast<unsigned int>(mxGetDimensions(top_diff)[0]), |
||||
output_blobs.size()); |
||||
// First, copy the output diff
|
||||
for (unsigned int i = 0; i < output_blobs.size(); ++i) { |
||||
const mxArray* const elem = mxGetCell(top_diff, i); |
||||
const float* const data_ptr = |
||||
reinterpret_cast<const float* const>(mxGetPr(elem)); |
||||
switch (Caffe::mode()) { |
||||
case Caffe::CPU: |
||||
memcpy(output_blobs[i]->mutable_cpu_diff(), data_ptr, |
||||
sizeof(float) * output_blobs[i]->count()); |
||||
break; |
||||
case Caffe::GPU: |
||||
cudaMemcpy(output_blobs[i]->mutable_gpu_diff(), data_ptr, |
||||
sizeof(float) * output_blobs[i]->count(), cudaMemcpyHostToDevice); |
||||
break; |
||||
default: |
||||
LOG(FATAL) << "Unknown Caffe mode."; |
||||
} // switch (Caffe::mode())
|
||||
} |
||||
// LOG(INFO) << "Start";
|
||||
net_->Backward(); |
||||
// LOG(INFO) << "End";
|
||||
mxArray* mx_out = mxCreateCellMatrix(input_blobs.size(), 1); |
||||
for (unsigned int i = 0; i < input_blobs.size(); ++i) { |
||||
// internally data is stored as (width, height, channels, num)
|
||||
// where width is the fastest dimension
|
||||
mwSize dims[4] = {input_blobs[i]->width(), input_blobs[i]->height(), |
||||
input_blobs[i]->channels(), input_blobs[i]->num()}; |
||||
mxArray* mx_blob = mxCreateNumericArray(4, dims, mxSINGLE_CLASS, mxREAL); |
||||
mxSetCell(mx_out, i, mx_blob); |
||||
float* data_ptr = reinterpret_cast<float*>(mxGetPr(mx_blob)); |
||||
switch (Caffe::mode()) { |
||||
case Caffe::CPU: |
||||
memcpy(data_ptr, input_blobs[i]->cpu_diff(), |
||||
sizeof(float) * input_blobs[i]->count()); |
||||
break; |
||||
case Caffe::GPU: |
||||
cudaMemcpy(data_ptr, input_blobs[i]->gpu_diff(), |
||||
sizeof(float) * input_blobs[i]->count(), cudaMemcpyDeviceToHost); |
||||
break; |
||||
default: |
||||
LOG(FATAL) << "Unknown Caffe mode."; |
||||
} // switch (Caffe::mode())
|
||||
} |
||||
|
||||
return mx_out; |
||||
} |
||||
|
||||
static mxArray* do_get_weights() { |
||||
const vector<shared_ptr<Layer<float> > >& layers = net_->layers(); |
||||
const vector<string>& layer_names = net_->layer_names(); |
||||
|
||||
// Step 1: count the number of layers with weights
|
||||
int num_layers = 0; |
||||
{ |
||||
string prev_layer_name = ""; |
||||
for (unsigned int i = 0; i < layers.size(); ++i) { |
||||
vector<shared_ptr<Blob<float> > >& layer_blobs = layers[i]->blobs(); |
||||
if (layer_blobs.size() == 0) { |
||||
continue; |
||||
} |
||||
if (layer_names[i] != prev_layer_name) { |
||||
prev_layer_name = layer_names[i]; |
||||
num_layers++; |
||||
} |
||||
} |
||||
} |
||||
|
||||
// Step 2: prepare output array of structures
|
||||
mxArray* mx_layers; |
||||
{ |
||||
const mwSize dims[2] = {num_layers, 1}; |
||||
const char* fnames[2] = {"weights", "layer_names"}; |
||||
mx_layers = mxCreateStructArray(2, dims, 2, fnames); |
||||
} |
||||
|
||||
// Step 3: copy weights into output
|
||||
{ |
||||
string prev_layer_name = ""; |
||||
int mx_layer_index = 0; |
||||
for (unsigned int i = 0; i < layers.size(); ++i) { |
||||
vector<shared_ptr<Blob<float> > >& layer_blobs = layers[i]->blobs(); |
||||
if (layer_blobs.size() == 0) { |
||||
continue; |
||||
} |
||||
|
||||
mxArray* mx_layer_cells = NULL; |
||||
if (layer_names[i] != prev_layer_name) { |
||||
prev_layer_name = layer_names[i]; |
||||
const mwSize dims[2] = {layer_blobs.size(), 1}; |
||||
mx_layer_cells = mxCreateCellArray(2, dims); |
||||
mxSetField(mx_layers, mx_layer_index, "weights", mx_layer_cells); |
||||
mxSetField(mx_layers, mx_layer_index, "layer_names", |
||||
mxCreateString(layer_names[i].c_str())); |
||||
mx_layer_index++; |
||||
} |
||||
|
||||
for (unsigned int j = 0; j < layer_blobs.size(); ++j) { |
||||
// internally data is stored as (width, height, channels, num)
|
||||
// where width is the fastest dimension
|
||||
mwSize dims[4] = {layer_blobs[j]->width(), layer_blobs[j]->height(), |
||||
layer_blobs[j]->channels(), layer_blobs[j]->num()}; |
||||
|
||||
mxArray* mx_weights = |
||||
mxCreateNumericArray(4, dims, mxSINGLE_CLASS, mxREAL); |
||||
mxSetCell(mx_layer_cells, j, mx_weights); |
||||
float* weights_ptr = reinterpret_cast<float*>(mxGetPr(mx_weights)); |
||||
|
||||
switch (Caffe::mode()) { |
||||
case Caffe::CPU: |
||||
memcpy(weights_ptr, layer_blobs[j]->cpu_data(), |
||||
sizeof(float) * layer_blobs[j]->count()); |
||||
break; |
||||
case Caffe::GPU: |
||||
CUDA_CHECK(cudaMemcpy(weights_ptr, layer_blobs[j]->gpu_data(), |
||||
sizeof(float) * layer_blobs[j]->count(), cudaMemcpyDeviceToHost)); |
||||
break; |
||||
default: |
||||
LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
return mx_layers; |
||||
} |
||||
|
||||
static void get_weights(MEX_ARGS) { |
||||
plhs[0] = do_get_weights(); |
||||
} |
||||
|
||||
static void set_mode_cpu(MEX_ARGS) { |
||||
Caffe::set_mode(Caffe::CPU); |
||||
} |
||||
|
||||
static void set_mode_gpu(MEX_ARGS) { |
||||
Caffe::set_mode(Caffe::GPU); |
||||
} |
||||
|
||||
static void set_phase_train(MEX_ARGS) { |
||||
Caffe::set_phase(Caffe::TRAIN); |
||||
} |
||||
|
||||
static void set_phase_test(MEX_ARGS) { |
||||
Caffe::set_phase(Caffe::TEST); |
||||
} |
||||
|
||||
static void set_device(MEX_ARGS) { |
||||
if (nrhs != 1) { |
||||
LOG(ERROR) << "Only given " << nrhs << " arguments"; |
||||
mexErrMsgTxt("Wrong number of arguments"); |
||||
} |
||||
|
||||
int device_id = static_cast<int>(mxGetScalar(prhs[0])); |
||||
Caffe::SetDevice(device_id); |
||||
} |
||||
|
||||
static void get_init_key(MEX_ARGS) { |
||||
plhs[0] = mxCreateDoubleScalar(init_key); |
||||
} |
||||
|
||||
static void init(MEX_ARGS) { |
||||
if (nrhs != 2) { |
||||
LOG(ERROR) << "Only given " << nrhs << " arguments"; |
||||
mexErrMsgTxt("Wrong number of arguments"); |
||||
} |
||||
|
||||
char* param_file = mxArrayToString(prhs[0]); |
||||
char* model_file = mxArrayToString(prhs[1]); |
||||
|
||||
net_.reset(new Net<float>(string(param_file))); |
||||
net_->CopyTrainedLayersFrom(string(model_file)); |
||||
|
||||
mxFree(param_file); |
||||
mxFree(model_file); |
||||
|
||||
init_key = random(); // NOLINT(caffe/random_fn)
|
||||
|
||||
if (nlhs == 1) { |
||||
plhs[0] = mxCreateDoubleScalar(init_key); |
||||
} |
||||
} |
||||
|
||||
static void reset(MEX_ARGS) { |
||||
if (net_) { |
||||
net_.reset(); |
||||
init_key = -2; |
||||
LOG(INFO) << "Network reset, call init before use it again"; |
||||
} |
||||
} |
||||
|
||||
static void forward(MEX_ARGS) { |
||||
if (nrhs != 1) { |
||||
LOG(ERROR) << "Only given " << nrhs << " arguments"; |
||||
mexErrMsgTxt("Wrong number of arguments"); |
||||
} |
||||
|
||||
plhs[0] = do_forward(prhs[0]); |
||||
} |
||||
|
||||
static void backward(MEX_ARGS) { |
||||
if (nrhs != 1) { |
||||
LOG(ERROR) << "Only given " << nrhs << " arguments"; |
||||
mexErrMsgTxt("Wrong number of arguments"); |
||||
} |
||||
|
||||
plhs[0] = do_backward(prhs[0]); |
||||
} |
||||
|
||||
static void is_initialized(MEX_ARGS) { |
||||
if (!net_) { |
||||
plhs[0] = mxCreateDoubleScalar(0); |
||||
} else { |
||||
plhs[0] = mxCreateDoubleScalar(1); |
||||
} |
||||
} |
||||
|
||||
/** -----------------------------------------------------------------
|
||||
** Available commands. |
||||
**/ |
||||
struct handler_registry { |
||||
string cmd; |
||||
void (*func)(MEX_ARGS); |
||||
}; |
||||
|
||||
static handler_registry handlers[] = { |
||||
// Public API functions
|
||||
{ "forward", forward }, |
||||
{ "backward", backward }, |
||||
{ "init", init }, |
||||
{ "is_initialized", is_initialized }, |
||||
{ "set_mode_cpu", set_mode_cpu }, |
||||
{ "set_mode_gpu", set_mode_gpu }, |
||||
{ "set_phase_train", set_phase_train }, |
||||
{ "set_phase_test", set_phase_test }, |
||||
{ "set_device", set_device }, |
||||
{ "get_weights", get_weights }, |
||||
{ "get_init_key", get_init_key }, |
||||
{ "reset", reset }, |
||||
// The end.
|
||||
{ "END", NULL }, |
||||
}; |
||||
|
||||
|
||||
/** -----------------------------------------------------------------
|
||||
** matlab entry point: caffe(api_command, arg1, arg2, ...) |
||||
**/ |
||||
void mexFunction(MEX_ARGS) { |
||||
if (nrhs == 0) { |
||||
LOG(ERROR) << "No API command given"; |
||||
mexErrMsgTxt("An API command is requires"); |
||||
return; |
||||
} |
||||
|
||||
{ // Handle input command
|
||||
char *cmd = mxArrayToString(prhs[0]); |
||||
bool dispatched = false; |
||||
// Dispatch to cmd handler
|
||||
for (int i = 0; handlers[i].func != NULL; i++) { |
||||
if (handlers[i].cmd.compare(cmd) == 0) { |
||||
handlers[i].func(nlhs, plhs, nrhs-1, prhs+1); |
||||
dispatched = true; |
||||
break; |
||||
} |
||||
} |
||||
if (!dispatched) { |
||||
LOG(ERROR) << "Unknown command `" << cmd << "'"; |
||||
mexErrMsgTxt("API command not recognized"); |
||||
} |
||||
mxFree(cmd); |
||||
} |
||||
} |
@ -0,0 +1,76 @@ |
||||
function [scores,list_im] = matcaffe_batch(list_im, use_gpu) |
||||
% scores = matcaffe_batch(list_im, use_gpu) |
||||
% |
||||
% Demo of the matlab wrapper using the ILSVRC network. |
||||
% |
||||
% input |
||||
% list_im list of images files |
||||
% use_gpu 1 to use the GPU, 0 to use the CPU |
||||
% |
||||
% output |
||||
% scores 1000 x num_images ILSVRC output vector |
||||
% |
||||
% You may need to do the following before you start matlab: |
||||
% $ export LD_LIBRARY_PATH=/opt/intel/mkl/lib/intel64:/usr/local/cuda/lib64 |
||||
% $ export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libstdc++.so.6 |
||||
% Or the equivalent based on where things are installed on your system |
||||
% |
||||
% Usage: |
||||
% scores = matcaffe_batch({'peppers.png','onion.png'}); |
||||
% scores = matcaffe_batch('list_images.txt', 1); |
||||
if nargin < 1 |
||||
% For test purposes |
||||
list_im = {'peppers.png','onions.png'}; |
||||
end |
||||
if ischar(list_im) |
||||
%Assume it is a file contaning the list of images |
||||
filename = list_im; |
||||
list_im = read_cell(filename); |
||||
end |
||||
% Adjust the batch size to match with imagenet_deploy.prototxt |
||||
batch_size = 10; |
||||
% Adjust dim to the output size of imagenet_deploy.prototxt |
||||
dim = 1000; |
||||
disp(list_im) |
||||
if mod(length(list_im),batch_size) |
||||
warning(['Assuming batches of ' num2str(batch_size) ' images rest will be filled with zeros']) |
||||
end |
||||
|
||||
% init caffe network (spews logging info) |
||||
if exist('use_gpu', 'var') |
||||
matcaffe_init(use_gpu); |
||||
else |
||||
matcaffe_init(); |
||||
end |
||||
|
||||
d = load('ilsvrc_2012_mean'); |
||||
IMAGE_MEAN = d.image_mean; |
||||
|
||||
% prepare input |
||||
|
||||
num_images = length(list_im); |
||||
scores = zeros(dim,num_images,'single'); |
||||
num_batches = ceil(length(list_im)/batch_size) |
||||
initic=tic; |
||||
for bb = 1 : num_batches |
||||
batchtic = tic; |
||||
range = 1+batch_size*(bb-1):min(num_images,batch_size * bb); |
||||
tic |
||||
input_data = prepare_batch(list_im(range),IMAGE_MEAN,batch_size); |
||||
toc, tic |
||||
fprintf('Batch %d out of %d %.2f%% Complete ETA %.2f seconds\n',... |
||||
bb,num_batches,bb/num_batches*100,toc(initic)/bb*(num_batches-bb)); |
||||
output_data = caffe('forward', {input_data}); |
||||
toc |
||||
output_data = squeeze(output_data{1}); |
||||
scores(:,range) = output_data(:,mod(range-1,batch_size)+1); |
||||
toc(batchtic) |
||||
end |
||||
toc(initic); |
||||
|
||||
if exist('filename', 'var') |
||||
save([filename '.probs.mat'],'list_im','scores','-v7.3'); |
||||
end |
||||
|
||||
|
||||
|
@ -0,0 +1,110 @@ |
||||
function [scores, maxlabel] = matcaffe_demo(im, use_gpu) |
||||
% scores = matcaffe_demo(im, use_gpu) |
||||
% |
||||
% Demo of the matlab wrapper using the ILSVRC network. |
||||
% |
||||
% input |
||||
% im color image as uint8 HxWx3 |
||||
% use_gpu 1 to use the GPU, 0 to use the CPU |
||||
% |
||||
% output |
||||
% scores 1000-dimensional ILSVRC score vector |
||||
% |
||||
% You may need to do the following before you start matlab: |
||||
% $ export LD_LIBRARY_PATH=/opt/intel/mkl/lib/intel64:/usr/local/cuda-5.5/lib64 |
||||
% $ export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libstdc++.so.6 |
||||
% Or the equivalent based on where things are installed on your system |
||||
% |
||||
% Usage: |
||||
% im = imread('../../examples/images/cat.jpg'); |
||||
% scores = matcaffe_demo(im, 1); |
||||
% [score, class] = max(scores); |
||||
% Five things to be aware of: |
||||
% caffe uses row-major order |
||||
% matlab uses column-major order |
||||
% caffe uses BGR color channel order |
||||
% matlab uses RGB color channel order |
||||
% images need to have the data mean subtracted |
||||
|
||||
% Data coming in from matlab needs to be in the order |
||||
% [width, height, channels, images] |
||||
% where width is the fastest dimension. |
||||
% Here is the rough matlab for putting image data into the correct |
||||
% format: |
||||
% % convert from uint8 to single |
||||
% im = single(im); |
||||
% % reshape to a fixed size (e.g., 227x227) |
||||
% im = imresize(im, [IMAGE_DIM IMAGE_DIM], 'bilinear'); |
||||
% % permute from RGB to BGR and subtract the data mean (already in BGR) |
||||
% im = im(:,:,[3 2 1]) - data_mean; |
||||
% % flip width and height to make width the fastest dimension |
||||
% im = permute(im, [2 1 3]); |
||||
|
||||
% If you have multiple images, cat them with cat(4, ...) |
||||
|
||||
% The actual forward function. It takes in a cell array of 4-D arrays as |
||||
% input and outputs a cell array. |
||||
|
||||
|
||||
% init caffe network (spews logging info) |
||||
if exist('use_gpu', 'var') |
||||
matcaffe_init(use_gpu); |
||||
else |
||||
matcaffe_init(); |
||||
end |
||||
|
||||
if nargin < 1 |
||||
% For demo purposes we will use the peppers image |
||||
im = imread('peppers.png'); |
||||
end |
||||
|
||||
% prepare oversampled input |
||||
% input_data is Height x Width x Channel x Num |
||||
tic; |
||||
input_data = {prepare_image(im)}; |
||||
toc; |
||||
|
||||
% do forward pass to get scores |
||||
% scores are now Width x Height x Channels x Num |
||||
tic; |
||||
scores = caffe('forward', input_data); |
||||
toc; |
||||
|
||||
scores = scores{1}; |
||||
size(scores) |
||||
scores = squeeze(scores); |
||||
scores = mean(scores,2); |
||||
|
||||
[~,maxlabel] = max(scores); |
||||
|
||||
% ------------------------------------------------------------------------ |
||||
function images = prepare_image(im) |
||||
% ------------------------------------------------------------------------ |
||||
d = load('ilsvrc_2012_mean'); |
||||
IMAGE_MEAN = d.image_mean; |
||||
IMAGE_DIM = 256; |
||||
CROPPED_DIM = 227; |
||||
|
||||
% resize to fixed input size |
||||
im = single(im); |
||||
im = imresize(im, [IMAGE_DIM IMAGE_DIM], 'bilinear'); |
||||
% permute from RGB to BGR (IMAGE_MEAN is already BGR) |
||||
im = im(:,:,[3 2 1]) - IMAGE_MEAN; |
||||
|
||||
% oversample (4 corners, center, and their x-axis flips) |
||||
images = zeros(CROPPED_DIM, CROPPED_DIM, 3, 10, 'single'); |
||||
indices = [0 IMAGE_DIM-CROPPED_DIM] + 1; |
||||
curr = 1; |
||||
for i = indices |
||||
for j = indices |
||||
images(:, :, :, curr) = ... |
||||
permute(im(i:i+CROPPED_DIM-1, j:j+CROPPED_DIM-1, :), [2 1 3]); |
||||
images(:, :, :, curr+5) = images(end:-1:1, :, :, curr); |
||||
curr = curr + 1; |
||||
end |
||||
end |
||||
center = floor(indices(2) / 2)+1; |
||||
images(:,:,:,5) = ... |
||||
permute(im(center:center+CROPPED_DIM-1,center:center+CROPPED_DIM-1,:), ... |
||||
[2 1 3]); |
||||
images(:,:,:,10) = images(end:-1:1, :, :, curr); |
@ -0,0 +1,44 @@ |
||||
function matcaffe_init(use_gpu, model_def_file, model_file) |
||||
% matcaffe_init(model_def_file, model_file, use_gpu) |
||||
% Initilize matcaffe wrapper |
||||
|
||||
if nargin < 1 |
||||
% By default use CPU |
||||
use_gpu = 0; |
||||
end |
||||
if nargin < 2 || isempty(model_def_file) |
||||
% By default use imagenet_deploy |
||||
model_def_file = '../../examples/imagenet/imagenet_deploy.prototxt'; |
||||
end |
||||
if nargin < 3 || isempty(model_file) |
||||
% By default use caffe reference model |
||||
model_file = '../../examples/imagenet/caffe_reference_imagenet_model'; |
||||
end |
||||
|
||||
|
||||
if caffe('is_initialized') == 0 |
||||
if exist(model_file, 'file') == 0 |
||||
% NOTE: you'll have to get the pre-trained ILSVRC network |
||||
error('You need a network model file'); |
||||
end |
||||
if ~exist(model_def_file,'file') |
||||
% NOTE: you'll have to get network definition |
||||
error('You need the network prototxt definition'); |
||||
end |
||||
caffe('init', model_def_file, model_file) |
||||
end |
||||
fprintf('Done with init\n'); |
||||
|
||||
% set to use GPU or CPU |
||||
if use_gpu |
||||
fprintf('Using GPU Mode\n'); |
||||
caffe('set_mode_gpu'); |
||||
else |
||||
fprintf('Using CPU Mode\n'); |
||||
caffe('set_mode_cpu'); |
||||
end |
||||
fprintf('Done with set_mode\n'); |
||||
|
||||
% put into test mode |
||||
caffe('set_phase_test'); |
||||
fprintf('Done with set_phase_test\n'); |
@ -0,0 +1,41 @@ |
||||
% ------------------------------------------------------------------------ |
||||
function images = prepare_batch(image_files,IMAGE_MEAN,batch_size) |
||||
% ------------------------------------------------------------------------ |
||||
if nargin < 2 |
||||
d = load('ilsvrc_2012_mean'); |
||||
IMAGE_MEAN = d.image_mean; |
||||
end |
||||
num_images = length(image_files); |
||||
if nargin < 3 |
||||
batch_size = num_images; |
||||
end |
||||
|
||||
IMAGE_DIM = 256; |
||||
CROPPED_DIM = 227; |
||||
indices = [0 IMAGE_DIM-CROPPED_DIM] + 1; |
||||
center = floor(indices(2) / 2)+1; |
||||
|
||||
num_images = length(image_files); |
||||
images = zeros(CROPPED_DIM,CROPPED_DIM,3,batch_size,'single'); |
||||
|
||||
parfor i=1:num_images |
||||
% read file |
||||
fprintf('%c Preparing %s\n',13,image_files{i}); |
||||
try |
||||
im = imread(image_files{i}); |
||||
% resize to fixed input size |
||||
im = single(im); |
||||
im = imresize(im, [IMAGE_DIM IMAGE_DIM], 'bilinear'); |
||||
% Transform GRAY to RGB |
||||
if size(im,3) == 1 |
||||
im = cat(3,im,im,im); |
||||
end |
||||
% permute from RGB to BGR (IMAGE_MEAN is already BGR) |
||||
im = im(:,:,[3 2 1]) - IMAGE_MEAN; |
||||
% Crop the center of the image |
||||
images(:,:,:,i) = permute(im(center:center+CROPPED_DIM-1,... |
||||
center:center+CROPPED_DIM-1,:),[2 1 3]); |
||||
catch |
||||
warning('Problems with file',image_files{i}); |
||||
end |
||||
end |
@ -0,0 +1,42 @@ |
||||
function res=print_cell(input,file,linesep,cellsep) |
||||
assert(iscell(input),'The input should be a cell') |
||||
if nargin < 4 |
||||
cellsep = '\t'; |
||||
end |
||||
if nargin < 3 |
||||
linesep = '\n'; |
||||
end |
||||
if exist('file','var') && ~isempty(file) |
||||
%% |
||||
fid = fopen(file,'w'); |
||||
for l=1:length(input) |
||||
if iscell(input{l}) |
||||
for i=1:length(input{l}) |
||||
fprintf(fid,['%s' cellsep],input{l}{i}); |
||||
end |
||||
fprintf(fid,linesep); |
||||
else |
||||
if size(input,2) > 1 |
||||
for i=1:size(input,2) |
||||
fprintf(fid,'%s ',input{l,i}); |
||||
end |
||||
fprintf(fid,linesep); |
||||
else |
||||
fprintf(fid,['%s' linesep],input{l}); |
||||
end |
||||
end |
||||
end |
||||
fclose(fid); |
||||
else |
||||
res = ''; |
||||
for l=1:length(input) |
||||
if iscell(input{l}) |
||||
for i=1:length(input{l}) |
||||
res = [res sprintf([cellsep{1} '%s' cellsep{2}],input{l}{i})]; |
||||
end |
||||
res = [res sprintf(linesep)]; |
||||
else |
||||
res = [res sprintf(['%s' linesep],input{l}(:))]; |
||||
end |
||||
end |
||||
end |
@ -0,0 +1,21 @@ |
||||
function res=read_cell(filename,linesep,cellsep) |
||||
if nargin < 2, linesep='\n'; end |
||||
if nargin < 3, cellsep = '\t'; end |
||||
if exist(filename,'file') |
||||
fid = fopen(filename); |
||||
else |
||||
% Assume that filename is either a file ide or a string |
||||
fid = filename; |
||||
end |
||||
|
||||
fileLines = textscan(fid,'%s','delimiter',linesep,'BufSize',100000); |
||||
|
||||
fileLines = fileLines{1}; |
||||
|
||||
if regexp(fileLines{1},cellsep,'once') |
||||
fileLines = regexprep(fileLines,['^' cellsep '|' cellsep '$'],''); |
||||
res = regexp(fileLines,cellsep,'split'); |
||||
res = cell2matcell(res); |
||||
else |
||||
res = fileLines; |
||||
end |
@ -0,0 +1,4 @@ |
||||
from .pycaffe import Net, SGDSolver |
||||
from .classifier import Classifier |
||||
from .detector import Detector |
||||
import io |
@ -0,0 +1,357 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
// pycaffe provides a wrapper of the caffe::Net class as well as some
|
||||
// caffe::Caffe functions so that one could easily call it from Python.
|
||||
// Note that for Python, we will simply use float as the data type.
|
||||
|
||||
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION |
||||
|
||||
#include "boost/python.hpp" |
||||
#include "boost/python/suite/indexing/vector_indexing_suite.hpp" |
||||
#include "numpy/arrayobject.h" |
||||
|
||||
// these need to be included after boost on OS X
|
||||
#include <string> // NOLINT(build/include_order) |
||||
#include <vector> // NOLINT(build/include_order) |
||||
#include <fstream> // NOLINT |
||||
|
||||
#include "caffe/caffe.hpp" |
||||
|
||||
// Temporary solution for numpy < 1.7 versions: old macro, no promises.
|
||||
// You're strongly advised to upgrade to >= 1.7.
|
||||
#ifndef NPY_ARRAY_C_CONTIGUOUS |
||||
#define NPY_ARRAY_C_CONTIGUOUS NPY_C_CONTIGUOUS |
||||
#define PyArray_SetBaseObject(arr, x) (PyArray_BASE(arr) = (x)) |
||||
#endif |
||||
|
||||
|
||||
using namespace caffe; // NOLINT(build/namespaces)
|
||||
using boost::python::extract; |
||||
using boost::python::len; |
||||
using boost::python::list; |
||||
using boost::python::object; |
||||
using boost::python::handle; |
||||
using boost::python::vector_indexing_suite; |
||||
|
||||
// for convenience, check that input files can be opened, and raise an
|
||||
// exception that boost will send to Python if not (caffe could still crash
|
||||
// later if the input files are disturbed before they are actually used, but
|
||||
// this saves frustration in most cases)
|
||||
static void CheckFile(const string& filename) { |
||||
std::ifstream f(filename.c_str()); |
||||
if (!f.good()) { |
||||
f.close(); |
||||
throw std::runtime_error("Could not open file " + filename); |
||||
} |
||||
f.close(); |
||||
} |
||||
|
||||
// wrap shared_ptr<Blob<float> > in a class that we construct in C++ and pass
|
||||
// to Python
|
||||
class CaffeBlob { |
||||
public: |
||||
CaffeBlob(const shared_ptr<Blob<float> > &blob, const string& name) |
||||
: blob_(blob), name_(name) {} |
||||
|
||||
string name() const { return name_; } |
||||
int num() const { return blob_->num(); } |
||||
int channels() const { return blob_->channels(); } |
||||
int height() const { return blob_->height(); } |
||||
int width() const { return blob_->width(); } |
||||
int count() const { return blob_->count(); } |
||||
|
||||
// this is here only to satisfy boost's vector_indexing_suite
|
||||
bool operator == (const CaffeBlob &other) { |
||||
return this->blob_ == other.blob_; |
||||
} |
||||
|
||||
protected: |
||||
shared_ptr<Blob<float> > blob_; |
||||
string name_; |
||||
}; |
||||
|
||||
|
||||
// We need another wrapper (used as boost::python's HeldType) that receives a
|
||||
// self PyObject * which we can use as ndarray.base, so that data/diff memory
|
||||
// is not freed while still being used in Python.
|
||||
class CaffeBlobWrap : public CaffeBlob { |
||||
public: |
||||
CaffeBlobWrap(PyObject *p, const CaffeBlob &blob) |
||||
: CaffeBlob(blob), self_(p) {} |
||||
|
||||
object get_data() { |
||||
npy_intp dims[] = {num(), channels(), height(), width()}; |
||||
|
||||
PyObject *obj = PyArray_SimpleNewFromData(4, dims, NPY_FLOAT32, |
||||
blob_->mutable_cpu_data()); |
||||
PyArray_SetBaseObject(reinterpret_cast<PyArrayObject *>(obj), self_); |
||||
Py_INCREF(self_); |
||||
handle<> h(obj); |
||||
|
||||
return object(h); |
||||
} |
||||
|
||||
object get_diff() { |
||||
npy_intp dims[] = {num(), channels(), height(), width()}; |
||||
|
||||
PyObject *obj = PyArray_SimpleNewFromData(4, dims, NPY_FLOAT32, |
||||
blob_->mutable_cpu_diff()); |
||||
PyArray_SetBaseObject(reinterpret_cast<PyArrayObject *>(obj), self_); |
||||
Py_INCREF(self_); |
||||
handle<> h(obj); |
||||
|
||||
return object(h); |
||||
} |
||||
|
||||
private: |
||||
PyObject *self_; |
||||
}; |
||||
|
||||
|
||||
class CaffeLayer { |
||||
public: |
||||
CaffeLayer(const shared_ptr<Layer<float> > &layer, const string &name) |
||||
: layer_(layer), name_(name) {} |
||||
|
||||
string name() const { return name_; } |
||||
vector<CaffeBlob> blobs() { |
||||
vector<CaffeBlob> result; |
||||
for (int i = 0; i < layer_->blobs().size(); ++i) { |
||||
result.push_back(CaffeBlob(layer_->blobs()[i], name_)); |
||||
} |
||||
return result; |
||||
} |
||||
|
||||
// this is here only to satisfy boost's vector_indexing_suite
|
||||
bool operator == (const CaffeLayer &other) { |
||||
return this->layer_ == other.layer_; |
||||
} |
||||
|
||||
protected: |
||||
shared_ptr<Layer<float> > layer_; |
||||
string name_; |
||||
}; |
||||
|
||||
|
||||
// A simple wrapper over CaffeNet that runs the forward process.
|
||||
struct CaffeNet { |
||||
// For cases where parameters will be determined later by the Python user,
|
||||
// create a Net with unallocated parameters (which will not be zero-filled
|
||||
// when accessed).
|
||||
explicit CaffeNet(string param_file) { |
||||
Init(param_file); |
||||
} |
||||
|
||||
CaffeNet(string param_file, string pretrained_param_file) { |
||||
Init(param_file); |
||||
CheckFile(pretrained_param_file); |
||||
net_->CopyTrainedLayersFrom(pretrained_param_file); |
||||
} |
||||
|
||||
explicit CaffeNet(shared_ptr<Net<float> > net) |
||||
: net_(net) {} |
||||
|
||||
void Init(string param_file) { |
||||
CheckFile(param_file); |
||||
net_.reset(new Net<float>(param_file)); |
||||
} |
||||
|
||||
|
||||
virtual ~CaffeNet() {} |
||||
|
||||
// Generate Python exceptions for badly shaped or discontiguous arrays.
|
||||
inline void check_contiguous_array(PyArrayObject* arr, string name, |
||||
int channels, int height, int width) { |
||||
if (!(PyArray_FLAGS(arr) & NPY_ARRAY_C_CONTIGUOUS)) { |
||||
throw std::runtime_error(name + " must be C contiguous"); |
||||
} |
||||
if (PyArray_NDIM(arr) != 4) { |
||||
throw std::runtime_error(name + " must be 4-d"); |
||||
} |
||||
if (PyArray_TYPE(arr) != NPY_FLOAT32) { |
||||
throw std::runtime_error(name + " must be float32"); |
||||
} |
||||
if (PyArray_DIMS(arr)[1] != channels) { |
||||
throw std::runtime_error(name + " has wrong number of channels"); |
||||
} |
||||
if (PyArray_DIMS(arr)[2] != height) { |
||||
throw std::runtime_error(name + " has wrong height"); |
||||
} |
||||
if (PyArray_DIMS(arr)[3] != width) { |
||||
throw std::runtime_error(name + " has wrong width"); |
||||
} |
||||
} |
||||
|
||||
void Forward() { |
||||
net_->ForwardPrefilled(); |
||||
} |
||||
|
||||
void Backward() { |
||||
net_->Backward(); |
||||
} |
||||
|
||||
void set_input_arrays(object data_obj, object labels_obj) { |
||||
// check that this network has an input MemoryDataLayer
|
||||
shared_ptr<MemoryDataLayer<float> > md_layer = |
||||
boost::dynamic_pointer_cast<MemoryDataLayer<float> >(net_->layers()[0]); |
||||
if (!md_layer) { |
||||
throw std::runtime_error("set_input_arrays may only be called if the" |
||||
" first layer is a MemoryDataLayer"); |
||||
} |
||||
|
||||
// check that we were passed appropriately-sized contiguous memory
|
||||
PyArrayObject* data_arr = |
||||
reinterpret_cast<PyArrayObject*>(data_obj.ptr()); |
||||
PyArrayObject* labels_arr = |
||||
reinterpret_cast<PyArrayObject*>(labels_obj.ptr()); |
||||
check_contiguous_array(data_arr, "data array", md_layer->datum_channels(), |
||||
md_layer->datum_height(), md_layer->datum_width()); |
||||
check_contiguous_array(labels_arr, "labels array", 1, 1, 1); |
||||
if (PyArray_DIMS(data_arr)[0] != PyArray_DIMS(labels_arr)[0]) { |
||||
throw std::runtime_error("data and labels must have the same first" |
||||
" dimension"); |
||||
} |
||||
if (PyArray_DIMS(data_arr)[0] % md_layer->batch_size() != 0) { |
||||
throw std::runtime_error("first dimensions of input arrays must be a" |
||||
" multiple of batch size"); |
||||
} |
||||
|
||||
// hold references
|
||||
input_data_ = data_obj; |
||||
input_labels_ = labels_obj; |
||||
|
||||
md_layer->Reset(static_cast<float*>(PyArray_DATA(data_arr)), |
||||
static_cast<float*>(PyArray_DATA(labels_arr)), |
||||
PyArray_DIMS(data_arr)[0]); |
||||
} |
||||
|
||||
// save the network weights to binary proto for net surgeries.
|
||||
void save(string filename) { |
||||
NetParameter net_param; |
||||
net_->ToProto(&net_param, false); |
||||
WriteProtoToBinaryFile(net_param, filename.c_str()); |
||||
} |
||||
|
||||
// The caffe::Caffe utility functions.
|
||||
void set_mode_cpu() { Caffe::set_mode(Caffe::CPU); } |
||||
void set_mode_gpu() { Caffe::set_mode(Caffe::GPU); } |
||||
void set_phase_train() { Caffe::set_phase(Caffe::TRAIN); } |
||||
void set_phase_test() { Caffe::set_phase(Caffe::TEST); } |
||||
void set_device(int device_id) { Caffe::SetDevice(device_id); } |
||||
|
||||
vector<CaffeBlob> blobs() { |
||||
vector<CaffeBlob> result; |
||||
for (int i = 0; i < net_->blobs().size(); ++i) { |
||||
result.push_back(CaffeBlob(net_->blobs()[i], net_->blob_names()[i])); |
||||
} |
||||
return result; |
||||
} |
||||
|
||||
vector<CaffeLayer> layers() { |
||||
vector<CaffeLayer> result; |
||||
for (int i = 0; i < net_->layers().size(); ++i) { |
||||
result.push_back(CaffeLayer(net_->layers()[i], net_->layer_names()[i])); |
||||
} |
||||
return result; |
||||
} |
||||
|
||||
list inputs() { |
||||
list input_blob_names; |
||||
for (int i = 0; i < net_->input_blob_indices().size(); ++i) { |
||||
input_blob_names.append( |
||||
net_->blob_names()[net_->input_blob_indices()[i]]); |
||||
} |
||||
return input_blob_names; |
||||
} |
||||
|
||||
list outputs() { |
||||
list output_blob_names; |
||||
for (int i = 0; i < net_->output_blob_indices().size(); ++i) { |
||||
output_blob_names.append( |
||||
net_->blob_names()[net_->output_blob_indices()[i]]); |
||||
} |
||||
return output_blob_names; |
||||
} |
||||
|
||||
// The pointer to the internal caffe::Net instant.
|
||||
shared_ptr<Net<float> > net_; |
||||
// if taking input from an ndarray, we need to hold references
|
||||
object input_data_; |
||||
object input_labels_; |
||||
}; |
||||
|
||||
class CaffeSGDSolver { |
||||
public: |
||||
explicit CaffeSGDSolver(const string& param_file) { |
||||
// as in CaffeNet, (as a convenience, not a guarantee), create a Python
|
||||
// exception if param_file can't be opened
|
||||
CheckFile(param_file); |
||||
solver_.reset(new SGDSolver<float>(param_file)); |
||||
// we need to explicitly store the net wrapper, rather than constructing
|
||||
// it on the fly, so that it can hold references to Python objects
|
||||
net_.reset(new CaffeNet(solver_->net())); |
||||
} |
||||
|
||||
shared_ptr<CaffeNet> net() { return net_; } |
||||
void Solve() { return solver_->Solve(); } |
||||
void SolveResume(const string& resume_file) { |
||||
CheckFile(resume_file); |
||||
return solver_->Solve(resume_file); |
||||
} |
||||
|
||||
protected: |
||||
shared_ptr<CaffeNet> net_; |
||||
shared_ptr<SGDSolver<float> > solver_; |
||||
}; |
||||
|
||||
|
||||
// The boost_python module definition.
|
||||
BOOST_PYTHON_MODULE(_caffe) { |
||||
// below, we prepend an underscore to methods that will be replaced
|
||||
// in Python
|
||||
boost::python::class_<CaffeNet, shared_ptr<CaffeNet> >( |
||||
"Net", boost::python::init<string, string>()) |
||||
.def(boost::python::init<string>()) |
||||
.def("_forward", &CaffeNet::Forward) |
||||
.def("_backward", &CaffeNet::Backward) |
||||
.def("set_mode_cpu", &CaffeNet::set_mode_cpu) |
||||
.def("set_mode_gpu", &CaffeNet::set_mode_gpu) |
||||
.def("set_phase_train", &CaffeNet::set_phase_train) |
||||
.def("set_phase_test", &CaffeNet::set_phase_test) |
||||
.def("set_device", &CaffeNet::set_device) |
||||
.add_property("_blobs", &CaffeNet::blobs) |
||||
.add_property("layers", &CaffeNet::layers) |
||||
.add_property("inputs", &CaffeNet::inputs) |
||||
.add_property("outputs", &CaffeNet::outputs) |
||||
.def("_set_input_arrays", &CaffeNet::set_input_arrays) |
||||
.def("save", &CaffeNet::save); |
||||
|
||||
boost::python::class_<CaffeBlob, CaffeBlobWrap>( |
||||
"Blob", boost::python::no_init) |
||||
.add_property("name", &CaffeBlob::name) |
||||
.add_property("num", &CaffeBlob::num) |
||||
.add_property("channels", &CaffeBlob::channels) |
||||
.add_property("height", &CaffeBlob::height) |
||||
.add_property("width", &CaffeBlob::width) |
||||
.add_property("count", &CaffeBlob::count) |
||||
.add_property("data", &CaffeBlobWrap::get_data) |
||||
.add_property("diff", &CaffeBlobWrap::get_diff); |
||||
|
||||
boost::python::class_<CaffeLayer>( |
||||
"Layer", boost::python::no_init) |
||||
.add_property("name", &CaffeLayer::name) |
||||
.add_property("blobs", &CaffeLayer::blobs); |
||||
|
||||
boost::python::class_<CaffeSGDSolver, boost::noncopyable>( |
||||
"SGDSolver", boost::python::init<string>()) |
||||
.add_property("net", &CaffeSGDSolver::net) |
||||
.def("solve", &CaffeSGDSolver::Solve) |
||||
.def("solve", &CaffeSGDSolver::SolveResume); |
||||
|
||||
boost::python::class_<vector<CaffeBlob> >("BlobVec") |
||||
.def(vector_indexing_suite<vector<CaffeBlob>, true>()); |
||||
|
||||
boost::python::class_<vector<CaffeLayer> >("LayerVec") |
||||
.def(vector_indexing_suite<vector<CaffeLayer>, true>()); |
||||
|
||||
import_array(); |
||||
} |
@ -0,0 +1,86 @@ |
||||
#!/usr/bin/env python |
||||
""" |
||||
Classifier is an image classifier specialization of Net. |
||||
""" |
||||
|
||||
import numpy as np |
||||
|
||||
import caffe |
||||
|
||||
|
||||
class Classifier(caffe.Net): |
||||
""" |
||||
Classifier extends Net for image class prediction |
||||
by scaling, center cropping, or oversampling. |
||||
""" |
||||
def __init__(self, model_file, pretrained_file, image_dims=None, |
||||
gpu=False, mean_file=None, input_scale=None, channel_swap=None): |
||||
""" |
||||
Take |
||||
image_dims: dimensions to scale input for cropping/sampling. |
||||
Default is to scale to net input size for whole-image crop. |
||||
gpu, mean_file, input_scale, channel_swap: convenience params for |
||||
setting mode, mean, input scale, and channel order. |
||||
""" |
||||
caffe.Net.__init__(self, model_file, pretrained_file) |
||||
self.set_phase_test() |
||||
|
||||
if gpu: |
||||
self.set_mode_gpu() |
||||
else: |
||||
self.set_mode_cpu() |
||||
|
||||
if mean_file: |
||||
self.set_mean(self.inputs[0], mean_file) |
||||
if input_scale: |
||||
self.set_input_scale(self.inputs[0], input_scale) |
||||
if channel_swap: |
||||
self.set_channel_swap(self.inputs[0], channel_swap) |
||||
|
||||
self.crop_dims = np.array(self.blobs[self.inputs[0]].data.shape[2:]) |
||||
if not image_dims: |
||||
image_dims = self.crop_dims |
||||
self.image_dims = image_dims |
||||
|
||||
|
||||
def predict(self, inputs, oversample=True): |
||||
""" |
||||
Predict classification probabilities of inputs. |
||||
|
||||
Take |
||||
inputs: iterable of (H x W x K) input ndarrays. |
||||
oversample: average predictions across center, corners, and mirrors |
||||
when True (default). Center-only prediction when False. |
||||
|
||||
Give |
||||
predictions: (N x C) ndarray of class probabilities |
||||
for N images and C classes. |
||||
""" |
||||
# Scale to standardize input dimensions. |
||||
inputs = np.asarray([caffe.io.resize_image(im, self.image_dims) |
||||
for im in inputs]) |
||||
|
||||
if oversample: |
||||
# Generate center, corner, and mirrored crops. |
||||
inputs = caffe.io.oversample(inputs, self.crop_dims) |
||||
else: |
||||
# Take center crop. |
||||
center = np.array(self.image_dims) / 2.0 |
||||
crop = np.tile(center, (1, 2))[0] + np.concatenate([ |
||||
-self.crop_dims / 2.0, |
||||
self.crop_dims / 2.0 |
||||
]) |
||||
inputs = inputs[:, crop[0]:crop[2], crop[1]:crop[3], :] |
||||
|
||||
# Classify |
||||
caffe_in = np.asarray([self.preprocess(self.inputs[0], in_) |
||||
for in_ in inputs]) |
||||
out = self.forward_all(**{self.inputs[0]: caffe_in}) |
||||
predictions = out[self.outputs[0]].squeeze(axis=(2,3)) |
||||
|
||||
# For oversampling, average predictions across crops. |
||||
if oversample: |
||||
predictions = predictions.reshape((len(predictions) / 10, 10, -1)) |
||||
predictions = predictions.mean(1) |
||||
|
||||
return predictions |
@ -0,0 +1,191 @@ |
||||
#!/usr/bin/env python |
||||
""" |
||||
Do windowed detection by classifying a number of images/crops at once, |
||||
optionally using the selective search window proposal method. |
||||
|
||||
This implementation follows ideas in |
||||
Ross Girshick, Jeff Donahue, Trevor Darrell, Jitendra Malik. |
||||
Rich feature hierarchies for accurate object detection and semantic |
||||
segmentation. |
||||
http://arxiv.org/abs/1311.2524 |
||||
|
||||
The selective_search_ijcv_with_python code required for the selective search |
||||
proposal mode is available at |
||||
https://github.com/sergeyk/selective_search_ijcv_with_python |
||||
""" |
||||
import numpy as np |
||||
import os |
||||
|
||||
import caffe |
||||
|
||||
|
||||
class Detector(caffe.Net): |
||||
""" |
||||
Detector extends Net for windowed detection by a list of crops or |
||||
selective search proposals. |
||||
""" |
||||
def __init__(self, model_file, pretrained_file, gpu=False, mean_file=None, |
||||
input_scale=None, channel_swap=None, context_pad=None): |
||||
""" |
||||
Take |
||||
gpu, mean_file, input_scale, channel_swap: convenience params for |
||||
setting mode, mean, input scale, and channel order. |
||||
context_pad: amount of surrounding context to take s.t. a `context_pad` |
||||
sized border of pixels in the network input image is context, as in |
||||
R-CNN feature extraction. |
||||
""" |
||||
caffe.Net.__init__(self, model_file, pretrained_file) |
||||
self.set_phase_test() |
||||
|
||||
if gpu: |
||||
self.set_mode_gpu() |
||||
else: |
||||
self.set_mode_cpu() |
||||
|
||||
if mean_file: |
||||
self.set_mean(self.inputs[0], mean_file) |
||||
if input_scale: |
||||
self.set_input_scale(self.inputs[0], input_scale) |
||||
if channel_swap: |
||||
self.set_channel_swap(self.inputs[0], channel_swap) |
||||
|
||||
self.configure_crop(context_pad) |
||||
|
||||
|
||||
def detect_windows(self, images_windows): |
||||
""" |
||||
Do windowed detection over given images and windows. Windows are |
||||
extracted then warped to the input dimensions of the net. |
||||
|
||||
Take |
||||
images_windows: (image filename, window list) iterable. |
||||
context_crop: size of context border to crop in pixels. |
||||
|
||||
Give |
||||
detections: list of {filename: image filename, window: crop coordinates, |
||||
predictions: prediction vector} dicts. |
||||
""" |
||||
# Extract windows. |
||||
window_inputs = [] |
||||
for image_fname, windows in images_windows: |
||||
image = caffe.io.load_image(image_fname).astype(np.float32) |
||||
for window in windows: |
||||
window_inputs.append(self.crop(image, window)) |
||||
|
||||
# Run through the net (warping windows to input dimensions). |
||||
caffe_in = np.asarray([self.preprocess(self.inputs[0], window_in) |
||||
for window_in in window_inputs]) |
||||
out = self.forward_all(**{self.inputs[0]: caffe_in}) |
||||
predictions = out[self.outputs[0]].squeeze(axis=(2,3)) |
||||
|
||||
# Package predictions with images and windows. |
||||
detections = [] |
||||
ix = 0 |
||||
for image_fname, windows in images_windows: |
||||
for window in windows: |
||||
detections.append({ |
||||
'window': window, |
||||
'prediction': predictions[ix], |
||||
'filename': image_fname |
||||
}) |
||||
ix += 1 |
||||
return detections |
||||
|
||||
|
||||
def detect_selective_search(self, image_fnames): |
||||
""" |
||||
Do windowed detection over Selective Search proposals by extracting |
||||
the crop and warping to the input dimensions of the net. |
||||
|
||||
Take |
||||
image_fnames: list |
||||
|
||||
Give |
||||
detections: list of {filename: image filename, window: crop coordinates, |
||||
predictions: prediction vector} dicts. |
||||
""" |
||||
import selective_search_ijcv_with_python as selective_search |
||||
# Make absolute paths so MATLAB can find the files. |
||||
image_fnames = [os.path.abspath(f) for f in image_fnames] |
||||
windows_list = selective_search.get_windows( |
||||
image_fnames, |
||||
cmd='selective_search_rcnn' |
||||
) |
||||
# Run windowed detection on the selective search list. |
||||
return self.detect_windows(zip(image_fnames, windows_list)) |
||||
|
||||
|
||||
def crop(self, im, window): |
||||
""" |
||||
Crop a window from the image for detection. Include surrounding context |
||||
according to the `context_pad` configuration. |
||||
|
||||
Take |
||||
im: H x W x K image ndarray to crop. |
||||
window: bounding box coordinates as ymin, xmin, ymax, xmax. |
||||
|
||||
Give |
||||
crop: cropped window. |
||||
""" |
||||
# Crop window from the image. |
||||
crop = im[window[0]:window[2], window[1]:window[3]] |
||||
|
||||
if self.context_pad: |
||||
box = window.copy() |
||||
crop_size = self.blobs[self.inputs[0]].width # assumes square |
||||
scale = crop_size / (1. * crop_size - self.context_pad * 2) |
||||
# Crop a box + surrounding context. |
||||
half_h = (box[2] - box[0] + 1) / 2. |
||||
half_w = (box[3] - box[1] + 1) / 2. |
||||
center = (box[0] + half_h, box[1] + half_w) |
||||
scaled_dims = scale * np.array((-half_h, -half_w, half_h, half_w)) |
||||
box = np.round(np.tile(center, 2) + scaled_dims) |
||||
full_h = box[2] - box[0] + 1 |
||||
full_w = box[3] - box[1] + 1 |
||||
scale_h = crop_size / full_h |
||||
scale_w = crop_size / full_w |
||||
pad_y = round(max(0, -box[0]) * scale_h) # amount out-of-bounds |
||||
pad_x = round(max(0, -box[1]) * scale_w) |
||||
|
||||
# Clip box to image dimensions. |
||||
im_h, im_w = im.shape[:2] |
||||
box = np.clip(box, 0., [im_h, im_w, im_h, im_w]) |
||||
clip_h = box[2] - box[0] + 1 |
||||
clip_w = box[3] - box[1] + 1 |
||||
assert(clip_h > 0 and clip_w > 0) |
||||
crop_h = round(clip_h * scale_h) |
||||
crop_w = round(clip_w * scale_w) |
||||
if pad_y + crop_h > crop_size: |
||||
crop_h = crop_size - pad_y |
||||
if pad_x + crop_w > crop_size: |
||||
crop_w = crop_size - pad_x |
||||
|
||||
# collect with context padding and place in input |
||||
# with mean padding |
||||
context_crop = im[box[0]:box[2], box[1]:box[3]] |
||||
context_crop = caffe.io.resize_image(context_crop, (crop_h, crop_w)) |
||||
crop = self.crop_mean.copy() |
||||
crop[pad_y:(pad_y + crop_h), pad_x:(pad_x + crop_w)] = context_crop |
||||
|
||||
return crop |
||||
|
||||
|
||||
def configure_crop(self, context_pad): |
||||
""" |
||||
Configure amount of context for cropping. |
||||
If context is included, make the special input mean for context padding. |
||||
|
||||
Take |
||||
context_pad: amount of context for cropping. |
||||
""" |
||||
self.context_pad = context_pad |
||||
if self.context_pad: |
||||
input_scale = self.input_scale.get(self.inputs[0]) |
||||
channel_order = self.channel_swap.get(self.inputs[0]) |
||||
# Padding context crops needs the mean in unprocessed input space. |
||||
self.crop_mean = self.mean[self.inputs[0]].copy() |
||||
self.crop_mean = self.crop_mean.transpose((1,2,0)) |
||||
channel_order_inverse = [channel_order.index(i) |
||||
for i in range(self.crop_mean.shape[2])] |
||||
self.crop_mean = self.crop_mean[:,:, channel_order_inverse] |
||||
self.crop_mean /= input_scale |
@ -0,0 +1,76 @@ |
||||
""" |
||||
Caffe network visualization: draw the NetParameter protobuffer. |
||||
|
||||
NOTE: this requires pydot>=1.0.2, which is not included in requirements.txt |
||||
since it requires graphviz and other prerequisites outside the scope of the |
||||
Caffe. |
||||
""" |
||||
|
||||
from caffe.proto import caffe_pb2 |
||||
from google.protobuf import text_format |
||||
import pydot |
||||
|
||||
# Internal layer and blob styles. |
||||
LAYER_STYLE = {'shape': 'record', 'fillcolor': '#6495ED', |
||||
'style': 'filled'} |
||||
NEURON_LAYER_STYLE = {'shape': 'record', 'fillcolor': '#90EE90', |
||||
'style': 'filled'} |
||||
BLOB_STYLE = {'shape': 'octagon', 'fillcolor': '#F0E68C', |
||||
'style': 'filled'} |
||||
def get_enum_name_by_value(): |
||||
desc = caffe_pb2.LayerParameter.LayerType.DESCRIPTOR |
||||
d = {} |
||||
for k,v in desc.values_by_name.items(): |
||||
d[v.number] = k |
||||
return d |
||||
|
||||
def get_pydot_graph(caffe_net): |
||||
pydot_graph = pydot.Dot(caffe_net.name, graph_type='digraph', rankdir="BT") |
||||
pydot_nodes = {} |
||||
pydot_edges = [] |
||||
d = get_enum_name_by_value() |
||||
for layer in caffe_net.layers: |
||||
name = layer.name |
||||
layertype = d[layer.type] |
||||
if (len(layer.bottom) == 1 and len(layer.top) == 1 and |
||||
layer.bottom[0] == layer.top[0]): |
||||
# We have an in-place neuron layer. |
||||
pydot_nodes[name + '_' + layertype] = pydot.Node( |
||||
'%s (%s)' % (name, layertype), **NEURON_LAYER_STYLE) |
||||
else: |
||||
pydot_nodes[name + '_' + layertype] = pydot.Node( |
||||
'%s (%s)' % (name, layertype), **LAYER_STYLE) |
||||
for bottom_blob in layer.bottom: |
||||
pydot_nodes[bottom_blob + '_blob'] = pydot.Node( |
||||
'%s' % (bottom_blob), **BLOB_STYLE) |
||||
pydot_edges.append((bottom_blob + '_blob', name + '_' + layertype)) |
||||
for top_blob in layer.top: |
||||
pydot_nodes[top_blob + '_blob'] = pydot.Node( |
||||
'%s' % (top_blob)) |
||||
pydot_edges.append((name + '_' + layertype, top_blob + '_blob')) |
||||
# Now, add the nodes and edges to the graph. |
||||
for node in pydot_nodes.values(): |
||||
pydot_graph.add_node(node) |
||||
for edge in pydot_edges: |
||||
pydot_graph.add_edge( |
||||
pydot.Edge(pydot_nodes[edge[0]], pydot_nodes[edge[1]])) |
||||
return pydot_graph |
||||
|
||||
def draw_net(caffe_net, ext='png'): |
||||
"""Draws a caffe net and returns the image string encoded using the given |
||||
extension. |
||||
|
||||
Input: |
||||
caffe_net: a caffe.proto.caffe_pb2.NetParameter protocol buffer. |
||||
ext: the image extension. Default 'png'. |
||||
""" |
||||
return get_pydot_graph(caffe_net).create(format=ext) |
||||
|
||||
def draw_net_to_file(caffe_net, filename): |
||||
"""Draws a caffe net, and saves it to file using the format given as the |
||||
file extension. Use '.raw' to output raw text that you can manually feed |
||||
to graphviz to draw graphs. |
||||
""" |
||||
ext = filename[filename.rfind('.')+1:] |
||||
with open(filename, 'wb') as fid: |
||||
fid.write(draw_net(caffe_net, ext)) |
Binary file not shown.
@ -0,0 +1,159 @@ |
||||
import numpy as np |
||||
import skimage.io |
||||
import skimage.transform |
||||
|
||||
from caffe.proto import caffe_pb2 |
||||
|
||||
|
||||
def load_image(filename, color=True): |
||||
""" |
||||
Load an image converting from grayscale or alpha as needed. |
||||
|
||||
Take |
||||
filename: string |
||||
color: flag for color format. True (default) loads as RGB while False |
||||
loads as intensity (if image is already grayscale). |
||||
|
||||
Give |
||||
image: an image with type np.float32 of size (H x W x 3) in RGB or |
||||
of size (H x W x 1) in grayscale. |
||||
""" |
||||
img = skimage.img_as_float(skimage.io.imread(filename)).astype(np.float32) |
||||
if img.ndim == 2: |
||||
img = img[:, :, np.newaxis] |
||||
if color: |
||||
img = np.tile(img, (1, 1, 3)) |
||||
elif img.shape[2] == 4: |
||||
img = img[:, :, :3] |
||||
return img |
||||
|
||||
|
||||
def resize_image(im, new_dims, interp_order=1): |
||||
""" |
||||
Resize an image array with interpolation. |
||||
|
||||
Take |
||||
im: (H x W x K) ndarray |
||||
new_dims: (height, width) tuple of new dimensions. |
||||
interp_order: interpolation order, default is linear. |
||||
|
||||
Give |
||||
im: resized ndarray with shape (new_dims[0], new_dims[1], K) |
||||
""" |
||||
return skimage.transform.resize(im, new_dims, order=interp_order) |
||||
|
||||
|
||||
def oversample(images, crop_dims): |
||||
""" |
||||
Crop images into the four corners, center, and their mirrored versions. |
||||
|
||||
Take |
||||
image: iterable of (H x W x K) ndarrays |
||||
crop_dims: (height, width) tuple for the crops. |
||||
|
||||
Give |
||||
crops: (10*N x H x W x K) ndarray of crops for number of inputs N. |
||||
""" |
||||
# Dimensions and center. |
||||
im_shape = np.array(images[0].shape) |
||||
crop_dims = np.array(crop_dims) |
||||
im_center = im_shape[:2] / 2.0 |
||||
|
||||
# Make crop coordinates |
||||
h_indices = (0, im_shape[0] - crop_dims[0]) |
||||
w_indices = (0, im_shape[1] - crop_dims[1]) |
||||
crops_ix = np.empty((5, 4), dtype=int) |
||||
curr = 0 |
||||
for i in h_indices: |
||||
for j in w_indices: |
||||
crops_ix[curr] = (i, j, i + crop_dims[0], j + crop_dims[1]) |
||||
curr += 1 |
||||
crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate([ |
||||
-crop_dims / 2.0, |
||||
crop_dims / 2.0 |
||||
]) |
||||
crops_ix = np.tile(crops_ix, (2, 1)) |
||||
|
||||
# Extract crops |
||||
crops = np.empty((10 * len(images), crop_dims[0], crop_dims[1], |
||||
im_shape[-1]), dtype=np.float32) |
||||
ix = 0 |
||||
for im in images: |
||||
for crop in crops_ix: |
||||
crops[ix] = im[crop[0]:crop[2], crop[1]:crop[3], :] |
||||
ix += 1 |
||||
crops[ix-5:ix] = crops[ix-5:ix, :, ::-1, :] # flip for mirrors |
||||
return crops |
||||
|
||||
|
||||
def blobproto_to_array(blob, return_diff=False): |
||||
"""Convert a blob proto to an array. In default, we will just return the data, |
||||
unless return_diff is True, in which case we will return the diff. |
||||
""" |
||||
if return_diff: |
||||
return np.array(blob.diff).reshape( |
||||
blob.num, blob.channels, blob.height, blob.width) |
||||
else: |
||||
return np.array(blob.data).reshape( |
||||
blob.num, blob.channels, blob.height, blob.width) |
||||
|
||||
|
||||
def array_to_blobproto(arr, diff=None): |
||||
"""Converts a 4-dimensional array to blob proto. If diff is given, also |
||||
convert the diff. You need to make sure that arr and diff have the same |
||||
shape, and this function does not do sanity check. |
||||
""" |
||||
if arr.ndim != 4: |
||||
raise ValueError('Incorrect array shape.') |
||||
blob = caffe_pb2.BlobProto() |
||||
blob.num, blob.channels, blob.height, blob.width = arr.shape; |
||||
blob.data.extend(arr.astype(float).flat) |
||||
if diff is not None: |
||||
blob.diff.extend(diff.astype(float).flat) |
||||
return blob |
||||
|
||||
|
||||
def arraylist_to_blobprotovecor_str(arraylist): |
||||
"""Converts a list of arrays to a serialized blobprotovec, which could be |
||||
then passed to a network for processing. |
||||
""" |
||||
vec = caffe_pb2.BlobProtoVector() |
||||
vec.blobs.extend([array_to_blobproto(arr) for arr in arraylist]) |
||||
return vec.SerializeToString() |
||||
|
||||
|
||||
def blobprotovector_str_to_arraylist(str): |
||||
"""Converts a serialized blobprotovec to a list of arrays. |
||||
""" |
||||
vec = caffe_pb2.BlobProtoVector() |
||||
vec.ParseFromString(str) |
||||
return [blobproto_to_array(blob) for blob in vec.blobs] |
||||
|
||||
|
||||
def array_to_datum(arr, label=0): |
||||
"""Converts a 3-dimensional array to datum. If the array has dtype uint8, |
||||
the output data will be encoded as a string. Otherwise, the output data |
||||
will be stored in float format. |
||||
""" |
||||
if arr.ndim != 3: |
||||
raise ValueError('Incorrect array shape.') |
||||
datum = caffe_pb2.Datum() |
||||
datum.channels, datum.height, datum.width = arr.shape |
||||
if arr.dtype == np.uint8: |
||||
datum.data = arr.tostring() |
||||
else: |
||||
datum.float_data.extend(arr.flat) |
||||
datum.label = label |
||||
return datum |
||||
|
||||
|
||||
def datum_to_array(datum): |
||||
"""Converts a datum to an array. Note that the label is not returned, |
||||
as one can easily get it by calling datum.label. |
||||
""" |
||||
if len(datum.data): |
||||
return np.fromstring(datum.data, dtype = np.uint8).reshape( |
||||
datum.channels, datum.height, datum.width) |
||||
else: |
||||
return np.array(datum.float_data).astype(float).reshape( |
||||
datum.channels, datum.height, datum.width) |
@ -0,0 +1,352 @@ |
||||
""" |
||||
Wrap the internal caffe C++ module (_caffe.so) with a clean, Pythonic |
||||
interface. |
||||
""" |
||||
|
||||
from collections import OrderedDict |
||||
from itertools import izip_longest |
||||
import numpy as np |
||||
|
||||
from ._caffe import Net, SGDSolver |
||||
import caffe.io |
||||
|
||||
# We directly update methods from Net here (rather than using composition or |
||||
# inheritance) so that nets created by caffe (e.g., by SGDSolver) will |
||||
# automatically have the improved interface. |
||||
|
||||
|
||||
@property |
||||
def _Net_blobs(self): |
||||
""" |
||||
An OrderedDict (bottom to top, i.e., input to output) of network |
||||
blobs indexed by name |
||||
""" |
||||
return OrderedDict([(bl.name, bl) for bl in self._blobs]) |
||||
|
||||
|
||||
@property |
||||
def _Net_params(self): |
||||
""" |
||||
An OrderedDict (bottom to top, i.e., input to output) of network |
||||
parameters indexed by name; each is a list of multiple blobs (e.g., |
||||
weights and biases) |
||||
""" |
||||
return OrderedDict([(lr.name, lr.blobs) for lr in self.layers |
||||
if len(lr.blobs) > 0]) |
||||
|
||||
|
||||
def _Net_forward(self, blobs=None, **kwargs): |
||||
""" |
||||
Forward pass: prepare inputs and run the net forward. |
||||
|
||||
Take |
||||
blobs: list of blobs to return in addition to output blobs. |
||||
kwargs: Keys are input blob names and values are blob ndarrays. |
||||
For formatting inputs for Caffe, see Net.preprocess(). |
||||
If None, input is taken from data layers. |
||||
|
||||
Give |
||||
outs: {blob name: blob ndarray} dict. |
||||
""" |
||||
if blobs is None: |
||||
blobs = [] |
||||
|
||||
if kwargs: |
||||
if set(kwargs.keys()) != set(self.inputs): |
||||
raise Exception('Input blob arguments do not match net inputs.') |
||||
# Set input according to defined shapes and make arrays single and |
||||
# C-contiguous as Caffe expects. |
||||
for in_, blob in kwargs.iteritems(): |
||||
if blob.shape[0] != self.blobs[in_].num: |
||||
raise Exception('Input is not batch sized') |
||||
if blob.ndim != 4: |
||||
raise Exception('{} blob is not 4-d'.format(in_)) |
||||
self.blobs[in_].data[...] = blob |
||||
|
||||
self._forward() |
||||
|
||||
# Unpack blobs to extract |
||||
outs = {out: self.blobs[out].data for out in set(self.outputs + blobs)} |
||||
return outs |
||||
|
||||
|
||||
def _Net_backward(self, diffs=None, **kwargs): |
||||
""" |
||||
Backward pass: prepare diffs and run the net backward. |
||||
|
||||
Take |
||||
diffs: list of diffs to return in addition to bottom diffs. |
||||
kwargs: Keys are output blob names and values are diff ndarrays. |
||||
If None, top diffs are taken from forward loss. |
||||
|
||||
Give |
||||
outs: {blob name: diff ndarray} dict. |
||||
""" |
||||
if diffs is None: |
||||
diffs = [] |
||||
|
||||
if kwargs: |
||||
if set(kwargs.keys()) != set(self.outputs): |
||||
raise Exception('Top diff arguments do not match net outputs.') |
||||
# Set top diffs according to defined shapes and make arrays single and |
||||
# C-contiguous as Caffe expects. |
||||
for top, diff in kwargs.iteritems(): |
||||
if diff.shape[0] != self.blobs[top].num: |
||||
raise Exception('Diff is not batch sized') |
||||
if diff.ndim != 4: |
||||
raise Exception('{} diff is not 4-d'.format(top)) |
||||
self.blobs[top].diff[...] = diff |
||||
|
||||
self._backward() |
||||
|
||||
# Unpack diffs to extract |
||||
outs = {out: self.blobs[out].diff for out in set(self.inputs + diffs)} |
||||
return outs |
||||
|
||||
|
||||
def _Net_forward_all(self, blobs=None, **kwargs): |
||||
""" |
||||
Run net forward in batches. |
||||
|
||||
Take |
||||
blobs: list of blobs to extract as in forward() |
||||
kwargs: Keys are input blob names and values are blob ndarrays. |
||||
Refer to forward(). |
||||
|
||||
Give |
||||
all_outs: {blob name: list of blobs} dict. |
||||
""" |
||||
# Collect outputs from batches |
||||
all_outs = {out: [] for out in set(self.outputs + (blobs or []))} |
||||
for batch in self._batch(kwargs): |
||||
outs = self.forward(blobs=blobs, **batch) |
||||
for out, out_blob in outs.iteritems(): |
||||
all_outs[out].extend(out_blob.copy()) |
||||
# Package in ndarray. |
||||
for out in all_outs: |
||||
all_outs[out] = np.asarray(all_outs[out]) |
||||
# Discard padding. |
||||
pad = len(all_outs.itervalues().next()) - len(kwargs.itervalues().next()) |
||||
if pad: |
||||
for out in all_outs: |
||||
all_outs[out] = all_outs[out][:-pad] |
||||
return all_outs |
||||
|
||||
|
||||
def _Net_forward_backward_all(self, blobs=None, diffs=None, **kwargs): |
||||
""" |
||||
Run net forward + backward in batches. |
||||
|
||||
Take |
||||
blobs: list of blobs to extract as in forward() |
||||
diffs: list of diffs to extract as in backward() |
||||
kwargs: Keys are input (for forward) and output (for backward) blob names |
||||
and values are ndarrays. Refer to forward() and backward(). |
||||
Prefilled variants are called for lack of input or output blobs. |
||||
|
||||
Give |
||||
all_blobs: {blob name: blob ndarray} dict. |
||||
all_diffs: {blob name: diff ndarray} dict. |
||||
""" |
||||
# Batch blobs and diffs. |
||||
all_outs = {out: [] for out in set(self.outputs + (blobs or []))} |
||||
all_diffs = {diff: [] for diff in set(self.inputs + (diffs or []))} |
||||
forward_batches = self._batch({in_: kwargs[in_] |
||||
for in_ in self.inputs if in_ in kwargs}) |
||||
backward_batches = self._batch({out: kwargs[out] |
||||
for out in self.outputs if out in kwargs}) |
||||
# Collect outputs from batches (and heed lack of forward/backward batches). |
||||
for fb, bb in izip_longest(forward_batches, backward_batches, fillvalue={}): |
||||
batch_blobs = self.forward(blobs=blobs, **fb) |
||||
batch_diffs = self.backward(diffs=diffs, **bb) |
||||
for out, out_blobs in batch_blobs.iteritems(): |
||||
all_outs[out].extend(out_blobs) |
||||
for diff, out_diffs in batch_diffs.iteritems(): |
||||
all_diffs[diff].extend(out_diffs) |
||||
# Package in ndarray. |
||||
for out, diff in zip(all_outs, all_diffs): |
||||
all_outs[out] = np.asarray(all_outs[out]) |
||||
all_diffs[diff] = np.asarray(all_diffs[diff]) |
||||
# Discard padding at the end and package in ndarray. |
||||
pad = len(all_outs.itervalues().next()) - len(kwargs.itervalues().next()) |
||||
if pad: |
||||
for out, diff in zip(all_outs, all_diffs): |
||||
all_outs[out] = all_outs[out][:-pad] |
||||
all_diffs[diff] = all_diffs[diff][:-pad] |
||||
return all_outs, all_diffs |
||||
|
||||
|
||||
def _Net_set_mean(self, input_, mean_f, mode='elementwise'): |
||||
""" |
||||
Set the mean to subtract for data centering. |
||||
|
||||
Take |
||||
input_: which input to assign this mean. |
||||
mean_f: path to mean .npy with ndarray (input dimensional or broadcastable) |
||||
mode: elementwise = use the whole mean (and check dimensions) |
||||
channel = channel constant (e.g. mean pixel instead of mean image) |
||||
""" |
||||
if not hasattr(self, 'mean'): |
||||
self.mean = {} |
||||
if input_ not in self.inputs: |
||||
raise Exception('Input not in {}'.format(self.inputs)) |
||||
in_shape = self.blobs[input_].data.shape |
||||
mean = np.load(mean_f) |
||||
if mode == 'elementwise': |
||||
if mean.shape != in_shape[1:]: |
||||
# Resize mean (which requires H x W x K input in range [0,1]). |
||||
m_min, m_max = mean.min(), mean.max() |
||||
normal_mean = (mean - m_min) / (m_max - m_min) |
||||
mean = caffe.io.resize_image(normal_mean.transpose((1,2,0)), |
||||
in_shape[2:]).transpose((2,0,1)) * (m_max - m_min) + m_min |
||||
self.mean[input_] = mean |
||||
elif mode == 'channel': |
||||
self.mean[input_] = mean.mean(1).mean(1).reshape((in_shape[1], 1, 1)) |
||||
else: |
||||
raise Exception('Mode not in {}'.format(['elementwise', 'channel'])) |
||||
|
||||
|
||||
|
||||
def _Net_set_input_scale(self, input_, scale): |
||||
""" |
||||
Set the input feature scaling factor s.t. input blob = input * scale. |
||||
|
||||
Take |
||||
input_: which input to assign this scale factor |
||||
scale: scale coefficient |
||||
""" |
||||
if not hasattr(self, 'input_scale'): |
||||
self.input_scale = {} |
||||
if input_ not in self.inputs: |
||||
raise Exception('Input not in {}'.format(self.inputs)) |
||||
self.input_scale[input_] = scale |
||||
|
||||
|
||||
def _Net_set_channel_swap(self, input_, order): |
||||
""" |
||||
Set the input channel order for e.g. RGB to BGR conversion |
||||
as needed for the reference ImageNet model. |
||||
|
||||
Take |
||||
input_: which input to assign this channel order |
||||
order: the order to take the channels. |
||||
(2,1,0) maps RGB to BGR for example. |
||||
""" |
||||
if not hasattr(self, 'channel_swap'): |
||||
self.channel_swap = {} |
||||
if input_ not in self.inputs: |
||||
raise Exception('Input not in {}'.format(self.inputs)) |
||||
self.channel_swap[input_] = order |
||||
|
||||
|
||||
def _Net_preprocess(self, input_name, input_): |
||||
""" |
||||
Format input for Caffe: |
||||
- convert to single |
||||
- resize to input dimensions (preserving number of channels) |
||||
- scale feature |
||||
- reorder channels (for instance color to BGR) |
||||
- subtract mean |
||||
- transpose dimensions to K x H x W |
||||
|
||||
Take |
||||
input_name: name of input blob to preprocess for |
||||
input_: (H' x W' x K) ndarray |
||||
|
||||
Give |
||||
caffe_inputs: (K x H x W) ndarray |
||||
""" |
||||
caffe_in = input_.astype(np.float32) |
||||
input_scale = self.input_scale.get(input_name) |
||||
channel_order = self.channel_swap.get(input_name) |
||||
mean = self.mean.get(input_name) |
||||
in_size = self.blobs[input_name].data.shape[2:] |
||||
if caffe_in.shape[:2] != in_size: |
||||
caffe_in = caffe.io.resize_image(caffe_in, in_size) |
||||
if input_scale: |
||||
caffe_in *= input_scale |
||||
if channel_order: |
||||
caffe_in = caffe_in[:, :, channel_order] |
||||
caffe_in = caffe_in.transpose((2, 0, 1)) |
||||
if mean is not None: |
||||
caffe_in -= mean |
||||
return caffe_in |
||||
|
||||
|
||||
def _Net_deprocess(self, input_name, input_): |
||||
""" |
||||
Invert Caffe formatting; see Net.preprocess(). |
||||
""" |
||||
decaf_in = input_.copy().squeeze() |
||||
input_scale = self.input_scale.get(input_name) |
||||
channel_order = self.channel_swap.get(input_name) |
||||
mean = self.mean.get(input_name) |
||||
if mean is not None: |
||||
decaf_in += mean |
||||
decaf_in = decaf_in.transpose((1,2,0)) |
||||
if channel_order: |
||||
channel_order_inverse = [channel_order.index(i) |
||||
for i in range(decaf_in.shape[2])] |
||||
decaf_in = decaf_in[:, :, channel_order_inverse] |
||||
if input_scale: |
||||
decaf_in /= input_scale |
||||
return decaf_in |
||||
|
||||
|
||||
def _Net_set_input_arrays(self, data, labels): |
||||
""" |
||||
Set input arrays of the in-memory MemoryDataLayer. |
||||
(Note: this is only for networks declared with the memory data layer.) |
||||
""" |
||||
if labels.ndim == 1: |
||||
labels = np.ascontiguousarray(labels[:, np.newaxis, np.newaxis, |
||||
np.newaxis]) |
||||
return self._set_input_arrays(data, labels) |
||||
|
||||
|
||||
def _Net_batch(self, blobs): |
||||
""" |
||||
Batch blob lists according to net's batch size. |
||||
|
||||
Take |
||||
blobs: Keys blob names and values are lists of blobs (of any length). |
||||
Naturally, all the lists should have the same length. |
||||
|
||||
Give (yield) |
||||
batch: {blob name: list of blobs} dict for a single batch. |
||||
""" |
||||
num = len(blobs.itervalues().next()) |
||||
batch_size = self.blobs.itervalues().next().num |
||||
remainder = num % batch_size |
||||
num_batches = num / batch_size |
||||
|
||||
# Yield full batches. |
||||
for b in range(num_batches): |
||||
i = b * batch_size |
||||
yield {name: blobs[name][i:i + batch_size] for name in blobs} |
||||
|
||||
# Yield last padded batch, if any. |
||||
if remainder > 0: |
||||
padded_batch = {} |
||||
for name in blobs: |
||||
padding = np.zeros((batch_size - remainder,) |
||||
+ blobs[name].shape[1:]) |
||||
padded_batch[name] = np.concatenate([blobs[name][-remainder:], |
||||
padding]) |
||||
yield padded_batch |
||||
|
||||
|
||||
# Attach methods to Net. |
||||
Net.blobs = _Net_blobs |
||||
Net.params = _Net_params |
||||
Net.forward = _Net_forward |
||||
Net.backward = _Net_backward |
||||
Net.forward_all = _Net_forward_all |
||||
Net.forward_backward_all = _Net_forward_backward_all |
||||
Net.set_mean = _Net_set_mean |
||||
Net.set_input_scale = _Net_set_input_scale |
||||
Net.set_channel_swap = _Net_set_channel_swap |
||||
Net.preprocess = _Net_preprocess |
||||
Net.deprocess = _Net_deprocess |
||||
Net.set_input_arrays = _Net_set_input_arrays |
||||
Net._batch = _Net_batch |
@ -0,0 +1,120 @@ |
||||
#!/usr/bin/env python |
||||
""" |
||||
classify.py is an out-of-the-box image classifer callable from the command line. |
||||
|
||||
By default it configures and runs the Caffe reference ImageNet model. |
||||
""" |
||||
import numpy as np |
||||
import os |
||||
import sys |
||||
import argparse |
||||
import glob |
||||
import time |
||||
|
||||
import caffe |
||||
|
||||
|
||||
def main(argv): |
||||
pycaffe_dir = os.path.dirname(__file__) |
||||
|
||||
parser = argparse.ArgumentParser() |
||||
# Required arguments: input and output files. |
||||
parser.add_argument( |
||||
"input_file", |
||||
help="Input image, directory, or npy." |
||||
) |
||||
parser.add_argument( |
||||
"output_file", |
||||
help="Output npy filename." |
||||
) |
||||
# Optional arguments. |
||||
parser.add_argument( |
||||
"--model_def", |
||||
default=os.path.join(pycaffe_dir, |
||||
"../examples/imagenet/imagenet_deploy.prototxt"), |
||||
help="Model definition file." |
||||
) |
||||
parser.add_argument( |
||||
"--pretrained_model", |
||||
default=os.path.join(pycaffe_dir, |
||||
"../examples/imagenet/caffe_reference_imagenet_model"), |
||||
help="Trained model weights file." |
||||
) |
||||
parser.add_argument( |
||||
"--gpu", |
||||
action='store_true', |
||||
help="Switch for gpu computation." |
||||
) |
||||
parser.add_argument( |
||||
"--center_only", |
||||
action='store_true', |
||||
help="Switch for prediction from center crop alone instead of " + |
||||
"averaging predictions across crops (default)." |
||||
) |
||||
parser.add_argument( |
||||
"--images_dim", |
||||
default='256,256', |
||||
help="Canonical 'height,width' dimensions of input images." |
||||
) |
||||
parser.add_argument( |
||||
"--mean_file", |
||||
default=os.path.join(pycaffe_dir, |
||||
'caffe/imagenet/ilsvrc_2012_mean.npy'), |
||||
help="Data set image mean of H x W x K dimensions (numpy array). " + |
||||
"Set to '' for no mean subtraction." |
||||
) |
||||
parser.add_argument( |
||||
"--input_scale", |
||||
type=float, |
||||
default=255, |
||||
help="Multiply input features by this scale before input to net" |
||||
) |
||||
parser.add_argument( |
||||
"--channel_swap", |
||||
default='2,1,0', |
||||
help="Order to permute input channels. The default converts " + |
||||
"RGB -> BGR since BGR is the Caffe default by way of OpenCV." |
||||
|
||||
) |
||||
parser.add_argument( |
||||
"--ext", |
||||
default='jpg', |
||||
help="Image file extension to take as input when a directory " + |
||||
"is given as the input file." |
||||
) |
||||
args = parser.parse_args() |
||||
|
||||
image_dims = [int(s) for s in args.images_dim.split(',')] |
||||
channel_swap = [int(s) for s in args.channel_swap.split(',')] |
||||
|
||||
# Make classifier. |
||||
classifier = caffe.Classifier(args.model_def, args.pretrained_model, |
||||
image_dims=image_dims, gpu=args.gpu, mean_file=args.mean_file, |
||||
input_scale=args.input_scale, channel_swap=channel_swap) |
||||
|
||||
if args.gpu: |
||||
print 'GPU mode' |
||||
|
||||
# Load numpy array (.npy), directory glob (*.jpg), or image file. |
||||
args.input_file = os.path.expanduser(args.input_file) |
||||
if args.input_file.endswith('npy'): |
||||
inputs = np.load(args.input_file) |
||||
elif os.path.isdir(args.input_file): |
||||
inputs =[caffe.io.load_image(im_f) |
||||
for im_f in glob.glob(args.input_file + '/*.' + args.ext)] |
||||
else: |
||||
inputs = [caffe.io.load_image(args.input_file)] |
||||
|
||||
print "Classifying %d inputs." % len(inputs) |
||||
|
||||
# Classify. |
||||
start = time.time() |
||||
predictions = classifier.predict(inputs, not args.center_only) |
||||
print "Done in %.2f s." % (time.time() - start) |
||||
|
||||
# Save |
||||
np.save(args.output_file, predictions) |
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
main(sys.argv) |
@ -0,0 +1,158 @@ |
||||
#!/usr/bin/env python |
||||
""" |
||||
detector.py is an out-of-the-box windowed detector |
||||
callable from the command line. |
||||
|
||||
By default it configures and runs the Caffe reference ImageNet model. |
||||
Note that this model was trained for image classification and not detection, |
||||
and finetuning for detection can be expected to improve results. |
||||
|
||||
The selective_search_ijcv_with_python code required for the selective search |
||||
proposal mode is available at |
||||
https://github.com/sergeyk/selective_search_ijcv_with_python |
||||
|
||||
TODO: |
||||
- batch up image filenames as well: don't want to load all of them into memory |
||||
- come up with a batching scheme that preserved order / keeps a unique ID |
||||
""" |
||||
import numpy as np |
||||
import pandas as pd |
||||
import os |
||||
import argparse |
||||
import time |
||||
|
||||
import caffe |
||||
|
||||
CROP_MODES = ['list', 'selective_search'] |
||||
COORD_COLS = ['ymin', 'xmin', 'ymax', 'xmax'] |
||||
|
||||
|
||||
def main(argv): |
||||
pycaffe_dir = os.path.dirname(__file__) |
||||
|
||||
parser = argparse.ArgumentParser() |
||||
# Required arguments: input and output. |
||||
parser.add_argument( |
||||
"input_file", |
||||
help="Input txt/csv filename. If .txt, must be list of filenames.\ |
||||
If .csv, must be comma-separated file with header\ |
||||
'filename, xmin, ymin, xmax, ymax'" |
||||
) |
||||
parser.add_argument( |
||||
"output_file", |
||||
help="Output h5/csv filename. Format depends on extension." |
||||
) |
||||
# Optional arguments. |
||||
parser.add_argument( |
||||
"--model_def", |
||||
default=os.path.join(pycaffe_dir, |
||||
"../examples/imagenet/imagenet_deploy.prototxt"), |
||||
help="Model definition file." |
||||
) |
||||
parser.add_argument( |
||||
"--pretrained_model", |
||||
default=os.path.join(pycaffe_dir, |
||||
"../examples/imagenet/caffe_reference_imagenet_model"), |
||||
help="Trained model weights file." |
||||
) |
||||
parser.add_argument( |
||||
"--crop_mode", |
||||
default="selective_search", |
||||
choices=CROP_MODES, |
||||
help="How to generate windows for detection." |
||||
) |
||||
parser.add_argument( |
||||
"--gpu", |
||||
action='store_true', |
||||
help="Switch for gpu computation." |
||||
) |
||||
parser.add_argument( |
||||
"--mean_file", |
||||
default=os.path.join(pycaffe_dir, |
||||
'caffe/imagenet/ilsvrc_2012_mean.npy'), |
||||
help="Data set image mean of H x W x K dimensions (numpy array). " + |
||||
"Set to '' for no mean subtraction." |
||||
) |
||||
parser.add_argument( |
||||
"--input_scale", |
||||
type=float, |
||||
default=255, |
||||
help="Multiply input features by this scale before input to net" |
||||
) |
||||
parser.add_argument( |
||||
"--channel_swap", |
||||
default='2,1,0', |
||||
help="Order to permute input channels. The default converts " + |
||||
"RGB -> BGR since BGR is the Caffe default by way of OpenCV." |
||||
|
||||
) |
||||
parser.add_argument( |
||||
"--context_pad", |
||||
type=int, |
||||
default='16', |
||||
help="Amount of surrounding context to collect in input window." |
||||
) |
||||
args = parser.parse_args() |
||||
|
||||
channel_swap = [int(s) for s in args.channel_swap.split(',')] |
||||
|
||||
# Make detector. |
||||
detector = caffe.Detector(args.model_def, args.pretrained_model, |
||||
gpu=args.gpu, mean_file=args.mean_file, |
||||
input_scale=args.input_scale, channel_swap=channel_swap, |
||||
context_pad=args.context_pad) |
||||
|
||||
if args.gpu: |
||||
print 'GPU mode' |
||||
|
||||
# Load input. |
||||
t = time.time() |
||||
print('Loading input...') |
||||
if args.input_file.lower().endswith('txt'): |
||||
with open(args.input_file) as f: |
||||
inputs = [_.strip() for _ in f.readlines()] |
||||
elif args.input_file.lower().endswith('csv'): |
||||
inputs = pd.read_csv(args.input_file, sep=',', dtype={'filename': str}) |
||||
inputs.set_index('filename', inplace=True) |
||||
else: |
||||
raise Exception("Unknown input file type: not in txt or csv.") |
||||
|
||||
# Detect. |
||||
if args.crop_mode == 'list': |
||||
# Unpack sequence of (image filename, windows). |
||||
images_windows = ( |
||||
(ix, inputs.iloc[np.where(inputs.index == ix)][COORD_COLS].values) |
||||
for ix in inputs.index.unique() |
||||
) |
||||
detections = detector.detect_windows(images_windows) |
||||
else: |
||||
detections = detector.detect_selective_search(inputs) |
||||
print("Processed {} windows in {:.3f} s.".format(len(detections), |
||||
time.time() - t)) |
||||
|
||||
# Collect into dataframe with labeled fields. |
||||
df = pd.DataFrame(detections) |
||||
df.set_index('filename', inplace=True) |
||||
df[COORD_COLS] = pd.DataFrame( |
||||
data=np.vstack(df['window']), index=df.index, columns=COORD_COLS) |
||||
del(df['window']) |
||||
|
||||
# Save results. |
||||
t = time.time() |
||||
if args.output_file.lower().endswith('csv'): |
||||
# csv |
||||
# Enumerate the class probabilities. |
||||
class_cols = ['class{}'.format(x) for x in range(NUM_OUTPUT)] |
||||
df[class_cols] = pd.DataFrame( |
||||
data=np.vstack(df['feat']), index=df.index, columns=class_cols) |
||||
df.to_csv(args.output_file, cols=COORD_COLS + class_cols) |
||||
else: |
||||
# h5 |
||||
df.to_hdf(args.output_file, 'df', mode='w') |
||||
print("Saved to {} in {:.3f} s.".format(args.output_file, |
||||
time.time() - t)) |
||||
|
||||
|
||||
if __name__ == "__main__": |
||||
import sys |
||||
main(sys.argv) |
@ -0,0 +1,25 @@ |
||||
#!/usr/bin/env python |
||||
""" |
||||
Draw a graph of the net architecture. |
||||
""" |
||||
import os |
||||
from google.protobuf import text_format |
||||
|
||||
import caffe, caffe.draw |
||||
from caffe.proto import caffe_pb2 |
||||
|
||||
|
||||
def main(argv): |
||||
if len(argv) != 3: |
||||
print 'Usage: %s input_net_proto_file output_image_file' % \ |
||||
os.path.basename(sys.argv[0]) |
||||
else: |
||||
net = caffe_pb2.NetParameter() |
||||
text_format.Merge(open(sys.argv[1]).read(), net) |
||||
print 'Drawing net to %s' % sys.argv[2] |
||||
caffe.draw.draw_net_to_file(net, sys.argv[2]) |
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
import sys |
||||
main(sys.argv) |
@ -0,0 +1,14 @@ |
||||
Cython>=0.19.2 |
||||
h5py>=2.2.0 |
||||
ipython>=1.1.0 |
||||
leveldb>=0.191 |
||||
matplotlib>=1.3.1 |
||||
networkx>=1.8.1 |
||||
nose>=1.3.0 |
||||
numpy>=1.7.1 |
||||
pandas>=0.12.0 |
||||
protobuf>=2.5.0 |
||||
python-gflags>=2.0 |
||||
scikit-image>=0.9.3 |
||||
scikit-learn>=0.14.1 |
||||
scipy>=0.13.2 |
@ -0,0 +1,171 @@ |
||||
import numpy as np |
||||
import skimage.io |
||||
from scipy.ndimage import zoom |
||||
from skimage.transform import resize |
||||
|
||||
from caffe.proto import caffe_pb2 |
||||
|
||||
|
||||
def load_image(filename, color=True): |
||||
""" |
||||
Load an image converting from grayscale or alpha as needed. |
||||
|
||||
Take |
||||
filename: string |
||||
color: flag for color format. True (default) loads as RGB while False |
||||
loads as intensity (if image is already grayscale). |
||||
|
||||
Give |
||||
image: an image with type np.float32 in range [0, 1] |
||||
of size (H x W x 3) in RGB or |
||||
of size (H x W x 1) in grayscale. |
||||
""" |
||||
img = skimage.img_as_float(skimage.io.imread(filename)).astype(np.float32) |
||||
if img.ndim == 2: |
||||
img = img[:, :, np.newaxis] |
||||
if color: |
||||
img = np.tile(img, (1, 1, 3)) |
||||
elif img.shape[2] == 4: |
||||
img = img[:, :, :3] |
||||
return img |
||||
|
||||
|
||||
def resize_image(im, new_dims, interp_order=1): |
||||
""" |
||||
Resize an image array with interpolation. |
||||
|
||||
Take |
||||
im: (H x W x K) ndarray |
||||
new_dims: (height, width) tuple of new dimensions. |
||||
interp_order: interpolation order, default is linear. |
||||
|
||||
Give |
||||
im: resized ndarray with shape (new_dims[0], new_dims[1], K) |
||||
""" |
||||
if im.shape[-1] == 1 or im.shape[-1] == 3: |
||||
# skimage is fast but only understands {1,3} channel images in [0, 1]. |
||||
im_min, im_max = im.min(), im.max() |
||||
im_std = (im - im_min) / (im_max - im_min) |
||||
resized_std = resize(im_std, new_dims, order=interp_order) |
||||
resized_im = resized_std * (im_max - im_min) + im_min |
||||
else: |
||||
# ndimage interpolates anything but more slowly. |
||||
scale = tuple(np.array(new_dims) / np.array(im.shape[:2])) |
||||
resized_im = zoom(im, scale + (1,), order=interp_order) |
||||
return resized_im.astype(np.float32) |
||||
|
||||
|
||||
def oversample(images, crop_dims): |
||||
""" |
||||
Crop images into the four corners, center, and their mirrored versions. |
||||
|
||||
Take |
||||
image: iterable of (H x W x K) ndarrays |
||||
crop_dims: (height, width) tuple for the crops. |
||||
|
||||
Give |
||||
crops: (10*N x H x W x K) ndarray of crops for number of inputs N. |
||||
""" |
||||
# Dimensions and center. |
||||
im_shape = np.array(images[0].shape) |
||||
crop_dims = np.array(crop_dims) |
||||
im_center = im_shape[:2] / 2.0 |
||||
|
||||
# Make crop coordinates |
||||
h_indices = (0, im_shape[0] - crop_dims[0]) |
||||
w_indices = (0, im_shape[1] - crop_dims[1]) |
||||
crops_ix = np.empty((5, 4), dtype=int) |
||||
curr = 0 |
||||
for i in h_indices: |
||||
for j in w_indices: |
||||
crops_ix[curr] = (i, j, i + crop_dims[0], j + crop_dims[1]) |
||||
curr += 1 |
||||
crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate([ |
||||
-crop_dims / 2.0, |
||||
crop_dims / 2.0 |
||||
]) |
||||
crops_ix = np.tile(crops_ix, (2, 1)) |
||||
|
||||
# Extract crops |
||||
crops = np.empty((10 * len(images), crop_dims[0], crop_dims[1], |
||||
im_shape[-1]), dtype=np.float32) |
||||
ix = 0 |
||||
for im in images: |
||||
for crop in crops_ix: |
||||
crops[ix] = im[crop[0]:crop[2], crop[1]:crop[3], :] |
||||
ix += 1 |
||||
crops[ix-5:ix] = crops[ix-5:ix, :, ::-1, :] # flip for mirrors |
||||
return crops |
||||
|
||||
|
||||
def blobproto_to_array(blob, return_diff=False): |
||||
"""Convert a blob proto to an array. In default, we will just return the data, |
||||
unless return_diff is True, in which case we will return the diff. |
||||
""" |
||||
if return_diff: |
||||
return np.array(blob.diff).reshape( |
||||
blob.num, blob.channels, blob.height, blob.width) |
||||
else: |
||||
return np.array(blob.data).reshape( |
||||
blob.num, blob.channels, blob.height, blob.width) |
||||
|
||||
|
||||
def array_to_blobproto(arr, diff=None): |
||||
"""Converts a 4-dimensional array to blob proto. If diff is given, also |
||||
convert the diff. You need to make sure that arr and diff have the same |
||||
shape, and this function does not do sanity check. |
||||
""" |
||||
if arr.ndim != 4: |
||||
raise ValueError('Incorrect array shape.') |
||||
blob = caffe_pb2.BlobProto() |
||||
blob.num, blob.channels, blob.height, blob.width = arr.shape; |
||||
blob.data.extend(arr.astype(float).flat) |
||||
if diff is not None: |
||||
blob.diff.extend(diff.astype(float).flat) |
||||
return blob |
||||
|
||||
|
||||
def arraylist_to_blobprotovecor_str(arraylist): |
||||
"""Converts a list of arrays to a serialized blobprotovec, which could be |
||||
then passed to a network for processing. |
||||
""" |
||||
vec = caffe_pb2.BlobProtoVector() |
||||
vec.blobs.extend([array_to_blobproto(arr) for arr in arraylist]) |
||||
return vec.SerializeToString() |
||||
|
||||
|
||||
def blobprotovector_str_to_arraylist(str): |
||||
"""Converts a serialized blobprotovec to a list of arrays. |
||||
""" |
||||
vec = caffe_pb2.BlobProtoVector() |
||||
vec.ParseFromString(str) |
||||
return [blobproto_to_array(blob) for blob in vec.blobs] |
||||
|
||||
|
||||
def array_to_datum(arr, label=0): |
||||
"""Converts a 3-dimensional array to datum. If the array has dtype uint8, |
||||
the output data will be encoded as a string. Otherwise, the output data |
||||
will be stored in float format. |
||||
""" |
||||
if arr.ndim != 3: |
||||
raise ValueError('Incorrect array shape.') |
||||
datum = caffe_pb2.Datum() |
||||
datum.channels, datum.height, datum.width = arr.shape |
||||
if arr.dtype == np.uint8: |
||||
datum.data = arr.tostring() |
||||
else: |
||||
datum.float_data.extend(arr.flat) |
||||
datum.label = label |
||||
return datum |
||||
|
||||
|
||||
def datum_to_array(datum): |
||||
"""Converts a datum to an array. Note that the label is not returned, |
||||
as one can easily get it by calling datum.label. |
||||
""" |
||||
if len(datum.data): |
||||
return np.fromstring(datum.data, dtype = np.uint8).reshape( |
||||
datum.channels, datum.height, datum.width) |
||||
else: |
||||
return np.array(datum.float_data).astype(float).reshape( |
||||
datum.channels, datum.height, datum.width) |
@ -0,0 +1,11 @@ |
||||
#!/bin/bash |
||||
|
||||
PORT=${1:-4000} |
||||
|
||||
echo "usage: build_docs.sh [port]" |
||||
|
||||
# Find the docs dir, no matter where the script is called |
||||
DIR="$( cd "$(dirname "$0")" ; pwd -P )" |
||||
cd $DIR/../docs |
||||
|
||||
jekyll serve -w -s . -d _site --port=$PORT |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,50 @@ |
||||
#!/usr/bin/env sh |
||||
# Publish/ Pull-request documentation to the gh-pages site. |
||||
|
||||
# The remote for pushing the docs (defaults to origin). |
||||
# This is where you will submit the PR to BVLC:gh-pages from. |
||||
REMOTE=${1:-origin} |
||||
|
||||
echo "Generating docs and pushing to $REMOTE:gh-pages..." |
||||
echo "To build and view docs when not on master, simply do 'jekyll serve -s docs'." |
||||
echo |
||||
|
||||
REMOTE_URL=`git config --get remote.${REMOTE}.url` |
||||
BRANCH=`git rev-parse --abbrev-ref HEAD` |
||||
MSG=`git log --oneline -1` |
||||
|
||||
if [[ $BRANCH = 'master' ]]; then |
||||
# Find the docs dir, no matter where the script is called |
||||
DIR="$( cd "$(dirname "$0")" ; pwd -P )" |
||||
DOCS_SITE_DIR=$DIR/../docs/_site |
||||
|
||||
# Make sure that docs/_site tracks remote:gh-pages. |
||||
# If not, then we make a new repo and check out just that branch. |
||||
mkdir -p $DOCS_SITE_DIR |
||||
cd $DOCS_SITE_DIR |
||||
SITE_REMOTE_URL=`git config --get remote.${REMOTE}.url` |
||||
SITE_BRANCH=`git rev-parse --abbrev-ref HEAD` |
||||
|
||||
echo $SITE_REMOTE_URL |
||||
echo $SITE_BRANCH |
||||
echo `pwd` |
||||
|
||||
if [[ ( $SITE_REMOTE_URL = $REMOTE_URL ) && ( $SITE_BRANCH = 'gh-pages' ) ]]; then |
||||
echo "Confirmed that docs/_site has same remote as main repo, and is on gh-pages." |
||||
else |
||||
echo "Checking out $REMOTE:gh-pages into docs/_site (will take a little time)." |
||||
git init . |
||||
git remote add -t gh-pages -f $REMOTE $REMOTE_URL |
||||
git checkout gh-pages |
||||
fi |
||||
|
||||
echo "Building the site into docs/_site, and committing the changes." |
||||
jekyll build -s .. -d . |
||||
git add --all . |
||||
git commit -m "$MSG" |
||||
git push $REMOTE gh-pages |
||||
|
||||
echo "All done!" |
||||
cd ../.. |
||||
else echo "You must run this deployment script from the 'master' branch." |
||||
fi |
@ -0,0 +1,214 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
|
||||
#include <cuda_runtime.h> |
||||
#include <cublas_v2.h> |
||||
|
||||
#include "caffe/blob.hpp" |
||||
#include "caffe/common.hpp" |
||||
#include "caffe/syncedmem.hpp" |
||||
#include "caffe/util/math_functions.hpp" |
||||
|
||||
namespace caffe { |
||||
|
||||
template <typename Dtype> |
||||
void Blob<Dtype>::Reshape(const int num, const int channels, const int height, |
||||
const int width) { |
||||
CHECK_GE(num, 0); |
||||
CHECK_GE(channels, 0); |
||||
CHECK_GE(height, 0); |
||||
CHECK_GE(width, 0); |
||||
num_ = num; |
||||
channels_ = channels; |
||||
height_ = height; |
||||
width_ = width; |
||||
count_ = num_ * channels_ * height_ * width_; |
||||
if (count_) { |
||||
data_.reset(new SyncedMemory(count_ * sizeof(Dtype))); |
||||
diff_.reset(new SyncedMemory(count_ * sizeof(Dtype))); |
||||
} else { |
||||
data_.reset(reinterpret_cast<SyncedMemory*>(NULL)); |
||||
diff_.reset(reinterpret_cast<SyncedMemory*>(NULL)); |
||||
} |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
void Blob<Dtype>::ReshapeLike(const Blob<Dtype>& other) { |
||||
Reshape(other.num(), other.channels(), other.height(), other.width()); |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
Blob<Dtype>::Blob(const int num, const int channels, const int height, |
||||
const int width) { |
||||
Reshape(num, channels, height, width); |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
const Dtype* Blob<Dtype>::cpu_data() const { |
||||
CHECK(data_); |
||||
return (const Dtype*)data_->cpu_data(); |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
void Blob<Dtype>::set_cpu_data(Dtype* data) { |
||||
CHECK(data); |
||||
data_->set_cpu_data(data); |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
const Dtype* Blob<Dtype>::gpu_data() const { |
||||
CHECK(data_); |
||||
return (const Dtype*)data_->gpu_data(); |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
const Dtype* Blob<Dtype>::cpu_diff() const { |
||||
CHECK(diff_); |
||||
return (const Dtype*)diff_->cpu_data(); |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
const Dtype* Blob<Dtype>::gpu_diff() const { |
||||
CHECK(diff_); |
||||
return (const Dtype*)diff_->gpu_data(); |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
Dtype* Blob<Dtype>::mutable_cpu_data() { |
||||
CHECK(data_); |
||||
return reinterpret_cast<Dtype*>(data_->mutable_cpu_data()); |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
Dtype* Blob<Dtype>::mutable_gpu_data() { |
||||
CHECK(data_); |
||||
return reinterpret_cast<Dtype*>(data_->mutable_gpu_data()); |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
Dtype* Blob<Dtype>::mutable_cpu_diff() { |
||||
CHECK(diff_); |
||||
return reinterpret_cast<Dtype*>(diff_->mutable_cpu_data()); |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
Dtype* Blob<Dtype>::mutable_gpu_diff() { |
||||
CHECK(diff_); |
||||
return reinterpret_cast<Dtype*>(diff_->mutable_gpu_data()); |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
void Blob<Dtype>::ShareData(const Blob& other) { |
||||
CHECK_EQ(count_, other.count()); |
||||
data_ = other.data(); |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
void Blob<Dtype>::ShareDiff(const Blob& other) { |
||||
CHECK_EQ(count_, other.count()); |
||||
diff_ = other.diff(); |
||||
} |
||||
|
||||
// The "update" method is used for parameter blobs in a Net, which are stored
|
||||
// as Blob<float> or Blob<double> -- hence we do not define it for
|
||||
// Blob<int> or Blob<unsigned int>.
|
||||
template <> void Blob<unsigned int>::Update() { NOT_IMPLEMENTED; } |
||||
template <> void Blob<int>::Update() { NOT_IMPLEMENTED; } |
||||
|
||||
template <typename Dtype> |
||||
void Blob<Dtype>::Update() { |
||||
// We will perform update based on where the data is located.
|
||||
switch (data_->head()) { |
||||
case SyncedMemory::HEAD_AT_CPU: |
||||
// perform computation on CPU
|
||||
caffe_axpy<Dtype>(count_, Dtype(-1), |
||||
reinterpret_cast<const Dtype*>(diff_->cpu_data()), |
||||
reinterpret_cast<Dtype*>(data_->mutable_cpu_data())); |
||||
break; |
||||
case SyncedMemory::HEAD_AT_GPU: |
||||
case SyncedMemory::SYNCED: |
||||
// perform computation on GPU
|
||||
caffe_gpu_axpy<Dtype>(count_, Dtype(-1), |
||||
reinterpret_cast<const Dtype*>(diff_->gpu_data()), |
||||
reinterpret_cast<Dtype*>(data_->mutable_gpu_data())); |
||||
break; |
||||
default: |
||||
LOG(FATAL) << "Syncedmem not initialized."; |
||||
} |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
void Blob<Dtype>::CopyFrom(const Blob& source, bool copy_diff, bool reshape) { |
||||
if (num_ != source.num() || channels_ != source.channels() || |
||||
height_ != source.height() || width_ != source.width()) { |
||||
if (reshape) { |
||||
Reshape(source.num(), source.channels(), source.height(), source.width()); |
||||
} else { |
||||
LOG(FATAL) << "Trying to copy blobs of different sizes."; |
||||
} |
||||
} |
||||
switch (Caffe::mode()) { |
||||
case Caffe::GPU: |
||||
if (copy_diff) { |
||||
CUDA_CHECK(cudaMemcpy(diff_->mutable_gpu_data(), source.gpu_diff(), |
||||
sizeof(Dtype) * count_, cudaMemcpyDeviceToDevice)); |
||||
} else { |
||||
CUDA_CHECK(cudaMemcpy(data_->mutable_gpu_data(), source.gpu_data(), |
||||
sizeof(Dtype) * count_, cudaMemcpyDeviceToDevice)); |
||||
} |
||||
break; |
||||
case Caffe::CPU: |
||||
if (copy_diff) { |
||||
memcpy(diff_->mutable_cpu_data(), source.cpu_diff(), |
||||
sizeof(Dtype) * count_); |
||||
} else { |
||||
memcpy(data_->mutable_cpu_data(), source.cpu_data(), |
||||
sizeof(Dtype) * count_); |
||||
} |
||||
break; |
||||
default: |
||||
LOG(FATAL) << "Unknown caffe mode."; |
||||
} |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
void Blob<Dtype>::FromProto(const BlobProto& proto) { |
||||
Reshape(proto.num(), proto.channels(), proto.height(), proto.width()); |
||||
// copy data
|
||||
Dtype* data_vec = mutable_cpu_data(); |
||||
for (int i = 0; i < count_; ++i) { |
||||
data_vec[i] = proto.data(i); |
||||
} |
||||
if (proto.diff_size() > 0) { |
||||
Dtype* diff_vec = mutable_cpu_diff(); |
||||
for (int i = 0; i < count_; ++i) { |
||||
diff_vec[i] = proto.diff(i); |
||||
} |
||||
} |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
void Blob<Dtype>::ToProto(BlobProto* proto, bool write_diff) const { |
||||
proto->set_num(num_); |
||||
proto->set_channels(channels_); |
||||
proto->set_height(height_); |
||||
proto->set_width(width_); |
||||
proto->clear_data(); |
||||
proto->clear_diff(); |
||||
const Dtype* data_vec = cpu_data(); |
||||
for (int i = 0; i < count_; ++i) { |
||||
proto->add_data(data_vec[i]); |
||||
} |
||||
if (write_diff) { |
||||
const Dtype* diff_vec = cpu_diff(); |
||||
for (int i = 0; i < count_; ++i) { |
||||
proto->add_diff(diff_vec[i]); |
||||
} |
||||
} |
||||
} |
||||
|
||||
INSTANTIATE_CLASS(Blob); |
||||
template class Blob<int>; |
||||
template class Blob<unsigned int>; |
||||
|
||||
} // namespace caffe
|
||||
|
@ -0,0 +1,198 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
|
||||
#include <cstdio> |
||||
#include <ctime> |
||||
|
||||
#include "caffe/common.hpp" |
||||
#include "caffe/util/rng.hpp" |
||||
|
||||
namespace caffe { |
||||
|
||||
shared_ptr<Caffe> Caffe::singleton_; |
||||
|
||||
|
||||
// curand seeding
|
||||
int64_t cluster_seedgen(void) { |
||||
int64_t s, seed, pid; |
||||
pid = getpid(); |
||||
s = time(NULL); |
||||
seed = abs(((s * 181) * ((pid - 83) * 359)) % 104729); |
||||
return seed; |
||||
} |
||||
|
||||
|
||||
Caffe::Caffe() |
||||
: mode_(Caffe::CPU), phase_(Caffe::TRAIN), cublas_handle_(NULL), |
||||
curand_generator_(NULL), |
||||
random_generator_() { |
||||
// Try to create a cublas handler, and report an error if failed (but we will
|
||||
// keep the program running as one might just want to run CPU code).
|
||||
if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) { |
||||
LOG(ERROR) << "Cannot create Cublas handle. Cublas won't be available."; |
||||
} |
||||
// Try to create a curand handler.
|
||||
if (curandCreateGenerator(&curand_generator_, CURAND_RNG_PSEUDO_DEFAULT) |
||||
!= CURAND_STATUS_SUCCESS || |
||||
curandSetPseudoRandomGeneratorSeed(curand_generator_, cluster_seedgen()) |
||||
!= CURAND_STATUS_SUCCESS) { |
||||
LOG(ERROR) << "Cannot create Curand generator. Curand won't be available."; |
||||
} |
||||
} |
||||
|
||||
Caffe::~Caffe() { |
||||
if (cublas_handle_) CUBLAS_CHECK(cublasDestroy(cublas_handle_)); |
||||
if (curand_generator_) { |
||||
CURAND_CHECK(curandDestroyGenerator(curand_generator_)); |
||||
} |
||||
} |
||||
|
||||
void Caffe::set_random_seed(const unsigned int seed) { |
||||
// Curand seed
|
||||
// Yangqing's note: simply setting the generator seed does not seem to
|
||||
// work on the tesla K20s, so I wrote the ugly reset thing below.
|
||||
if (Get().curand_generator_) { |
||||
CURAND_CHECK(curandDestroyGenerator(curand_generator())); |
||||
CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_, |
||||
CURAND_RNG_PSEUDO_DEFAULT)); |
||||
CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(curand_generator(), |
||||
seed)); |
||||
} else { |
||||
LOG(ERROR) << "Curand not available. Skipping setting the curand seed."; |
||||
} |
||||
// RNG seed
|
||||
Get().random_generator_.reset(new RNG(seed)); |
||||
} |
||||
|
||||
void Caffe::SetDevice(const int device_id) { |
||||
int current_device; |
||||
CUDA_CHECK(cudaGetDevice(¤t_device)); |
||||
if (current_device == device_id) { |
||||
return; |
||||
} |
||||
// The call to cudaSetDevice must come before any calls to Get, which
|
||||
// may perform initialization using the GPU.
|
||||
CUDA_CHECK(cudaSetDevice(device_id)); |
||||
if (Get().cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_)); |
||||
if (Get().curand_generator_) { |
||||
CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_)); |
||||
} |
||||
CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_)); |
||||
CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_, |
||||
CURAND_RNG_PSEUDO_DEFAULT)); |
||||
CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_, |
||||
cluster_seedgen())); |
||||
} |
||||
|
||||
void Caffe::DeviceQuery() { |
||||
cudaDeviceProp prop; |
||||
int device; |
||||
if (cudaSuccess != cudaGetDevice(&device)) { |
||||
printf("No cuda device present.\n"); |
||||
return; |
||||
} |
||||
CUDA_CHECK(cudaGetDeviceProperties(&prop, device)); |
||||
printf("Device id: %d\n", device); |
||||
printf("Major revision number: %d\n", prop.major); |
||||
printf("Minor revision number: %d\n", prop.minor); |
||||
printf("Name: %s\n", prop.name); |
||||
printf("Total global memory: %lu\n", prop.totalGlobalMem); |
||||
printf("Total shared memory per block: %lu\n", prop.sharedMemPerBlock); |
||||
printf("Total registers per block: %d\n", prop.regsPerBlock); |
||||
printf("Warp size: %d\n", prop.warpSize); |
||||
printf("Maximum memory pitch: %lu\n", prop.memPitch); |
||||
printf("Maximum threads per block: %d\n", prop.maxThreadsPerBlock); |
||||
printf("Maximum dimension of block: %d, %d, %d\n", |
||||
prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2]); |
||||
printf("Maximum dimension of grid: %d, %d, %d\n", |
||||
prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2]); |
||||
printf("Clock rate: %d\n", prop.clockRate); |
||||
printf("Total constant memory: %lu\n", prop.totalConstMem); |
||||
printf("Texture alignment: %lu\n", prop.textureAlignment); |
||||
printf("Concurrent copy and execution: %s\n", |
||||
(prop.deviceOverlap ? "Yes" : "No")); |
||||
printf("Number of multiprocessors: %d\n", prop.multiProcessorCount); |
||||
printf("Kernel execution timeout: %s\n", |
||||
(prop.kernelExecTimeoutEnabled ? "Yes" : "No")); |
||||
return; |
||||
} |
||||
|
||||
|
||||
class Caffe::RNG::Generator { |
||||
public: |
||||
Generator() : rng_(new caffe::rng_t(cluster_seedgen())) {} |
||||
explicit Generator(unsigned int seed) : rng_(new caffe::rng_t(seed)) {} |
||||
caffe::rng_t* rng() { return rng_.get(); } |
||||
private: |
||||
shared_ptr<caffe::rng_t> rng_; |
||||
}; |
||||
|
||||
Caffe::RNG::RNG() : generator_(new Generator()) { } |
||||
|
||||
Caffe::RNG::RNG(unsigned int seed) : generator_(new Generator(seed)) { } |
||||
|
||||
Caffe::RNG& Caffe::RNG::operator=(const RNG& other) { |
||||
generator_.reset(other.generator_.get()); |
||||
return *this; |
||||
} |
||||
|
||||
void* Caffe::RNG::generator() { |
||||
return static_cast<void*>(generator_->rng()); |
||||
} |
||||
|
||||
const char* cublasGetErrorString(cublasStatus_t error) { |
||||
switch (error) { |
||||
case CUBLAS_STATUS_SUCCESS: |
||||
return "CUBLAS_STATUS_SUCCESS"; |
||||
case CUBLAS_STATUS_NOT_INITIALIZED: |
||||
return "CUBLAS_STATUS_NOT_INITIALIZED"; |
||||
case CUBLAS_STATUS_ALLOC_FAILED: |
||||
return "CUBLAS_STATUS_ALLOC_FAILED"; |
||||
case CUBLAS_STATUS_INVALID_VALUE: |
||||
return "CUBLAS_STATUS_INVALID_VALUE"; |
||||
case CUBLAS_STATUS_ARCH_MISMATCH: |
||||
return "CUBLAS_STATUS_ARCH_MISMATCH"; |
||||
case CUBLAS_STATUS_MAPPING_ERROR: |
||||
return "CUBLAS_STATUS_MAPPING_ERROR"; |
||||
case CUBLAS_STATUS_EXECUTION_FAILED: |
||||
return "CUBLAS_STATUS_EXECUTION_FAILED"; |
||||
case CUBLAS_STATUS_INTERNAL_ERROR: |
||||
return "CUBLAS_STATUS_INTERNAL_ERROR"; |
||||
case CUBLAS_STATUS_NOT_SUPPORTED: |
||||
return "CUBLAS_STATUS_NOT_SUPPORTED"; |
||||
} |
||||
return "Unknown cublas status"; |
||||
} |
||||
|
||||
const char* curandGetErrorString(curandStatus_t error) { |
||||
switch (error) { |
||||
case CURAND_STATUS_SUCCESS: |
||||
return "CURAND_STATUS_SUCCESS"; |
||||
case CURAND_STATUS_VERSION_MISMATCH: |
||||
return "CURAND_STATUS_VERSION_MISMATCH"; |
||||
case CURAND_STATUS_NOT_INITIALIZED: |
||||
return "CURAND_STATUS_NOT_INITIALIZED"; |
||||
case CURAND_STATUS_ALLOCATION_FAILED: |
||||
return "CURAND_STATUS_ALLOCATION_FAILED"; |
||||
case CURAND_STATUS_TYPE_ERROR: |
||||
return "CURAND_STATUS_TYPE_ERROR"; |
||||
case CURAND_STATUS_OUT_OF_RANGE: |
||||
return "CURAND_STATUS_OUT_OF_RANGE"; |
||||
case CURAND_STATUS_LENGTH_NOT_MULTIPLE: |
||||
return "CURAND_STATUS_LENGTH_NOT_MULTIPLE"; |
||||
case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED: |
||||
return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED"; |
||||
case CURAND_STATUS_LAUNCH_FAILURE: |
||||
return "CURAND_STATUS_LAUNCH_FAILURE"; |
||||
case CURAND_STATUS_PREEXISTING_FAILURE: |
||||
return "CURAND_STATUS_PREEXISTING_FAILURE"; |
||||
case CURAND_STATUS_INITIALIZATION_FAILED: |
||||
return "CURAND_STATUS_INITIALIZATION_FAILED"; |
||||
case CURAND_STATUS_ARCH_MISMATCH: |
||||
return "CURAND_STATUS_ARCH_MISMATCH"; |
||||
case CURAND_STATUS_INTERNAL_ERROR: |
||||
return "CURAND_STATUS_INTERNAL_ERROR"; |
||||
} |
||||
return "Unknown curand status"; |
||||
} |
||||
|
||||
} // namespace caffe
|
@ -0,0 +1,101 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
|
||||
#ifndef CAFFE_LAYER_FACTORY_HPP_ |
||||
#define CAFFE_LAYER_FACTORY_HPP_ |
||||
|
||||
#include <string> |
||||
|
||||
#include "caffe/layer.hpp" |
||||
#include "caffe/vision_layers.hpp" |
||||
#include "caffe/proto/caffe.pb.h" |
||||
|
||||
using std::string; |
||||
|
||||
namespace caffe { |
||||
|
||||
|
||||
// A function to get a specific layer from the specification given in
|
||||
// LayerParameter. Ideally this would be replaced by a factory pattern,
|
||||
// but we will leave it this way for now.
|
||||
template <typename Dtype> |
||||
Layer<Dtype>* GetLayer(const LayerParameter& param) { |
||||
const string& name = param.name(); |
||||
const LayerParameter_LayerType& type = param.type(); |
||||
switch (type) { |
||||
case LayerParameter_LayerType_ACCURACY: |
||||
return new AccuracyLayer<Dtype>(param); |
||||
case LayerParameter_LayerType_ARGMAX: |
||||
return new ArgMaxLayer<Dtype>(param); |
||||
case LayerParameter_LayerType_BNLL: |
||||
return new BNLLLayer<Dtype>(param); |
||||
case LayerParameter_LayerType_CONCAT: |
||||
return new ConcatLayer<Dtype>(param); |
||||
case LayerParameter_LayerType_CONVOLUTION: |
||||
return new ConvolutionLayer<Dtype>(param); |
||||
case LayerParameter_LayerType_DATA: |
||||
return new DataLayer<Dtype>(param); |
||||
case LayerParameter_LayerType_DROPOUT: |
||||
return new DropoutLayer<Dtype>(param); |
||||
case LayerParameter_LayerType_DUMMY_DATA: |
||||
return new DummyDataLayer<Dtype>(param); |
||||
case LayerParameter_LayerType_EUCLIDEAN_LOSS: |
||||
return new EuclideanLossLayer<Dtype>(param); |
||||
case LayerParameter_LayerType_ELTWISE: |
||||
return new EltwiseLayer<Dtype>(param); |
||||
case LayerParameter_LayerType_FLATTEN: |
||||
return new FlattenLayer<Dtype>(param); |
||||
case LayerParameter_LayerType_HDF5_DATA: |
||||
return new HDF5DataLayer<Dtype>(param); |
||||
case LayerParameter_LayerType_HDF5_OUTPUT: |
||||
return new HDF5OutputLayer<Dtype>(param); |
||||
case LayerParameter_LayerType_HINGE_LOSS: |
||||
return new HingeLossLayer<Dtype>(param); |
||||
case LayerParameter_LayerType_IMAGE_DATA: |
||||
return new ImageDataLayer<Dtype>(param); |
||||
case LayerParameter_LayerType_IM2COL: |
||||
return new Im2colLayer<Dtype>(param); |
||||
case LayerParameter_LayerType_INFOGAIN_LOSS: |
||||
return new InfogainLossLayer<Dtype>(param); |
||||
case LayerParameter_LayerType_INNER_PRODUCT: |
||||
return new InnerProductLayer<Dtype>(param); |
||||
case LayerParameter_LayerType_LRN: |
||||
return new LRNLayer<Dtype>(param); |
||||
case LayerParameter_LayerType_MEMORY_DATA: |
||||
return new MemoryDataLayer<Dtype>(param); |
||||
case LayerParameter_LayerType_MULTINOMIAL_LOGISTIC_LOSS: |
||||
return new MultinomialLogisticLossLayer<Dtype>(param); |
||||
case LayerParameter_LayerType_POOLING: |
||||
return new PoolingLayer<Dtype>(param); |
||||
case LayerParameter_LayerType_POWER: |
||||
return new PowerLayer<Dtype>(param); |
||||
case LayerParameter_LayerType_RELU: |
||||
return new ReLULayer<Dtype>(param); |
||||
case LayerParameter_LayerType_SIGMOID: |
||||
return new SigmoidLayer<Dtype>(param); |
||||
case LayerParameter_LayerType_SIGMOID_CROSS_ENTROPY_LOSS: |
||||
return new SigmoidCrossEntropyLossLayer<Dtype>(param); |
||||
case LayerParameter_LayerType_SOFTMAX: |
||||
return new SoftmaxLayer<Dtype>(param); |
||||
case LayerParameter_LayerType_SOFTMAX_LOSS: |
||||
return new SoftmaxWithLossLayer<Dtype>(param); |
||||
case LayerParameter_LayerType_SPLIT: |
||||
return new SplitLayer<Dtype>(param); |
||||
case LayerParameter_LayerType_TANH: |
||||
return new TanHLayer<Dtype>(param); |
||||
case LayerParameter_LayerType_WINDOW_DATA: |
||||
return new WindowDataLayer<Dtype>(param); |
||||
case LayerParameter_LayerType_NONE: |
||||
LOG(FATAL) << "Layer " << name << " has unspecified type."; |
||||
default: |
||||
LOG(FATAL) << "Layer " << name << " has unknown type " << type; |
||||
} |
||||
// just to suppress old compiler warnings.
|
||||
return (Layer<Dtype>*)(NULL); |
||||
} |
||||
|
||||
template Layer<float>* GetLayer(const LayerParameter& param); |
||||
template Layer<double>* GetLayer(const LayerParameter& param); |
||||
|
||||
} // namespace caffe
|
||||
|
||||
#endif // CAFFE_LAYER_FACTORY_HPP_
|
@ -0,0 +1,64 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
|
||||
#include <algorithm> |
||||
#include <cmath> |
||||
#include <cfloat> |
||||
#include <vector> |
||||
|
||||
#include "caffe/layer.hpp" |
||||
#include "caffe/vision_layers.hpp" |
||||
#include "caffe/util/math_functions.hpp" |
||||
#include "caffe/util/io.hpp" |
||||
|
||||
using std::max; |
||||
|
||||
namespace caffe { |
||||
|
||||
template <typename Dtype> |
||||
void AccuracyLayer<Dtype>::SetUp( |
||||
const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) { |
||||
Layer<Dtype>::SetUp(bottom, top); |
||||
CHECK_EQ(bottom[0]->num(), bottom[1]->num()) |
||||
<< "The data and label should have the same number."; |
||||
CHECK_EQ(bottom[1]->channels(), 1); |
||||
CHECK_EQ(bottom[1]->height(), 1); |
||||
CHECK_EQ(bottom[1]->width(), 1); |
||||
(*top)[0]->Reshape(1, 2, 1, 1); |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
Dtype AccuracyLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top) { |
||||
Dtype accuracy = 0; |
||||
Dtype logprob = 0; |
||||
const Dtype* bottom_data = bottom[0]->cpu_data(); |
||||
const Dtype* bottom_label = bottom[1]->cpu_data(); |
||||
int num = bottom[0]->num(); |
||||
int dim = bottom[0]->count() / bottom[0]->num(); |
||||
for (int i = 0; i < num; ++i) { |
||||
// Accuracy
|
||||
Dtype maxval = -FLT_MAX; |
||||
int max_id = 0; |
||||
for (int j = 0; j < dim; ++j) { |
||||
if (bottom_data[i * dim + j] > maxval) { |
||||
maxval = bottom_data[i * dim + j]; |
||||
max_id = j; |
||||
} |
||||
} |
||||
if (max_id == static_cast<int>(bottom_label[i])) { |
||||
++accuracy; |
||||
} |
||||
Dtype prob = max(bottom_data[i * dim + static_cast<int>(bottom_label[i])], |
||||
Dtype(kLOG_THRESHOLD)); |
||||
logprob -= log(prob); |
||||
} |
||||
// LOG(INFO) << "Accuracy: " << accuracy;
|
||||
(*top)[0]->mutable_cpu_data()[0] = accuracy / num; |
||||
(*top)[0]->mutable_cpu_data()[1] = logprob / num; |
||||
// Accuracy layer should not be used as a loss function.
|
||||
return Dtype(0); |
||||
} |
||||
|
||||
INSTANTIATE_CLASS(AccuracyLayer); |
||||
|
||||
} // namespace caffe
|
@ -0,0 +1,55 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
|
||||
#include <vector> |
||||
#include <cfloat> |
||||
|
||||
#include "caffe/layer.hpp" |
||||
#include "caffe/vision_layers.hpp" |
||||
|
||||
|
||||
namespace caffe { |
||||
|
||||
template <typename Dtype> |
||||
void ArgMaxLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top) { |
||||
Layer<Dtype>::SetUp(bottom, top); |
||||
out_max_val_ = this->layer_param_.argmax_param().out_max_val(); |
||||
if (out_max_val_) { |
||||
// Produces max_ind and max_val
|
||||
(*top)[0]->Reshape(bottom[0]->num(), 2, 1, 1); |
||||
} else { |
||||
// Produces only max_ind
|
||||
(*top)[0]->Reshape(bottom[0]->num(), 1, 1, 1); |
||||
} |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
Dtype ArgMaxLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top) { |
||||
const Dtype* bottom_data = bottom[0]->cpu_data(); |
||||
Dtype* top_data = (*top)[0]->mutable_cpu_data(); |
||||
int num = bottom[0]->num(); |
||||
int dim = bottom[0]->count() / bottom[0]->num(); |
||||
for (int i = 0; i < num; ++i) { |
||||
Dtype max_val = -FLT_MAX; |
||||
int max_ind = 0; |
||||
for (int j = 0; j < dim; ++j) { |
||||
if (bottom_data[i * dim + j] > max_val) { |
||||
max_val = bottom_data[i * dim + j]; |
||||
max_ind = j; |
||||
} |
||||
} |
||||
if (out_max_val_) { |
||||
top_data[i * 2] = max_ind; |
||||
top_data[i * 2 + 1] = max_val; |
||||
} else { |
||||
top_data[i] = max_ind; |
||||
} |
||||
} |
||||
return Dtype(0); |
||||
} |
||||
|
||||
INSTANTIATE_CLASS(ArgMaxLayer); |
||||
|
||||
|
||||
} // namespace caffe
|
@ -0,0 +1,50 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
|
||||
#include <algorithm> |
||||
#include <vector> |
||||
|
||||
#include "caffe/layer.hpp" |
||||
#include "caffe/vision_layers.hpp" |
||||
|
||||
using std::min; |
||||
|
||||
namespace caffe { |
||||
|
||||
const float kBNLL_THRESHOLD = 50.; |
||||
|
||||
template <typename Dtype> |
||||
Dtype BNLLLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top) { |
||||
const Dtype* bottom_data = bottom[0]->cpu_data(); |
||||
Dtype* top_data = (*top)[0]->mutable_cpu_data(); |
||||
const int count = bottom[0]->count(); |
||||
for (int i = 0; i < count; ++i) { |
||||
top_data[i] = bottom_data[i] > 0 ? |
||||
bottom_data[i] + log(1. + exp(-bottom_data[i])) : |
||||
log(1. + exp(bottom_data[i])); |
||||
} |
||||
return Dtype(0); |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
void BNLLLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, |
||||
vector<Blob<Dtype>*>* bottom) { |
||||
if (propagate_down) { |
||||
const Dtype* bottom_data = (*bottom)[0]->cpu_data(); |
||||
const Dtype* top_diff = top[0]->cpu_diff(); |
||||
Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); |
||||
const int count = (*bottom)[0]->count(); |
||||
Dtype expval; |
||||
for (int i = 0; i < count; ++i) { |
||||
expval = exp(min(bottom_data[i], Dtype(kBNLL_THRESHOLD))); |
||||
bottom_diff[i] = top_diff[i] * expval / (expval + 1.); |
||||
} |
||||
} |
||||
} |
||||
|
||||
|
||||
INSTANTIATE_CLASS(BNLLLayer); |
||||
|
||||
|
||||
} // namespace caffe
|
@ -0,0 +1,65 @@ |
||||
// Copyright 2014 BVLC and contributors. |
||||
|
||||
#include <algorithm> |
||||
#include <vector> |
||||
|
||||
#include "caffe/layer.hpp" |
||||
#include "caffe/vision_layers.hpp" |
||||
|
||||
using std::max; |
||||
|
||||
namespace caffe { |
||||
|
||||
const float kBNLL_THRESHOLD = 50.; |
||||
|
||||
template <typename Dtype> |
||||
__global__ void BNLLForward(const int n, const Dtype* in, Dtype* out) { |
||||
CUDA_KERNEL_LOOP(index, n) { |
||||
out[index] = in[index] > 0 ? |
||||
in[index] + log(1. + exp(-in[index])) : |
||||
log(1. + exp(in[index])); |
||||
} |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
Dtype BNLLLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top) { |
||||
const Dtype* bottom_data = bottom[0]->gpu_data(); |
||||
Dtype* top_data = (*top)[0]->mutable_gpu_data(); |
||||
const int count = bottom[0]->count(); |
||||
// NOLINT_NEXT_LINE(whitespace/operators) |
||||
BNLLForward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>( |
||||
count, bottom_data, top_data); |
||||
CUDA_POST_KERNEL_CHECK; |
||||
return Dtype(0); |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
__global__ void BNLLBackward(const int n, const Dtype* in_diff, |
||||
const Dtype* in_data, Dtype* out_diff) { |
||||
CUDA_KERNEL_LOOP(index, n) { |
||||
Dtype expval = exp(min(in_data[index], Dtype(kBNLL_THRESHOLD))); |
||||
out_diff[index] = in_diff[index] * expval / (expval + 1.); |
||||
} |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
void BNLLLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, |
||||
vector<Blob<Dtype>*>* bottom) { |
||||
if (propagate_down) { |
||||
const Dtype* bottom_data = (*bottom)[0]->gpu_data(); |
||||
const Dtype* top_diff = top[0]->gpu_diff(); |
||||
Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff(); |
||||
const int count = (*bottom)[0]->count(); |
||||
// NOLINT_NEXT_LINE(whitespace/operators) |
||||
BNLLBackward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>( |
||||
count, top_diff, bottom_data, bottom_diff); |
||||
CUDA_POST_KERNEL_CHECK; |
||||
} |
||||
} |
||||
|
||||
INSTANTIATE_CLASS(BNLLLayer); |
||||
|
||||
|
||||
} // namespace caffe |
@ -0,0 +1,101 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
|
||||
#include <vector> |
||||
|
||||
#include "caffe/layer.hpp" |
||||
#include "caffe/vision_layers.hpp" |
||||
#include "caffe/util/math_functions.hpp" |
||||
|
||||
namespace caffe { |
||||
|
||||
template <typename Dtype> |
||||
void ConcatLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top) { |
||||
Layer<Dtype>::SetUp(bottom, top); |
||||
concat_dim_ = this->layer_param_.concat_param().concat_dim(); |
||||
CHECK_GE(concat_dim_, 0) << |
||||
"concat_dim should be >= 0"; |
||||
CHECK_LE(concat_dim_, 1) << |
||||
"For now concat_dim <=1, it can only concat num and channels"; |
||||
|
||||
// Initialize with the first blob.
|
||||
count_ = bottom[0]->count(); |
||||
num_ = bottom[0]->num(); |
||||
channels_ = bottom[0]->channels(); |
||||
height_ = bottom[0]->height(); |
||||
width_ = bottom[0]->width(); |
||||
for (int i = 1; i < bottom.size(); ++i) { |
||||
count_ += bottom[i]->count(); |
||||
if (concat_dim_== 0) { |
||||
num_ += bottom[i]->num(); |
||||
} else if (concat_dim_ == 1) { |
||||
channels_ += bottom[i]->channels(); |
||||
} else if (concat_dim_ == 2) { |
||||
height_ += bottom[i]->height(); |
||||
} else if (concat_dim_ == 3) { |
||||
width_ += bottom[i]->width(); |
||||
} |
||||
} |
||||
(*top)[0]->Reshape(num_, channels_, height_, width_); |
||||
CHECK_EQ(count_, (*top)[0]->count()); |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
Dtype ConcatLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top) { |
||||
Dtype* top_data = (*top)[0]->mutable_cpu_data(); |
||||
if (concat_dim_== 0) { |
||||
int offset_num = 0; |
||||
for (int i = 0; i < bottom.size(); ++i) { |
||||
const Dtype* bottom_data = bottom[i]->cpu_data(); |
||||
int num_elem = bottom[i]->count(); |
||||
caffe_copy(num_elem, bottom_data, top_data+(*top)[0]->offset(offset_num)); |
||||
offset_num += bottom[i]->num(); |
||||
} |
||||
} else if (concat_dim_ == 1) { |
||||
int offset_channel = 0; |
||||
for (int i = 0; i < bottom.size(); ++i) { |
||||
const Dtype* bottom_data = bottom[i]->cpu_data(); |
||||
int num_elem = |
||||
bottom[i]->channels()*bottom[i]->height()*bottom[i]->width(); |
||||
for (int n = 0; n < num_; ++n) { |
||||
caffe_copy(num_elem, bottom_data+bottom[i]->offset(n), |
||||
top_data+(*top)[0]->offset(n, offset_channel)); |
||||
} |
||||
offset_channel += bottom[i]->channels(); |
||||
} // concat_dim_ is guaranteed to be 0 or 1 by SetUp.
|
||||
} |
||||
return Dtype(0.); |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
void ConcatLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom) { |
||||
const Dtype* top_diff = top[0]->cpu_diff(); |
||||
if (concat_dim_ == 0) { |
||||
int offset_num = 0; |
||||
for (int i = 0; i < bottom->size(); ++i) { |
||||
Blob<Dtype>* blob = (*bottom)[i]; |
||||
Dtype* bottom_diff = blob->mutable_cpu_diff(); |
||||
caffe_copy(blob->count(), |
||||
top_diff+top[0]->offset(offset_num), bottom_diff); |
||||
offset_num += blob->num(); |
||||
} |
||||
} else if (concat_dim_ == 1) { |
||||
int offset_channel = 0; |
||||
for (int i = 0; i < bottom->size(); ++i) { |
||||
Blob<Dtype>* blob = (*bottom)[i]; |
||||
Dtype* bottom_diff = blob->mutable_cpu_diff(); |
||||
int num_elem = blob->channels()*blob->height()*blob->width(); |
||||
for (int n = 0; n < num_; ++n) { |
||||
caffe_copy(num_elem, top_diff+top[0]->offset(n, offset_channel), |
||||
bottom_diff+blob->offset(n)); |
||||
} |
||||
offset_channel += blob->channels(); |
||||
} |
||||
} // concat_dim_ is guaranteed to be 0 or 1 by SetUp.
|
||||
} |
||||
|
||||
INSTANTIATE_CLASS(ConcatLayer); |
||||
|
||||
} // namespace caffe
|
@ -0,0 +1,75 @@ |
||||
// Copyright 2014 BVLC and contributors. |
||||
|
||||
#include <vector> |
||||
|
||||
#include "caffe/layer.hpp" |
||||
#include "caffe/vision_layers.hpp" |
||||
#include "caffe/util/math_functions.hpp" |
||||
|
||||
namespace caffe { |
||||
|
||||
template <typename Dtype> |
||||
Dtype ConcatLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top) { |
||||
Dtype* top_data = (*top)[0]->mutable_gpu_data(); |
||||
if (concat_dim_ == 0) { |
||||
int offset_num = 0; |
||||
for (int i = 0; i < bottom.size(); ++i) { |
||||
const Dtype* bottom_data = bottom[i]->gpu_data(); |
||||
caffe_gpu_copy(bottom[i]->count(), bottom_data, |
||||
top_data + (*top)[0]->offset(offset_num)); |
||||
offset_num += bottom[i]->num(); |
||||
} |
||||
} else if (concat_dim_ == 1) { |
||||
int offset_channel = 0; |
||||
for (int i = 0; i < bottom.size(); ++i) { |
||||
const Dtype* bottom_data = bottom[i]->gpu_data(); |
||||
int num_elem = |
||||
bottom[i]->channels() * bottom[i]->height() * bottom[i]->width(); |
||||
for (int n = 0; n < num_; ++n) { |
||||
caffe_gpu_copy(num_elem, bottom_data+bottom[i]->offset(n), |
||||
top_data + (*top)[0]->offset(n, offset_channel)); |
||||
} |
||||
offset_channel += bottom[i]->channels(); |
||||
} |
||||
} else { |
||||
LOG(FATAL) << "concat_dim along dim" << concat_dim_ << |
||||
" not implemented yet"; |
||||
} |
||||
return Dtype(0.); |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
void ConcatLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom) { |
||||
const Dtype* top_diff = top[0]->gpu_diff(); |
||||
if (concat_dim_ == 0) { |
||||
int offset_num = 0; |
||||
for (int i = 0; i < bottom->size(); ++i) { |
||||
Blob<Dtype>* blob = (*bottom)[i]; |
||||
Dtype* bottom_diff = blob->mutable_gpu_diff(); |
||||
caffe_gpu_copy(blob->count(), |
||||
top_diff + top[0]->offset(offset_num), bottom_diff); |
||||
offset_num += blob->num(); |
||||
} |
||||
} else if (concat_dim_ == 1) { |
||||
int offset_channel = 0; |
||||
for (int i = 0; i < bottom->size(); ++i) { |
||||
Blob<Dtype>* blob = (*bottom)[i]; |
||||
Dtype* bottom_diff = blob->mutable_gpu_diff(); |
||||
int num_elem = blob->channels()*blob->height()*blob->width(); |
||||
for (int n = 0; n < num_; ++n) { |
||||
caffe_gpu_copy(num_elem, top_diff + top[0]->offset(n, offset_channel), |
||||
bottom_diff + blob->offset(n)); |
||||
} |
||||
offset_channel += blob->channels(); |
||||
} |
||||
} else { |
||||
LOG(FATAL) << "concat_dim along dim" << concat_dim_ << |
||||
" not implemented yet"; |
||||
} |
||||
} |
||||
|
||||
INSTANTIATE_CLASS(ConcatLayer); |
||||
|
||||
} // namespace caffe |
@ -0,0 +1,167 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
|
||||
#include <vector> |
||||
|
||||
#include "caffe/layer.hpp" |
||||
#include "caffe/vision_layers.hpp" |
||||
#include "caffe/util/im2col.hpp" |
||||
#include "caffe/filler.hpp" |
||||
#include "caffe/util/math_functions.hpp" |
||||
|
||||
namespace caffe { |
||||
|
||||
template <typename Dtype> |
||||
void ConvolutionLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top) { |
||||
Layer<Dtype>::SetUp(bottom, top); |
||||
kernel_size_ = this->layer_param_.convolution_param().kernel_size(); |
||||
stride_ = this->layer_param_.convolution_param().stride(); |
||||
group_ = this->layer_param_.convolution_param().group(); |
||||
pad_ = this->layer_param_.convolution_param().pad(); |
||||
num_ = bottom[0]->num(); |
||||
channels_ = bottom[0]->channels(); |
||||
height_ = bottom[0]->height(); |
||||
width_ = bottom[0]->width(); |
||||
num_output_ = this->layer_param_.convolution_param().num_output(); |
||||
CHECK_GT(num_output_, 0); |
||||
CHECK_EQ(channels_ % group_, 0); |
||||
// The im2col result buffer would only hold one image at a time to avoid
|
||||
// overly large memory usage.
|
||||
int height_out = (height_ + 2 * pad_ - kernel_size_) / stride_ + 1; |
||||
int width_out = (width_ + 2 * pad_ - kernel_size_) / stride_ + 1; |
||||
col_buffer_.Reshape( |
||||
1, channels_ * kernel_size_ * kernel_size_, height_out, width_out); |
||||
// Set the parameters
|
||||
CHECK_EQ(num_output_ % group_, 0) |
||||
<< "Number of output should be multiples of group."; |
||||
bias_term_ = this->layer_param_.convolution_param().bias_term(); |
||||
// Figure out the dimensions for individual gemms.
|
||||
M_ = num_output_ / group_; |
||||
K_ = channels_ * kernel_size_ * kernel_size_ / group_; |
||||
N_ = height_out * width_out; |
||||
(*top)[0]->Reshape(bottom[0]->num(), num_output_, height_out, width_out); |
||||
// Check if we need to set up the weights
|
||||
if (this->blobs_.size() > 0) { |
||||
LOG(INFO) << "Skipping parameter initialization"; |
||||
} else { |
||||
if (bias_term_) { |
||||
this->blobs_.resize(2); |
||||
} else { |
||||
this->blobs_.resize(1); |
||||
} |
||||
// Intialize the weight
|
||||
this->blobs_[0].reset(new Blob<Dtype>( |
||||
num_output_, channels_ / group_, kernel_size_, kernel_size_)); |
||||
// fill the weights
|
||||
shared_ptr<Filler<Dtype> > weight_filler(GetFiller<Dtype>( |
||||
this->layer_param_.convolution_param().weight_filler())); |
||||
weight_filler->Fill(this->blobs_[0].get()); |
||||
// If necessary, intiialize and fill the bias term
|
||||
if (bias_term_) { |
||||
this->blobs_[1].reset(new Blob<Dtype>(1, 1, 1, num_output_)); |
||||
shared_ptr<Filler<Dtype> > bias_filler(GetFiller<Dtype>( |
||||
this->layer_param_.convolution_param().bias_filler())); |
||||
bias_filler->Fill(this->blobs_[1].get()); |
||||
} |
||||
} |
||||
// Set up the bias filler
|
||||
if (bias_term_) { |
||||
bias_multiplier_.reset(new SyncedMemory(N_ * sizeof(Dtype))); |
||||
Dtype* bias_multiplier_data = |
||||
reinterpret_cast<Dtype*>(bias_multiplier_->mutable_cpu_data()); |
||||
for (int i = 0; i < N_; ++i) { |
||||
bias_multiplier_data[i] = 1.; |
||||
} |
||||
} |
||||
} |
||||
|
||||
|
||||
template <typename Dtype> |
||||
Dtype ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top) { |
||||
const Dtype* bottom_data = bottom[0]->cpu_data(); |
||||
Dtype* top_data = (*top)[0]->mutable_cpu_data(); |
||||
Dtype* col_data = col_buffer_.mutable_cpu_data(); |
||||
const Dtype* weight = this->blobs_[0]->cpu_data(); |
||||
int weight_offset = M_ * K_; |
||||
int col_offset = K_ * N_; |
||||
int top_offset = M_ * N_; |
||||
for (int n = 0; n < num_; ++n) { |
||||
// First, im2col
|
||||
im2col_cpu(bottom_data + bottom[0]->offset(n), channels_, height_, |
||||
width_, kernel_size_, pad_, stride_, col_data); |
||||
// Second, innerproduct with groups
|
||||
for (int g = 0; g < group_; ++g) { |
||||
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_, |
||||
(Dtype)1., weight + weight_offset * g, col_data + col_offset * g, |
||||
(Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g); |
||||
} |
||||
// third, add bias
|
||||
if (bias_term_) { |
||||
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_, |
||||
N_, 1, (Dtype)1., this->blobs_[1]->cpu_data(), |
||||
reinterpret_cast<const Dtype*>(bias_multiplier_->cpu_data()), |
||||
(Dtype)1., top_data + (*top)[0]->offset(n)); |
||||
} |
||||
} |
||||
return Dtype(0.); |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
void ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom) { |
||||
const Dtype* top_diff = top[0]->cpu_diff(); |
||||
const Dtype* weight = this->blobs_[0]->cpu_data(); |
||||
Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff(); |
||||
const Dtype* bottom_data = (*bottom)[0]->cpu_data(); |
||||
Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); |
||||
Dtype* col_data = col_buffer_.mutable_cpu_data(); |
||||
Dtype* col_diff = col_buffer_.mutable_cpu_diff(); |
||||
// bias gradient if necessary
|
||||
Dtype* bias_diff = NULL; |
||||
|
||||
if (bias_term_) { |
||||
bias_diff = this->blobs_[1]->mutable_cpu_diff(); |
||||
memset(bias_diff, 0, sizeof(Dtype) * this->blobs_[1]->count()); |
||||
for (int n = 0; n < num_; ++n) { |
||||
caffe_cpu_gemv<Dtype>(CblasNoTrans, num_output_, N_, |
||||
1., top_diff + top[0]->offset(n), |
||||
reinterpret_cast<const Dtype*>(bias_multiplier_->cpu_data()), 1., |
||||
bias_diff); |
||||
} |
||||
} |
||||
|
||||
int weight_offset = M_ * K_; |
||||
int col_offset = K_ * N_; |
||||
int top_offset = M_ * N_; |
||||
memset(weight_diff, 0, sizeof(Dtype) * this->blobs_[0]->count()); |
||||
for (int n = 0; n < num_; ++n) { |
||||
// since we saved memory in the forward pass by not storing all col data,
|
||||
// we will need to recompute them.
|
||||
im2col_cpu(bottom_data + (*bottom)[0]->offset(n), channels_, height_, |
||||
width_, kernel_size_, pad_, stride_, col_data); |
||||
// gradient w.r.t. weight. Note that we will accumulate diffs.
|
||||
for (int g = 0; g < group_; ++g) { |
||||
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_, |
||||
(Dtype)1., top_diff + top[0]->offset(n) + top_offset * g, |
||||
col_data + col_offset * g, (Dtype)1., |
||||
weight_diff + weight_offset * g); |
||||
} |
||||
// gradient w.r.t. bottom data, if necessary
|
||||
if (propagate_down) { |
||||
for (int g = 0; g < group_; ++g) { |
||||
caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_, |
||||
(Dtype)1., weight + weight_offset * g, |
||||
top_diff + top[0]->offset(n) + top_offset * g, |
||||
(Dtype)0., col_diff + col_offset * g); |
||||
} |
||||
// col2im back to the data
|
||||
col2im_cpu(col_diff, channels_, height_, width_, kernel_size_, pad_, |
||||
stride_, bottom_diff + (*bottom)[0]->offset(n)); |
||||
} |
||||
} |
||||
} |
||||
|
||||
INSTANTIATE_CLASS(ConvolutionLayer); |
||||
|
||||
} // namespace caffe
|
@ -0,0 +1,104 @@ |
||||
// Copyright 2014 BVLC and contributors. |
||||
|
||||
#include <vector> |
||||
|
||||
#include "caffe/layer.hpp" |
||||
#include "caffe/vision_layers.hpp" |
||||
#include "caffe/util/im2col.hpp" |
||||
#include "caffe/filler.hpp" |
||||
#include "caffe/util/math_functions.hpp" |
||||
|
||||
namespace caffe { |
||||
|
||||
template <typename Dtype> |
||||
Dtype ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top) { |
||||
const Dtype* bottom_data = bottom[0]->gpu_data(); |
||||
Dtype* top_data = (*top)[0]->mutable_gpu_data(); |
||||
Dtype* col_data = col_buffer_.mutable_gpu_data(); |
||||
const Dtype* weight = this->blobs_[0]->gpu_data(); |
||||
int weight_offset = M_ * K_; |
||||
int col_offset = K_ * N_; |
||||
int top_offset = M_ * N_; |
||||
for (int n = 0; n < num_; ++n) { |
||||
// First, im2col |
||||
im2col_gpu(bottom_data + bottom[0]->offset(n), channels_, height_, |
||||
width_, kernel_size_, pad_, stride_, col_data); |
||||
// Second, innerproduct with groups |
||||
for (int g = 0; g < group_; ++g) { |
||||
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_, |
||||
(Dtype)1., weight + weight_offset * g, col_data + col_offset * g, |
||||
(Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g); |
||||
} |
||||
// third, add bias |
||||
if (bias_term_) { |
||||
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_, |
||||
N_, 1, (Dtype)1., this->blobs_[1]->gpu_data(), |
||||
reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()), |
||||
(Dtype)1., top_data + (*top)[0]->offset(n)); |
||||
} |
||||
} |
||||
return Dtype(0.); |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
void ConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, vector<Blob<Dtype>*>* bottom) { |
||||
const Dtype* top_diff = top[0]->gpu_diff(); |
||||
const Dtype* weight = this->blobs_[0]->gpu_data(); |
||||
Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff(); |
||||
const Dtype* bottom_data = (*bottom)[0]->gpu_data(); |
||||
Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff(); |
||||
Dtype* col_data = col_buffer_.mutable_gpu_data(); |
||||
Dtype* col_diff = col_buffer_.mutable_gpu_diff(); |
||||
// bias gradient if necessary |
||||
Dtype* bias_diff = NULL; |
||||
|
||||
if (bias_term_) { |
||||
bias_diff = this->blobs_[1]->mutable_gpu_diff(); |
||||
CUDA_CHECK(cudaMemset(bias_diff, 0, |
||||
sizeof(Dtype) * this->blobs_[1]->count())); |
||||
for (int n = 0; n < num_; ++n) { |
||||
caffe_gpu_gemv<Dtype>(CblasNoTrans, num_output_, N_, |
||||
1., top_diff + top[0]->offset(n), |
||||
reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()), |
||||
1., bias_diff); |
||||
} |
||||
} |
||||
|
||||
int weight_offset = M_ * K_; |
||||
int col_offset = K_ * N_; |
||||
int top_offset = M_ * N_; |
||||
CUDA_CHECK(cudaMemset(weight_diff, 0, |
||||
sizeof(Dtype) * this->blobs_[0]->count())); |
||||
for (int n = 0; n < num_; ++n) { |
||||
// since we saved memory in the forward pass by not storing all col data, |
||||
// we will need to recompute them. |
||||
im2col_gpu(bottom_data + (*bottom)[0]->offset(n), channels_, height_, |
||||
width_, kernel_size_, pad_, stride_, col_data); |
||||
// gradient w.r.t. weight. Note that we will accumulate diffs. |
||||
for (int g = 0; g < group_; ++g) { |
||||
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_, |
||||
(Dtype)1., top_diff + top[0]->offset(n) + top_offset * g, |
||||
col_data + col_offset * g, (Dtype)1., |
||||
weight_diff + weight_offset * g); |
||||
} |
||||
// gradient w.r.t. bottom data, if necessary |
||||
if (propagate_down) { |
||||
for (int g = 0; g < group_; ++g) { |
||||
caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_, |
||||
(Dtype)1., weight + weight_offset * g, |
||||
top_diff + top[0]->offset(n) + top_offset * g, |
||||
(Dtype)0., col_diff + col_offset * g); |
||||
} |
||||
// col2im back to the data |
||||
col2im_gpu(col_diff, channels_, height_, width_, kernel_size_, pad_, |
||||
stride_, bottom_diff + (*bottom)[0]->offset(n)); |
||||
} |
||||
} |
||||
} |
||||
|
||||
|
||||
INSTANTIATE_CLASS(ConvolutionLayer); |
||||
|
||||
} // namespace caffe |
@ -0,0 +1,367 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
|
||||
#include <stdint.h> |
||||
#include <leveldb/db.h> |
||||
#include <pthread.h> |
||||
|
||||
#include <string> |
||||
#include <vector> |
||||
|
||||
#include "caffe/layer.hpp" |
||||
#include "caffe/util/io.hpp" |
||||
#include "caffe/util/math_functions.hpp" |
||||
#include "caffe/util/rng.hpp" |
||||
#include "caffe/vision_layers.hpp" |
||||
#include "caffe/proto/caffe.pb.h" |
||||
|
||||
using std::string; |
||||
|
||||
namespace caffe { |
||||
|
||||
template <typename Dtype> |
||||
void* DataLayerPrefetch(void* layer_pointer) { |
||||
CHECK(layer_pointer); |
||||
DataLayer<Dtype>* layer = static_cast<DataLayer<Dtype>*>(layer_pointer); |
||||
CHECK(layer); |
||||
Datum datum; |
||||
CHECK(layer->prefetch_data_); |
||||
Dtype* top_data = layer->prefetch_data_->mutable_cpu_data(); |
||||
Dtype* top_label; |
||||
if (layer->output_labels_) { |
||||
top_label = layer->prefetch_label_->mutable_cpu_data(); |
||||
} |
||||
const Dtype scale = layer->layer_param_.data_param().scale(); |
||||
const int batch_size = layer->layer_param_.data_param().batch_size(); |
||||
const int crop_size = layer->layer_param_.data_param().crop_size(); |
||||
const bool mirror = layer->layer_param_.data_param().mirror(); |
||||
|
||||
if (mirror && crop_size == 0) { |
||||
LOG(FATAL) << "Current implementation requires mirror and crop_size to be " |
||||
<< "set at the same time."; |
||||
} |
||||
// datum scales
|
||||
const int channels = layer->datum_channels_; |
||||
const int height = layer->datum_height_; |
||||
const int width = layer->datum_width_; |
||||
const int size = layer->datum_size_; |
||||
const Dtype* mean = layer->data_mean_.cpu_data(); |
||||
for (int item_id = 0; item_id < batch_size; ++item_id) { |
||||
// get a blob
|
||||
switch (layer->layer_param_.data_param().backend()) { |
||||
case DataParameter_DB_LEVELDB: |
||||
CHECK(layer->iter_); |
||||
CHECK(layer->iter_->Valid()); |
||||
datum.ParseFromString(layer->iter_->value().ToString()); |
||||
break; |
||||
case DataParameter_DB_LMDB: |
||||
CHECK_EQ(mdb_cursor_get(layer->mdb_cursor_, &layer->mdb_key_, |
||||
&layer->mdb_value_, MDB_GET_CURRENT), MDB_SUCCESS); |
||||
datum.ParseFromArray(layer->mdb_value_.mv_data, |
||||
layer->mdb_value_.mv_size); |
||||
break; |
||||
default: |
||||
LOG(FATAL) << "Unknown database backend"; |
||||
} |
||||
|
||||
const string& data = datum.data(); |
||||
if (crop_size) { |
||||
CHECK(data.size()) << "Image cropping only support uint8 data"; |
||||
int h_off, w_off; |
||||
// We only do random crop when we do training.
|
||||
if (layer->phase_ == Caffe::TRAIN) { |
||||
h_off = layer->PrefetchRand() % (height - crop_size); |
||||
w_off = layer->PrefetchRand() % (width - crop_size); |
||||
} else { |
||||
h_off = (height - crop_size) / 2; |
||||
w_off = (width - crop_size) / 2; |
||||
} |
||||
if (mirror && layer->PrefetchRand() % 2) { |
||||
// Copy mirrored version
|
||||
for (int c = 0; c < channels; ++c) { |
||||
for (int h = 0; h < crop_size; ++h) { |
||||
for (int w = 0; w < crop_size; ++w) { |
||||
int top_index = ((item_id * channels + c) * crop_size + h) |
||||
* crop_size + (crop_size - 1 - w); |
||||
int data_index = (c * height + h + h_off) * width + w + w_off; |
||||
Dtype datum_element = |
||||
static_cast<Dtype>(static_cast<uint8_t>(data[data_index])); |
||||
top_data[top_index] = (datum_element - mean[data_index]) * scale; |
||||
} |
||||
} |
||||
} |
||||
} else { |
||||
// Normal copy
|
||||
for (int c = 0; c < channels; ++c) { |
||||
for (int h = 0; h < crop_size; ++h) { |
||||
for (int w = 0; w < crop_size; ++w) { |
||||
int top_index = ((item_id * channels + c) * crop_size + h) |
||||
* crop_size + w; |
||||
int data_index = (c * height + h + h_off) * width + w + w_off; |
||||
Dtype datum_element = |
||||
static_cast<Dtype>(static_cast<uint8_t>(data[data_index])); |
||||
top_data[top_index] = (datum_element - mean[data_index]) * scale; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} else { |
||||
// we will prefer to use data() first, and then try float_data()
|
||||
if (data.size()) { |
||||
for (int j = 0; j < size; ++j) { |
||||
Dtype datum_element = |
||||
static_cast<Dtype>(static_cast<uint8_t>(data[j])); |
||||
top_data[item_id * size + j] = (datum_element - mean[j]) * scale; |
||||
} |
||||
} else { |
||||
for (int j = 0; j < size; ++j) { |
||||
top_data[item_id * size + j] = |
||||
(datum.float_data(j) - mean[j]) * scale; |
||||
} |
||||
} |
||||
} |
||||
|
||||
if (layer->output_labels_) { |
||||
top_label[item_id] = datum.label(); |
||||
} |
||||
// go to the next iter
|
||||
switch (layer->layer_param_.data_param().backend()) { |
||||
case DataParameter_DB_LEVELDB: |
||||
layer->iter_->Next(); |
||||
if (!layer->iter_->Valid()) { |
||||
// We have reached the end. Restart from the first.
|
||||
DLOG(INFO) << "Restarting data prefetching from start."; |
||||
layer->iter_->SeekToFirst(); |
||||
} |
||||
break; |
||||
case DataParameter_DB_LMDB: |
||||
if (mdb_cursor_get(layer->mdb_cursor_, &layer->mdb_key_, |
||||
&layer->mdb_value_, MDB_NEXT) != MDB_SUCCESS) { |
||||
// We have reached the end. Restart from the first.
|
||||
DLOG(INFO) << "Restarting data prefetching from start."; |
||||
CHECK_EQ(mdb_cursor_get(layer->mdb_cursor_, &layer->mdb_key_, |
||||
&layer->mdb_value_, MDB_FIRST), MDB_SUCCESS); |
||||
} |
||||
break; |
||||
default: |
||||
LOG(FATAL) << "Unknown database backend"; |
||||
} |
||||
} |
||||
|
||||
return static_cast<void*>(NULL); |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
DataLayer<Dtype>::~DataLayer<Dtype>() { |
||||
JoinPrefetchThread(); |
||||
// clean up the database resources
|
||||
switch (this->layer_param_.data_param().backend()) { |
||||
case DataParameter_DB_LEVELDB: |
||||
break; // do nothing
|
||||
case DataParameter_DB_LMDB: |
||||
mdb_cursor_close(mdb_cursor_); |
||||
mdb_close(mdb_env_, mdb_dbi_); |
||||
mdb_txn_abort(mdb_txn_); |
||||
mdb_env_close(mdb_env_); |
||||
break; |
||||
default: |
||||
LOG(FATAL) << "Unknown database backend"; |
||||
} |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
void DataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top) { |
||||
Layer<Dtype>::SetUp(bottom, top); |
||||
if (top->size() == 1) { |
||||
output_labels_ = false; |
||||
} else { |
||||
output_labels_ = true; |
||||
} |
||||
// Initialize DB
|
||||
switch (this->layer_param_.data_param().backend()) { |
||||
case DataParameter_DB_LEVELDB: |
||||
{ |
||||
leveldb::DB* db_temp; |
||||
leveldb::Options options; |
||||
options.create_if_missing = false; |
||||
options.max_open_files = 100; |
||||
LOG(INFO) << "Opening leveldb " << this->layer_param_.data_param().source(); |
||||
leveldb::Status status = leveldb::DB::Open( |
||||
options, this->layer_param_.data_param().source(), &db_temp); |
||||
CHECK(status.ok()) << "Failed to open leveldb " |
||||
<< this->layer_param_.data_param().source() << std::endl |
||||
<< status.ToString(); |
||||
db_.reset(db_temp); |
||||
iter_.reset(db_->NewIterator(leveldb::ReadOptions())); |
||||
iter_->SeekToFirst(); |
||||
} |
||||
break; |
||||
case DataParameter_DB_LMDB: |
||||
CHECK_EQ(mdb_env_create(&mdb_env_), MDB_SUCCESS) << "mdb_env_create failed"; |
||||
CHECK_EQ(mdb_env_set_mapsize(mdb_env_, 1099511627776), MDB_SUCCESS); // 1TB
|
||||
CHECK_EQ(mdb_env_open(mdb_env_, |
||||
this->layer_param_.data_param().source().c_str(), |
||||
MDB_RDONLY|MDB_NOTLS, 0664), MDB_SUCCESS) << "mdb_env_open failed"; |
||||
CHECK_EQ(mdb_txn_begin(mdb_env_, NULL, MDB_RDONLY, &mdb_txn_), MDB_SUCCESS) |
||||
<< "mdb_txn_begin failed"; |
||||
CHECK_EQ(mdb_open(mdb_txn_, NULL, 0, &mdb_dbi_), MDB_SUCCESS) |
||||
<< "mdb_open failed"; |
||||
CHECK_EQ(mdb_cursor_open(mdb_txn_, mdb_dbi_, &mdb_cursor_), MDB_SUCCESS) |
||||
<< "mdb_cursor_open failed"; |
||||
LOG(INFO) << "Opening lmdb " << this->layer_param_.data_param().source(); |
||||
CHECK_EQ(mdb_cursor_get(mdb_cursor_, &mdb_key_, &mdb_value_, MDB_FIRST), |
||||
MDB_SUCCESS) << "mdb_cursor_get failed"; |
||||
break; |
||||
default: |
||||
LOG(FATAL) << "Unknown database backend"; |
||||
} |
||||
|
||||
// Check if we would need to randomly skip a few data points
|
||||
if (this->layer_param_.data_param().rand_skip()) { |
||||
unsigned int skip = caffe_rng_rand() % |
||||
this->layer_param_.data_param().rand_skip(); |
||||
LOG(INFO) << "Skipping first " << skip << " data points."; |
||||
while (skip-- > 0) { |
||||
switch (this->layer_param_.data_param().backend()) { |
||||
case DataParameter_DB_LEVELDB: |
||||
iter_->Next(); |
||||
if (!iter_->Valid()) { |
||||
iter_->SeekToFirst(); |
||||
} |
||||
break; |
||||
case DataParameter_DB_LMDB: |
||||
if (mdb_cursor_get(mdb_cursor_, &mdb_key_, &mdb_value_, MDB_NEXT) |
||||
!= MDB_SUCCESS) { |
||||
CHECK_EQ(mdb_cursor_get(mdb_cursor_, &mdb_key_, &mdb_value_, |
||||
MDB_FIRST), MDB_SUCCESS); |
||||
} |
||||
break; |
||||
default: |
||||
LOG(FATAL) << "Unknown database backend"; |
||||
} |
||||
} |
||||
} |
||||
// Read a data point, and use it to initialize the top blob.
|
||||
Datum datum; |
||||
switch (this->layer_param_.data_param().backend()) { |
||||
case DataParameter_DB_LEVELDB: |
||||
datum.ParseFromString(iter_->value().ToString()); |
||||
break; |
||||
case DataParameter_DB_LMDB: |
||||
datum.ParseFromArray(mdb_value_.mv_data, mdb_value_.mv_size); |
||||
break; |
||||
default: |
||||
LOG(FATAL) << "Unknown database backend"; |
||||
} |
||||
|
||||
// image
|
||||
int crop_size = this->layer_param_.data_param().crop_size(); |
||||
if (crop_size > 0) { |
||||
(*top)[0]->Reshape(this->layer_param_.data_param().batch_size(), |
||||
datum.channels(), crop_size, crop_size); |
||||
prefetch_data_.reset(new Blob<Dtype>( |
||||
this->layer_param_.data_param().batch_size(), datum.channels(), |
||||
crop_size, crop_size)); |
||||
} else { |
||||
(*top)[0]->Reshape( |
||||
this->layer_param_.data_param().batch_size(), datum.channels(), |
||||
datum.height(), datum.width()); |
||||
prefetch_data_.reset(new Blob<Dtype>( |
||||
this->layer_param_.data_param().batch_size(), datum.channels(), |
||||
datum.height(), datum.width())); |
||||
} |
||||
LOG(INFO) << "output data size: " << (*top)[0]->num() << "," |
||||
<< (*top)[0]->channels() << "," << (*top)[0]->height() << "," |
||||
<< (*top)[0]->width(); |
||||
// label
|
||||
if (output_labels_) { |
||||
(*top)[1]->Reshape(this->layer_param_.data_param().batch_size(), 1, 1, 1); |
||||
prefetch_label_.reset( |
||||
new Blob<Dtype>(this->layer_param_.data_param().batch_size(), 1, 1, 1)); |
||||
} |
||||
// datum size
|
||||
datum_channels_ = datum.channels(); |
||||
datum_height_ = datum.height(); |
||||
datum_width_ = datum.width(); |
||||
datum_size_ = datum.channels() * datum.height() * datum.width(); |
||||
CHECK_GT(datum_height_, crop_size); |
||||
CHECK_GT(datum_width_, crop_size); |
||||
// check if we want to have mean
|
||||
if (this->layer_param_.data_param().has_mean_file()) { |
||||
const string& mean_file = this->layer_param_.data_param().mean_file(); |
||||
LOG(INFO) << "Loading mean file from" << mean_file; |
||||
BlobProto blob_proto; |
||||
ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto); |
||||
data_mean_.FromProto(blob_proto); |
||||
CHECK_EQ(data_mean_.num(), 1); |
||||
CHECK_EQ(data_mean_.channels(), datum_channels_); |
||||
CHECK_EQ(data_mean_.height(), datum_height_); |
||||
CHECK_EQ(data_mean_.width(), datum_width_); |
||||
} else { |
||||
// Simply initialize an all-empty mean.
|
||||
data_mean_.Reshape(1, datum_channels_, datum_height_, datum_width_); |
||||
} |
||||
// Now, start the prefetch thread. Before calling prefetch, we make two
|
||||
// cpu_data calls so that the prefetch thread does not accidentally make
|
||||
// simultaneous cudaMalloc calls when the main thread is running. In some
|
||||
// GPUs this seems to cause failures if we do not so.
|
||||
prefetch_data_->mutable_cpu_data(); |
||||
if (output_labels_) { |
||||
prefetch_label_->mutable_cpu_data(); |
||||
} |
||||
data_mean_.cpu_data(); |
||||
DLOG(INFO) << "Initializing prefetch"; |
||||
CreatePrefetchThread(); |
||||
DLOG(INFO) << "Prefetch initialized."; |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
void DataLayer<Dtype>::CreatePrefetchThread() { |
||||
phase_ = Caffe::phase(); |
||||
const bool prefetch_needs_rand = (phase_ == Caffe::TRAIN) && |
||||
(this->layer_param_.data_param().mirror() || |
||||
this->layer_param_.data_param().crop_size()); |
||||
if (prefetch_needs_rand) { |
||||
const unsigned int prefetch_rng_seed = caffe_rng_rand(); |
||||
prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed)); |
||||
} else { |
||||
prefetch_rng_.reset(); |
||||
} |
||||
// Create the thread.
|
||||
CHECK(!pthread_create(&thread_, NULL, DataLayerPrefetch<Dtype>, |
||||
static_cast<void*>(this))) << "Pthread execution failed."; |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
void DataLayer<Dtype>::JoinPrefetchThread() { |
||||
CHECK(!pthread_join(thread_, NULL)) << "Pthread joining failed."; |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
unsigned int DataLayer<Dtype>::PrefetchRand() { |
||||
CHECK(prefetch_rng_); |
||||
caffe::rng_t* prefetch_rng = |
||||
static_cast<caffe::rng_t*>(prefetch_rng_->generator()); |
||||
return (*prefetch_rng)(); |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
Dtype DataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top) { |
||||
// First, join the thread
|
||||
JoinPrefetchThread(); |
||||
// Copy the data
|
||||
caffe_copy(prefetch_data_->count(), prefetch_data_->cpu_data(), |
||||
(*top)[0]->mutable_cpu_data()); |
||||
if (output_labels_) { |
||||
caffe_copy(prefetch_label_->count(), prefetch_label_->cpu_data(), |
||||
(*top)[1]->mutable_cpu_data()); |
||||
} |
||||
// Start a new prefetch thread
|
||||
CreatePrefetchThread(); |
||||
return Dtype(0.); |
||||
} |
||||
|
||||
INSTANTIATE_CLASS(DataLayer); |
||||
|
||||
} // namespace caffe
|
@ -0,0 +1,39 @@ |
||||
// Copyright 2014 BVLC and contributors. |
||||
|
||||
#include <stdint.h> |
||||
#include <leveldb/db.h> |
||||
#include <pthread.h> |
||||
|
||||
#include <string> |
||||
#include <vector> |
||||
|
||||
#include "caffe/layer.hpp" |
||||
#include "caffe/util/io.hpp" |
||||
#include "caffe/vision_layers.hpp" |
||||
|
||||
using std::string; |
||||
|
||||
namespace caffe { |
||||
|
||||
template <typename Dtype> |
||||
Dtype DataLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top) { |
||||
// First, join the thread |
||||
JoinPrefetchThread(); |
||||
// Copy the data |
||||
CUDA_CHECK(cudaMemcpy((*top)[0]->mutable_gpu_data(), |
||||
prefetch_data_->cpu_data(), sizeof(Dtype) * prefetch_data_->count(), |
||||
cudaMemcpyHostToDevice)); |
||||
if (output_labels_) { |
||||
CUDA_CHECK(cudaMemcpy((*top)[1]->mutable_gpu_data(), |
||||
prefetch_label_->cpu_data(), sizeof(Dtype) * prefetch_label_->count(), |
||||
cudaMemcpyHostToDevice)); |
||||
} |
||||
// Start a new prefetch thread |
||||
CreatePrefetchThread(); |
||||
return Dtype(0.); |
||||
} |
||||
|
||||
INSTANTIATE_CLASS(DataLayer); |
||||
|
||||
} // namespace caffe |
@ -0,0 +1,68 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
|
||||
// TODO (sergeyk): effect should not be dependent on phase. wasted memcpy.
|
||||
|
||||
#include <vector> |
||||
|
||||
#include "caffe/common.hpp" |
||||
#include "caffe/util/math_functions.hpp" |
||||
#include "caffe/layer.hpp" |
||||
#include "caffe/syncedmem.hpp" |
||||
#include "caffe/vision_layers.hpp" |
||||
|
||||
namespace caffe { |
||||
|
||||
template <typename Dtype> |
||||
void DropoutLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top) { |
||||
NeuronLayer<Dtype>::SetUp(bottom, top); |
||||
// Set up the cache for random number generation
|
||||
rand_vec_.reset(new Blob<unsigned int>(bottom[0]->num(), |
||||
bottom[0]->channels(), bottom[0]->height(), bottom[0]->width())); |
||||
threshold_ = this->layer_param_.dropout_param().dropout_ratio(); |
||||
DCHECK(threshold_ > 0.); |
||||
DCHECK(threshold_ < 1.); |
||||
scale_ = 1. / (1. - threshold_); |
||||
uint_thres_ = static_cast<unsigned int>(UINT_MAX * threshold_); |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
Dtype DropoutLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top) { |
||||
const Dtype* bottom_data = bottom[0]->cpu_data(); |
||||
Dtype* top_data = (*top)[0]->mutable_cpu_data(); |
||||
unsigned int* mask = rand_vec_->mutable_cpu_data(); |
||||
const int count = bottom[0]->count(); |
||||
if (Caffe::phase() == Caffe::TRAIN) { |
||||
// Create random numbers
|
||||
caffe_rng_bernoulli(count, 1. - threshold_, mask); |
||||
for (int i = 0; i < count; ++i) { |
||||
top_data[i] = bottom_data[i] * mask[i] * scale_; |
||||
} |
||||
} else { |
||||
caffe_copy(bottom[0]->count(), bottom_data, top_data); |
||||
} |
||||
return Dtype(0); |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
void DropoutLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, |
||||
vector<Blob<Dtype>*>* bottom) { |
||||
CHECK(Caffe::phase() == Caffe::TRAIN); |
||||
if (propagate_down) { |
||||
const Dtype* top_diff = top[0]->cpu_diff(); |
||||
Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); |
||||
const unsigned int* mask = rand_vec_->cpu_data(); |
||||
const int count = (*bottom)[0]->count(); |
||||
for (int i = 0; i < count; ++i) { |
||||
bottom_diff[i] = top_diff[i] * mask[i] * scale_; |
||||
} |
||||
} |
||||
} |
||||
|
||||
|
||||
INSTANTIATE_CLASS(DropoutLayer); |
||||
|
||||
|
||||
} // namespace caffe
|
@ -0,0 +1,78 @@ |
||||
// Copyright 2014 BVLC and contributors. |
||||
|
||||
#include <algorithm> |
||||
#include <limits> |
||||
#include <vector> |
||||
|
||||
#include "caffe/common.hpp" |
||||
#include "caffe/layer.hpp" |
||||
#include "caffe/syncedmem.hpp" |
||||
#include "caffe/vision_layers.hpp" |
||||
#include "caffe/util/math_functions.hpp" |
||||
|
||||
using std::max; |
||||
|
||||
namespace caffe { |
||||
|
||||
|
||||
template <typename Dtype> |
||||
__global__ void DropoutForward(const int n, const Dtype* in, |
||||
const unsigned int* mask, const unsigned int threshold, const float scale, |
||||
Dtype* out) { |
||||
CUDA_KERNEL_LOOP(index, n) { |
||||
out[index] = in[index] * (mask[index] > threshold) * scale; |
||||
} |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
Dtype DropoutLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top) { |
||||
const Dtype* bottom_data = bottom[0]->gpu_data(); |
||||
Dtype* top_data = (*top)[0]->mutable_gpu_data(); |
||||
const int count = bottom[0]->count(); |
||||
if (Caffe::phase() == Caffe::TRAIN) { |
||||
unsigned int* mask = |
||||
static_cast<unsigned int*>(rand_vec_->mutable_gpu_data()); |
||||
caffe_gpu_rng_uniform(count, mask); |
||||
// set thresholds |
||||
// NOLINT_NEXT_LINE(whitespace/operators) |
||||
DropoutForward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>( |
||||
count, bottom_data, mask, uint_thres_, scale_, top_data); |
||||
CUDA_POST_KERNEL_CHECK; |
||||
} else { |
||||
caffe_gpu_copy(count, bottom_data, top_data); |
||||
} |
||||
return Dtype(0); |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
__global__ void DropoutBackward(const int n, const Dtype* in_diff, |
||||
const unsigned int* mask, const unsigned int threshold, const float scale, |
||||
Dtype* out_diff) { |
||||
CUDA_KERNEL_LOOP(index, n) { |
||||
out_diff[index] = in_diff[index] * scale * (mask[index] > threshold); |
||||
} |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
void DropoutLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top, |
||||
const bool propagate_down, |
||||
vector<Blob<Dtype>*>* bottom) { |
||||
CHECK(Caffe::phase() == Caffe::TRAIN); |
||||
if (propagate_down) { |
||||
const Dtype* top_diff = top[0]->gpu_diff(); |
||||
Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff(); |
||||
const unsigned int* mask = |
||||
static_cast<const unsigned int*>(rand_vec_->gpu_data()); |
||||
const int count = (*bottom)[0]->count(); |
||||
// NOLINT_NEXT_LINE(whitespace/operators) |
||||
DropoutBackward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>( |
||||
count, top_diff, mask, uint_thres_, scale_, bottom_diff); |
||||
CUDA_POST_KERNEL_CHECK; |
||||
} |
||||
} |
||||
|
||||
INSTANTIATE_CLASS(DropoutLayer); |
||||
|
||||
|
||||
} // namespace caffe |
@ -0,0 +1,100 @@ |
||||
// Copyright 2014 BVLC and contributors.
|
||||
|
||||
#include <vector> |
||||
|
||||
#include "caffe/filler.hpp" |
||||
#include "caffe/layer.hpp" |
||||
#include "caffe/vision_layers.hpp" |
||||
|
||||
namespace caffe { |
||||
|
||||
template <typename Dtype> |
||||
void DummyDataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top) { |
||||
const int num_top = top->size(); |
||||
const DummyDataParameter& param = this->layer_param_.dummy_data_param(); |
||||
const int num_data_filler = param.data_filler_size(); |
||||
CHECK(num_data_filler == 0 || num_data_filler == 1 || |
||||
num_data_filler == num_top) |
||||
<< "Number of data fillers must be 0, 1 or equal to the number of tops: " |
||||
<< num_top << "; you specified " << num_data_filler << " data fillers."; |
||||
CHECK(param.num_size() == 1 || param.num_size() == num_top) |
||||
<< "Must specify either a single (1) 'num' or one for each top blob " |
||||
<< "(" << num_top << "); you specified " << param.num_size() << "."; |
||||
CHECK(param.channels_size() == 1 || param.channels_size() == num_top) |
||||
<< "Must specify either a single (1) 'channels' or one for each top blob " |
||||
<< "(" << num_top << "); you specified " << param.channels_size() << "."; |
||||
CHECK(param.height_size() == 1 || param.height_size() == num_top) |
||||
<< "Must specify either a single (1) 'height' or one for each top blob " |
||||
<< "(" << num_top << "); you specified " << param.height_size() << "."; |
||||
CHECK(param.width_size() == 1 || param.width_size() == num_top) |
||||
<< "Must specify either a single (1) 'width' or one for each top blob " |
||||
<< "(" << num_top << "); you specified " << param.width_size() << "."; |
||||
// refill_[i] tells Forward i whether or not to actually refill top Blob i.
|
||||
// If refill_[i] is false, Forward does nothing for Blob i. We use this to
|
||||
// avoid wastefully refilling "constant" Blobs in every forward pass.
|
||||
// We first fill refill_ in with the INVERSE of its final values.
|
||||
// The first time we run Forward from the SetUp method, we'll fill only the
|
||||
// Blobs for which refill_ is normally false. These Blobs will never be
|
||||
// filled again.
|
||||
refill_.clear(); |
||||
fillers_.clear(); |
||||
if (num_data_filler <= 1) { |
||||
FillerParameter filler_param; |
||||
if (num_data_filler == 0) { |
||||
filler_param.set_type("constant"); |
||||
filler_param.set_value(0); |
||||
} else { |
||||
filler_param.CopyFrom(param.data_filler(0)); |
||||
} |
||||
// Refill on each iteration iff not using a constant filler,
|
||||
// but use the inverse of this rule for the first run.
|
||||
refill_.resize(1); |
||||
refill_[0] = (strcmp(filler_param.type().c_str(), "constant") == 0); |
||||
fillers_.resize(1); |
||||
fillers_[0].reset(GetFiller<Dtype>(filler_param)); |
||||
} else { |
||||
refill_.resize(num_top); |
||||
fillers_.resize(num_top); |
||||
for (int i = 0; i < num_top; ++i) { |
||||
fillers_[i].reset(GetFiller<Dtype>(param.data_filler(i))); |
||||
// Refill on each iteration iff not using a constant filler,
|
||||
// but use the inverse of this rule for the first run.
|
||||
refill_[i] = |
||||
(strcmp(param.data_filler(i).type().c_str(), "constant") == 0); |
||||
} |
||||
} |
||||
for (int i = 0; i < num_top; ++i) { |
||||
const int num = (param.num_size() == 1) ? param.num(0) : param.num(i); |
||||
const int channels = |
||||
(param.channels_size() == 1) ? param.channels(0) : param.channels(i); |
||||
const int height = |
||||
(param.height_size() == 1) ? param.height(0) : param.height(i); |
||||
const int width = |
||||
(param.width_size() == 1) ? param.width(0) : param.width(i); |
||||
(*top)[i]->Reshape(num, channels, height, width); |
||||
} |
||||
// Run Forward once, with refill_ inverted, to fill the constant Blobs.
|
||||
this->Forward(bottom, top); |
||||
// Invert the inverted refill_ values to refill the desired (non-constant)
|
||||
// Blobs in every usual forward pass.
|
||||
for (int i = 0; i < refill_.size(); ++i) { |
||||
refill_[i] = !refill_[i]; |
||||
} |
||||
} |
||||
|
||||
template <typename Dtype> |
||||
Dtype DummyDataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
||||
vector<Blob<Dtype>*>* top) { |
||||
for (int i = 0; i < top->size(); ++i) { |
||||
const int filler_id = (fillers_.size() > 1) ? i : 0; |
||||
if (refill_[filler_id]) { |
||||
fillers_[filler_id]->Fill((*top)[i]); |
||||
} |
||||
} |
||||
return Dtype(0.); |
||||
} |
||||
|
||||
INSTANTIATE_CLASS(DummyDataLayer); |
||||
|
||||
} // namespace caffe
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue