-
Notifications
You must be signed in to change notification settings - Fork 18.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ND Crop layer #3570
ND Crop layer #3570
Changes from 1 commit
64e78bd
952fd17
ca9fa49
e03a287
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,10 @@ | ||
#include <algorithm> | ||
#include <functional> | ||
#include <map> | ||
#include <set> | ||
#include <vector> | ||
|
||
|
||
#include "caffe/layer.hpp" | ||
#include "caffe/layers/crop_layer.hpp" | ||
#include "caffe/net.hpp" | ||
|
@@ -13,58 +15,120 @@ namespace caffe { | |
template <typename Dtype> | ||
void CropLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top) { | ||
const CropParameter& param = this->layer_param_.crop_param(); | ||
CHECK_EQ(bottom.size(), 2) << "Wrong number of bottom blobs."; | ||
CHECK_EQ(bottom[0]->num_axes(), 4) << "Only works with 4D blobs."; | ||
CHECK_EQ(bottom[1]->num_axes(), 4) << "Only works with 4D blobs."; | ||
crop_h_ = param.offset_height(); | ||
crop_w_ = param.offset_width(); | ||
// parameter setup moved to Reshape because it depends on size. | ||
} | ||
|
||
template <typename Dtype> | ||
void CropLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top) { | ||
// Check that the image we are cropping minus the margin is bigger than the | ||
// destination image. | ||
CHECK_GT(bottom[0]->height()-crop_h_, bottom[1]->height()) | ||
<< "invalid offset"; | ||
CHECK_GT(bottom[0]->width()-crop_w_, bottom[1]->width()) << "invalid offset"; | ||
top[0]->Reshape(bottom[0]->num(), bottom[0]->channels(), bottom[1]->height(), | ||
bottom[1]->width()); | ||
const CropParameter& param = this->layer_param_.crop_param(); | ||
// bottom[0] supplies the data | ||
// bottom[1] supplies the size | ||
int input_dim = bottom[0]->num_axes(); | ||
CHECK_LT(param.axis(), input_dim) << "crop axis bigger than input dim"; | ||
// initialize all offsets to 0 | ||
offsets = vector<int>(input_dim, 0); | ||
// initialize new shape to bottom[0] | ||
vector<int> new_shape(bottom[0]->shape()); | ||
|
||
if (param.offset_size() > 1) { | ||
// the number of crop values specified must be equal to the number | ||
// of dimensions following axis | ||
CHECK_EQ(param.axis() + param.offset_size(), input_dim) | ||
<< "number of crop values specified must be equal to the number of " | ||
<< "dimensions following axis."; | ||
} | ||
// apply crops | ||
for (int i = 0; i < input_dim; ++i) { | ||
int crop_offset = 0; | ||
int new_size = bottom[0]->shape(i); | ||
if (i >= param.axis() && param.offset_size() == 1) { | ||
// if only one crop value is supplied, crop all dimensions after axis | ||
// by this crop value | ||
crop_offset = param.offset(0); | ||
new_size = bottom[1]->shape(i); | ||
} else if (i >= param.axis() && param.offset_size() > 1) { | ||
// crop values specified must be equal to the number of dimensions | ||
// following axis | ||
crop_offset = param.offset(i - param.axis()); | ||
new_size = bottom[1]->shape(i); | ||
} | ||
// Check that the image we are cropping minus the margin is bigger | ||
// than the destination image. | ||
CHECK_GE(bottom[0]->shape(i) - crop_offset, | ||
bottom[1]->shape(i)) | ||
<< "invalid crop parameters in dimension: " << i; | ||
// Now set new size and offsets | ||
new_shape[i] = new_size; | ||
offsets[i] = crop_offset; | ||
} | ||
top[0]->Reshape(new_shape); | ||
} | ||
|
||
// recursive copy function | ||
template <typename Dtype> | ||
void CropLayer<Dtype>::crop_copy(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top, | ||
const vector<int>& offsets, | ||
vector<int> indices, | ||
int cur_dim, | ||
const Dtype* src_data, | ||
Dtype* dest_data, | ||
bool is_forward) { | ||
if (cur_dim + 1 < top[0]->num_axes()) { | ||
// We are not yet at the final dimension, call copy recursivley | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. recursivley -> recursively |
||
for (int i = 0; i < top[0]->shape(cur_dim); ++i) { | ||
indices[cur_dim] = i; | ||
crop_copy(bottom, top, offsets, indices, cur_dim+1, | ||
src_data, dest_data, is_forward); | ||
} | ||
} else { | ||
// We are at the last dimensions, which is stored continously in memory | ||
for (int i = 0; i < top[0]->shape(cur_dim); ++i) { | ||
// prepare index vector reduced(red) and with offsets(off) | ||
std::vector<int> ind_red(cur_dim, 0); | ||
std::vector<int> ind_off(cur_dim+1, 0); | ||
for (int j = 0; j < cur_dim; ++j) { | ||
ind_red[j] = indices[j]; | ||
ind_off[j] = indices[j] + offsets[j]; | ||
} | ||
ind_off[cur_dim] = offsets[cur_dim]; | ||
// do the copy | ||
if (is_forward) { | ||
caffe_copy(top[0]->shape(cur_dim), | ||
src_data + bottom[0]->offset(ind_off), | ||
dest_data + top[0]->offset(ind_red)); | ||
} else { | ||
// in the backwards pass the src_data is top_diff | ||
// and the dest_data is bottom_diff | ||
caffe_copy(top[0]->shape(cur_dim), | ||
src_data + top[0]->offset(ind_red), | ||
dest_data + bottom[0]->offset(ind_off)); | ||
} | ||
} | ||
} | ||
} | ||
|
||
template <typename Dtype> | ||
void CropLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top) { | ||
std::vector<int> indices(top[0]->num_axes(), 0); | ||
const Dtype* bottom_data = bottom[0]->cpu_data(); | ||
Dtype* top_data = top[0]->mutable_cpu_data(); | ||
for (int n = 0; n < top[0]->num(); ++n) { | ||
for (int c = 0; c < top[0]->channels(); ++c) { | ||
for (int h = 0; h < top[0]->height(); ++h) { | ||
caffe_copy(top[0]->width(), | ||
bottom_data + bottom[0]->offset(n, c, crop_h_ + h, crop_w_), | ||
top_data + top[0]->offset(n, c, h)); | ||
} | ||
} | ||
} | ||
crop_copy(bottom, top, offsets, indices, 0, bottom_data, top_data, true); | ||
} | ||
|
||
template <typename Dtype> | ||
void CropLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, | ||
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { | ||
const Dtype* top_diff = top[0]->cpu_diff(); | ||
Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); | ||
|
||
if (propagate_down[0]) { | ||
caffe_set(bottom[0]->count(), static_cast<Dtype>(0), bottom_diff); | ||
for (int n = 0; n < top[0]->num(); ++n) { | ||
for (int c = 0; c < top[0]->channels(); ++c) { | ||
for (int h = 0; h < top[0]->height(); ++h) { | ||
caffe_copy(top[0]->width(), | ||
top_diff + top[0]->offset(n, c, h), | ||
bottom_diff + bottom[0]->offset(n, c, crop_h_ + h, crop_w_)); | ||
} | ||
} | ||
} | ||
std::vector<int> indices(top[0]->num_axes(), 0); | ||
crop_copy(bottom, top, offsets, indices, 0, top_diff, bottom_diff, false); | ||
} | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,36 +22,103 @@ __global__ void copy_kernel(const int n, const int height, const int width, | |
} | ||
} | ||
|
||
// recursive copy function, this function is similar to crop_copy but loops | ||
// over all but the last two dimensions. It is implemented this way to allow | ||
// for ND cropping while still relying on a CUDA kernel for the innermost | ||
// two dimensions for performance reasons. | ||
// An alternative way to implement ND cropping relying more on the kernel | ||
// would require passing offsets to the kernel, which is a bit problematic | ||
// because it is of variable length. Since in the standard (N,C,W,H) case | ||
// N,C are usually not cropped a speedup could be achieved by not looping | ||
// the application of the copy_kernel around these dimensions. | ||
template <typename Dtype> | ||
void CropLayer<Dtype>::crop_copy_gpu(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top, | ||
const vector<int>& offsets, | ||
vector<int> indices, | ||
int cur_dim, | ||
const Dtype* src_data, | ||
Dtype* dest_data, | ||
bool is_forward) { | ||
if (cur_dim + 2 < top[0]->num_axes()) { | ||
// We are not yet at the final dimension, call copy recursivley | ||
for (int i = 0; i < top[0]->shape(cur_dim); ++i) { | ||
indices[cur_dim] = i; | ||
crop_copy_gpu(bottom, top, offsets, indices, cur_dim+1, | ||
src_data, dest_data, is_forward); | ||
} | ||
} else { | ||
// We are at the last two dimensions, which are stored continously in memory | ||
// With (N,C,H,W) | ||
// (0,1,2,3) cur_dim -> H | ||
// cur_dim+1 -> W | ||
const int lines = top[0]->shape(cur_dim); | ||
const int height = top[0]->shape(cur_dim); | ||
const int width = top[0]->shape(cur_dim+1); | ||
std::vector<int> ind_off(cur_dim+2, 0); | ||
for (int j = 0; j < cur_dim; ++j) { | ||
ind_off[j] = indices[j] + offsets[j]; | ||
} | ||
ind_off[cur_dim] = offsets[cur_dim]; | ||
ind_off[cur_dim+1] = offsets[cur_dim+1]; | ||
// Compute copy strides | ||
const int src_outer_stride = | ||
bottom[0]->shape(cur_dim)*bottom[0]->shape(cur_dim+1); | ||
const int src_inner_stride = bottom[0]->shape(cur_dim+1); | ||
const int dest_outer_stride = | ||
top[0]->shape(cur_dim)*top[0]->shape(cur_dim+1); | ||
const int dest_inner_stride = top[0]->shape(cur_dim+1); | ||
|
||
if (is_forward) { | ||
const Dtype* bottom_data = bottom[0]->gpu_data() + | ||
bottom[0]->offset(ind_off); | ||
Dtype* top_data = top[0]->mutable_gpu_data() + | ||
top[0]->offset(indices); | ||
// NOLINT_NEXT_LINE(whitespace/operators) | ||
copy_kernel<<<CAFFE_GET_BLOCKS(lines), CAFFE_CUDA_NUM_THREADS>>>( | ||
lines, height, width, | ||
src_outer_stride, src_inner_stride, | ||
dest_outer_stride, dest_inner_stride, | ||
bottom_data, top_data); | ||
|
||
} else { | ||
const Dtype* top_diff = top[0]->gpu_diff() + | ||
top[0]->offset(indices); | ||
Dtype* bottom_diff = bottom[0]->mutable_gpu_diff() + | ||
bottom[0]->offset(ind_off); | ||
// NOLINT_NEXT_LINE(whitespace/operators) | ||
copy_kernel<<<CAFFE_GET_BLOCKS(lines), CAFFE_CUDA_NUM_THREADS>>>( | ||
lines, height, width, | ||
dest_outer_stride, dest_inner_stride, | ||
src_outer_stride, src_inner_stride, | ||
top_diff, bottom_diff); | ||
} | ||
} | ||
} | ||
|
||
template <typename Dtype> | ||
void CropLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top) { | ||
std::vector<int> indices(top[0]->num_axes(), 0); | ||
// This works because crop_copy uses caffe_copy which calls cudaMemcpy. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Drop out-of-date comment as the implementation now uses a copy kernel. |
||
// My intuition is that calling this thousands of times is probably less | ||
// efficient than writing a custom kernel. | ||
const Dtype* bottom_data = bottom[0]->gpu_data(); | ||
Dtype* top_data = top[0]->mutable_gpu_data(); | ||
const int lines = top[0]->count() / top[0]->width(); | ||
|
||
// NOLINT_NEXT_LINE(whitespace/operators) | ||
copy_kernel<<<CAFFE_GET_BLOCKS(lines), CAFFE_CUDA_NUM_THREADS>>>( | ||
lines, top[0]->height(), top[0]->width(), | ||
bottom[0]->height() * bottom[0]->width(), bottom[0]->width(), | ||
top[0]->height() * top[0]->width(), top[0]->width(), | ||
bottom_data + bottom[0]->offset(0, 0, crop_h_, crop_w_), top_data); | ||
crop_copy_gpu(bottom, top, offsets, indices, 0, bottom_data, top_data, true); | ||
} | ||
|
||
template <typename Dtype> | ||
void CropLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top, | ||
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { | ||
const Dtype* top_diff = top[0]->gpu_diff(); | ||
Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); | ||
const int lines = top[0]->count() / top[0]->width(); | ||
|
||
if (propagate_down[0]) { | ||
caffe_gpu_set(bottom[0]->count(), static_cast<Dtype>(0), bottom_diff); | ||
// NOLINT_NEXT_LINE(whitespace/operators) | ||
copy_kernel<<<CAFFE_GET_BLOCKS(lines), CAFFE_CUDA_NUM_THREADS>>>( | ||
lines, top[0]->height(), top[0]->width(), | ||
top[0]->height() * top[0]->width(), top[0]->width(), | ||
bottom[0]->height() * bottom[0]->width(), bottom[0]->width(), | ||
top_diff, bottom_diff + bottom[0]->offset(0, 0, crop_h_, crop_w_)); | ||
std::vector<int> indices(top[0]->num_axes(), 0); | ||
crop_copy_gpu(bottom, top, offsets, indices, 0, top_diff, bottom_diff, | ||
false); | ||
} | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -600,10 +600,19 @@ message ConvolutionParameter { | |
} | ||
|
||
message CropParameter { | ||
// Assumes standard dimensions: ( N,C,H,W ) | ||
// This could possibly be extended to use "optional BlobShape offsets" | ||
optional uint32 offset_height = 1[default = 0]; | ||
optional uint32 offset_width = 2[default = 0]; | ||
// To crop, elements of the first bottom are selected to fit the dimensions | ||
// of the second, reference bottom. The crop is configured by | ||
// - the crop `axis` to pick the dimensions for cropping | ||
// - the crop `offset` to set the shift for all/each dimension | ||
// to align the cropped bottom with the reference bottom. | ||
// All dimensions up to but excluding `axis` are preserved, while | ||
// the dimensions including and trailing `axis` are cropped. | ||
// If only one `offset` is set, then all dimensions are offset by this amount. | ||
// Otherwise, the number of offsets must equal the number of cropped axes to | ||
// shift the crop in each dimension accordingly. | ||
// Note: standard dimensions are N,C,H,W so the default is a spatial crop. | ||
optional uint32 axis = 1 [default = 2]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's make |
||
repeated uint32 offset = 2; | ||
} | ||
|
||
message DataParameter { | ||
|
@@ -680,7 +689,7 @@ message EltwiseParameter { | |
// Message that stores parameters used by ELULayer | ||
message ELUParameter { | ||
// Described in: | ||
// Clevert, D.-A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate | ||
// Clevert, D.-A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate | ||
// Deep Network Learning by Exponential Linear Units (ELUs). arXiv | ||
optional float alpha = 1 [default = 1]; | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems safe to move any logic that depends only on the number of dimensions to
LayerSetUp()
; switching the number of dims. would break many layers.