Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ND Crop layer #3570

Merged
merged 4 commits into from
Mar 5, 2016
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Extend Crop to N-D, changed CropParameter.
  • Loading branch information
BlGene committed Mar 5, 2016
commit 952fd17e52b24736f2644c32c249538241b34474
22 changes: 20 additions & 2 deletions include/caffe/layers/crop_layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,27 @@ class CropLayer : public Layer<Dtype> {
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

int crop_h_, crop_w_;
};
vector<int> offsets;

private:
void crop_copy(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top,
const vector<int>& offsets,
vector<int> indices,
int cur_dim,
const Dtype* src_data,
Dtype* dest_data,
bool is_forward);

void crop_copy_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top,
const vector<int>& offsets,
vector<int> indices,
int cur_dim,
const Dtype* src_data,
Dtype* dest_data,
bool is_forward);
};
} // namespace caffe

#endif // CAFFE_CROP_LAYER_HPP_
124 changes: 94 additions & 30 deletions src/caffe/layers/crop_layer.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
#include <algorithm>
#include <functional>
#include <map>
#include <set>
#include <vector>


#include "caffe/layer.hpp"
#include "caffe/layers/crop_layer.hpp"
#include "caffe/net.hpp"
Expand All @@ -13,58 +15,120 @@ namespace caffe {
template <typename Dtype>
void CropLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const CropParameter& param = this->layer_param_.crop_param();
CHECK_EQ(bottom.size(), 2) << "Wrong number of bottom blobs.";
CHECK_EQ(bottom[0]->num_axes(), 4) << "Only works with 4D blobs.";
CHECK_EQ(bottom[1]->num_axes(), 4) << "Only works with 4D blobs.";
crop_h_ = param.offset_height();
crop_w_ = param.offset_width();
// parameter setup moved to Reshape because it depends on size.
}

template <typename Dtype>
void CropLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
// Check that the image we are cropping minus the margin is bigger than the
// destination image.
CHECK_GT(bottom[0]->height()-crop_h_, bottom[1]->height())
<< "invalid offset";
CHECK_GT(bottom[0]->width()-crop_w_, bottom[1]->width()) << "invalid offset";
top[0]->Reshape(bottom[0]->num(), bottom[0]->channels(), bottom[1]->height(),
bottom[1]->width());
const CropParameter& param = this->layer_param_.crop_param();
// bottom[0] supplies the data
// bottom[1] supplies the size
int input_dim = bottom[0]->num_axes();
CHECK_LT(param.axis(), input_dim) << "crop axis bigger than input dim";
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems safe to move any logic that depends only on the number of dimensions to LayerSetUp(); switching the number of dims. would break many layers.

// initialize all offsets to 0
offsets = vector<int>(input_dim, 0);
// initialize new shape to bottom[0]
vector<int> new_shape(bottom[0]->shape());

if (param.offset_size() > 1) {
// the number of crop values specified must be equal to the number
// of dimensions following axis
CHECK_EQ(param.axis() + param.offset_size(), input_dim)
<< "number of crop values specified must be equal to the number of "
<< "dimensions following axis.";
}
// apply crops
for (int i = 0; i < input_dim; ++i) {
int crop_offset = 0;
int new_size = bottom[0]->shape(i);
if (i >= param.axis() && param.offset_size() == 1) {
// if only one crop value is supplied, crop all dimensions after axis
// by this crop value
crop_offset = param.offset(0);
new_size = bottom[1]->shape(i);
} else if (i >= param.axis() && param.offset_size() > 1) {
// crop values specified must be equal to the number of dimensions
// following axis
crop_offset = param.offset(i - param.axis());
new_size = bottom[1]->shape(i);
}
// Check that the image we are cropping minus the margin is bigger
// than the destination image.
CHECK_GE(bottom[0]->shape(i) - crop_offset,
bottom[1]->shape(i))
<< "invalid crop parameters in dimension: " << i;
// Now set new size and offsets
new_shape[i] = new_size;
offsets[i] = crop_offset;
}
top[0]->Reshape(new_shape);
}

// recursive copy function
template <typename Dtype>
void CropLayer<Dtype>::crop_copy(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top,
const vector<int>& offsets,
vector<int> indices,
int cur_dim,
const Dtype* src_data,
Dtype* dest_data,
bool is_forward) {
if (cur_dim + 1 < top[0]->num_axes()) {
// We are not yet at the final dimension, call copy recursivley
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

recursivley -> recursively

for (int i = 0; i < top[0]->shape(cur_dim); ++i) {
indices[cur_dim] = i;
crop_copy(bottom, top, offsets, indices, cur_dim+1,
src_data, dest_data, is_forward);
}
} else {
// We are at the last dimensions, which is stored continously in memory
for (int i = 0; i < top[0]->shape(cur_dim); ++i) {
// prepare index vector reduced(red) and with offsets(off)
std::vector<int> ind_red(cur_dim, 0);
std::vector<int> ind_off(cur_dim+1, 0);
for (int j = 0; j < cur_dim; ++j) {
ind_red[j] = indices[j];
ind_off[j] = indices[j] + offsets[j];
}
ind_off[cur_dim] = offsets[cur_dim];
// do the copy
if (is_forward) {
caffe_copy(top[0]->shape(cur_dim),
src_data + bottom[0]->offset(ind_off),
dest_data + top[0]->offset(ind_red));
} else {
// in the backwards pass the src_data is top_diff
// and the dest_data is bottom_diff
caffe_copy(top[0]->shape(cur_dim),
src_data + top[0]->offset(ind_red),
dest_data + bottom[0]->offset(ind_off));
}
}
}
}

template <typename Dtype>
void CropLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
std::vector<int> indices(top[0]->num_axes(), 0);
const Dtype* bottom_data = bottom[0]->cpu_data();
Dtype* top_data = top[0]->mutable_cpu_data();
for (int n = 0; n < top[0]->num(); ++n) {
for (int c = 0; c < top[0]->channels(); ++c) {
for (int h = 0; h < top[0]->height(); ++h) {
caffe_copy(top[0]->width(),
bottom_data + bottom[0]->offset(n, c, crop_h_ + h, crop_w_),
top_data + top[0]->offset(n, c, h));
}
}
}
crop_copy(bottom, top, offsets, indices, 0, bottom_data, top_data, true);
}

template <typename Dtype>
void CropLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
const Dtype* top_diff = top[0]->cpu_diff();
Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();

if (propagate_down[0]) {
caffe_set(bottom[0]->count(), static_cast<Dtype>(0), bottom_diff);
for (int n = 0; n < top[0]->num(); ++n) {
for (int c = 0; c < top[0]->channels(); ++c) {
for (int h = 0; h < top[0]->height(); ++h) {
caffe_copy(top[0]->width(),
top_diff + top[0]->offset(n, c, h),
bottom_diff + bottom[0]->offset(n, c, crop_h_ + h, crop_w_));
}
}
}
std::vector<int> indices(top[0]->num_axes(), 0);
crop_copy(bottom, top, offsets, indices, 0, top_diff, bottom_diff, false);
}
}

Expand Down
97 changes: 82 additions & 15 deletions src/caffe/layers/crop_layer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,36 +22,103 @@ __global__ void copy_kernel(const int n, const int height, const int width,
}
}

// recursive copy function, this function is similar to crop_copy but loops
// over all but the last two dimensions. It is implemented this way to allow
// for ND cropping while still relying on a CUDA kernel for the innermost
// two dimensions for performance reasons.
// An alternative way to implement ND cropping relying more on the kernel
// would require passing offsets to the kernel, which is a bit problematic
// because it is of variable length. Since in the standard (N,C,W,H) case
// N,C are usually not cropped a speedup could be achieved by not looping
// the application of the copy_kernel around these dimensions.
template <typename Dtype>
void CropLayer<Dtype>::crop_copy_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top,
const vector<int>& offsets,
vector<int> indices,
int cur_dim,
const Dtype* src_data,
Dtype* dest_data,
bool is_forward) {
if (cur_dim + 2 < top[0]->num_axes()) {
// We are not yet at the final dimension, call copy recursivley
for (int i = 0; i < top[0]->shape(cur_dim); ++i) {
indices[cur_dim] = i;
crop_copy_gpu(bottom, top, offsets, indices, cur_dim+1,
src_data, dest_data, is_forward);
}
} else {
// We are at the last two dimensions, which are stored continously in memory
// With (N,C,H,W)
// (0,1,2,3) cur_dim -> H
// cur_dim+1 -> W
const int lines = top[0]->shape(cur_dim);
const int height = top[0]->shape(cur_dim);
const int width = top[0]->shape(cur_dim+1);
std::vector<int> ind_off(cur_dim+2, 0);
for (int j = 0; j < cur_dim; ++j) {
ind_off[j] = indices[j] + offsets[j];
}
ind_off[cur_dim] = offsets[cur_dim];
ind_off[cur_dim+1] = offsets[cur_dim+1];
// Compute copy strides
const int src_outer_stride =
bottom[0]->shape(cur_dim)*bottom[0]->shape(cur_dim+1);
const int src_inner_stride = bottom[0]->shape(cur_dim+1);
const int dest_outer_stride =
top[0]->shape(cur_dim)*top[0]->shape(cur_dim+1);
const int dest_inner_stride = top[0]->shape(cur_dim+1);

if (is_forward) {
const Dtype* bottom_data = bottom[0]->gpu_data() +
bottom[0]->offset(ind_off);
Dtype* top_data = top[0]->mutable_gpu_data() +
top[0]->offset(indices);
// NOLINT_NEXT_LINE(whitespace/operators)
copy_kernel<<<CAFFE_GET_BLOCKS(lines), CAFFE_CUDA_NUM_THREADS>>>(
lines, height, width,
src_outer_stride, src_inner_stride,
dest_outer_stride, dest_inner_stride,
bottom_data, top_data);

} else {
const Dtype* top_diff = top[0]->gpu_diff() +
top[0]->offset(indices);
Dtype* bottom_diff = bottom[0]->mutable_gpu_diff() +
bottom[0]->offset(ind_off);
// NOLINT_NEXT_LINE(whitespace/operators)
copy_kernel<<<CAFFE_GET_BLOCKS(lines), CAFFE_CUDA_NUM_THREADS>>>(
lines, height, width,
dest_outer_stride, dest_inner_stride,
src_outer_stride, src_inner_stride,
top_diff, bottom_diff);
}
}
}

template <typename Dtype>
void CropLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
std::vector<int> indices(top[0]->num_axes(), 0);
// This works because crop_copy uses caffe_copy which calls cudaMemcpy.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Drop out-of-date comment as the implementation now uses a copy kernel.

// My intuition is that calling this thousands of times is probably less
// efficient than writing a custom kernel.
const Dtype* bottom_data = bottom[0]->gpu_data();
Dtype* top_data = top[0]->mutable_gpu_data();
const int lines = top[0]->count() / top[0]->width();

// NOLINT_NEXT_LINE(whitespace/operators)
copy_kernel<<<CAFFE_GET_BLOCKS(lines), CAFFE_CUDA_NUM_THREADS>>>(
lines, top[0]->height(), top[0]->width(),
bottom[0]->height() * bottom[0]->width(), bottom[0]->width(),
top[0]->height() * top[0]->width(), top[0]->width(),
bottom_data + bottom[0]->offset(0, 0, crop_h_, crop_w_), top_data);
crop_copy_gpu(bottom, top, offsets, indices, 0, bottom_data, top_data, true);
}

template <typename Dtype>
void CropLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
const Dtype* top_diff = top[0]->gpu_diff();
Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
const int lines = top[0]->count() / top[0]->width();

if (propagate_down[0]) {
caffe_gpu_set(bottom[0]->count(), static_cast<Dtype>(0), bottom_diff);
// NOLINT_NEXT_LINE(whitespace/operators)
copy_kernel<<<CAFFE_GET_BLOCKS(lines), CAFFE_CUDA_NUM_THREADS>>>(
lines, top[0]->height(), top[0]->width(),
top[0]->height() * top[0]->width(), top[0]->width(),
bottom[0]->height() * bottom[0]->width(), bottom[0]->width(),
top_diff, bottom_diff + bottom[0]->offset(0, 0, crop_h_, crop_w_));
std::vector<int> indices(top[0]->num_axes(), 0);
crop_copy_gpu(bottom, top, offsets, indices, 0, top_diff, bottom_diff,
false);
}
}

Expand Down
19 changes: 14 additions & 5 deletions src/caffe/proto/caffe.proto
Original file line number Diff line number Diff line change
Expand Up @@ -600,10 +600,19 @@ message ConvolutionParameter {
}

message CropParameter {
// Assumes standard dimensions: ( N,C,H,W )
// This could possibly be extended to use "optional BlobShape offsets"
optional uint32 offset_height = 1[default = 0];
optional uint32 offset_width = 2[default = 0];
// To crop, elements of the first bottom are selected to fit the dimensions
// of the second, reference bottom. The crop is configured by
// - the crop `axis` to pick the dimensions for cropping
// - the crop `offset` to set the shift for all/each dimension
// to align the cropped bottom with the reference bottom.
// All dimensions up to but excluding `axis` are preserved, while
// the dimensions including and trailing `axis` are cropped.
// If only one `offset` is set, then all dimensions are offset by this amount.
// Otherwise, the number of offsets must equal the number of cropped axes to
// shift the crop in each dimension accordingly.
// Note: standard dimensions are N,C,H,W so the default is a spatial crop.
optional uint32 axis = 1 [default = 2];
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's make axis mirror the axis arg of other layers to allow for negative indexing through an int32 field and the CanonicalAxisIndex() helper method? See for instance https://github.com/BVLC/caffe/blob/master/src/caffe/layers/flatten_layer.cpp#L12-L13

repeated uint32 offset = 2;
}

message DataParameter {
Expand Down Expand Up @@ -680,7 +689,7 @@ message EltwiseParameter {
// Message that stores parameters used by ELULayer
message ELUParameter {
// Described in:
// Clevert, D.-A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate
// Clevert, D.-A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate
// Deep Network Learning by Exponential Linear Units (ELUs). arXiv
optional float alpha = 1 [default = 1];
}
Expand Down