Halide 14.0.0
Halide compiler and libraries
HalideBuffer.h
Go to the documentation of this file.
1/** \file
2 * Defines a Buffer type that wraps from halide_buffer_t and adds
3 * functionality, and methods for more conveniently iterating over the
4 * samples in a halide_buffer_t outside of Halide code. */
5
6#ifndef HALIDE_RUNTIME_BUFFER_H
7#define HALIDE_RUNTIME_BUFFER_H
8
9#include <algorithm>
10#include <atomic>
11#include <cassert>
12#include <cstdint>
13#include <cstring>
14#include <limits>
15#include <memory>
16#include <vector>
17
18#if defined(__has_feature)
19#if __has_feature(memory_sanitizer)
20#include <sanitizer/msan_interface.h>
21#endif
22#endif
23
24#include "HalideRuntime.h"
25
26#ifdef _MSC_VER
27#include <malloc.h>
28#define HALIDE_ALLOCA _alloca
29#else
30#define HALIDE_ALLOCA __builtin_alloca
31#endif
32
33// gcc 5.1 has a false positive warning on this code
34#if __GNUC__ == 5 && __GNUC_MINOR__ == 1
35#pragma GCC diagnostic ignored "-Warray-bounds"
36#endif
37
38#ifndef HALIDE_RUNTIME_BUFFER_CHECK_INDICES
39#define HALIDE_RUNTIME_BUFFER_CHECK_INDICES 0
40#endif
41
42namespace Halide {
43namespace Runtime {
44
45// Forward-declare our Buffer class
46template<typename T, int Dims, int InClassDimStorage>
47class Buffer;
48
49// A helper to check if a parameter pack is entirely implicitly
50// int-convertible to use with std::enable_if
51template<typename... Args>
52struct AllInts : std::false_type {};
53
54template<>
55struct AllInts<> : std::true_type {};
56
57template<typename T, typename... Args>
58struct AllInts<T, Args...> {
59 static const bool value = std::is_convertible<T, int>::value && AllInts<Args...>::value;
60};
61
62// Floats and doubles are technically implicitly int-convertible, but
63// doing so produces a warning we treat as an error, so just disallow
64// it here.
65template<typename... Args>
66struct AllInts<float, Args...> : std::false_type {};
67
68template<typename... Args>
69struct AllInts<double, Args...> : std::false_type {};
70
71// A helper to detect if there are any zeros in a container
72namespace Internal {
73template<typename Container>
74bool any_zero(const Container &c) {
75 for (int i : c) {
76 if (i == 0) {
77 return true;
78 }
79 }
80 return false;
81}
82} // namespace Internal
83
84/** A struct acting as a header for allocations owned by the Buffer
85 * class itself. */
87 void (*deallocate_fn)(void *);
88 std::atomic<int> ref_count;
89
90 // Note that ref_count always starts at 1
93 }
94};
95
96/** This indicates how to deallocate the device for a Halide::Runtime::Buffer. */
97enum struct BufferDeviceOwnership : int {
98 Allocated, ///> halide_device_free will be called when device ref count goes to zero
99 WrappedNative, ///> halide_device_detach_native will be called when device ref count goes to zero
100 Unmanaged, ///> No free routine will be called when device ref count goes to zero
101 AllocatedDeviceAndHost, ///> Call device_and_host_free when DevRefCount goes to zero.
102 Cropped, ///> Call halide_device_release_crop when DevRefCount goes to zero.
103};
104
105/** A similar struct for managing device allocations. */
107 // This is only ever constructed when there's something to manage,
108 // so start at one.
109 std::atomic<int> count{1};
111};
112
113constexpr int AnyDims = -1;
114
115/** A templated Buffer class that wraps halide_buffer_t and adds
116 * functionality. When using Halide from C++, this is the preferred
117 * way to create input and output buffers. The overhead of using this
118 * class relative to a naked halide_buffer_t is minimal - it uses another
119 * ~16 bytes on the stack, and does no dynamic allocations when using
120 * it to represent existing memory of a known maximum dimensionality.
121 *
122 * The template parameter T is the element type. For buffers where the
123 * element type is unknown, or may vary, use void or const void.
124 *
125 * The template parameter Dims is the number of dimensions. For buffers where
126 * the dimensionality type is unknown at, or may vary, use AnyDims.
127 *
128 * InClassDimStorage is the maximum number of dimensions that can be represented
129 * using space inside the class itself. Set it to the maximum dimensionality
130 * you expect this buffer to be. If the actual dimensionality exceeds
131 * this, heap storage is allocated to track the shape of the buffer.
132 * InClassDimStorage defaults to 4, which should cover nearly all usage.
133 *
134 * The class optionally allocates and owns memory for the image using
135 * a shared pointer allocated with the provided allocator. If they are
136 * null, malloc and free are used. Any device-side allocation is
137 * considered as owned if and only if the host-side allocation is
138 * owned. */
139template<typename T = void,
140 int Dims = AnyDims,
141 int InClassDimStorage = (Dims == AnyDims ? 4 : std::max(Dims, 1))>
142class Buffer {
143 /** The underlying halide_buffer_t */
144 halide_buffer_t buf = {};
145
146 /** Some in-class storage for shape of the dimensions. */
147 halide_dimension_t shape[InClassDimStorage];
148
149 /** The allocation owned by this Buffer. NULL if the Buffer does not
150 * own the memory. */
151 AllocationHeader *alloc = nullptr;
152
153 /** A reference count for the device allocation owned by this
154 * buffer. */
155 mutable DeviceRefCount *dev_ref_count = nullptr;
156
157 /** True if T is of type void or const void */
158 static const bool T_is_void = std::is_same<typename std::remove_const<T>::type, void>::value;
159
160 /** A type function that adds a const qualifier if T is a const type. */
161 template<typename T2>
162 using add_const_if_T_is_const = typename std::conditional<std::is_const<T>::value, const T2, T2>::type;
163
164 /** T unless T is (const) void, in which case (const)
165 * uint8_t. Useful for providing return types for operator() */
166 using not_void_T = typename std::conditional<T_is_void,
167 add_const_if_T_is_const<uint8_t>,
168 T>::type;
169
170 /** T with constness removed. Useful for return type of copy(). */
171 using not_const_T = typename std::remove_const<T>::type;
172
173 /** The type the elements are stored as. Equal to not_void_T
174 * unless T is a pointer, in which case uint64_t. Halide stores
175 * all pointer types as uint64s internally, even on 32-bit
176 * systems. */
177 using storage_T = typename std::conditional<std::is_pointer<T>::value, uint64_t, not_void_T>::type;
178
179public:
180 /** True if the Halide type is not void (or const void). */
181 static constexpr bool has_static_halide_type = !T_is_void;
182
183 /** Get the Halide type of T. Callers should not use the result if
184 * has_static_halide_type is false. */
186 return halide_type_of<typename std::remove_cv<not_void_T>::type>();
187 }
188
189 /** Does this Buffer own the host memory it refers to? */
190 bool owns_host_memory() const {
191 return alloc != nullptr;
192 }
193
194 static constexpr bool has_static_dimensions = (Dims != AnyDims);
195
196 /** Callers should not use the result if
197 * has_static_dimensions is false. */
198 static constexpr int static_dimensions() {
199 return Dims;
200 }
201
202 static_assert(!has_static_dimensions || static_dimensions() >= 0);
203
204private:
205 /** Increment the reference count of any owned allocation */
206 void incref() const {
207 if (owns_host_memory()) {
208 alloc->ref_count++;
209 }
210 if (buf.device) {
211 if (!dev_ref_count) {
212 // I seem to have a non-zero dev field but no
213 // reference count for it. I must have been given a
214 // device allocation by a Halide pipeline, and have
215 // never been copied from since. Take sole ownership
216 // of it.
217 dev_ref_count = new DeviceRefCount;
218 }
219 dev_ref_count->count++;
220 }
221 }
222
223 // Note that this is called "cropped" but can also encompass a slice/embed
224 // operation as well.
225 struct DevRefCountCropped : DeviceRefCount {
226 Buffer<T, Dims, InClassDimStorage> cropped_from;
227 DevRefCountCropped(const Buffer<T, Dims, InClassDimStorage> &cropped_from)
228 : cropped_from(cropped_from) {
230 }
231 };
232
233 /** Setup the device ref count for a buffer to indicate it is a crop (or slice, embed, etc) of cropped_from */
234 void crop_from(const Buffer<T, Dims, InClassDimStorage> &cropped_from) {
235 assert(dev_ref_count == nullptr);
236 dev_ref_count = new DevRefCountCropped(cropped_from);
237 }
238
239 /** Decrement the reference count of any owned allocation and free host
240 * and device memory if it hits zero. Sets alloc to nullptr. */
241 void decref(bool device_only = false) {
242 if (owns_host_memory() && !device_only) {
243 int new_count = --(alloc->ref_count);
244 if (new_count == 0) {
245 void (*fn)(void *) = alloc->deallocate_fn;
246 alloc->~AllocationHeader();
247 fn(alloc);
248 }
249 buf.host = nullptr;
250 alloc = nullptr;
251 set_host_dirty(false);
252 }
253 int new_count = 0;
254 if (dev_ref_count) {
255 new_count = --(dev_ref_count->count);
256 }
257 if (new_count == 0) {
258 if (buf.device) {
259 assert(!(alloc && device_dirty()) &&
260 "Implicitly freeing a dirty device allocation while a host allocation still lives. "
261 "Call device_free explicitly if you want to drop dirty device-side data. "
262 "Call copy_to_host explicitly if you want the data copied to the host allocation "
263 "before the device allocation is freed.");
264 int result = 0;
265 if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::WrappedNative) {
266 result = buf.device_interface->detach_native(nullptr, &buf);
267 } else if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::AllocatedDeviceAndHost) {
268 result = buf.device_interface->device_and_host_free(nullptr, &buf);
269 } else if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
270 result = buf.device_interface->device_release_crop(nullptr, &buf);
271 } else if (dev_ref_count == nullptr || dev_ref_count->ownership == BufferDeviceOwnership::Allocated) {
272 result = buf.device_interface->device_free(nullptr, &buf);
273 }
274 // No reasonable way to return the error, but we can at least assert-fail in debug builds.
275 assert((result == 0) && "device_interface call returned a nonzero result in Buffer::decref()");
276 (void)result;
277 }
278 if (dev_ref_count) {
279 if (dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
280 delete (DevRefCountCropped *)dev_ref_count;
281 } else {
282 delete dev_ref_count;
283 }
284 }
285 }
286 dev_ref_count = nullptr;
287 buf.device = 0;
288 buf.device_interface = nullptr;
289 }
290
291 void free_shape_storage() {
292 if (buf.dim != shape) {
293 delete[] buf.dim;
294 buf.dim = nullptr;
295 }
296 }
297
298 template<int DimsSpecified>
299 void make_static_shape_storage() {
300 static_assert(Dims == AnyDims || Dims == DimsSpecified,
301 "Number of arguments to Buffer() does not match static dimensionality");
302 buf.dimensions = DimsSpecified;
303 if constexpr (Dims == AnyDims) {
304 if constexpr (DimsSpecified <= InClassDimStorage) {
305 buf.dim = shape;
306 } else {
307 static_assert(DimsSpecified >= 1);
308 buf.dim = new halide_dimension_t[DimsSpecified];
309 }
310 } else {
311 static_assert(InClassDimStorage >= Dims);
312 buf.dim = shape;
313 }
314 }
315
316 void make_shape_storage(const int dimensions) {
317 if (Dims != AnyDims && Dims != dimensions) {
318 assert(false && "Number of arguments to Buffer() does not match static dimensionality");
319 }
320 // This should usually be inlined, so if dimensions is statically known,
321 // we can skip the call to new
322 buf.dimensions = dimensions;
323 buf.dim = (dimensions <= InClassDimStorage) ? shape : new halide_dimension_t[dimensions];
324 }
325
326 void copy_shape_from(const halide_buffer_t &other) {
327 // All callers of this ensure that buf.dimensions == other.dimensions.
328 make_shape_storage(other.dimensions);
329 std::copy(other.dim, other.dim + other.dimensions, buf.dim);
330 }
331
332 template<typename T2, int D2, int S2>
333 void move_shape_from(Buffer<T2, D2, S2> &&other) {
334 if (other.shape == other.buf.dim) {
335 copy_shape_from(other.buf);
336 } else {
337 buf.dim = other.buf.dim;
338 other.buf.dim = nullptr;
339 }
340 }
341
342 /** Initialize the shape from a halide_buffer_t. */
343 void initialize_from_buffer(const halide_buffer_t &b,
344 BufferDeviceOwnership ownership) {
345 memcpy(&buf, &b, sizeof(halide_buffer_t));
346 copy_shape_from(b);
347 if (b.device) {
348 dev_ref_count = new DeviceRefCount;
349 dev_ref_count->ownership = ownership;
350 }
351 }
352
353 /** Initialize the shape from an array of ints */
354 void initialize_shape(const int *sizes) {
355 for (int i = 0; i < buf.dimensions; i++) {
356 buf.dim[i].min = 0;
357 buf.dim[i].extent = sizes[i];
358 if (i == 0) {
359 buf.dim[i].stride = 1;
360 } else {
361 buf.dim[i].stride = buf.dim[i - 1].stride * buf.dim[i - 1].extent;
362 }
363 }
364 }
365
366 /** Initialize the shape from a vector of extents */
367 void initialize_shape(const std::vector<int> &sizes) {
368 assert(buf.dimensions == (int)sizes.size());
369 initialize_shape(sizes.data());
370 }
371
372 /** Initialize the shape from the static shape of an array */
373 template<typename Array, size_t N>
374 void initialize_shape_from_array_shape(int next, Array (&vals)[N]) {
375 buf.dim[next].min = 0;
376 buf.dim[next].extent = (int)N;
377 if (next == 0) {
378 buf.dim[next].stride = 1;
379 } else {
380 initialize_shape_from_array_shape(next - 1, vals[0]);
381 buf.dim[next].stride = buf.dim[next - 1].stride * buf.dim[next - 1].extent;
382 }
383 }
384
385 /** Base case for the template recursion above. */
386 template<typename T2>
387 void initialize_shape_from_array_shape(int, const T2 &) {
388 }
389
390 /** Get the dimensionality of a multi-dimensional C array */
391 template<typename Array, size_t N>
392 static int dimensionality_of_array(Array (&vals)[N]) {
393 return dimensionality_of_array(vals[0]) + 1;
394 }
395
396 template<typename T2>
397 static int dimensionality_of_array(const T2 &) {
398 return 0;
399 }
400
401 /** Get the underlying halide_type_t of an array's element type. */
402 template<typename Array, size_t N>
403 static halide_type_t scalar_type_of_array(Array (&vals)[N]) {
404 return scalar_type_of_array(vals[0]);
405 }
406
407 template<typename T2>
408 static halide_type_t scalar_type_of_array(const T2 &) {
409 return halide_type_of<typename std::remove_cv<T2>::type>();
410 }
411
412 /** Crop a single dimension without handling device allocation. */
413 void crop_host(int d, int min, int extent) {
414 assert(dim(d).min() <= min);
415 assert(dim(d).max() >= min + extent - 1);
416 ptrdiff_t shift = min - dim(d).min();
417 if (buf.host != nullptr) {
418 buf.host += (shift * dim(d).stride()) * type().bytes();
419 }
420 buf.dim[d].min = min;
421 buf.dim[d].extent = extent;
422 }
423
424 /** Crop as many dimensions as are in rect, without handling device allocation. */
425 void crop_host(const std::vector<std::pair<int, int>> &rect) {
426 assert(rect.size() <= static_cast<decltype(rect.size())>(std::numeric_limits<int>::max()));
427 int limit = (int)rect.size();
428 assert(limit <= dimensions());
429 for (int i = 0; i < limit; i++) {
430 crop_host(i, rect[i].first, rect[i].second);
431 }
432 }
433
434 void complete_device_crop(Buffer<T, Dims, InClassDimStorage> &result_host_cropped) const {
435 assert(buf.device_interface != nullptr);
436 if (buf.device_interface->device_crop(nullptr, &this->buf, &result_host_cropped.buf) == 0) {
437 const Buffer<T, Dims, InClassDimStorage> *cropped_from = this;
438 // TODO: Figure out what to do if dev_ref_count is nullptr. Should incref logic run here?
439 // is it possible to get to this point without incref having run at least once since
440 // the device field was set? (I.e. in the internal logic of crop. incref might have been
441 // called.)
442 if (dev_ref_count != nullptr && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
443 cropped_from = &((DevRefCountCropped *)dev_ref_count)->cropped_from;
444 }
445 result_host_cropped.crop_from(*cropped_from);
446 }
447 }
448
449 /** slice a single dimension without handling device allocation. */
450 void slice_host(int d, int pos) {
451 static_assert(Dims == AnyDims);
452 assert(dimensions() > 0);
453 assert(d >= 0 && d < dimensions());
454 assert(pos >= dim(d).min() && pos <= dim(d).max());
455 buf.dimensions--;
456 ptrdiff_t shift = pos - buf.dim[d].min;
457 if (buf.host != nullptr) {
458 buf.host += (shift * buf.dim[d].stride) * type().bytes();
459 }
460 for (int i = d; i < buf.dimensions; i++) {
461 buf.dim[i] = buf.dim[i + 1];
462 }
463 buf.dim[buf.dimensions] = {0, 0, 0};
464 }
465
466 void complete_device_slice(Buffer<T, AnyDims, InClassDimStorage> &result_host_sliced, int d, int pos) const {
467 assert(buf.device_interface != nullptr);
468 if (buf.device_interface->device_slice(nullptr, &this->buf, d, pos, &result_host_sliced.buf) == 0) {
469 const Buffer<T, Dims, InClassDimStorage> *sliced_from = this;
470 // TODO: Figure out what to do if dev_ref_count is nullptr. Should incref logic run here?
471 // is it possible to get to this point without incref having run at least once since
472 // the device field was set? (I.e. in the internal logic of slice. incref might have been
473 // called.)
474 if (dev_ref_count != nullptr && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
475 sliced_from = &((DevRefCountCropped *)dev_ref_count)->cropped_from;
476 }
477 // crop_from() is correct here, despite the fact that we are slicing.
478 result_host_sliced.crop_from(*sliced_from);
479 }
480 }
481
482public:
483 typedef T ElemType;
484
485 /** Read-only access to the shape */
486 class Dimension {
487 const halide_dimension_t &d;
488
489 public:
490 /** The lowest coordinate in this dimension */
492 return d.min;
493 }
494
495 /** The number of elements in memory you have to step over to
496 * increment this coordinate by one. */
498 return d.stride;
499 }
500
501 /** The extent of the image along this dimension */
503 return d.extent;
504 }
505
506 /** The highest coordinate in this dimension */
508 return min() + extent() - 1;
509 }
510
511 /** An iterator class, so that you can iterate over
512 * coordinates in a dimensions using a range-based for loop. */
513 struct iterator {
514 int val;
515 int operator*() const {
516 return val;
517 }
518 bool operator!=(const iterator &other) const {
519 return val != other.val;
520 }
522 val++;
523 return *this;
524 }
525 };
526
527 /** An iterator that points to the min coordinate */
529 return {min()};
530 }
531
532 /** An iterator that points to one past the max coordinate */
534 return {min() + extent()};
535 }
536
538 : d(dim) {
539 }
540 };
541
542 /** Access the shape of the buffer */
544 assert(i >= 0 && i < this->dimensions());
545 return Dimension(buf.dim[i]);
546 }
547
548 /** Access to the mins, strides, extents. Will be deprecated. Do not use. */
549 // @{
550 int min(int i) const {
551 return dim(i).min();
552 }
553 int extent(int i) const {
554 return dim(i).extent();
555 }
556 int stride(int i) const {
557 return dim(i).stride();
558 }
559 // @}
560
561 /** The total number of elements this buffer represents. Equal to
562 * the product of the extents */
563 size_t number_of_elements() const {
564 return buf.number_of_elements();
565 }
566
567 /** Get the dimensionality of the buffer. */
568 int dimensions() const {
569 if constexpr (has_static_dimensions) {
570 return Dims;
571 } else {
572 return buf.dimensions;
573 }
574 }
575
576 /** Get the type of the elements. */
578 return buf.type;
579 }
580
581 /** A pointer to the element with the lowest address. If all
582 * strides are positive, equal to the host pointer. */
583 T *begin() const {
584 assert(buf.host != nullptr); // Cannot call begin() on an unallocated Buffer.
585 return (T *)buf.begin();
586 }
587
588 /** A pointer to one beyond the element with the highest address. */
589 T *end() const {
590 assert(buf.host != nullptr); // Cannot call end() on an unallocated Buffer.
591 return (T *)buf.end();
592 }
593
594 /** The total number of bytes spanned by the data in memory. */
595 size_t size_in_bytes() const {
596 return buf.size_in_bytes();
597 }
598
599 /** Reset the Buffer to be equivalent to a default-constructed Buffer
600 * of the same static type (if any); Buffer<void> will have its runtime
601 * type reset to uint8. */
602 void reset() {
603 *this = Buffer();
604 }
605
607 : shape() {
608 buf.type = static_halide_type();
609 // If Dims are statically known, must create storage that many.
610 // otherwise, make a zero-dimensional buffer.
611 constexpr int buf_dimensions = (Dims == AnyDims) ? 0 : Dims;
612 make_static_shape_storage<buf_dimensions>();
613 }
614
615 /** Make a Buffer from a halide_buffer_t */
616 explicit Buffer(const halide_buffer_t &buf,
618 assert(T_is_void || buf.type == static_halide_type());
619 initialize_from_buffer(buf, ownership);
620 }
621
622 /** Give Buffers access to the members of Buffers of different dimensionalities and types. */
623 template<typename T2, int D2, int S2>
624 friend class Buffer;
625
626private:
627 template<typename T2, int D2, int S2>
628 static void static_assert_can_convert_from() {
629 static_assert((!std::is_const<T2>::value || std::is_const<T>::value),
630 "Can't convert from a Buffer<const T> to a Buffer<T>");
631 static_assert(std::is_same<typename std::remove_const<T>::type,
632 typename std::remove_const<T2>::type>::value ||
634 "type mismatch constructing Buffer");
635 static_assert(Dims == AnyDims || D2 == AnyDims || Dims == D2,
636 "Can't convert from a Buffer with static dimensionality to a Buffer with different static dimensionality");
637 }
638
639public:
640 /** Determine if a Buffer<T, Dims, InClassDimStorage> can be constructed from some other Buffer type.
641 * If this can be determined at compile time, fail with a static assert; otherwise
642 * return a boolean based on runtime typing. */
643 template<typename T2, int D2, int S2>
644 static bool can_convert_from(const Buffer<T2, D2, S2> &other) {
645 static_assert_can_convert_from<T2, D2, S2>();
646 if (Buffer<T2, D2, S2>::T_is_void && !T_is_void) {
647 if (other.type() != static_halide_type()) {
648 return false;
649 }
650 }
651 if (Dims != AnyDims) {
652 if (other.dimensions() != Dims) {
653 return false;
654 }
655 }
656 return true;
657 }
658
659 /** Fail an assertion at runtime or compile-time if an Buffer<T, Dims, InClassDimStorage>
660 * cannot be constructed from some other Buffer type. */
661 template<typename T2, int D2, int S2>
662 static void assert_can_convert_from(const Buffer<T2, D2, S2> &other) {
663 // Explicitly call static_assert_can_convert_from() here so
664 // that we always get compile-time checking, even if compiling with
665 // assertions disabled.
666 static_assert_can_convert_from<T2, D2, S2>();
667 assert(can_convert_from(other));
668 }
669
670 /** Copy constructor. Does not copy underlying data. */
672 : buf(other.buf),
673 alloc(other.alloc) {
674 other.incref();
675 dev_ref_count = other.dev_ref_count;
676 copy_shape_from(other.buf);
677 }
678
679 /** Construct a Buffer from a Buffer of different dimensionality
680 * and type. Asserts that the type and dimensionality matches (at runtime,
681 * if one of the types is void). Note that this constructor is
682 * implicit. This, for example, lets you pass things like
683 * Buffer<T> or Buffer<const void> to functions expected
684 * Buffer<const T>. */
685 template<typename T2, int D2, int S2>
687 : buf(other.buf),
688 alloc(other.alloc) {
689 assert_can_convert_from(other);
690 other.incref();
691 dev_ref_count = other.dev_ref_count;
692 copy_shape_from(other.buf);
693 }
694
695 /** Move constructor */
697 : buf(other.buf),
698 alloc(other.alloc),
699 dev_ref_count(other.dev_ref_count) {
700 other.dev_ref_count = nullptr;
701 other.alloc = nullptr;
702 move_shape_from(std::forward<Buffer<T, Dims, InClassDimStorage>>(other));
703 other.buf = halide_buffer_t();
704 }
705
706 /** Move-construct a Buffer from a Buffer of different
707 * dimensionality and type. Asserts that the types match (at
708 * runtime if one of the types is void). */
709 template<typename T2, int D2, int S2>
711 : buf(other.buf),
712 alloc(other.alloc),
713 dev_ref_count(other.dev_ref_count) {
714 assert_can_convert_from(other);
715 other.dev_ref_count = nullptr;
716 other.alloc = nullptr;
717 move_shape_from(std::forward<Buffer<T2, D2, S2>>(other));
718 other.buf = halide_buffer_t();
719 }
720
721 /** Assign from another Buffer of possibly-different
722 * dimensionality and type. Asserts that the types match (at
723 * runtime if one of the types is void). */
724 template<typename T2, int D2, int S2>
726 if ((const void *)this == (const void *)&other) {
727 return *this;
728 }
729 assert_can_convert_from(other);
730 other.incref();
731 decref();
732 dev_ref_count = other.dev_ref_count;
733 alloc = other.alloc;
734 free_shape_storage();
735 buf = other.buf;
736 copy_shape_from(other.buf);
737 return *this;
738 }
739
740 /** Standard assignment operator */
742 // The cast to void* here is just to satisfy clang-tidy
743 if ((const void *)this == (const void *)&other) {
744 return *this;
745 }
746 other.incref();
747 decref();
748 dev_ref_count = other.dev_ref_count;
749 alloc = other.alloc;
750 free_shape_storage();
751 buf = other.buf;
752 copy_shape_from(other.buf);
753 return *this;
754 }
755
756 /** Move from another Buffer of possibly-different
757 * dimensionality and type. Asserts that the types match (at
758 * runtime if one of the types is void). */
759 template<typename T2, int D2, int S2>
761 assert_can_convert_from(other);
762 decref();
763 alloc = other.alloc;
764 other.alloc = nullptr;
765 dev_ref_count = other.dev_ref_count;
766 other.dev_ref_count = nullptr;
767 free_shape_storage();
768 buf = other.buf;
769 move_shape_from(std::forward<Buffer<T2, D2, S2>>(other));
770 other.buf = halide_buffer_t();
771 return *this;
772 }
773
774 /** Standard move-assignment operator */
776 decref();
777 alloc = other.alloc;
778 other.alloc = nullptr;
779 dev_ref_count = other.dev_ref_count;
780 other.dev_ref_count = nullptr;
781 free_shape_storage();
782 buf = other.buf;
783 move_shape_from(std::forward<Buffer<T, Dims, InClassDimStorage>>(other));
784 other.buf = halide_buffer_t();
785 return *this;
786 }
787
788 /** Check the product of the extents fits in memory. */
790 size_t size = type().bytes();
791 for (int i = 0; i < dimensions(); i++) {
792 size *= dim(i).extent();
793 }
794 // We allow 2^31 or 2^63 bytes, so drop the top bit.
795 size = (size << 1) >> 1;
796 for (int i = 0; i < dimensions(); i++) {
797 size /= dim(i).extent();
798 }
799 assert(size == (size_t)type().bytes() && "Error: Overflow computing total size of buffer.");
800 }
801
802 /** Allocate memory for this Buffer. Drops the reference to any
803 * owned memory. */
804 void allocate(void *(*allocate_fn)(size_t) = nullptr,
805 void (*deallocate_fn)(void *) = nullptr) {
806 if (!allocate_fn) {
807 allocate_fn = malloc;
808 }
809 if (!deallocate_fn) {
810 deallocate_fn = free;
811 }
812
813 // Drop any existing allocation
814 deallocate();
815
816 // Conservatively align images to 128 bytes. This is enough
817 // alignment for all the platforms we might use.
818 size_t size = size_in_bytes();
819 const size_t alignment = 128;
820 size = (size + alignment - 1) & ~(alignment - 1);
821 void *alloc_storage = allocate_fn(size + sizeof(AllocationHeader) + alignment - 1);
822 alloc = new (alloc_storage) AllocationHeader(deallocate_fn);
823 uint8_t *unaligned_ptr = ((uint8_t *)alloc) + sizeof(AllocationHeader);
824 buf.host = (uint8_t *)((uintptr_t)(unaligned_ptr + alignment - 1) & ~(alignment - 1));
825 }
826
827 /** Drop reference to any owned host or device memory, possibly
828 * freeing it, if this buffer held the last reference to
829 * it. Retains the shape of the buffer. Does nothing if this
830 * buffer did not allocate its own memory. */
831 void deallocate() {
832 decref();
833 }
834
835 /** Drop reference to any owned device memory, possibly freeing it
836 * if this buffer held the last reference to it. Asserts that
837 * device_dirty is false. */
839 decref(true);
840 }
841
842 /** Allocate a new image of the given size with a runtime
843 * type. Only used when you do know what size you want but you
844 * don't know statically what type the elements are. Pass zeroes
845 * to make a buffer suitable for bounds query calls. */
846 template<typename... Args,
847 typename = typename std::enable_if<AllInts<Args...>::value>::type>
848 Buffer(halide_type_t t, int first, Args... rest) {
849 if (!T_is_void) {
850 assert(static_halide_type() == t);
851 }
852 int extents[] = {first, (int)rest...};
853 buf.type = t;
854 constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
855 make_static_shape_storage<buf_dimensions>();
856 initialize_shape(extents);
857 if (!Internal::any_zero(extents)) {
858 check_overflow();
859 allocate();
860 }
861 }
862
863 /** Allocate a new image of the given size. Pass zeroes to make a
864 * buffer suitable for bounds query calls. */
865 // @{
866
867 // The overload with one argument is 'explicit', so that
868 // (say) int is not implicitly convertible to Buffer<int>
869 explicit Buffer(int first) {
870 static_assert(!T_is_void,
871 "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
872 int extents[] = {first};
873 buf.type = static_halide_type();
874 constexpr int buf_dimensions = 1;
875 make_static_shape_storage<buf_dimensions>();
876 initialize_shape(extents);
877 if (first != 0) {
878 check_overflow();
879 allocate();
880 }
881 }
882
883 template<typename... Args,
884 typename = typename std::enable_if<AllInts<Args...>::value>::type>
885 Buffer(int first, int second, Args... rest) {
886 static_assert(!T_is_void,
887 "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
888 int extents[] = {first, second, (int)rest...};
889 buf.type = static_halide_type();
890 constexpr int buf_dimensions = 2 + (int)(sizeof...(rest));
891 make_static_shape_storage<buf_dimensions>();
892 initialize_shape(extents);
893 if (!Internal::any_zero(extents)) {
894 check_overflow();
895 allocate();
896 }
897 }
898 // @}
899
900 /** Allocate a new image of unknown type using a vector of ints as the size. */
901 Buffer(halide_type_t t, const std::vector<int> &sizes) {
902 if (!T_is_void) {
903 assert(static_halide_type() == t);
904 }
905 buf.type = t;
906 // make_shape_storage() will do a runtime check that dimensionality matches.
907 make_shape_storage((int)sizes.size());
908 initialize_shape(sizes);
909 if (!Internal::any_zero(sizes)) {
910 check_overflow();
911 allocate();
912 }
913 }
914
915 /** Allocate a new image of known type using a vector of ints as the size. */
916 explicit Buffer(const std::vector<int> &sizes)
917 : Buffer(static_halide_type(), sizes) {
918 }
919
920private:
921 // Create a copy of the sizes vector, ordered as specified by order.
922 static std::vector<int> make_ordered_sizes(const std::vector<int> &sizes, const std::vector<int> &order) {
923 assert(order.size() == sizes.size());
924 std::vector<int> ordered_sizes(sizes.size());
925 for (size_t i = 0; i < sizes.size(); ++i) {
926 ordered_sizes[i] = sizes.at(order[i]);
927 }
928 return ordered_sizes;
929 }
930
931public:
932 /** Allocate a new image of unknown type using a vector of ints as the size and
933 * a vector of indices indicating the storage order for each dimension. The
934 * length of the sizes vector and the storage-order vector must match. For instance,
935 * to allocate an interleaved RGB buffer, you would pass {2, 0, 1} for storage_order. */
936 Buffer(halide_type_t t, const std::vector<int> &sizes, const std::vector<int> &storage_order)
937 : Buffer(t, make_ordered_sizes(sizes, storage_order)) {
938 transpose(storage_order);
939 }
940
941 Buffer(const std::vector<int> &sizes, const std::vector<int> &storage_order)
942 : Buffer(static_halide_type(), sizes, storage_order) {
943 }
944
945 /** Make an Buffer that refers to a statically sized array. Does not
946 * take ownership of the data, and does not set the host_dirty flag. */
947 template<typename Array, size_t N>
948 explicit Buffer(Array (&vals)[N]) {
949 const int buf_dimensions = dimensionality_of_array(vals);
950 buf.type = scalar_type_of_array(vals);
951 buf.host = (uint8_t *)vals;
952 make_shape_storage(buf_dimensions);
953 initialize_shape_from_array_shape(buf.dimensions - 1, vals);
954 }
955
956 /** Initialize an Buffer of runtime type from a pointer and some
957 * sizes. Assumes dense row-major packing and a min coordinate of
958 * zero. Does not take ownership of the data and does not set the
959 * host_dirty flag. */
960 template<typename... Args,
961 typename = typename std::enable_if<AllInts<Args...>::value>::type>
962 explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, int first, Args &&...rest) {
963 if (!T_is_void) {
964 assert(static_halide_type() == t);
965 }
966 int extents[] = {first, (int)rest...};
967 buf.type = t;
968 buf.host = (uint8_t *)const_cast<void *>(data);
969 constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
970 make_static_shape_storage<buf_dimensions>();
971 initialize_shape(extents);
972 }
973
974 /** Initialize an Buffer from a pointer and some sizes. Assumes
975 * dense row-major packing and a min coordinate of zero. Does not
976 * take ownership of the data and does not set the host_dirty flag. */
977 template<typename... Args,
978 typename = typename std::enable_if<AllInts<Args...>::value>::type>
979 explicit Buffer(T *data, int first, Args &&...rest) {
980 int extents[] = {first, (int)rest...};
981 buf.type = static_halide_type();
982 buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
983 constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
984 make_static_shape_storage<buf_dimensions>();
985 initialize_shape(extents);
986 }
987
988 /** Initialize an Buffer from a pointer and a vector of
989 * sizes. Assumes dense row-major packing and a min coordinate of
990 * zero. Does not take ownership of the data and does not set the
991 * host_dirty flag. */
992 explicit Buffer(T *data, const std::vector<int> &sizes) {
993 buf.type = static_halide_type();
994 buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
995 make_shape_storage((int)sizes.size());
996 initialize_shape(sizes);
997 }
998
999 /** Initialize an Buffer of runtime type from a pointer and a
1000 * vector of sizes. Assumes dense row-major packing and a min
1001 * coordinate of zero. Does not take ownership of the data and
1002 * does not set the host_dirty flag. */
1003 explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, const std::vector<int> &sizes) {
1004 if (!T_is_void) {
1005 assert(static_halide_type() == t);
1006 }
1007 buf.type = t;
1008 buf.host = (uint8_t *)const_cast<void *>(data);
1009 make_shape_storage((int)sizes.size());
1010 initialize_shape(sizes);
1011 }
1012
1013 /** Initialize an Buffer from a pointer to the min coordinate and
1014 * an array describing the shape. Does not take ownership of the
1015 * data, and does not set the host_dirty flag. */
1016 explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, int d, const halide_dimension_t *shape) {
1017 if (!T_is_void) {
1018 assert(static_halide_type() == t);
1019 }
1020 buf.type = t;
1021 buf.host = (uint8_t *)const_cast<void *>(data);
1022 make_shape_storage(d);
1023 for (int i = 0; i < d; i++) {
1024 buf.dim[i] = shape[i];
1025 }
1026 }
1027
1028 /** Initialize a Buffer from a pointer to the min coordinate and
1029 * a vector describing the shape. Does not take ownership of the
1030 * data, and does not set the host_dirty flag. */
1031 explicit inline Buffer(halide_type_t t, add_const_if_T_is_const<void> *data,
1032 const std::vector<halide_dimension_t> &shape)
1033 : Buffer(t, data, (int)shape.size(), shape.data()) {
1034 }
1035
1036 /** Initialize an Buffer from a pointer to the min coordinate and
1037 * an array describing the shape. Does not take ownership of the
1038 * data and does not set the host_dirty flag. */
1039 explicit Buffer(T *data, int d, const halide_dimension_t *shape) {
1040 buf.type = static_halide_type();
1041 buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
1042 make_shape_storage(d);
1043 for (int i = 0; i < d; i++) {
1044 buf.dim[i] = shape[i];
1045 }
1046 }
1047
1048 /** Initialize a Buffer from a pointer to the min coordinate and
1049 * a vector describing the shape. Does not take ownership of the
1050 * data, and does not set the host_dirty flag. */
1051 explicit inline Buffer(T *data, const std::vector<halide_dimension_t> &shape)
1052 : Buffer(data, (int)shape.size(), shape.data()) {
1053 }
1054
1055 /** Destructor. Will release any underlying owned allocation if
1056 * this is the last reference to it. Will assert fail if there are
1057 * weak references to this Buffer outstanding. */
1059 decref();
1060 free_shape_storage();
1061 }
1062
1063 /** Get a pointer to the raw halide_buffer_t this wraps. */
1064 // @{
1066 return &buf;
1067 }
1068
1070 return &buf;
1071 }
1072 // @}
1073
1074 /** Provide a cast operator to halide_buffer_t *, so that
1075 * instances can be passed directly to Halide filters. */
1076 operator halide_buffer_t *() {
1077 return &buf;
1078 }
1079
1080 /** Return a typed reference to this Buffer. Useful for converting
1081 * a reference to a Buffer<void> to a reference to, for example, a
1082 * Buffer<const uint8_t>, or converting a Buffer<T>& to Buffer<const T>&.
1083 * You can also optionally sspecify a new value for Dims; this is useful
1084 * mainly for removing the dimensionality constraint on a Buffer with
1085 * explicit dimensionality. Does a runtime assert if the source buffer type
1086 * is void or the new dimensionality is incompatible. */
1087 template<typename T2, int D2 = Dims>
1090 return *((Buffer<T2, D2, InClassDimStorage> *)this);
1091 }
1092
1093 /** Return a const typed reference to this Buffer. Useful for converting
1094 * a reference to a Buffer<void> to a reference to, for example, a
1095 * Buffer<const uint8_t>, or converting a Buffer<T>& to Buffer<const T>&.
1096 * You can also optionally sspecify a new value for Dims; this is useful
1097 * mainly for removing the dimensionality constraint on a Buffer with
1098 * explicit dimensionality. Does a runtime assert if the source buffer type
1099 * is void or the new dimensionality is incompatible. */
1100 template<typename T2, int D2 = Dims>
1103 return *((const Buffer<T2, D2, InClassDimStorage> *)this);
1104 }
1105
1106 /** Return an rval reference to this Buffer. Useful for converting
1107 * a reference to a Buffer<void> to a reference to, for example, a
1108 * Buffer<const uint8_t>, or converting a Buffer<T>& to Buffer<const T>&.
1109 * You can also optionally sspecify a new value for Dims; this is useful
1110 * mainly for removing the dimensionality constraint on a Buffer with
1111 * explicit dimensionality. Does a runtime assert if the source buffer type
1112 * is void or the new dimensionality is incompatible. */
1113 template<typename T2, int D2 = Dims>
1116 return *((Buffer<T2, D2, InClassDimStorage> *)this);
1117 }
1118
1119 /** as_const() is syntactic sugar for .as<const T>(), to avoid the need
1120 * to recapitulate the type argument. */
1121 // @{
1123 Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> &as_const() & {
1124 // Note that we can skip the assert_can_convert_from(), since T -> const T
1125 // conversion is always legal.
1126 return *((Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> *)this);
1127 }
1128
1130 const Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> &as_const() const & {
1131 return *((const Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> *)this);
1132 }
1133
1135 Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> as_const() && {
1136 return *((Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> *)this);
1137 }
1138 // @}
1139
1140 /** Conventional names for the first three dimensions. */
1141 // @{
1142 int width() const {
1143 return (dimensions() > 0) ? dim(0).extent() : 1;
1144 }
1145 int height() const {
1146 return (dimensions() > 1) ? dim(1).extent() : 1;
1147 }
1148 int channels() const {
1149 return (dimensions() > 2) ? dim(2).extent() : 1;
1150 }
1151 // @}
1152
1153 /** Conventional names for the min and max value of each dimension */
1154 // @{
1155 int left() const {
1156 return dim(0).min();
1157 }
1158
1159 int right() const {
1160 return dim(0).max();
1161 }
1162
1163 int top() const {
1164 return dim(1).min();
1165 }
1166
1167 int bottom() const {
1168 return dim(1).max();
1169 }
1170 // @}
1171
1172 /** Make a new image which is a deep copy of this image. Use crop
1173 * or slice followed by copy to make a copy of only a portion of
1174 * the image. The new image uses the same memory layout as the
1175 * original, with holes compacted away. Note that the returned
1176 * Buffer is always of a non-const type T (ie:
1177 *
1178 * Buffer<const T>.copy() -> Buffer<T> rather than Buffer<const T>
1179 *
1180 * which is always safe, since we are making a deep copy. (The caller
1181 * can easily cast it back to Buffer<const T> if desired, which is
1182 * always safe and free.)
1183 */
1184 Buffer<not_const_T, Dims, InClassDimStorage> copy(void *(*allocate_fn)(size_t) = nullptr,
1185 void (*deallocate_fn)(void *) = nullptr) const {
1187 dst.copy_from(*this);
1188 return dst;
1189 }
1190
1191 /** Like copy(), but the copy is created in interleaved memory layout
1192 * (vs. keeping the same memory layout as the original). Requires that 'this'
1193 * has exactly 3 dimensions.
1194 */
1196 void (*deallocate_fn)(void *) = nullptr) const {
1197 static_assert(Dims == AnyDims || Dims == 3);
1198 assert(dimensions() == 3);
1200 dst.set_min(min(0), min(1), min(2));
1201 dst.allocate(allocate_fn, deallocate_fn);
1202 dst.copy_from(*this);
1203 return dst;
1204 }
1205
1206 /** Like copy(), but the copy is created in planar memory layout
1207 * (vs. keeping the same memory layout as the original).
1208 */
1209 Buffer<not_const_T, Dims, InClassDimStorage> copy_to_planar(void *(*allocate_fn)(size_t) = nullptr,
1210 void (*deallocate_fn)(void *) = nullptr) const {
1211 std::vector<int> mins, extents;
1212 const int dims = dimensions();
1213 mins.reserve(dims);
1214 extents.reserve(dims);
1215 for (int d = 0; d < dims; ++d) {
1216 mins.push_back(dim(d).min());
1217 extents.push_back(dim(d).extent());
1218 }
1220 dst.set_min(mins);
1221 dst.allocate(allocate_fn, deallocate_fn);
1222 dst.copy_from(*this);
1223 return dst;
1224 }
1225
1226 /** Make a copy of the Buffer which shares the underlying host and/or device
1227 * allocations as the existing Buffer. This is purely syntactic sugar for
1228 * cases where you have a const reference to a Buffer but need a temporary
1229 * non-const copy (e.g. to make a call into AOT-generated Halide code), and want a terse
1230 * inline way to create a temporary. \code
1231 * void call_my_func(const Buffer<const uint8_t>& input) {
1232 * my_func(input.alias(), output);
1233 * }\endcode
1234 */
1236 return *this;
1237 }
1238
1239 /** Fill a Buffer with the values at the same coordinates in
1240 * another Buffer. Restricts itself to coordinates contained
1241 * within the intersection of the two buffers. If the two Buffers
1242 * are not in the same coordinate system, you will need to
1243 * translate the argument Buffer first. E.g. if you're blitting a
1244 * sprite onto a framebuffer, you'll want to translate the sprite
1245 * to the correct location first like so: \code
1246 * framebuffer.copy_from(sprite.translated({x, y})); \endcode
1247 */
1248 template<typename T2, int D2, int S2>
1250 static_assert(!std::is_const<T>::value, "Cannot call copy_from() on a Buffer<const T>");
1251 assert(!device_dirty() && "Cannot call Halide::Runtime::Buffer::copy_from on a device dirty destination.");
1252 assert(!src.device_dirty() && "Cannot call Halide::Runtime::Buffer::copy_from on a device dirty source.");
1253
1255
1256 static_assert(Dims == AnyDims || D2 == AnyDims || Dims == D2);
1257 assert(src.dimensions() == dst.dimensions());
1258
1259 // Trim the copy to the region in common
1260 const int d = dimensions();
1261 for (int i = 0; i < d; i++) {
1262 int min_coord = std::max(dst.dim(i).min(), src.dim(i).min());
1263 int max_coord = std::min(dst.dim(i).max(), src.dim(i).max());
1264 if (max_coord < min_coord) {
1265 // The buffers do not overlap.
1266 return;
1267 }
1268 dst.crop(i, min_coord, max_coord - min_coord + 1);
1269 src.crop(i, min_coord, max_coord - min_coord + 1);
1270 }
1271
1272 // If T is void, we need to do runtime dispatch to an
1273 // appropriately-typed lambda. We're copying, so we only care
1274 // about the element size. (If not, this should optimize away
1275 // into a static dispatch to the right-sized copy.)
1276 if (T_is_void ? (type().bytes() == 1) : (sizeof(not_void_T) == 1)) {
1277 using MemType = uint8_t;
1278 auto &typed_dst = (Buffer<MemType, Dims, InClassDimStorage> &)dst;
1279 auto &typed_src = (Buffer<const MemType, D2, S2> &)src;
1280 typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1281 } else if (T_is_void ? (type().bytes() == 2) : (sizeof(not_void_T) == 2)) {
1282 using MemType = uint16_t;
1283 auto &typed_dst = (Buffer<MemType, Dims, InClassDimStorage> &)dst;
1284 auto &typed_src = (Buffer<const MemType, D2, S2> &)src;
1285 typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1286 } else if (T_is_void ? (type().bytes() == 4) : (sizeof(not_void_T) == 4)) {
1287 using MemType = uint32_t;
1288 auto &typed_dst = (Buffer<MemType, Dims, InClassDimStorage> &)dst;
1289 auto &typed_src = (Buffer<const MemType, D2, S2> &)src;
1290 typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1291 } else if (T_is_void ? (type().bytes() == 8) : (sizeof(not_void_T) == 8)) {
1292 using MemType = uint64_t;
1293 auto &typed_dst = (Buffer<MemType, Dims, InClassDimStorage> &)dst;
1294 auto &typed_src = (Buffer<const MemType, D2, S2> &)src;
1295 typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1296 } else {
1297 assert(false && "type().bytes() must be 1, 2, 4, or 8");
1298 }
1299 set_host_dirty();
1300 }
1301
1302 /** Make an image that refers to a sub-range of this image along
1303 * the given dimension. Asserts that the crop region is within
1304 * the existing bounds: you cannot "crop outwards", even if you know there
1305 * is valid Buffer storage (e.g. because you already cropped inwards). */
1306 Buffer<T, Dims, InClassDimStorage> cropped(int d, int min, int extent) const {
1307 // Make a fresh copy of the underlying buffer (but not a fresh
1308 // copy of the allocation, if there is one).
1310
1311 // This guarantees the prexisting device ref is dropped if the
1312 // device_crop call fails and maintains the buffer in a consistent
1313 // state.
1314 im.device_deallocate();
1315
1316 im.crop_host(d, min, extent);
1317 if (buf.device_interface != nullptr) {
1318 complete_device_crop(im);
1319 }
1320 return im;
1321 }
1322
1323 /** Crop an image in-place along the given dimension. This does
1324 * not move any data around in memory - it just changes the min
1325 * and extent of the given dimension. */
1326 void crop(int d, int min, int extent) {
1327 // An optimization for non-device buffers. For the device case,
1328 // a temp buffer is required, so reuse the not-in-place version.
1329 // TODO(zalman|abadams): Are nop crops common enough to special
1330 // case the device part of the if to do nothing?
1331 if (buf.device_interface != nullptr) {
1332 *this = cropped(d, min, extent);
1333 } else {
1334 crop_host(d, min, extent);
1335 }
1336 }
1337
1338 /** Make an image that refers to a sub-rectangle of this image along
1339 * the first N dimensions. Asserts that the crop region is within
1340 * the existing bounds. The cropped image may drop any device handle
1341 * if the device_interface cannot accomplish the crop in-place. */
1342 Buffer<T, Dims, InClassDimStorage> cropped(const std::vector<std::pair<int, int>> &rect) const {
1343 // Make a fresh copy of the underlying buffer (but not a fresh
1344 // copy of the allocation, if there is one).
1346
1347 // This guarantees the prexisting device ref is dropped if the
1348 // device_crop call fails and maintains the buffer in a consistent
1349 // state.
1350 im.device_deallocate();
1351
1352 im.crop_host(rect);
1353 if (buf.device_interface != nullptr) {
1354 complete_device_crop(im);
1355 }
1356 return im;
1357 }
1358
1359 /** Crop an image in-place along the first N dimensions. This does
1360 * not move any data around in memory, nor does it free memory. It
1361 * just rewrites the min/extent of each dimension to refer to a
1362 * subregion of the same allocation. */
1363 void crop(const std::vector<std::pair<int, int>> &rect) {
1364 // An optimization for non-device buffers. For the device case,
1365 // a temp buffer is required, so reuse the not-in-place version.
1366 // TODO(zalman|abadams): Are nop crops common enough to special
1367 // case the device part of the if to do nothing?
1368 if (buf.device_interface != nullptr) {
1369 *this = cropped(rect);
1370 } else {
1371 crop_host(rect);
1372 }
1373 }
1374
1375 /** Make an image which refers to the same data with using
1376 * translated coordinates in the given dimension. Positive values
1377 * move the image data to the right or down relative to the
1378 * coordinate system. Drops any device handle. */
1381 im.translate(d, dx);
1382 return im;
1383 }
1384
1385 /** Translate an image in-place along one dimension by changing
1386 * how it is indexed. Does not move any data around in memory. */
1387 void translate(int d, int delta) {
1388 assert(d >= 0 && d < this->dimensions());
1389 device_deallocate();
1390 buf.dim[d].min += delta;
1391 }
1392
1393 /** Make an image which refers to the same data translated along
1394 * the first N dimensions. */
1395 Buffer<T, Dims, InClassDimStorage> translated(const std::vector<int> &delta) const {
1397 im.translate(delta);
1398 return im;
1399 }
1400
1401 /** Translate an image along the first N dimensions by changing
1402 * how it is indexed. Does not move any data around in memory. */
1403 void translate(const std::vector<int> &delta) {
1404 device_deallocate();
1405 assert(delta.size() <= static_cast<decltype(delta.size())>(std::numeric_limits<int>::max()));
1406 int limit = (int)delta.size();
1407 assert(limit <= dimensions());
1408 for (int i = 0; i < limit; i++) {
1409 translate(i, delta[i]);
1410 }
1411 }
1412
1413 /** Set the min coordinate of an image in the first N dimensions. */
1414 // @{
1415 void set_min(const std::vector<int> &mins) {
1416 assert(mins.size() <= static_cast<decltype(mins.size())>(dimensions()));
1417 device_deallocate();
1418 for (size_t i = 0; i < mins.size(); i++) {
1419 buf.dim[i].min = mins[i];
1420 }
1421 }
1422
1423 template<typename... Args>
1424 void set_min(Args... args) {
1425 set_min(std::vector<int>{args...});
1426 }
1427 // @}
1428
1429 /** Test if a given coordinate is within the bounds of an image. */
1430 // @{
1431 bool contains(const std::vector<int> &coords) const {
1432 assert(coords.size() <= static_cast<decltype(coords.size())>(dimensions()));
1433 for (size_t i = 0; i < coords.size(); i++) {
1434 if (coords[i] < dim((int)i).min() || coords[i] > dim((int)i).max()) {
1435 return false;
1436 }
1437 }
1438 return true;
1439 }
1440
1441 template<typename... Args>
1442 bool contains(Args... args) const {
1443 return contains(std::vector<int>{args...});
1444 }
1445 // @}
1446
1447 /** Make a buffer which refers to the same data in the same layout
1448 * using a swapped indexing order for the dimensions given. So
1449 * A = B.transposed(0, 1) means that A(i, j) == B(j, i), and more
1450 * strongly that A.address_of(i, j) == B.address_of(j, i). */
1453 im.transpose(d1, d2);
1454 return im;
1455 }
1456
1457 /** Transpose a buffer in-place by changing how it is indexed. For
1458 * example, transpose(0, 1) on a two-dimensional buffer means that
1459 * the value referred to by coordinates (i, j) is now reached at
1460 * the coordinates (j, i), and vice versa. This is done by
1461 * reordering the per-dimension metadata rather than by moving
1462 * data around in memory, so other views of the same memory will
1463 * not see the data as having been transposed. */
1464 void transpose(int d1, int d2) {
1465 assert(d1 >= 0 && d1 < this->dimensions());
1466 assert(d2 >= 0 && d2 < this->dimensions());
1467 std::swap(buf.dim[d1], buf.dim[d2]);
1468 }
1469
1470 /** A generalized transpose: instead of swapping two dimensions,
1471 * pass a vector that lists each dimension index exactly once, in
1472 * the desired order. This does not move any data around in memory
1473 * - it just permutes how it is indexed. */
1474 void transpose(const std::vector<int> &order) {
1475 assert((int)order.size() == dimensions());
1476 if (dimensions() < 2) {
1477 // My, that was easy
1478 return;
1479 }
1480
1481 std::vector<int> order_sorted = order;
1482 for (size_t i = 1; i < order_sorted.size(); i++) {
1483 for (size_t j = i; j > 0 && order_sorted[j - 1] > order_sorted[j]; j--) {
1484 std::swap(order_sorted[j], order_sorted[j - 1]);
1485 transpose(j, j - 1);
1486 }
1487 }
1488 }
1489
1490 /** Make a buffer which refers to the same data in the same
1491 * layout using a different ordering of the dimensions. */
1492 Buffer<T, Dims, InClassDimStorage> transposed(const std::vector<int> &order) const {
1494 im.transpose(order);
1495 return im;
1496 }
1497
1498 /** Make a lower-dimensional buffer that refers to one slice of
1499 * this buffer. */
1500 Buffer<T, (Dims == AnyDims ? AnyDims : Dims - 1)>
1501 sliced(int d, int pos) const {
1502 static_assert(Dims == AnyDims || Dims > 0, "Cannot slice a 0-dimensional buffer");
1503 assert(dimensions() > 0);
1504
1506
1507 // This guarantees the prexisting device ref is dropped if the
1508 // device_slice call fails and maintains the buffer in a consistent
1509 // state.
1510 im.device_deallocate();
1511
1512 im.slice_host(d, pos);
1513 if (buf.device_interface != nullptr) {
1514 complete_device_slice(im, d, pos);
1515 }
1516 return im;
1517 }
1518
1519 /** Make a lower-dimensional buffer that refers to one slice of this
1520 * buffer at the dimension's minimum. */
1521 Buffer<T, (Dims == AnyDims ? AnyDims : Dims - 1)>
1522 sliced(int d) const {
1523 static_assert(Dims == AnyDims || Dims > 0, "Cannot slice a 0-dimensional buffer");
1524 assert(dimensions() > 0);
1525
1526 return sliced(d, dim(d).min());
1527 }
1528
1529 /** Rewrite the buffer to refer to a single lower-dimensional
1530 * slice of itself along the given dimension at the given
1531 * coordinate. Does not move any data around or free the original
1532 * memory, so other views of the same data are unaffected. Can
1533 * only be called on a Buffer with dynamic dimensionality. */
1534 void slice(int d, int pos) {
1535 static_assert(Dims == AnyDims, "Cannot call slice() on a Buffer with static dimensionality.");
1536 assert(dimensions() > 0);
1537
1538 // An optimization for non-device buffers. For the device case,
1539 // a temp buffer is required, so reuse the not-in-place version.
1540 // TODO(zalman|abadams): Are nop slices common enough to special
1541 // case the device part of the if to do nothing?
1542 if (buf.device_interface != nullptr) {
1543 *this = sliced(d, pos);
1544 } else {
1545 slice_host(d, pos);
1546 }
1547 }
1548
1549 /** Slice a buffer in-place at the dimension's minimum. */
1550 inline void slice(int d) {
1551 slice(d, dim(d).min());
1552 }
1553
1554 /** Make a new buffer that views this buffer as a single slice in a
1555 * higher-dimensional space. The new dimension has extent one and
1556 * the given min. This operation is the opposite of slice. As an
1557 * example, the following condition is true:
1558 *
1559 \code
1560 im2 = im.embedded(1, 17);
1561 &im(x, y, c) == &im2(x, 17, y, c);
1562 \endcode
1563 */
1564 Buffer<T, (Dims == AnyDims ? AnyDims : Dims + 1)>
1565 embedded(int d, int pos = 0) const {
1567 im.embed(d, pos);
1568 return im;
1569 }
1570
1571 /** Embed a buffer in-place, increasing the
1572 * dimensionality. */
1573 void embed(int d, int pos = 0) {
1574 static_assert(Dims == AnyDims, "Cannot call embed() on a Buffer with static dimensionality.");
1575 assert(d >= 0 && d <= dimensions());
1576 add_dimension();
1577 translate(dimensions() - 1, pos);
1578 for (int i = dimensions() - 1; i > d; i--) {
1579 transpose(i, i - 1);
1580 }
1581 }
1582
1583 /** Add a new dimension with a min of zero and an extent of
1584 * one. The stride is the extent of the outermost dimension times
1585 * its stride. The new dimension is the last dimension. This is a
1586 * special case of embed. */
1588 static_assert(Dims == AnyDims, "Cannot call add_dimension() on a Buffer with static dimensionality.");
1589 const int dims = buf.dimensions;
1590 buf.dimensions++;
1591 if (buf.dim != shape) {
1592 // We're already on the heap. Reallocate.
1593 halide_dimension_t *new_shape = new halide_dimension_t[buf.dimensions];
1594 for (int i = 0; i < dims; i++) {
1595 new_shape[i] = buf.dim[i];
1596 }
1597 delete[] buf.dim;
1598 buf.dim = new_shape;
1599 } else if (dims == InClassDimStorage) {
1600 // Transition from the in-class storage to the heap
1601 make_shape_storage(buf.dimensions);
1602 for (int i = 0; i < dims; i++) {
1603 buf.dim[i] = shape[i];
1604 }
1605 } else {
1606 // We still fit in the class
1607 }
1608 buf.dim[dims] = {0, 1, 0};
1609 if (dims == 0) {
1610 buf.dim[dims].stride = 1;
1611 } else {
1612 buf.dim[dims].stride = buf.dim[dims - 1].extent * buf.dim[dims - 1].stride;
1613 }
1614 }
1615
1616 /** Add a new dimension with a min of zero, an extent of one, and
1617 * the specified stride. The new dimension is the last
1618 * dimension. This is a special case of embed. */
1620 add_dimension();
1621 buf.dim[buf.dimensions - 1].stride = s;
1622 }
1623
1624 /** Methods for managing any GPU allocation. */
1625 // @{
1626 // Set the host dirty flag. Called by every operator()
1627 // access. Must be inlined so it can be hoisted out of loops.
1629 void set_host_dirty(bool v = true) {
1630 assert((!v || !device_dirty()) && "Cannot set host dirty when device is already dirty. Call copy_to_host() before accessing the buffer from host.");
1631 buf.set_host_dirty(v);
1632 }
1633
1634 // Check if the device allocation is dirty. Called by
1635 // set_host_dirty, which is called by every accessor. Must be
1636 // inlined so it can be hoisted out of loops.
1638 bool device_dirty() const {
1639 return buf.device_dirty();
1640 }
1641
1642 bool host_dirty() const {
1643 return buf.host_dirty();
1644 }
1645
1646 void set_device_dirty(bool v = true) {
1647 assert((!v || !host_dirty()) && "Cannot set device dirty when host is already dirty.");
1648 buf.set_device_dirty(v);
1649 }
1650
1651 int copy_to_host(void *ctx = nullptr) {
1652 if (device_dirty()) {
1653 return buf.device_interface->copy_to_host(ctx, &buf);
1654 }
1655 return 0;
1656 }
1657
1658 int copy_to_device(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1659 if (host_dirty()) {
1660 return device_interface->copy_to_device(ctx, &buf, device_interface);
1661 }
1662 return 0;
1663 }
1664
1665 int device_malloc(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1666 return device_interface->device_malloc(ctx, &buf, device_interface);
1667 }
1668
1669 int device_free(void *ctx = nullptr) {
1670 if (dev_ref_count) {
1671 assert(dev_ref_count->ownership == BufferDeviceOwnership::Allocated &&
1672 "Can't call device_free on an unmanaged or wrapped native device handle. "
1673 "Free the source allocation or call device_detach_native instead.");
1674 // Multiple people may be holding onto this dev field
1675 assert(dev_ref_count->count == 1 &&
1676 "Multiple Halide::Runtime::Buffer objects share this device "
1677 "allocation. Freeing it would create dangling references. "
1678 "Don't call device_free on Halide buffers that you have copied or "
1679 "passed by value.");
1680 }
1681 int ret = 0;
1682 if (buf.device_interface) {
1683 ret = buf.device_interface->device_free(ctx, &buf);
1684 }
1685 if (dev_ref_count) {
1686 delete dev_ref_count;
1687 dev_ref_count = nullptr;
1688 }
1689 return ret;
1690 }
1691
1692 int device_wrap_native(const struct halide_device_interface_t *device_interface,
1693 uint64_t handle, void *ctx = nullptr) {
1694 assert(device_interface);
1695 dev_ref_count = new DeviceRefCount;
1697 return device_interface->wrap_native(ctx, &buf, handle, device_interface);
1698 }
1699
1700 int device_detach_native(void *ctx = nullptr) {
1701 assert(dev_ref_count &&
1703 "Only call device_detach_native on buffers wrapping a native "
1704 "device handle via device_wrap_native. This buffer was allocated "
1705 "using device_malloc, or is unmanaged. "
1706 "Call device_free or free the original allocation instead.");
1707 // Multiple people may be holding onto this dev field
1708 assert(dev_ref_count->count == 1 &&
1709 "Multiple Halide::Runtime::Buffer objects share this device "
1710 "allocation. Freeing it could create dangling references. "
1711 "Don't call device_detach_native on Halide buffers that you "
1712 "have copied or passed by value.");
1713 int ret = 0;
1714 if (buf.device_interface) {
1715 ret = buf.device_interface->detach_native(ctx, &buf);
1716 }
1717 delete dev_ref_count;
1718 dev_ref_count = nullptr;
1719 return ret;
1720 }
1721
1722 int device_and_host_malloc(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1723 return device_interface->device_and_host_malloc(ctx, &buf, device_interface);
1724 }
1725
1726 int device_and_host_free(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1727 if (dev_ref_count) {
1729 "Can't call device_and_host_free on a device handle not allocated with device_and_host_malloc. "
1730 "Free the source allocation or call device_detach_native instead.");
1731 // Multiple people may be holding onto this dev field
1732 assert(dev_ref_count->count == 1 &&
1733 "Multiple Halide::Runtime::Buffer objects share this device "
1734 "allocation. Freeing it would create dangling references. "
1735 "Don't call device_and_host_free on Halide buffers that you have copied or "
1736 "passed by value.");
1737 }
1738 int ret = 0;
1739 if (buf.device_interface) {
1740 ret = buf.device_interface->device_and_host_free(ctx, &buf);
1741 }
1742 if (dev_ref_count) {
1743 delete dev_ref_count;
1744 dev_ref_count = nullptr;
1745 }
1746 return ret;
1747 }
1748
1749 int device_sync(void *ctx = nullptr) {
1750 return buf.device_sync(ctx);
1751 }
1752
1754 return buf.device != 0;
1755 }
1756
1757 /** Return the method by which the device field is managed. */
1759 if (dev_ref_count == nullptr) {
1761 }
1762 return dev_ref_count->ownership;
1763 }
1764 // @}
1765
1766 /** If you use the (x, y, c) indexing convention, then Halide
1767 * Buffers are stored planar by default. This function constructs
1768 * an interleaved RGB or RGBA image that can still be indexed
1769 * using (x, y, c). Passing it to a generator requires that the
1770 * generator has been compiled with support for interleaved (also
1771 * known as packed or chunky) memory layouts. */
1772 static Buffer<void, Dims, InClassDimStorage> make_interleaved(halide_type_t t, int width, int height, int channels) {
1773 static_assert(Dims == AnyDims || Dims == 3, "make_interleaved() must be called on a Buffer that can represent 3 dimensions.");
1774 Buffer<void, Dims, InClassDimStorage> im(t, channels, width, height);
1775 // Note that this is equivalent to calling transpose({2, 0, 1}),
1776 // but slightly more efficient.
1777 im.transpose(0, 1);
1778 im.transpose(1, 2);
1779 return im;
1780 }
1781
1782 /** If you use the (x, y, c) indexing convention, then Halide
1783 * Buffers are stored planar by default. This function constructs
1784 * an interleaved RGB or RGBA image that can still be indexed
1785 * using (x, y, c). Passing it to a generator requires that the
1786 * generator has been compiled with support for interleaved (also
1787 * known as packed or chunky) memory layouts. */
1788 static Buffer<T, Dims, InClassDimStorage> make_interleaved(int width, int height, int channels) {
1789 return make_interleaved(static_halide_type(), width, height, channels);
1790 }
1791
1792 /** Wrap an existing interleaved image. */
1793 static Buffer<add_const_if_T_is_const<void>, Dims, InClassDimStorage>
1794 make_interleaved(halide_type_t t, T *data, int width, int height, int channels) {
1795 static_assert(Dims == AnyDims || Dims == 3, "make_interleaved() must be called on a Buffer that can represent 3 dimensions.");
1796 Buffer<add_const_if_T_is_const<void>, Dims, InClassDimStorage> im(t, data, channels, width, height);
1797 im.transpose(0, 1);
1798 im.transpose(1, 2);
1799 return im;
1800 }
1801
1802 /** Wrap an existing interleaved image. */
1803 static Buffer<T, Dims, InClassDimStorage> make_interleaved(T *data, int width, int height, int channels) {
1804 return make_interleaved(static_halide_type(), data, width, height, channels);
1805 }
1806
1807 /** Make a zero-dimensional Buffer */
1809 static_assert(Dims == AnyDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1810 Buffer<add_const_if_T_is_const<void>, AnyDims, InClassDimStorage> buf(t, 1);
1811 buf.slice(0, 0);
1812 return buf;
1813 }
1814
1815 /** Make a zero-dimensional Buffer */
1817 static_assert(Dims == AnyDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1819 buf.slice(0, 0);
1820 return buf;
1821 }
1822
1823 /** Make a zero-dimensional Buffer that points to non-owned, existing data */
1825 static_assert(Dims == AnyDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1827 buf.slice(0, 0);
1828 return buf;
1829 }
1830
1831 /** Make a buffer with the same shape and memory nesting order as
1832 * another buffer. It may have a different type. */
1833 template<typename T2, int D2, int S2>
1835 void *(*allocate_fn)(size_t) = nullptr,
1836 void (*deallocate_fn)(void *) = nullptr) {
1837 static_assert(Dims == D2 || Dims == AnyDims);
1838 const halide_type_t dst_type = T_is_void ? src.type() : halide_type_of<typename std::remove_cv<not_void_T>::type>();
1839 return Buffer<>::make_with_shape_of_helper(dst_type, src.dimensions(), src.buf.dim,
1840 allocate_fn, deallocate_fn);
1841 }
1842
1843private:
1844 static Buffer<> make_with_shape_of_helper(halide_type_t dst_type,
1845 int dimensions,
1846 halide_dimension_t *shape,
1847 void *(*allocate_fn)(size_t),
1848 void (*deallocate_fn)(void *)) {
1849 // Reorder the dimensions of src to have strides in increasing order
1850 std::vector<int> swaps;
1851 for (int i = dimensions - 1; i > 0; i--) {
1852 for (int j = i; j > 0; j--) {
1853 if (shape[j - 1].stride > shape[j].stride) {
1854 std::swap(shape[j - 1], shape[j]);
1855 swaps.push_back(j);
1856 }
1857 }
1858 }
1859
1860 // Rewrite the strides to be dense (this messes up src, which
1861 // is why we took it by value).
1862 for (int i = 0; i < dimensions; i++) {
1863 if (i == 0) {
1864 shape[i].stride = 1;
1865 } else {
1866 shape[i].stride = shape[i - 1].extent * shape[i - 1].stride;
1867 }
1868 }
1869
1870 // Undo the dimension reordering
1871 while (!swaps.empty()) {
1872 int j = swaps.back();
1873 std::swap(shape[j - 1], shape[j]);
1874 swaps.pop_back();
1875 }
1876
1877 // Use an explicit runtime type, and make dst a Buffer<void>, to allow
1878 // using this method with Buffer<void> for either src or dst.
1879 Buffer<> dst(dst_type, nullptr, dimensions, shape);
1880 dst.allocate(allocate_fn, deallocate_fn);
1881
1882 return dst;
1883 }
1884
1885 template<typename... Args>
1887 ptrdiff_t
1888 offset_of(int d, int first, Args... rest) const {
1889#if HALIDE_RUNTIME_BUFFER_CHECK_INDICES
1890 assert(first >= this->buf.dim[d].min);
1891 assert(first < this->buf.dim[d].min + this->buf.dim[d].extent);
1892#endif
1893 return offset_of(d + 1, rest...) + (ptrdiff_t)this->buf.dim[d].stride * (first - this->buf.dim[d].min);
1894 }
1895
1897 ptrdiff_t offset_of(int d) const {
1898 return 0;
1899 }
1900
1901 template<typename... Args>
1903 storage_T *
1904 address_of(Args... args) const {
1905 if (T_is_void) {
1906 return (storage_T *)(this->buf.host) + offset_of(0, args...) * type().bytes();
1907 } else {
1908 return (storage_T *)(this->buf.host) + offset_of(0, args...);
1909 }
1910 }
1911
1913 ptrdiff_t offset_of(const int *pos) const {
1914 ptrdiff_t offset = 0;
1915 for (int i = this->dimensions() - 1; i >= 0; i--) {
1916#if HALIDE_RUNTIME_BUFFER_CHECK_INDICES
1917 assert(pos[i] >= this->buf.dim[i].min);
1918 assert(pos[i] < this->buf.dim[i].min + this->buf.dim[i].extent);
1919#endif
1920 offset += (ptrdiff_t)this->buf.dim[i].stride * (pos[i] - this->buf.dim[i].min);
1921 }
1922 return offset;
1923 }
1924
1926 storage_T *address_of(const int *pos) const {
1927 if (T_is_void) {
1928 return (storage_T *)this->buf.host + offset_of(pos) * type().bytes();
1929 } else {
1930 return (storage_T *)this->buf.host + offset_of(pos);
1931 }
1932 }
1933
1934public:
1935 /** Get a pointer to the address of the min coordinate. */
1936 T *data() const {
1937 return (T *)(this->buf.host);
1938 }
1939
1940 /** Access elements. Use im(...) to get a reference to an element,
1941 * and use &im(...) to get the address of an element. If you pass
1942 * fewer arguments than the buffer has dimensions, the rest are
1943 * treated as their min coordinate. The non-const versions set the
1944 * host_dirty flag to true.
1945 */
1946 //@{
1947 template<typename... Args,
1948 typename = typename std::enable_if<AllInts<Args...>::value>::type>
1949 HALIDE_ALWAYS_INLINE const not_void_T &operator()(int first, Args... rest) const {
1950 static_assert(!T_is_void,
1951 "Cannot use operator() on Buffer<void> types");
1952 constexpr int expected_dims = 1 + (int)(sizeof...(rest));
1953 static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
1954 assert(!device_dirty());
1955 return *((const not_void_T *)(address_of(first, rest...)));
1956 }
1957
1959 const not_void_T &
1960 operator()() const {
1961 static_assert(!T_is_void,
1962 "Cannot use operator() on Buffer<void> types");
1963 constexpr int expected_dims = 0;
1964 static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
1965 assert(!device_dirty());
1966 return *((const not_void_T *)(data()));
1967 }
1968
1970 const not_void_T &
1971 operator()(const int *pos) const {
1972 static_assert(!T_is_void,
1973 "Cannot use operator() on Buffer<void> types");
1974 assert(!device_dirty());
1975 return *((const not_void_T *)(address_of(pos)));
1976 }
1977
1978 template<typename... Args,
1979 typename = typename std::enable_if<AllInts<Args...>::value>::type>
1981 not_void_T &
1982 operator()(int first, Args... rest) {
1983 static_assert(!T_is_void,
1984 "Cannot use operator() on Buffer<void> types");
1985 constexpr int expected_dims = 1 + (int)(sizeof...(rest));
1986 static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
1987 set_host_dirty();
1988 return *((not_void_T *)(address_of(first, rest...)));
1989 }
1990
1992 not_void_T &
1994 static_assert(!T_is_void,
1995 "Cannot use operator() on Buffer<void> types");
1996 constexpr int expected_dims = 0;
1997 static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
1998 set_host_dirty();
1999 return *((not_void_T *)(data()));
2000 }
2001
2003 not_void_T &
2004 operator()(const int *pos) {
2005 static_assert(!T_is_void,
2006 "Cannot use operator() on Buffer<void> types");
2007 set_host_dirty();
2008 return *((not_void_T *)(address_of(pos)));
2009 }
2010 // @}
2011
2012 /** Tests that all values in this buffer are equal to val. */
2013 bool all_equal(not_void_T val) const {
2014 bool all_equal = true;
2015 for_each_element([&](const int *pos) { all_equal &= (*this)(pos) == val; });
2016 return all_equal;
2017 }
2018
2020 set_host_dirty();
2021 for_each_value([=](T &v) { v = val; });
2022 return *this;
2023 }
2024
2025private:
2026 /** Helper functions for for_each_value. */
2027 // @{
2028 template<int N>
2029 struct for_each_value_task_dim {
2030 std::ptrdiff_t extent;
2031 std::ptrdiff_t stride[N];
2032 };
2033
2034 // Given an array of strides, and a bunch of pointers to pointers
2035 // (all of different types), advance the pointers using the
2036 // strides.
2037 template<typename Ptr, typename... Ptrs>
2038 HALIDE_ALWAYS_INLINE static void advance_ptrs(const std::ptrdiff_t *stride, Ptr &ptr, Ptrs &...ptrs) {
2039 ptr += *stride;
2040 advance_ptrs(stride + 1, ptrs...);
2041 }
2042
2044 static void advance_ptrs(const std::ptrdiff_t *) {
2045 }
2046
2047 template<typename Fn, typename Ptr, typename... Ptrs>
2048 HALIDE_NEVER_INLINE static void for_each_value_helper(Fn &&f, int d, bool innermost_strides_are_one,
2049 const for_each_value_task_dim<sizeof...(Ptrs) + 1> *t, Ptr ptr, Ptrs... ptrs) {
2050 if (d == 0) {
2051 if (innermost_strides_are_one) {
2052 Ptr end = ptr + t[0].extent;
2053 while (ptr != end) {
2054 f(*ptr++, (*ptrs++)...);
2055 }
2056 } else {
2057 for (std::ptrdiff_t i = t[0].extent; i != 0; i--) {
2058 f(*ptr, (*ptrs)...);
2059 advance_ptrs(t[0].stride, ptr, ptrs...);
2060 }
2061 }
2062 } else {
2063 for (std::ptrdiff_t i = t[d].extent; i != 0; i--) {
2064 for_each_value_helper(f, d - 1, innermost_strides_are_one, t, ptr, ptrs...);
2065 advance_ptrs(t[d].stride, ptr, ptrs...);
2066 }
2067 }
2068 }
2069
2070 template<int N>
2071 HALIDE_NEVER_INLINE static bool for_each_value_prep(for_each_value_task_dim<N> *t,
2072 const halide_buffer_t **buffers) {
2073 // Check the buffers all have clean host allocations
2074 for (int i = 0; i < N; i++) {
2075 if (buffers[i]->device) {
2076 assert(buffers[i]->host &&
2077 "Buffer passed to for_each_value has device allocation but no host allocation. Call allocate() and copy_to_host() first");
2078 assert(!buffers[i]->device_dirty() &&
2079 "Buffer passed to for_each_value is dirty on device. Call copy_to_host() first");
2080 } else {
2081 assert(buffers[i]->host &&
2082 "Buffer passed to for_each_value has no host or device allocation");
2083 }
2084 }
2085
2086 const int dimensions = buffers[0]->dimensions;
2087
2088 // Extract the strides in all the dimensions
2089 for (int i = 0; i < dimensions; i++) {
2090 for (int j = 0; j < N; j++) {
2091 assert(buffers[j]->dimensions == dimensions);
2092 assert(buffers[j]->dim[i].extent == buffers[0]->dim[i].extent &&
2093 buffers[j]->dim[i].min == buffers[0]->dim[i].min);
2094 const int s = buffers[j]->dim[i].stride;
2095 t[i].stride[j] = s;
2096 }
2097 t[i].extent = buffers[0]->dim[i].extent;
2098
2099 // Order the dimensions by stride, so that the traversal is cache-coherent.
2100 // Use the last dimension for this, because this is the source in copies.
2101 // It appears to be better to optimize read order than write order.
2102 for (int j = i; j > 0 && t[j].stride[N - 1] < t[j - 1].stride[N - 1]; j--) {
2103 std::swap(t[j], t[j - 1]);
2104 }
2105 }
2106
2107 // flatten dimensions where possible to make a larger inner
2108 // loop for autovectorization.
2109 int d = dimensions;
2110 for (int i = 1; i < d; i++) {
2111 bool flat = true;
2112 for (int j = 0; j < N; j++) {
2113 flat = flat && t[i - 1].stride[j] * t[i - 1].extent == t[i].stride[j];
2114 }
2115 if (flat) {
2116 t[i - 1].extent *= t[i].extent;
2117 for (int j = i; j < d; j++) {
2118 t[j] = t[j + 1];
2119 }
2120 i--;
2121 d--;
2122 t[d].extent = 1;
2123 }
2124 }
2125
2126 bool innermost_strides_are_one = true;
2127 if (dimensions > 0) {
2128 for (int i = 0; i < N; i++) {
2129 innermost_strides_are_one &= (t[0].stride[i] == 1);
2130 }
2131 }
2132
2133 return innermost_strides_are_one;
2134 }
2135
2136 template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2137 void for_each_value_impl(Fn &&f, Args &&...other_buffers) const {
2138 if (dimensions() > 0) {
2139 Buffer<>::for_each_value_task_dim<N> *t =
2140 (Buffer<>::for_each_value_task_dim<N> *)HALIDE_ALLOCA((dimensions() + 1) * sizeof(for_each_value_task_dim<N>));
2141 // Move the preparatory code into a non-templated helper to
2142 // save code size.
2143 const halide_buffer_t *buffers[] = {&buf, (&other_buffers.buf)...};
2144 bool innermost_strides_are_one = Buffer<>::for_each_value_prep(t, buffers);
2145
2146 Buffer<>::for_each_value_helper(f, dimensions() - 1,
2147 innermost_strides_are_one,
2148 t,
2149 data(), (other_buffers.data())...);
2150 } else {
2151 f(*data(), (*other_buffers.data())...);
2152 }
2153 }
2154 // @}
2155
2156public:
2157 /** Call a function on every value in the buffer, and the
2158 * corresponding values in some number of other buffers of the
2159 * same size. The function should take a reference, const
2160 * reference, or value of the correct type for each buffer. This
2161 * effectively lifts a function of scalars to an element-wise
2162 * function of buffers. This produces code that the compiler can
2163 * autovectorize. This is slightly cheaper than for_each_element,
2164 * because it does not need to track the coordinates.
2165 *
2166 * Note that constness of Buffers is preserved: a const Buffer<T> (for either
2167 * 'this' or the other-buffers arguments) will allow mutation of the
2168 * buffer contents, while a Buffer<const T> will not. Attempting to specify
2169 * a mutable reference for the lambda argument of a Buffer<const T>
2170 * will result in a compilation error. */
2171 // @{
2172 template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2173 HALIDE_ALWAYS_INLINE const Buffer<T, Dims, InClassDimStorage> &for_each_value(Fn &&f, Args &&...other_buffers) const {
2174 for_each_value_impl(f, std::forward<Args>(other_buffers)...);
2175 return *this;
2176 }
2177
2178 template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2181 for_each_value(Fn &&f, Args &&...other_buffers) {
2182 for_each_value_impl(f, std::forward<Args>(other_buffers)...);
2183 return *this;
2184 }
2185 // @}
2186
2187private:
2188 // Helper functions for for_each_element
2189 struct for_each_element_task_dim {
2190 int min, max;
2191 };
2192
2193 /** If f is callable with this many args, call it. The first
2194 * argument is just to make the overloads distinct. Actual
2195 * overload selection is done using the enable_if. */
2196 template<typename Fn,
2197 typename... Args,
2198 typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2199 HALIDE_ALWAYS_INLINE static void for_each_element_variadic(int, int, const for_each_element_task_dim *, Fn &&f, Args... args) {
2200 f(args...);
2201 }
2202
2203 /** If the above overload is impossible, we add an outer loop over
2204 * an additional argument and try again. */
2205 template<typename Fn,
2206 typename... Args>
2207 HALIDE_ALWAYS_INLINE static void for_each_element_variadic(double, int d, const for_each_element_task_dim *t, Fn &&f, Args... args) {
2208 for (int i = t[d].min; i <= t[d].max; i++) {
2209 for_each_element_variadic(0, d - 1, t, std::forward<Fn>(f), i, args...);
2210 }
2211 }
2212
2213 /** Determine the minimum number of arguments a callable can take
2214 * using the same trick. */
2215 template<typename Fn,
2216 typename... Args,
2217 typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2218 HALIDE_ALWAYS_INLINE static int num_args(int, Fn &&, Args...) {
2219 return (int)(sizeof...(Args));
2220 }
2221
2222 /** The recursive version is only enabled up to a recursion limit
2223 * of 256. This catches callables that aren't callable with any
2224 * number of ints. */
2225 template<typename Fn,
2226 typename... Args>
2227 HALIDE_ALWAYS_INLINE static int num_args(double, Fn &&f, Args... args) {
2228 static_assert(sizeof...(args) <= 256,
2229 "Callable passed to for_each_element must accept either a const int *,"
2230 " or up to 256 ints. No such operator found. Expect infinite template recursion.");
2231 return num_args(0, std::forward<Fn>(f), 0, args...);
2232 }
2233
2234 /** A version where the callable takes a position array instead,
2235 * with compile-time recursion on the dimensionality. This
2236 * overload is preferred to the one below using the same int vs
2237 * double trick as above, but is impossible once d hits -1 using
2238 * std::enable_if. */
2239 template<int d,
2240 typename Fn,
2241 typename = typename std::enable_if<(d >= 0)>::type>
2242 HALIDE_ALWAYS_INLINE static void for_each_element_array_helper(int, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2243 for (pos[d] = t[d].min; pos[d] <= t[d].max; pos[d]++) {
2244 for_each_element_array_helper<d - 1>(0, t, std::forward<Fn>(f), pos);
2245 }
2246 }
2247
2248 /** Base case for recursion above. */
2249 template<int d,
2250 typename Fn,
2251 typename = typename std::enable_if<(d < 0)>::type>
2252 HALIDE_ALWAYS_INLINE static void for_each_element_array_helper(double, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2253 f(pos);
2254 }
2255
2256 /** A run-time-recursive version (instead of
2257 * compile-time-recursive) that requires the callable to take a
2258 * pointer to a position array instead. Dispatches to the
2259 * compile-time-recursive version once the dimensionality gets
2260 * small. */
2261 template<typename Fn>
2262 static void for_each_element_array(int d, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2263 if (d == -1) {
2264 f(pos);
2265 } else if (d == 0) {
2266 // Once the dimensionality gets small enough, dispatch to
2267 // a compile-time-recursive version for better codegen of
2268 // the inner loops.
2269 for_each_element_array_helper<0, Fn>(0, t, std::forward<Fn>(f), pos);
2270 } else if (d == 1) {
2271 for_each_element_array_helper<1, Fn>(0, t, std::forward<Fn>(f), pos);
2272 } else if (d == 2) {
2273 for_each_element_array_helper<2, Fn>(0, t, std::forward<Fn>(f), pos);
2274 } else if (d == 3) {
2275 for_each_element_array_helper<3, Fn>(0, t, std::forward<Fn>(f), pos);
2276 } else {
2277 for (pos[d] = t[d].min; pos[d] <= t[d].max; pos[d]++) {
2278 for_each_element_array(d - 1, t, std::forward<Fn>(f), pos);
2279 }
2280 }
2281 }
2282
2283 /** We now have two overloads for for_each_element. This one
2284 * triggers if the callable takes a const int *.
2285 */
2286 template<typename Fn,
2287 typename = decltype(std::declval<Fn>()((const int *)nullptr))>
2288 static void for_each_element(int, int dims, const for_each_element_task_dim *t, Fn &&f, int check = 0) {
2289 int *pos = (int *)HALIDE_ALLOCA(dims * sizeof(int));
2290 for_each_element_array(dims - 1, t, std::forward<Fn>(f), pos);
2291 }
2292
2293 /** This one triggers otherwise. It treats the callable as
2294 * something that takes some number of ints. */
2295 template<typename Fn>
2296 HALIDE_ALWAYS_INLINE static void for_each_element(double, int dims, const for_each_element_task_dim *t, Fn &&f) {
2297 int args = num_args(0, std::forward<Fn>(f));
2298 assert(dims >= args);
2299 for_each_element_variadic(0, args - 1, t, std::forward<Fn>(f));
2300 }
2301
2302 template<typename Fn>
2303 void for_each_element_impl(Fn &&f) const {
2304 for_each_element_task_dim *t =
2305 (for_each_element_task_dim *)HALIDE_ALLOCA(dimensions() * sizeof(for_each_element_task_dim));
2306 for (int i = 0; i < dimensions(); i++) {
2307 t[i].min = dim(i).min();
2308 t[i].max = dim(i).max();
2309 }
2310 for_each_element(0, dimensions(), t, std::forward<Fn>(f));
2311 }
2312
2313public:
2314 /** Call a function at each site in a buffer. This is likely to be
2315 * much slower than using Halide code to populate a buffer, but is
2316 * convenient for tests. If the function has more arguments than the
2317 * buffer has dimensions, the remaining arguments will be zero. If it
2318 * has fewer arguments than the buffer has dimensions then the last
2319 * few dimensions of the buffer are not iterated over. For example,
2320 * the following code exploits this to set a floating point RGB image
2321 * to red:
2322
2323 \code
2324 Buffer<float, 3> im(100, 100, 3);
2325 im.for_each_element([&](int x, int y) {
2326 im(x, y, 0) = 1.0f;
2327 im(x, y, 1) = 0.0f;
2328 im(x, y, 2) = 0.0f:
2329 });
2330 \endcode
2331
2332 * The compiled code is equivalent to writing the a nested for loop,
2333 * and compilers are capable of optimizing it in the same way.
2334 *
2335 * If the callable can be called with an int * as the sole argument,
2336 * that version is called instead. Each location in the buffer is
2337 * passed to it in a coordinate array. This version is higher-overhead
2338 * than the variadic version, but is useful for writing generic code
2339 * that accepts buffers of arbitrary dimensionality. For example, the
2340 * following sets the value at all sites in an arbitrary-dimensional
2341 * buffer to their first coordinate:
2342
2343 \code
2344 im.for_each_element([&](const int *pos) {im(pos) = pos[0];});
2345 \endcode
2346
2347 * It is also possible to use for_each_element to iterate over entire
2348 * rows or columns by cropping the buffer to a single column or row
2349 * respectively and iterating over elements of the result. For example,
2350 * to set the diagonal of the image to 1 by iterating over the columns:
2351
2352 \code
2353 Buffer<float, 3> im(100, 100, 3);
2354 im.sliced(1, 0).for_each_element([&](int x, int c) {
2355 im(x, x, c) = 1.0f;
2356 });
2357 \endcode
2358
2359 * Or, assuming the memory layout is known to be dense per row, one can
2360 * memset each row of an image like so:
2361
2362 \code
2363 Buffer<float, 3> im(100, 100, 3);
2364 im.sliced(0, 0).for_each_element([&](int y, int c) {
2365 memset(&im(0, y, c), 0, sizeof(float) * im.width());
2366 });
2367 \endcode
2368
2369 */
2370 // @{
2371 template<typename Fn>
2373 for_each_element_impl(f);
2374 return *this;
2375 }
2376
2377 template<typename Fn>
2381 for_each_element_impl(f);
2382 return *this;
2383 }
2384 // @}
2385
2386private:
2387 template<typename Fn>
2388 struct FillHelper {
2389 Fn f;
2391
2392 template<typename... Args,
2393 typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2394 void operator()(Args... args) {
2395 (*buf)(args...) = f(args...);
2396 }
2397
2398 FillHelper(Fn &&f, Buffer<T, Dims, InClassDimStorage> *buf)
2399 : f(std::forward<Fn>(f)), buf(buf) {
2400 }
2401 };
2402
2403public:
2404 /** Fill a buffer by evaluating a callable at every site. The
2405 * callable should look much like a callable passed to
2406 * for_each_element, but it should return the value that should be
2407 * stored to the coordinate corresponding to the arguments. */
2408 template<typename Fn,
2409 typename = typename std::enable_if<!std::is_arithmetic<typename std::decay<Fn>::type>::value>::type>
2411 // We'll go via for_each_element. We need a variadic wrapper lambda.
2412 FillHelper<Fn> wrapper(std::forward<Fn>(f), this);
2413 return for_each_element(wrapper);
2414 }
2415
2416 /** Check if an input buffer passed extern stage is a querying
2417 * bounds. Compared to doing the host pointer check directly,
2418 * this both adds clarity to code and will facilitate moving to
2419 * another representation for bounds query arguments. */
2420 bool is_bounds_query() const {
2421 return buf.is_bounds_query();
2422 }
2423
2424 /** Convenient check to verify that all of the interesting bytes in the Buffer
2425 * are initialized under MSAN. Note that by default, we use for_each_value() here so that
2426 * we skip any unused padding that isn't part of the Buffer; this isn't efficient,
2427 * but in MSAN mode, it doesn't matter. (Pass true for the flag to force check
2428 * the entire Buffer storage.) */
2429 void msan_check_mem_is_initialized(bool entire = false) const {
2430#if defined(__has_feature)
2431#if __has_feature(memory_sanitizer)
2432 if (entire) {
2433 __msan_check_mem_is_initialized(data(), size_in_bytes());
2434 } else {
2435 for_each_value([](T &v) { __msan_check_mem_is_initialized(&v, sizeof(T)); ; });
2436 }
2437#endif
2438#endif
2439 }
2440};
2441
2442} // namespace Runtime
2443} // namespace Halide
2444
2445#undef HALIDE_ALLOCA
2446
2447#endif // HALIDE_RUNTIME_IMAGE_H
#define HALIDE_ALLOCA
Definition: HalideBuffer.h:30
This file declares the routines used by Halide internally in its runtime.
#define HALIDE_NEVER_INLINE
Definition: HalideRuntime.h:39
#define HALIDE_ALWAYS_INLINE
Definition: HalideRuntime.h:38
struct halide_buffer_t halide_buffer_t
The raw representation of an image passed around by generated Halide code.
Read-only access to the shape.
Definition: HalideBuffer.h:486
HALIDE_ALWAYS_INLINE int min() const
The lowest coordinate in this dimension.
Definition: HalideBuffer.h:491
Dimension(const halide_dimension_t &dim)
Definition: HalideBuffer.h:537
HALIDE_ALWAYS_INLINE int max() const
The highest coordinate in this dimension.
Definition: HalideBuffer.h:507
HALIDE_ALWAYS_INLINE iterator end() const
An iterator that points to one past the max coordinate.
Definition: HalideBuffer.h:533
HALIDE_ALWAYS_INLINE int stride() const
The number of elements in memory you have to step over to increment this coordinate by one.
Definition: HalideBuffer.h:497
HALIDE_ALWAYS_INLINE iterator begin() const
An iterator that points to the min coordinate.
Definition: HalideBuffer.h:528
HALIDE_ALWAYS_INLINE int extent() const
The extent of the image along this dimension.
Definition: HalideBuffer.h:502
A templated Buffer class that wraps halide_buffer_t and adds functionality.
Definition: HalideBuffer.h:142
Buffer< T, Dims, InClassDimStorage > & operator=(const Buffer< T2, D2, S2 > &other)
Assign from another Buffer of possibly-different dimensionality and type.
Definition: HalideBuffer.h:725
Buffer< not_const_T, Dims, InClassDimStorage > copy_to_planar(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Like copy(), but the copy is created in planar memory layout (vs.
Buffer< T, Dims, InClassDimStorage > transposed(const std::vector< int > &order) const
Make a buffer which refers to the same data in the same layout using a different ordering of the dime...
void translate(int d, int delta)
Translate an image in-place along one dimension by changing how it is indexed.
Buffer(const halide_buffer_t &buf, BufferDeviceOwnership ownership=BufferDeviceOwnership::Unmanaged)
Make a Buffer from a halide_buffer_t.
Definition: HalideBuffer.h:616
void allocate(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr)
Allocate memory for this Buffer.
Definition: HalideBuffer.h:804
Buffer< not_const_T, Dims, InClassDimStorage > copy(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Make a new image which is a deep copy of this image.
Buffer< T,(Dims==AnyDims ? AnyDims :Dims+1)> embedded(int d, int pos=0) const
Make a new buffer that views this buffer as a single slice in a higher-dimensional space.
void add_dimension()
Add a new dimension with a min of zero and an extent of one.
void slice(int d)
Slice a buffer in-place at the dimension's minimum.
bool owns_host_memory() const
Does this Buffer own the host memory it refers to?
Definition: HalideBuffer.h:190
int width() const
Conventional names for the first three dimensions.
void transpose(const std::vector< int > &order)
A generalized transpose: instead of swapping two dimensions, pass a vector that lists each dimension ...
void set_min(const std::vector< int > &mins)
Set the min coordinate of an image in the first N dimensions.
HALIDE_ALWAYS_INLINE Buffer< T, Dims, InClassDimStorage > & for_each_element(Fn &&f)
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, const std::vector< int > &sizes)
Initialize an Buffer of runtime type from a pointer and a vector of sizes.
HALIDE_ALWAYS_INLINE Buffer< T2, D2, InClassDimStorage > as() &&
Return an rval reference to this Buffer.
int copy_to_host(void *ctx=nullptr)
Buffer(halide_type_t t, const std::vector< int > &sizes)
Allocate a new image of unknown type using a vector of ints as the size.
Definition: HalideBuffer.h:901
int device_malloc(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
int device_free(void *ctx=nullptr)
int extent(int i) const
Definition: HalideBuffer.h:553
bool contains(Args... args) const
void crop(const std::vector< std::pair< int, int > > &rect)
Crop an image in-place along the first N dimensions.
HALIDE_ALWAYS_INLINE const Buffer< typename std::add_const< T >::type, Dims, InClassDimStorage > & as_const() const &
void set_device_dirty(bool v=true)
HALIDE_ALWAYS_INLINE const not_void_T & operator()(const int *pos) const
Buffer(T *data, int d, const halide_dimension_t *shape)
Initialize an Buffer from a pointer to the min coordinate and an array describing the shape.
Buffer(Buffer< T2, D2, S2 > &&other)
Move-construct a Buffer from a Buffer of different dimensionality and type.
Definition: HalideBuffer.h:710
void slice(int d, int pos)
Rewrite the buffer to refer to a single lower-dimensional slice of itself along the given dimension a...
HALIDE_ALWAYS_INLINE const not_void_T & operator()(int first, Args... rest) const
Access elements.
HALIDE_ALWAYS_INLINE void set_host_dirty(bool v=true)
Methods for managing any GPU allocation.
void msan_check_mem_is_initialized(bool entire=false) const
Convenient check to verify that all of the interesting bytes in the Buffer are initialized under MSAN...
HALIDE_ALWAYS_INLINE Buffer< typename std::add_const< T >::type, Dims, InClassDimStorage > as_const() &&
Buffer< T, Dims, InClassDimStorage > & operator=(Buffer< T, Dims, InClassDimStorage > &&other) noexcept
Standard move-assignment operator.
Definition: HalideBuffer.h:775
int device_detach_native(void *ctx=nullptr)
int device_wrap_native(const struct halide_device_interface_t *device_interface, uint64_t handle, void *ctx=nullptr)
Buffer< T, Dims, InClassDimStorage > translated(const std::vector< int > &delta) const
Make an image which refers to the same data translated along the first N dimensions.
HALIDE_ALWAYS_INLINE Dimension dim(int i) const
Access the shape of the buffer.
Definition: HalideBuffer.h:543
Buffer(int first, int second, Args... rest)
Definition: HalideBuffer.h:885
HALIDE_ALWAYS_INLINE Buffer< typename std::add_const< T >::type, Dims, InClassDimStorage > & as_const() &
as_const() is syntactic sugar for .as<const T>(), to avoid the need to recapitulate the type argument...
Buffer< T, Dims, InClassDimStorage > transposed(int d1, int d2) const
Make a buffer which refers to the same data in the same layout using a swapped indexing order for the...
HALIDE_ALWAYS_INLINE Buffer< T, Dims, InClassDimStorage > & for_each_value(Fn &&f, Args &&...other_buffers)
HALIDE_ALWAYS_INLINE not_void_T & operator()()
BufferDeviceOwnership device_ownership() const
Return the method by which the device field is managed.
void check_overflow()
Check the product of the extents fits in memory.
Definition: HalideBuffer.h:789
static bool can_convert_from(const Buffer< T2, D2, S2 > &other)
Determine if a Buffer<T, Dims, InClassDimStorage> can be constructed from some other Buffer type.
Definition: HalideBuffer.h:644
Buffer< not_const_T, Dims, InClassDimStorage > copy_to_interleaved(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Like copy(), but the copy is created in interleaved memory layout (vs.
int device_and_host_malloc(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
int device_sync(void *ctx=nullptr)
static Buffer< void, Dims, InClassDimStorage > make_interleaved(halide_type_t t, int width, int height, int channels)
If you use the (x, y, c) indexing convention, then Halide Buffers are stored planar by default.
Buffer(const std::vector< int > &sizes)
Allocate a new image of known type using a vector of ints as the size.
Definition: HalideBuffer.h:916
void embed(int d, int pos=0)
Embed a buffer in-place, increasing the dimensionality.
static constexpr halide_type_t static_halide_type()
Get the Halide type of T.
Definition: HalideBuffer.h:185
Buffer(T *data, int first, Args &&...rest)
Initialize an Buffer from a pointer and some sizes.
Definition: HalideBuffer.h:979
int copy_to_device(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Buffer(Array(&vals)[N])
Make an Buffer that refers to a statically sized array.
Definition: HalideBuffer.h:948
const halide_buffer_t * raw_buffer() const
HALIDE_ALWAYS_INLINE not_void_T & operator()(int first, Args... rest)
static Buffer< T, Dims, InClassDimStorage > make_interleaved(int width, int height, int channels)
If you use the (x, y, c) indexing convention, then Halide Buffers are stored planar by default.
halide_type_t type() const
Get the type of the elements.
Definition: HalideBuffer.h:577
int device_and_host_free(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Buffer(int first)
Allocate a new image of the given size.
Definition: HalideBuffer.h:869
halide_buffer_t * raw_buffer()
Get a pointer to the raw halide_buffer_t this wraps.
T * end() const
A pointer to one beyond the element with the highest address.
Definition: HalideBuffer.h:589
HALIDE_ALWAYS_INLINE bool device_dirty() const
Buffer< T, Dims, InClassDimStorage > cropped(const std::vector< std::pair< int, int > > &rect) const
Make an image that refers to a sub-rectangle of this image along the first N dimensions.
static constexpr int static_dimensions()
Callers should not use the result if has_static_dimensions is false.
Definition: HalideBuffer.h:198
void transpose(int d1, int d2)
Transpose a buffer in-place by changing how it is indexed.
void deallocate()
Drop reference to any owned host or device memory, possibly freeing it, if this buffer held the last ...
Definition: HalideBuffer.h:831
size_t size_in_bytes() const
The total number of bytes spanned by the data in memory.
Definition: HalideBuffer.h:595
bool has_device_allocation() const
void reset()
Reset the Buffer to be equivalent to a default-constructed Buffer of the same static type (if any); B...
Definition: HalideBuffer.h:602
Buffer(halide_type_t t, int first, Args... rest)
Allocate a new image of the given size with a runtime type.
Definition: HalideBuffer.h:848
int dimensions() const
Get the dimensionality of the buffer.
Definition: HalideBuffer.h:568
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, int d, const halide_dimension_t *shape)
Initialize an Buffer from a pointer to the min coordinate and an array describing the shape.
int min(int i) const
Access to the mins, strides, extents.
Definition: HalideBuffer.h:550
HALIDE_ALWAYS_INLINE const Buffer< T, Dims, InClassDimStorage > & for_each_element(Fn &&f) const
Call a function at each site in a buffer.
void device_deallocate()
Drop reference to any owned device memory, possibly freeing it if this buffer held the last reference...
Definition: HalideBuffer.h:838
HALIDE_ALWAYS_INLINE const not_void_T & operator()() const
static Buffer< T, Dims, InClassDimStorage > make_scalar()
Make a zero-dimensional Buffer.
void add_dimension_with_stride(int s)
Add a new dimension with a min of zero, an extent of one, and the specified stride.
Buffer(Buffer< T, Dims, InClassDimStorage > &&other) noexcept
Move constructor.
Definition: HalideBuffer.h:696
Buffer< T, Dims, InClassDimStorage > cropped(int d, int min, int extent) const
Make an image that refers to a sub-range of this image along the given dimension.
void crop(int d, int min, int extent)
Crop an image in-place along the given dimension.
Buffer< T, Dims, InClassDimStorage > & fill(Fn &&f)
Fill a buffer by evaluating a callable at every site.
static Buffer< T, Dims, InClassDimStorage > make_scalar(T *data)
Make a zero-dimensional Buffer that points to non-owned, existing data.
Buffer< T, Dims, InClassDimStorage > alias() const
Make a copy of the Buffer which shares the underlying host and/or device allocations as the existing ...
void set_min(Args... args)
size_t number_of_elements() const
The total number of elements this buffer represents.
Definition: HalideBuffer.h:563
static void assert_can_convert_from(const Buffer< T2, D2, S2 > &other)
Fail an assertion at runtime or compile-time if an Buffer<T, Dims, InClassDimStorage> cannot be const...
Definition: HalideBuffer.h:662
void translate(const std::vector< int > &delta)
Translate an image along the first N dimensions by changing how it is indexed.
Buffer(const Buffer< T, Dims, InClassDimStorage > &other)
Copy constructor.
Definition: HalideBuffer.h:671
HALIDE_ALWAYS_INLINE not_void_T & operator()(const int *pos)
T * data() const
Get a pointer to the address of the min coordinate.
Buffer< T, Dims, InClassDimStorage > & fill(not_void_T val)
Buffer(const std::vector< int > &sizes, const std::vector< int > &storage_order)
Definition: HalideBuffer.h:941
Buffer< T, Dims, InClassDimStorage > & operator=(Buffer< T2, D2, S2 > &&other)
Move from another Buffer of possibly-different dimensionality and type.
Definition: HalideBuffer.h:760
Buffer(halide_type_t t, const std::vector< int > &sizes, const std::vector< int > &storage_order)
Allocate a new image of unknown type using a vector of ints as the size and a vector of indices indic...
Definition: HalideBuffer.h:936
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, const std::vector< halide_dimension_t > &shape)
Initialize a Buffer from a pointer to the min coordinate and a vector describing the shape.
Buffer< T,(Dims==AnyDims ? AnyDims :Dims - 1)> sliced(int d, int pos) const
Make a lower-dimensional buffer that refers to one slice of this buffer.
static Buffer< add_const_if_T_is_const< void >, Dims, InClassDimStorage > make_interleaved(halide_type_t t, T *data, int width, int height, int channels)
Wrap an existing interleaved image.
HALIDE_ALWAYS_INLINE const Buffer< T, Dims, InClassDimStorage > & for_each_value(Fn &&f, Args &&...other_buffers) const
Call a function on every value in the buffer, and the corresponding values in some number of other bu...
bool is_bounds_query() const
Check if an input buffer passed extern stage is a querying bounds.
Buffer< T,(Dims==AnyDims ? AnyDims :Dims - 1)> sliced(int d) const
Make a lower-dimensional buffer that refers to one slice of this buffer at the dimension's minimum.
int left() const
Conventional names for the min and max value of each dimension.
void copy_from(Buffer< T2, D2, S2 > src)
Fill a Buffer with the values at the same coordinates in another Buffer.
Buffer< T, Dims, InClassDimStorage > translated(int d, int dx) const
Make an image which refers to the same data with using translated coordinates in the given dimension.
int stride(int i) const
Definition: HalideBuffer.h:556
static Buffer< T, Dims, InClassDimStorage > make_interleaved(T *data, int width, int height, int channels)
Wrap an existing interleaved image.
static Buffer< T, Dims, InClassDimStorage > make_with_shape_of(Buffer< T2, D2, S2 > src, void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr)
Make a buffer with the same shape and memory nesting order as another buffer.
Buffer(const Buffer< T2, D2, S2 > &other)
Construct a Buffer from a Buffer of different dimensionality and type.
Definition: HalideBuffer.h:686
bool contains(const std::vector< int > &coords) const
Test if a given coordinate is within the bounds of an image.
Buffer(T *data, const std::vector< halide_dimension_t > &shape)
Initialize a Buffer from a pointer to the min coordinate and a vector describing the shape.
Buffer(T *data, const std::vector< int > &sizes)
Initialize an Buffer from a pointer and a vector of sizes.
Definition: HalideBuffer.h:992
Buffer< T, Dims, InClassDimStorage > & operator=(const Buffer< T, Dims, InClassDimStorage > &other)
Standard assignment operator.
Definition: HalideBuffer.h:741
T * begin() const
A pointer to the element with the lowest address.
Definition: HalideBuffer.h:583
bool all_equal(not_void_T val) const
Tests that all values in this buffer are equal to val.
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, int first, Args &&...rest)
Initialize an Buffer of runtime type from a pointer and some sizes.
Definition: HalideBuffer.h:962
HALIDE_ALWAYS_INLINE Buffer< T2, D2, InClassDimStorage > & as() &
Return a typed reference to this Buffer.
HALIDE_ALWAYS_INLINE const Buffer< T2, D2, InClassDimStorage > & as() const &
Return a const typed reference to this Buffer.
static Buffer< add_const_if_T_is_const< void >, Dims, InClassDimStorage > make_scalar(halide_type_t t)
Make a zero-dimensional Buffer.
bool any_zero(const Container &c)
Definition: HalideBuffer.h:74
constexpr int AnyDims
Definition: HalideBuffer.h:113
BufferDeviceOwnership
This indicates how to deallocate the device for a Halide::Runtime::Buffer.
Definition: HalideBuffer.h:97
@ AllocatedDeviceAndHost
‍No free routine will be called when device ref count goes to zero
@ WrappedNative
‍halide_device_free will be called when device ref count goes to zero
@ Unmanaged
‍halide_device_detach_native will be called when device ref count goes to zero
@ Cropped
‍Call device_and_host_free when DevRefCount goes to zero.
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
Expr min(const FuncRef &a, const FuncRef &b)
Explicit overloads of min and max for FuncRef.
Definition: Func.h:600
Expr max(const FuncRef &a, const FuncRef &b)
Definition: Func.h:603
unsigned __INT64_TYPE__ uint64_t
void * malloc(size_t)
unsigned __INT8_TYPE__ uint8_t
__PTRDIFF_TYPE__ ptrdiff_t
unsigned __INT16_TYPE__ uint16_t
void * memcpy(void *s1, const void *s2, size_t n)
unsigned __INT32_TYPE__ uint32_t
void free(void *)
A struct acting as a header for allocations owned by the Buffer class itself.
Definition: HalideBuffer.h:86
AllocationHeader(void(*deallocate_fn)(void *))
Definition: HalideBuffer.h:91
An iterator class, so that you can iterate over coordinates in a dimensions using a range-based for l...
Definition: HalideBuffer.h:513
bool operator!=(const iterator &other) const
Definition: HalideBuffer.h:518
A similar struct for managing device allocations.
Definition: HalideBuffer.h:106
BufferDeviceOwnership ownership
Definition: HalideBuffer.h:110
The raw representation of an image passed around by generated Halide code.
int32_t dimensions
The dimensionality of the buffer.
halide_dimension_t * dim
The shape of the buffer.
uint64_t device
A device-handle for e.g.
uint8_t * host
A pointer to the start of the data in main memory.
Each GPU API provides a halide_device_interface_t struct pointing to the code that manages device all...
int(* device_and_host_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
int(* wrap_native)(void *user_context, struct halide_buffer_t *buf, uint64_t handle, const struct halide_device_interface_t *device_interface)
int(* copy_to_device)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
int(* device_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
A runtime tag for a type in the halide type system.