tesseract v5.3.3.20231005
stridemap.h
Go to the documentation of this file.
1
2// File: stridemap.h
3// Description: Indexing into a 4-d tensor held in a 2-d Array.
4// Author: Ray Smith
5//
6// (C) Copyright 2016, Google Inc.
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License at
10// http://www.apache.org/licenses/LICENSE-2.0
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
17#ifndef TESSERACT_LSTM_STRIDEMAP_H_
18#define TESSERACT_LSTM_STRIDEMAP_H_
19
20#include <cstring>
21#include <vector>
22
23namespace tesseract {
24
25// Enum describing the dimensions of the 'Tensor' in a NetworkIO.
26// A NetworkIO is analogous to a TF Tensor, except that the number of dimensions
27// is fixed (4), and they always have the same meaning. The underlying
28// representation is a 2-D array, for which the product batch*height*width
29// is always dim1 and depth is always dim2. FlexDimensions is used only for
30// batch, height, width with the StrideMap, and therefore represents the runtime
31// shape. The build-time shape is defined by StaticShape.
33 FD_BATCH, // Index of multiple images.
34 FD_HEIGHT, // y-coordinate in image.
35 FD_WIDTH, // x-coordinate in image.
36 FD_DIMSIZE, // Number of flexible non-depth dimensions.
37};
38
39// Encapsulation of information relating to the mapping from [batch][y][x] to
40// the first index into the 2-d array underlying a NetworkIO.
41class StrideMap {
42public:
43 // Class holding the non-depth indices.
44 class Index {
45 public:
46 explicit Index(const StrideMap &stride_map) : stride_map_(&stride_map) {
48 }
49 Index(const StrideMap &stride_map, int batch, int y, int x) : stride_map_(&stride_map) {
50 indices_[FD_BATCH] = batch;
51 indices_[FD_HEIGHT] = y;
52 indices_[FD_WIDTH] = x;
53 SetTFromIndices();
54 }
55 // Accesses the index to the underlying array.
56 int t() const {
57 return t_;
58 }
59 int index(FlexDimensions dimension) const {
60 return indices_[dimension];
61 }
62 // Initializes the indices to the first valid location.
63 void InitToFirst() {
64 memset(indices_, 0, sizeof(indices_));
65 t_ = 0;
66 }
67 // Initializes the indices to the last valid location.
68 void InitToLast() {
69 InitToLastOfBatch(MaxIndexOfDim(FD_BATCH));
70 }
71 // Returns true if *this is a valid index.
72 bool IsValid() const;
73 // Returns true if the index of the given dimension is the last.
74 bool IsLast(FlexDimensions dimension) const;
75 // Given that the dimensions up to and including dim-1 are valid, returns
76 // the maximum index for dimension dim.
77 int MaxIndexOfDim(FlexDimensions dim) const;
78 // Adds the given offset to the given dimension. Returns true if the result
79 // makes a valid index.
80 bool AddOffset(int offset, FlexDimensions dimension);
81 // Increments the index in some encapsulated way that guarantees to remain
82 // valid until it returns false, meaning that the iteration is complete.
83 bool Increment();
84 // Decrements the index in some encapsulated way that guarantees to remain
85 // valid until it returns false, meaning that the iteration (that started
86 // with InitToLast()) is complete.
87 bool Decrement();
88
89 private:
90 // Initializes the indices to the last valid location in the given batch
91 // index.
92 void InitToLastOfBatch(int batch);
93 // Computes and sets t_ from the current indices_.
94 void SetTFromIndices();
95
96 // Map into which *this is an index.
97 const StrideMap *stride_map_;
98 // Index to the first dimension of the underlying array.
99 int t_;
100 // Indices into the individual dimensions.
101 int indices_[FD_DIMSIZE];
102 };
103
105 memset(shape_, 0, sizeof(shape_));
106 memset(t_increments_, 0, sizeof(t_increments_));
107 }
108 // Default copy constructor and operator= are OK to use here!
109
110 // Sets up the stride for the given array of height, width pairs.
111 void SetStride(const std::vector<std::pair<int, int>> &h_w_pairs);
112 // Scales width and height dimensions by the given factors.
113 void ScaleXY(int x_factor, int y_factor);
114 // Reduces width to 1, across the batch, whatever the input size.
115 void ReduceWidthTo1();
116 // Transposes the width and height dimensions.
117 void TransposeXY();
118 // Returns the size of the given dimension.
119 int Size(FlexDimensions dimension) const {
120 return shape_[dimension];
121 }
122 // Returns the total width required.
123 int Width() const {
124 return t_increments_[FD_BATCH] * shape_[FD_BATCH];
125 }
126
127private:
128 // Computes t_increments_ from shape_.
129 void ComputeTIncrements();
130
131 // The size of each non-depth dimension.
132 int shape_[FD_DIMSIZE];
133 // Precomputed 't' increments for each dimension. This is the value of
134 // the given dimension in the packed 3-d array that the shape_ represents.
135 int t_increments_[FD_DIMSIZE];
136 // Vector of size shape_[FD_BATCH] holds the height of each image in a batch.
137 std::vector<int> heights_;
138 // Vector of size shape_[FD_BATCH] holds the width of each image in a batch.
139 std::vector<int> widths_;
140};
141
142} // namespace tesseract
143
144#endif // TESSERACT_LSTM_STRIDEMAP_H_
const double y
FlexDimensions
Definition: stridemap.h:32
@ FD_WIDTH
Definition: stridemap.h:35
@ FD_DIMSIZE
Definition: stridemap.h:36
@ FD_BATCH
Definition: stridemap.h:33
@ FD_HEIGHT
Definition: stridemap.h:34
int Width() const
Definition: stridemap.h:123
int Size(FlexDimensions dimension) const
Definition: stridemap.h:119
void ScaleXY(int x_factor, int y_factor)
Definition: stridemap.cpp:153
void SetStride(const std::vector< std::pair< int, int > > &h_w_pairs)
Definition: stridemap.cpp:131
int index(FlexDimensions dimension) const
Definition: stridemap.h:59
bool AddOffset(int offset, FlexDimensions dimension)
Definition: stridemap.cpp:67
bool IsLast(FlexDimensions dimension) const
Definition: stridemap.cpp:40
Index(const StrideMap &stride_map, int batch, int y, int x)
Definition: stridemap.h:49
int MaxIndexOfDim(FlexDimensions dim) const
Definition: stridemap.cpp:46
Index(const StrideMap &stride_map)
Definition: stridemap.h:46