tesseract v5.3.3.20231005
shapetable_test.cc
Go to the documentation of this file.
1// (C) Copyright 2017, Google Inc.
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5// http://www.apache.org/licenses/LICENSE-2.0
6// Unless required by applicable law or agreed to in writing, software
7// distributed under the License is distributed on an "AS IS" BASIS,
8// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9// See the License for the specific language governing permissions and
10// limitations under the License.
11
12#include <string>
13#include <utility>
14
15#include "include_gunit.h"
16
17#include "serialis.h"
18#include "shapetable.h"
19#include "unicharset.h"
20
21namespace tesseract {
22
23#ifndef DISABLED_LEGACY_ENGINE
24
25static std::string TmpNameToPath(const std::string &name) {
26 return file::JoinPath(FLAGS_test_tmpdir, name);
27}
28
29// Sets up a simple shape with some unichars.
30static void Setup352(int font_id, Shape *shape) {
31 shape->AddToShape(3, font_id);
32 shape->AddToShape(5, font_id);
33 shape->AddToShape(2, font_id);
34}
35
36// Verifies some properties of the 352 shape.
37static void Expect352(int font_id, const Shape &shape) {
38 EXPECT_EQ(3, shape.size());
39 EXPECT_TRUE(shape.ContainsUnichar(2));
40 EXPECT_TRUE(shape.ContainsUnichar(3));
41 EXPECT_TRUE(shape.ContainsUnichar(5));
42 EXPECT_FALSE(shape.ContainsUnichar(1));
43 EXPECT_TRUE(shape.ContainsUnicharAndFont(2, font_id));
44 EXPECT_FALSE(shape.ContainsUnicharAndFont(2, font_id - 1));
45 EXPECT_FALSE(shape.ContainsUnicharAndFont(font_id, 2));
46 // It should be a subset of itself.
47 EXPECT_TRUE(shape.IsSubsetOf(shape));
48}
49
50#endif
51
52// The fixture for testing Shape.
53class ShapeTest : public testing::Test {
54protected:
55 void SetUp() override {
56 std::locale::global(std::locale(""));
58 }
59};
60
61// Tests that a Shape works as expected for all the basic functions.
62TEST_F(ShapeTest, BasicTest) {
63#ifdef DISABLED_LEGACY_ENGINE
64 // Skip test because Shape is missing.
65 GTEST_SKIP();
66#else
67 Shape shape1;
68 EXPECT_EQ(0, shape1.size());
69 Setup352(101, &shape1);
70 Expect352(101, shape1);
71 // It should still work after file I/O.
72 std::string filename = TmpNameToPath("shapefile");
73 FILE *fp = fopen(filename.c_str(), "wb");
74 ASSERT_TRUE(fp != nullptr);
75 EXPECT_TRUE(shape1.Serialize(fp));
76 fclose(fp);
77 TFile tfp;
78 EXPECT_TRUE(tfp.Open(filename.c_str(), nullptr));
79 Shape shape2;
80 EXPECT_TRUE(shape2.DeSerialize(&tfp));
81 Expect352(101, shape2);
82 // They should be subsets of each other.
83 EXPECT_TRUE(shape1.IsSubsetOf(shape2));
84 EXPECT_TRUE(shape2.IsSubsetOf(shape1));
85 // They should be equal unichars.
86 EXPECT_TRUE(shape1.IsEqualUnichars(&shape2));
87 // and still pass afterwards.
88 Expect352(101, shape1);
89 Expect352(101, shape2);
90#endif
91}
92
93// Tests AddShape separately, as it takes quite a bit of work.
94TEST_F(ShapeTest, AddShapeTest) {
95#ifdef DISABLED_LEGACY_ENGINE
96 // Skip test because Shape is missing.
97 GTEST_SKIP();
98#else
99 Shape shape1;
100 Setup352(101, &shape1);
101 Expect352(101, shape1);
102 // Now setup a different shape with different content.
103 Shape shape2;
104 shape2.AddToShape(3, 101); // Duplicates shape1.
105 shape2.AddToShape(5, 110); // Different font to shape1.
106 shape2.AddToShape(7, 101); // Different unichar to shape1.
107 // They should NOT be subsets of each other.
108 EXPECT_FALSE(shape1.IsSubsetOf(shape2));
109 EXPECT_FALSE(shape2.IsSubsetOf(shape1));
110 // Now add shape2 to shape1.
111 shape1.AddShape(shape2);
112 // Test subsets again.
113 EXPECT_FALSE(shape1.IsSubsetOf(shape2));
114 EXPECT_TRUE(shape2.IsSubsetOf(shape1));
115 EXPECT_EQ(4, shape1.size());
116 EXPECT_FALSE(shape1.ContainsUnichar(1));
117 EXPECT_TRUE(shape1.ContainsUnicharAndFont(5, 101));
118 EXPECT_TRUE(shape1.ContainsUnicharAndFont(5, 110));
119 EXPECT_FALSE(shape1.ContainsUnicharAndFont(3, 110));
120 EXPECT_FALSE(shape1.ContainsUnicharAndFont(7, 110));
121 EXPECT_FALSE(shape1.IsEqualUnichars(&shape2));
122#endif
123}
124
125// The fixture for testing Shape.
127
128// Tests that a Shape works as expected for all the basic functions.
130#ifdef DISABLED_LEGACY_ENGINE
131 // Skip test because Shape is missing.
132 GTEST_SKIP();
133#else
134 Shape shape1;
135 Setup352(101, &shape1);
136 // Build a shape table with the same data, but in separate shapes.
137 UNICHARSET unicharset;
138 unicharset.unichar_insert(" ");
139 for (int i = 1; i <= 10; ++i) {
140 char class_str[20];
141 snprintf(class_str, sizeof(class_str), "class%d", i);
142 unicharset.unichar_insert(class_str);
143 }
144 ShapeTable st(unicharset);
145 EXPECT_EQ(0, st.AddShape(3, 101));
146 EXPECT_EQ(1, st.AddShape(5, 101));
147 EXPECT_EQ(2, st.AddShape(2, 101));
148 EXPECT_EQ(3, st.NumShapes());
149 Expect352(101, shape1);
150 EXPECT_EQ(3, st.AddShape(shape1));
151 for (int i = 0; i < 3; ++i) {
153 }
156 st.DeleteShape(3);
158
159 // Now merge to make a single shape like shape1.
161 st.MergeShapes(0, 1);
162 EXPECT_EQ(3, st.MergedUnicharCount(1, 2));
163 st.MergeShapes(1, 2);
164 for (int i = 0; i < 3; ++i) {
166 // Master font count is the sum of all the font counts in the shape, not
167 // the actual number of different fonts in the shape.
169 }
172 ShapeTable st2;
173 st2.AppendMasterShapes(st, nullptr);
174 EXPECT_EQ(1, st.NumMasterShapes());
175 EXPECT_EQ(1, st2.NumShapes());
176 EXPECT_TRUE(st2.MutableShape(0)->IsEqualUnichars(&shape1));
178#endif
179}
180
181} // namespace tesseract
#define GTEST_SKIP()
Definition: gtest.h:1889
#define EXPECT_EQ(val1, val2)
Definition: gtest.h:2043
#define EXPECT_TRUE(condition)
Definition: gtest.h:1982
#define ASSERT_TRUE(condition)
Definition: gtest.h:1990
#define EXPECT_FALSE(condition)
Definition: gtest.h:1986
TEST_F(EuroText, FastLatinOCR)
bool Open(const char *filename, FileReader reader)
Definition: serialis.cpp:140
void unichar_insert(const char *const unichar_repr, OldUncleanUnichars old_style)
Definition: unicharset.cpp:654
bool IsSubsetOf(const Shape &other) const
Definition: shapetable.cpp:206
bool ContainsUnicharAndFont(int unichar_id, int font_id) const
Definition: shapetable.cpp:133
void AddToShape(int unichar_id, int font_id)
Definition: shapetable.cpp:103
int size() const
Definition: shapetable.h:169
bool Serialize(FILE *fp) const
Definition: shapetable.cpp:86
void AddShape(const Shape &other)
Definition: shapetable.cpp:123
bool IsEqualUnichars(Shape *other)
Definition: shapetable.cpp:222
bool DeSerialize(TFile *fp)
Definition: shapetable.cpp:92
bool ContainsUnichar(int unichar_id) const
Definition: shapetable.cpp:150
bool AnyMultipleUnichars() const
Definition: shapetable.cpp:458
Shape * MutableShape(unsigned shape_id)
Definition: shapetable.h:295
int MergedUnicharCount(unsigned shape_id1, unsigned shape_id2) const
Definition: shapetable.cpp:520
unsigned AddShape(int unichar_id, int font_id)
Definition: shapetable.cpp:351
int NumMasterShapes() const
Definition: shapetable.cpp:699
unsigned MasterDestinationIndex(unsigned shape_id) const
Definition: shapetable.cpp:548
int MasterFontCount(unsigned shape_id) const
Definition: shapetable.cpp:509
unsigned NumShapes() const
Definition: shapetable.h:248
void DeleteShape(unsigned shape_id)
Definition: shapetable.cpp:376
void MergeShapes(unsigned shape_id1, unsigned shape_id2)
Definition: shapetable.cpp:530
unsigned MasterUnicharCount(unsigned shape_id) const
Definition: shapetable.cpp:503
void AppendMasterShapes(const ShapeTable &other, std::vector< int > *shape_map)
Definition: shapetable.cpp:683
static void MakeTmpdir()
Definition: include_gunit.h:38
static std::string JoinPath(const std::string &s1, const std::string &s2)
Definition: include_gunit.h:65
void SetUp() override