tesseract v5.3.3.20231005
ocrpara.cpp
Go to the documentation of this file.
1
2// File: ocrpara.cpp
3// Description: OCR Paragraph Output Type
4// Author: David Eger
5//
6// (C) Copyright 2010, Google Inc.
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License at
10// http://www.apache.org/licenses/LICENSE-2.0
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16//
18
19#include "ocrpara.h"
20
21#include "host.h" // For NearlyEqual()
22
23#include <cstdio>
24
25namespace tesseract {
26
31
32static const char *ParagraphJustificationToString(tesseract::ParagraphJustification justification) {
33 switch (justification) {
35 return "LEFT";
37 return "RIGHT";
39 return "CENTER";
40 default:
41 return "UNKNOWN";
42 }
43}
44
45bool ParagraphModel::ValidFirstLine(int lmargin, int lindent, int rindent, int rmargin) const {
46 switch (justification_) {
48 return NearlyEqual(lmargin + lindent, margin_ + first_indent_, tolerance_);
50 return NearlyEqual(rmargin + rindent, margin_ + first_indent_, tolerance_);
52 return NearlyEqual(lindent, rindent, tolerance_ * 2);
53 default:
54 // shouldn't happen
55 return false;
56 }
57}
58
59bool ParagraphModel::ValidBodyLine(int lmargin, int lindent, int rindent, int rmargin) const {
60 switch (justification_) {
62 return NearlyEqual(lmargin + lindent, margin_ + body_indent_, tolerance_);
64 return NearlyEqual(rmargin + rindent, margin_ + body_indent_, tolerance_);
66 return NearlyEqual(lindent, rindent, tolerance_ * 2);
67 default:
68 // shouldn't happen
69 return false;
70 }
71}
72
74 if (justification_ != other.justification_) {
75 return false;
76 }
77 if (justification_ == JUSTIFICATION_CENTER || justification_ == JUSTIFICATION_UNKNOWN) {
78 return true;
79 }
80 int tolerance = (tolerance_ + other.tolerance_) / 4;
81 return NearlyEqual(margin_ + first_indent_, other.margin_ + other.first_indent_, tolerance) &&
82 NearlyEqual(margin_ + body_indent_, other.margin_ + other.body_indent_, tolerance);
83}
84
85std::string ParagraphModel::ToString() const {
86 char buffer[200];
87 const char *alignment = ParagraphJustificationToString(justification_);
88 snprintf(buffer, sizeof(buffer), "margin: %d, first_indent: %d, body_indent: %d, alignment: %s",
89 margin_, first_indent_, body_indent_, alignment);
90 return std::string(buffer);
91}
92
93} // namespace tesseract
bool NearlyEqual(T x, T y, T tolerance)
Definition: host.h:51
ParagraphJustification
Definition: publictypes.h:246
@ JUSTIFICATION_LEFT
Definition: publictypes.h:248
@ JUSTIFICATION_UNKNOWN
Definition: publictypes.h:247
@ JUSTIFICATION_RIGHT
Definition: publictypes.h:250
@ JUSTIFICATION_CENTER
Definition: publictypes.h:249
bool Comparable(const ParagraphModel &other) const
Definition: ocrpara.cpp:73
bool ValidFirstLine(int lmargin, int lindent, int rindent, int rmargin) const
Definition: ocrpara.cpp:45
int tolerance() const
Definition: ocrpara.h:178
std::string ToString() const
Definition: ocrpara.cpp:85
bool ValidBodyLine(int lmargin, int lindent, int rindent, int rmargin) const
Definition: ocrpara.cpp:59