tesseract
v5.3.3.20231005
unicharmap.h
Go to the documentation of this file.
1
2
// File: unicharmap.h
3
// Description: Unicode character/ligature to integer id class.
4
// Author: Thomas Kielbus
5
// Created: Wed Jun 28 17:05:01 PDT 2006
6
//
7
// (C) Copyright 2006, Google Inc.
8
// Licensed under the Apache License, Version 2.0 (the "License");
9
// you may not use this file except in compliance with the License.
10
// You may obtain a copy of the License at
11
// http://www.apache.org/licenses/LICENSE-2.0
12
// Unless required by applicable law or agreed to in writing, software
13
// distributed under the License is distributed on an "AS IS" BASIS,
14
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
// See the License for the specific language governing permissions and
16
// limitations under the License.
17
//
19
20
#ifndef TESSERACT_CCUTIL_UNICHARMAP_H_
21
#define TESSERACT_CCUTIL_UNICHARMAP_H_
22
23
#include <
tesseract/unichar.h
>
24
25
namespace
tesseract
{
26
27
// A UNICHARMAP stores unique unichars. Each of them is associated with one
28
// UNICHAR_ID.
29
class
TESS_API
UNICHARMAP
{
30
public
:
31
// Create an empty UNICHARMAP
32
UNICHARMAP
();
33
34
~UNICHARMAP
();
35
36
// Insert the given unichar representation in the UNICHARMAP and associate it
37
// with the given id. The length of the representation MUST be non-zero.
38
void
insert(
const
char
*
const
unichar_repr,
UNICHAR_ID
id
);
39
40
// Return the id associated with the given unichar representation,
41
// this representation MUST exist within the UNICHARMAP. The first
42
// length characters (maximum) from unichar_repr are used. The length
43
// MUST be non-zero.
44
UNICHAR_ID
unichar_to_id(
const
char
*
const
unichar_repr,
int
length)
const
;
45
46
// Return true if the given unichar representation is already present in the
47
// UNICHARMAP. The first length characters (maximum) from unichar_repr are
48
// used. The length MUST be non-zero.
49
bool
contains
(
const
char
*
const
unichar_repr,
int
length)
const
;
50
51
// Return the minimum number of characters that must be used from this string
52
// to obtain a match in the UNICHARMAP.
53
int
minmatch(
const
char
*
const
unichar_repr)
const
;
54
55
// Clear the UNICHARMAP. All previous data is lost.
56
void
clear();
57
58
private
:
59
// The UNICHARMAP is represented as a tree whose nodes are of type
60
// UNICHARMAP_NODE.
61
struct
UNICHARMAP_NODE {
62
UNICHARMAP_NODE();
63
~UNICHARMAP_NODE();
64
65
UNICHARMAP_NODE *children;
66
UNICHAR_ID
id;
67
};
68
69
UNICHARMAP_NODE *nodes;
70
};
71
72
}
// namespace tesseract
73
74
#endif
// TESSERACT_CCUTIL_UNICHARMAP_H_
unichar.h
tesseract
Definition:
baseapi.h:39
tesseract::UNICHAR_ID
int UNICHAR_ID
Definition:
unichar.h:34
tesseract::contains
bool contains(const std::vector< T > &data, const T &value)
Definition:
helpers.h:39
tesseract::UNICHARMAP
Definition:
unicharmap.h:29
TESS_API
#define TESS_API
Definition:
export.h:32
src
ccutil
unicharmap.h
Generated on Thu Oct 5 2023 22:10:25 for tesseract by
1.9.4