tesseract v5.3.3.20231005
dawg_cache.cpp
Go to the documentation of this file.
1
2// File: dawg_cache.cpp
3// Description: A class that knows about loading and caching dawgs.
4// Author: David Eger
5//
6// (C) Copyright 2012, Google Inc.
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License at
10// http://www.apache.org/licenses/LICENSE-2.0
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16//
18
19#include "dawg_cache.h"
20
21#include "dawg.h"
22#include "object_cache.h"
23#include "tessdatamanager.h"
24
25namespace tesseract {
26
27struct DawgLoader {
28 DawgLoader(const std::string &lang, TessdataType tessdata_dawg_type, int dawg_debug_level,
29 TessdataManager *data_file)
30 : lang_(lang)
31 , data_file_(data_file)
32 , tessdata_dawg_type_(tessdata_dawg_type)
33 , dawg_debug_level_(dawg_debug_level) {}
34
35 Dawg *Load();
36
37 std::string lang_;
41};
42
43Dawg *DawgCache::GetSquishedDawg(const std::string &lang, TessdataType tessdata_dawg_type,
44 int debug_level, TessdataManager *data_file) {
45 std::string data_id = data_file->GetDataFileName();
46 data_id += kTessdataFileSuffixes[tessdata_dawg_type];
47 DawgLoader loader(lang, tessdata_dawg_type, debug_level, data_file);
48 return dawgs_.Get(data_id, std::bind(&DawgLoader::Load, &loader));
49}
50
52 TFile fp;
54 return nullptr;
55 }
56 DawgType dawg_type;
57 PermuterType perm_type;
58 switch (tessdata_dawg_type_) {
61 dawg_type = DAWG_TYPE_PUNCTUATION;
62 perm_type = PUNC_PERM;
63 break;
66 dawg_type = DAWG_TYPE_WORD;
67 perm_type = SYSTEM_DAWG_PERM;
68 break;
71 dawg_type = DAWG_TYPE_NUMBER;
72 perm_type = NUMBER_PERM;
73 break;
75 dawg_type = DAWG_TYPE_WORD; // doesn't actually matter
76 perm_type = COMPOUND_PERM; // doesn't actually matter
77 break;
79 dawg_type = DAWG_TYPE_WORD;
80 perm_type = SYSTEM_DAWG_PERM;
81 break;
83 dawg_type = DAWG_TYPE_WORD;
84 perm_type = FREQ_DAWG_PERM;
85 break;
86 default:
87 return nullptr;
88 }
89 auto *retval = new SquishedDawg(dawg_type, lang_, perm_type, dawg_debug_level_);
90 if (retval->Load(&fp)) {
91 return retval;
92 }
93 delete retval;
94 return nullptr;
95}
96
97} // namespace tesseract
DawgType
Definition: dawg.h:64
@ DAWG_TYPE_NUMBER
Definition: dawg.h:67
@ DAWG_TYPE_WORD
Definition: dawg.h:66
@ DAWG_TYPE_PUNCTUATION
Definition: dawg.h:65
@ TESSDATA_UNAMBIG_DAWG
@ TESSDATA_LSTM_SYSTEM_DAWG
@ TESSDATA_NUMBER_DAWG
@ TESSDATA_LSTM_PUNC_DAWG
@ TESSDATA_BIGRAM_DAWG
@ TESSDATA_LSTM_NUMBER_DAWG
@ TESSDATA_SYSTEM_DAWG
PermuterType
Definition: ratngs.h:235
@ SYSTEM_DAWG_PERM
Definition: ratngs.h:244
@ NUMBER_PERM
Definition: ratngs.h:242
@ COMPOUND_PERM
Definition: ratngs.h:248
@ PUNC_PERM
Definition: ratngs.h:237
@ FREQ_DAWG_PERM
Definition: ratngs.h:247
bool GetComponent(TessdataType type, TFile *fp)
const std::string & GetDataFileName() const
DawgLoader(const std::string &lang, TessdataType tessdata_dawg_type, int dawg_debug_level, TessdataManager *data_file)
Definition: dawg_cache.cpp:28
TessdataType tessdata_dawg_type_
Definition: dawg_cache.cpp:39
TessdataManager * data_file_
Definition: dawg_cache.cpp:38
Dawg * GetSquishedDawg(const std::string &lang, TessdataType tessdata_dawg_type, int debug_level, TessdataManager *data_file)
Definition: dawg_cache.cpp:43