#include <tessdatamanager.h>
|
| TessdataManager () |
|
| TessdataManager (FileReader reader) |
|
| ~TessdataManager () |
|
bool | swap () const |
|
bool | is_loaded () const |
|
void | LoadFileLater (const char *data_file_name) |
|
bool | Init (const char *data_file_name) |
|
bool | LoadMemBuffer (const char *name, const char *data, int size) |
|
void | OverwriteEntry (TessdataType type, const char *data, int size) |
|
bool | SaveFile (const STRING &filename, FileWriter writer) const |
|
void | Serialize (GenericVector< char > *data) const |
|
void | Clear () |
|
void | Directory () const |
|
bool | IsComponentAvailable (TessdataType type) const |
|
bool | GetComponent (TessdataType type, TFile *fp) |
|
bool | GetComponent (TessdataType type, TFile *fp) const |
|
std::string | VersionString () const |
|
void | SetVersionString (const std::string &v_str) |
|
bool | IsBaseAvailable () const |
|
bool | IsLSTMAvailable () const |
|
const STRING & | GetDataFileName () const |
|
bool | CombineDataFiles (const char *language_data_path_prefix, const char *output_filename) |
|
bool | OverwriteComponents (const char *new_traineddata_filename, char **component_filenames, int num_new_components) |
|
bool | ExtractToFile (const char *filename) |
|
Definition at line 126 of file tessdatamanager.h.
◆ TessdataManager() [1/2]
tesseract::TessdataManager::TessdataManager |
( |
| ) |
|
Definition at line 40 of file tessdatamanager.cpp.
40 : reader_(
nullptr), is_loaded_(
false), swap_(
false) {
void SetVersionString(const std::string &v_str)
◆ TessdataManager() [2/2]
tesseract::TessdataManager::TessdataManager |
( |
FileReader |
reader | ) |
|
|
explicit |
◆ ~TessdataManager()
tesseract::TessdataManager::~TessdataManager |
( |
| ) |
|
|
inline |
◆ Clear()
void tesseract::TessdataManager::Clear |
( |
| ) |
|
◆ CombineDataFiles()
bool tesseract::TessdataManager::CombineDataFiles |
( |
const char * |
language_data_path_prefix, |
|
|
const char * |
output_filename |
|
) |
| |
Reads all the standard tesseract config and data files for a language at the given path and bundles them up into one binary data file. Returns true if the combined traineddata file was successfully written.
Definition at line 201 of file tessdatamanager.cpp.
209 filename += kTessdataFileSuffixes[i];
210 FILE *fp = fopen(filename.
string(),
"rb");
224 "Error: traineddata file must contain at least (a unicharset file" 225 "and inttemp) OR an lstm file.\n");
229 return SaveFile(output_filename,
nullptr);
static bool TessdataTypeFromFileSuffix(const char *suffix, TessdataType *type)
bool IsLSTMAvailable() const
bool SaveFile(const STRING &filename, FileWriter writer) const
bool LoadDataFromFile(const char *filename, GenericVector< char > *data)
const char * string() const
bool IsBaseAvailable() const
◆ Directory()
void tesseract::TessdataManager::Directory |
( |
| ) |
const |
Definition at line 159 of file tessdatamanager.cpp.
163 if (!entries_[i].empty()) {
164 tprintf(
"%d:%s:size=%d, offset=%d\n", i, kTessdataFileSuffixes[i],
165 entries_[i].size(), offset);
166 offset += entries_[i].
size();
std::string VersionString() const
◆ ExtractToFile()
bool tesseract::TessdataManager::ExtractToFile |
( |
const char * |
filename | ) |
|
Extracts tessdata component implied by the name of the input file from the combined traineddata loaded into TessdataManager. Writes the extracted component to the file indicated by the file name. E.g. if the filename given is somepath/somelang.unicharset, unicharset will be extracted from the data loaded into the TessdataManager and will be written to somepath/somelang.unicharset.
- Returns
- true if the component was successfully extracted, false if the component was not present in the traineddata loaded into TessdataManager.
Definition at line 251 of file tessdatamanager.cpp.
255 if (entries_[type].empty())
return false;
bool SaveDataToFile(const GenericVector< char > &data, const STRING &filename)
static bool TessdataTypeFromFileName(const char *filename, TessdataType *type)
◆ GetComponent() [1/2]
Definition at line 173 of file tessdatamanager.cpp.
174 if (!is_loaded_ && !
Init(data_file_name_.
string()))
return false;
176 return const_this->GetComponent(type, fp);
bool Init(const char *data_file_name)
const char * string() const
◆ GetComponent() [2/2]
bool tesseract::TessdataManager::GetComponent |
( |
TessdataType |
type, |
|
|
TFile * |
fp |
|
) |
| const |
Definition at line 181 of file tessdatamanager.cpp.
183 if (entries_[type].empty())
return false;
184 fp->Open(&entries_[type][0], entries_[type].size());
◆ GetDataFileName()
const STRING& tesseract::TessdataManager::GetDataFileName |
( |
| ) |
const |
|
inline |
◆ Init()
bool tesseract::TessdataManager::Init |
( |
const char * |
data_file_name | ) |
|
Opens and reads the given data file right now.
- Returns
- true on success.
Definition at line 58 of file tessdatamanager.cpp.
60 if (reader_ ==
nullptr) {
63 if (!(*reader_)(data_file_name, &data))
return false;
bool LoadDataFromFile(const char *filename, GenericVector< char > *data)
bool LoadMemBuffer(const char *name, const char *data, int size)
◆ is_loaded()
bool tesseract::TessdataManager::is_loaded |
( |
| ) |
const |
|
inline |
◆ IsBaseAvailable()
bool tesseract::TessdataManager::IsBaseAvailable |
( |
| ) |
const |
|
inline |
◆ IsComponentAvailable()
bool tesseract::TessdataManager::IsComponentAvailable |
( |
TessdataType |
type | ) |
const |
|
inline |
◆ IsLSTMAvailable()
bool tesseract::TessdataManager::IsLSTMAvailable |
( |
| ) |
const |
|
inline |
◆ LoadFileLater()
void tesseract::TessdataManager::LoadFileLater |
( |
const char * |
data_file_name | ) |
|
◆ LoadMemBuffer()
bool tesseract::TessdataManager::LoadMemBuffer |
( |
const char * |
name, |
|
|
const char * |
data, |
|
|
int |
size |
|
) |
| |
Definition at line 69 of file tessdatamanager.cpp.
72 data_file_name_ = name;
76 if (fp.FRead(&num_entries,
sizeof(num_entries), 1) != 1)
return false;
77 swap_ = num_entries > kMaxNumTessdataEntries || num_entries < 0;
79 if (swap_)
ReverseN(&num_entries,
sizeof(num_entries));
80 if (num_entries > kMaxNumTessdataEntries || num_entries < 0)
return false;
83 if (fp.FReadEndian(&offset_table[0],
sizeof(offset_table[0]), num_entries) !=
87 if (offset_table[i] >= 0) {
88 int64_t entry_size = size - offset_table[i];
90 while (j < num_entries && offset_table[j] == -1) ++j;
91 if (j < num_entries) entry_size = offset_table[j] - offset_table[i];
93 if (fp.FRead(&entries_[i][0], 1, entry_size) != entry_size)
return false;
void ReverseN(void *ptr, int num_bytes)
void resize_no_init(int size)
void SetVersionString(const std::string &v_str)
◆ OverwriteComponents()
bool tesseract::TessdataManager::OverwriteComponents |
( |
const char * |
new_traineddata_filename, |
|
|
char ** |
component_filenames, |
|
|
int |
num_new_components |
|
) |
| |
Gets the individual components from the data_file_ with which the class was initialized. Overwrites the components specified by component_filenames. Writes the updated traineddata file to new_traineddata_filename.
Definition at line 232 of file tessdatamanager.cpp.
237 for (
int i = 0; i < num_new_components; ++i) {
241 tprintf(
"Failed to read component file:%s\n", component_filenames[i]);
248 return SaveFile(new_traineddata_filename,
nullptr);
bool SaveFile(const STRING &filename, FileWriter writer) const
bool LoadDataFromFile(const char *filename, GenericVector< char > *data)
static bool TessdataTypeFromFileName(const char *filename, TessdataType *type)
◆ OverwriteEntry()
void tesseract::TessdataManager::OverwriteEntry |
( |
TessdataType |
type, |
|
|
const char * |
data, |
|
|
int |
size |
|
) |
| |
Definition at line 104 of file tessdatamanager.cpp.
108 memcpy(&entries_[type][0], data, size);
void resize_no_init(int size)
◆ SaveFile()
bool tesseract::TessdataManager::SaveFile |
( |
const STRING & |
filename, |
|
|
FileWriter |
writer |
|
) |
| const |
Definition at line 112 of file tessdatamanager.cpp.
117 if (writer ==
nullptr)
bool SaveDataToFile(const GenericVector< char > &data, const STRING &filename)
void Serialize(GenericVector< char > *data) const
◆ Serialize()
void tesseract::TessdataManager::Serialize |
( |
GenericVector< char > * |
data | ) |
const |
Definition at line 124 of file tessdatamanager.cpp.
128 int64_t offset =
sizeof(int32_t) +
sizeof(offset_table);
130 if (entries_[i].empty()) {
131 offset_table[i] = -1;
133 offset_table[i] = offset;
134 offset += entries_[i].
size();
141 fp.FWrite(&num_entries,
sizeof(num_entries), 1);
142 fp.FWrite(offset_table,
sizeof(offset_table), 1);
144 if (!entries_[i].empty()) {
145 fp.FWrite(&entries_[i][0], entries_[i].size(), 1);
void init_to_size(int size, T t)
◆ SetVersionString()
void tesseract::TessdataManager::SetVersionString |
( |
const std::string & |
v_str | ) |
|
◆ swap()
bool tesseract::TessdataManager::swap |
( |
| ) |
const |
|
inline |
◆ TessdataTypeFromFileName()
bool tesseract::TessdataManager::TessdataTypeFromFileName |
( |
const char * |
filename, |
|
|
TessdataType * |
type |
|
) |
| |
|
static |
Tries to determine tessdata component file suffix from filename, returns true on success.
Definition at line 272 of file tessdatamanager.cpp.
275 const char *suffix = strrchr(
filename,
'.');
276 if (suffix ==
nullptr || *(++suffix) ==
'\0')
return false;
static bool TessdataTypeFromFileSuffix(const char *suffix, TessdataType *type)
◆ TessdataTypeFromFileSuffix()
bool tesseract::TessdataManager::TessdataTypeFromFileSuffix |
( |
const char * |
suffix, |
|
|
TessdataType * |
type |
|
) |
| |
|
static |
Fills type with TessdataType of the tessdata component represented by the given file name. E.g. tessdata/eng.unicharset -> TESSDATA_UNICHARSET.
- Returns
- true if the tessdata component type could be determined from the given file name.
Definition at line 259 of file tessdatamanager.cpp.
262 if (strcmp(kTessdataFileSuffixes[i], suffix) == 0) {
267 tprintf(
"TessdataManager can't determine which tessdata" 268 " component is represented by %s\n", suffix);
◆ VersionString()
std::string tesseract::TessdataManager::VersionString |
( |
| ) |
const |
The documentation for this class was generated from the following files: