19#ifndef TESSERACT_CCUTIL_TESSDATAMANAGER_H_
20#define TESSERACT_CCUTIL_TESSDATAMANAGER_H_
27static const char kTrainedDataSuffix[] =
"traineddata";
31static const char kLangConfigFileSuffix[] =
"config";
32static const char kUnicharsetFileSuffix[] =
"unicharset";
33static const char kAmbigsFileSuffix[] =
"unicharambigs";
34static const char kBuiltInTemplatesFileSuffix[] =
"inttemp";
35static const char kBuiltInCutoffsFileSuffix[] =
"pffmtable";
36static const char kNormProtoFileSuffix[] =
"normproto";
37static const char kPuncDawgFileSuffix[] =
"punc-dawg";
38static const char kSystemDawgFileSuffix[] =
"word-dawg";
39static const char kNumberDawgFileSuffix[] =
"number-dawg";
40static const char kFreqDawgFileSuffix[] =
"freq-dawg";
41static const char kFixedLengthDawgsFileSuffix[] =
"fixed-length-dawgs";
42static const char kCubeUnicharsetFileSuffix[] =
"cube-unicharset";
43static const char kCubeSystemDawgFileSuffix[] =
"cube-word-dawg";
44static const char kShapeTableFileSuffix[] =
"shapetable";
45static const char kBigramDawgFileSuffix[] =
"bigram-dawg";
46static const char kUnambigDawgFileSuffix[] =
"unambig-dawg";
47static const char kParamsModelFileSuffix[] =
"params-model";
48static const char kLSTMModelFileSuffix[] =
"lstm";
49static const char kLSTMPuncDawgFileSuffix[] =
"lstm-punc-dawg";
50static const char kLSTMSystemDawgFileSuffix[] =
"lstm-word-dawg";
51static const char kLSTMNumberDawgFileSuffix[] =
"lstm-number-dawg";
52static const char kLSTMUnicharsetFileSuffix[] =
"lstm-unicharset";
53static const char kLSTMRecoderFileSuffix[] =
"lstm-recoder";
54static const char kVersionFileSuffix[] =
"version";
91static const char *
const kTessdataFileSuffixes[] = {
92 kLangConfigFileSuffix,
93 kUnicharsetFileSuffix,
95 kBuiltInTemplatesFileSuffix,
96 kBuiltInCutoffsFileSuffix,
99 kSystemDawgFileSuffix,
100 kNumberDawgFileSuffix,
102 kFixedLengthDawgsFileSuffix,
103 kCubeUnicharsetFileSuffix,
104 kCubeSystemDawgFileSuffix,
105 kShapeTableFileSuffix,
106 kBigramDawgFileSuffix,
107 kUnambigDawgFileSuffix,
108 kParamsModelFileSuffix,
109 kLSTMModelFileSuffix,
110 kLSTMPuncDawgFileSuffix,
111 kLSTMSystemDawgFileSuffix,
112 kLSTMNumberDawgFileSuffix,
113 kLSTMUnicharsetFileSuffix,
114 kLSTMRecoderFileSuffix,
125static const int kMaxNumTessdataEntries = 1000;
143 void LoadFileLater(
const char *data_file_name);
148 bool Init(
const char *data_file_name);
151 bool LoadMemBuffer(
const char *name,
const char *data,
int size);
156 bool SaveFile(
const char *filename,
FileWriter writer)
const;
158 void Serialize(std::vector<char> *data)
const;
163 void Directory()
const;
167 return !entries_[
type].empty();
177 std::string VersionString()
const;
179 void SetVersionString(
const std::string &v_str);
193 return data_file_name_;
201 bool CombineDataFiles(
const char *language_data_path_prefix,
const char *output_filename);
208 bool OverwriteComponents(
const char *new_traineddata_filename,
char **component_filenames,
209 int num_new_components);
221 bool ExtractToFile(
const char *filename);
225 bool LoadArchiveFile(
const char *filename);
233 static bool TessdataTypeFromFileSuffix(
const char *suffix,
TessdataType *
type);
239 static bool TessdataTypeFromFileName(
const char *filename,
TessdataType *
type);
242 std::string data_file_name_;
bool(*)(const std::vector< char > &data, const char *filename) FileWriter
@ TESSDATA_LSTM_SYSTEM_DAWG
@ TESSDATA_LSTM_UNICHARSET
@ TESSDATA_CUBE_SYSTEM_DAWG
@ TESSDATA_CUBE_UNICHARSET
@ TESSDATA_LSTM_PUNC_DAWG
@ TESSDATA_LSTM_NUMBER_DAWG
@ TESSDATA_FIXED_LENGTH_DAWGS
bool Serialize(FILE *fp, const std::vector< T > &data)
bool(*)(const char *filename, std::vector< char > *data) FileReader
bool IsLSTMAvailable() const
~TessdataManager()=default
const std::string & GetDataFileName() const
bool IsBaseAvailable() const
bool IsComponentAvailable(TessdataType type) const