21 #pragma warning(disable:4244) // Conversion warnings 25 #include "config_auto.h" 55 data_file_name_ = data_file_name;
60 if (reader_ ==
nullptr) {
63 if (!(*reader_)(data_file_name, &data))
return false;
72 data_file_name_ = name;
76 if (fp.
FRead(&num_entries,
sizeof(num_entries), 1) != 1)
return false;
77 swap_ = num_entries > kMaxNumTessdataEntries || num_entries < 0;
79 if (swap_)
ReverseN(&num_entries,
sizeof(num_entries));
80 if (num_entries > kMaxNumTessdataEntries || num_entries < 0)
return false;
83 if (fp.
FReadEndian(&offset_table[0],
sizeof(offset_table[0]), num_entries) !=
87 if (offset_table[i] >= 0) {
88 int64_t entry_size = size - offset_table[i];
90 while (j < num_entries && offset_table[j] == -1) ++j;
91 if (j < num_entries) entry_size = offset_table[j] - offset_table[i];
93 if (fp.
FRead(&entries_[i][0], 1, entry_size) != entry_size)
return false;
108 memcpy(&entries_[type][0], data, size);
117 if (writer ==
nullptr)
128 int64_t offset =
sizeof(int32_t) +
sizeof(offset_table);
130 if (entries_[i].empty()) {
131 offset_table[i] = -1;
133 offset_table[i] = offset;
134 offset += entries_[i].
size();
141 fp.
FWrite(&num_entries,
sizeof(num_entries), 1);
142 fp.
FWrite(offset_table,
sizeof(offset_table), 1);
144 if (!entries_[i].empty()) {
145 fp.
FWrite(&entries_[i][0], entries_[i].size(), 1);
163 if (!entries_[i].empty()) {
164 tprintf(
"%d:%s:size=%d, offset=%d\n", i, kTessdataFileSuffixes[i],
165 entries_[i].size(), offset);
166 offset += entries_[i].
size();
174 if (!is_loaded_ && !
Init(data_file_name_.
string()))
return false;
183 if (entries_[type].empty())
return false;
184 fp->
Open(&entries_[type][0], entries_[type].size());
192 entries_[TESSDATA_VERSION].size());
202 const char *language_data_path_prefix,
203 const char *output_filename) {
209 filename += kTessdataFileSuffixes[i];
210 FILE *fp = fopen(filename.
string(),
"rb");
224 "Error: traineddata file must contain at least (a unicharset file" 225 "and inttemp) OR an lstm file.\n");
229 return SaveFile(output_filename,
nullptr);
233 const char *new_traineddata_filename,
234 char **component_filenames,
235 int num_new_components) {
237 for (
int i = 0; i < num_new_components; ++i) {
241 tprintf(
"Failed to read component file:%s\n", component_filenames[i]);
248 return SaveFile(new_traineddata_filename,
nullptr);
255 if (entries_[type].empty())
return false;
262 if (strcmp(kTessdataFileSuffixes[i], suffix) == 0) {
267 tprintf(
"TessdataManager can't determine which tessdata" 268 " component is represented by %s\n", suffix);
275 const char *suffix = strrchr(filename,
'.');
276 if (suffix ==
nullptr || *(++suffix) ==
'\0')
return false;
static bool TessdataTypeFromFileSuffix(const char *suffix, TessdataType *type)
bool ExtractToFile(const char *filename)
bool Init(const char *data_file_name)
bool(* FileReader)(const STRING &filename, GenericVector< char > *data)
std::string VersionString() const
int FRead(void *buffer, int size, int count)
void OpenWrite(GenericVector< char > *data)
bool IsLSTMAvailable() const
bool SaveFile(const STRING &filename, FileWriter writer) const
bool GetComponent(TessdataType type, TFile *fp)
bool LoadDataFromFile(const char *filename, GenericVector< char > *data)
void set_swap(bool value)
void ReverseN(void *ptr, int num_bytes)
bool(* FileWriter)(const GenericVector< char > &data, const STRING &filename)
void resize_no_init(int size)
int FReadEndian(void *buffer, int size, int count)
void OverwriteEntry(TessdataType type, const char *data, int size)
bool LoadMemBuffer(const char *name, const char *data, int size)
bool Open(const STRING &filename, FileReader reader)
void LoadFileLater(const char *data_file_name)
bool OverwriteComponents(const char *new_traineddata_filename, char **component_filenames, int num_new_components)
void SetVersionString(const std::string &v_str)
bool CombineDataFiles(const char *language_data_path_prefix, const char *output_filename)
const char * string() const
bool SaveDataToFile(const GenericVector< char > &data, const STRING &filename)
int FWrite(const void *buffer, int size, int count)
void Serialize(GenericVector< char > *data) const
bool IsBaseAvailable() const
static bool TessdataTypeFromFileName(const char *filename, TessdataType *type)
void init_to_size(int size, T t)