24 #include "config_auto.h" 49 #define VARDIR "configs/" 51 #define API_CONFIG "configs/api_config" 66 if ((fp = fopen(path.
string(),
"rb")) != NULL) {
70 path +=
"tessconfigs/";
72 if ((fp = fopen(path.
string(),
"rb")) != NULL) {
92 const char *arg0,
const char *textbase,
const char *language,
101 lang = language != NULL ? language :
"eng";
109 tprintf(
"Error opening data file %s\n", tessdata_path.
string());
110 tprintf(
"Please make sure the TESSDATA_PREFIX environment variable is set" 111 " to your \"tessdata\" directory.\n");
138 for (
int i = 0; i < configs_size; ++i) {
144 if (vars_vec != NULL && vars_values != NULL) {
145 for (
int i = 0; i < vars_vec->
size(); ++i) {
147 (*vars_values)[i].
string(),
148 set_params_constraint, this->
params())) {
149 tprintf(
"Error setting param %s\n", (*vars_vec)[i].
string());
156 FILE *params_file = fopen(tessedit_write_params_to_file.
string(),
"wb");
157 if (params_file != NULL) {
161 tprintf(
"Failed to open %s for writing params.\n",
162 tessedit_write_params_to_file.
string());
178 #ifndef ANDROID_BUILD 186 tprintf(
"Error: LSTM requested, but not present!! Loading tesseract.\n");
195 #ifndef ANDROID_BUILD 203 tprintf(
"Error: Size of unicharset is greater than MAX_NUM_CLASSES\n");
225 static_cast<ParamsModel::PassEnum>(p));
237 static bool IsStrInList(
const STRING& str,
239 for (
int i = 0; i < str_list.
size(); ++i) {
240 if (str_list[i] == str)
255 while (remains.
length() > 0) {
257 const char* start = remains.
string();
258 while (*start ==
'+')
262 target = not_to_load;
266 int end = strlen(start);
267 const char* plus = strchr(start,
'+');
268 if (plus != NULL && plus - start < end)
275 if (!IsStrInList(lang_code, *target)) {
287 char **configs,
int configs_size,
290 bool set_only_non_debug_params,
296 sub_langs_.delete_data_pointers();
300 bool loaded_primary =
false;
302 for (
int lang_index = 0; lang_index < langs_to_load.
size(); ++lang_index) {
303 if (!IsStrInList(langs_to_load[lang_index], langs_not_to_load)) {
304 const char *lang_str = langs_to_load[lang_index].string();
306 if (!loaded_primary) {
313 arg0, textbase, lang_str, oem, configs, configs_size, vars_vec,
314 vars_values, set_only_non_debug_params, mgr);
318 if (!loaded_primary) {
320 tprintf(
"Failed loading language '%s'\n", lang_str);
323 &langs_to_load, &langs_not_to_load);
324 loaded_primary =
true;
328 tprintf(
"Failed loading language '%s'\n", lang_str);
331 sub_langs_.push_back(tess_to_init);
334 &langs_to_load, &langs_not_to_load);
339 if (!loaded_primary) {
340 tprintf(
"Tesseract couldn't load any languages!\n");
343 if (!sub_langs_.empty()) {
350 for (
int s = 0; s < sub_langs_.size(); ++s) {
351 sub_langs_[s]->language_model_->getParamsModel().Copy(
354 tprintf(
"Using params model of the primary language\n");
357 for (
int s = 0; s < sub_langs_.size(); ++s) {
358 sub_langs_[s]->language_model_->getParamsModel().Clear();
385 char **configs,
int configs_size,
388 bool set_only_non_debug_params,
391 configs_size, vars_vec, vars_values,
392 set_only_non_debug_params, mgr)) {
402 init_tesseract ? mgr :
nullptr);
409 for (
int i = 0; i < new_fonts.
size(); ++i) {
418 for (
int i = 0; i < lang_fonts->
size(); ++i) {
419 int index = all_fonts.
get_id(lang_fonts->
get(i));
435 for (
int i = 0; i < sub_langs_.size(); ++i) {
440 for (
int i = 0; i < sub_langs_.size(); ++i) {
443 font_table_size_ = all_fonts.
size();
450 NULL, 0, NULL, NULL,
false, mgr))
void SetupForLoad(DawgCache *dawg_cache)
Dict & getDict() override
void ParseLanguageString(const char *lang_str, GenericVector< STRING > *to_load, GenericVector< STRING > *not_to_load)
void SetupUniversalFontIds()
int init_tesseract(const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params, TessdataManager *mgr)
const T & get(int id) const
Return the object from an id.
void main_setup(const char *argv0, const char *basename)
CCUtil::main_setup - set location of tessdata and name of image.
static bool ReadParamsFromFp(SetParamConstraint constraint, TFile *fp, ParamsVectors *member_params)
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
bool init_tesseract_lang_data(const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params, TessdataManager *mgr)
LanguageModel * language_model_
int tessedit_ocr_engine_mode
bool tessedit_use_primary_params_model
ParamsModel & getParamsModel()
bool IsLSTMAvailable() const
char * tessedit_write_params_to_file
bool IsComponentAvailable(TessdataType type) const
bool GetComponent(TessdataType type, TFile *fp)
static bool ReadParamsFile(const char *file, SetParamConstraint constraint, ParamsVectors *member_params)
int get_id(T object) const
const char * string() const
bool LoadFromFp(const char *lang, TFile *fp)
static DawgCache * GlobalDawgCache()
UnicityTable< FontInfo > & get_fontinfo_table()
void Load(const STRING &lang, TessdataManager *data_file)
int push_back(T object)
Add an element in the table.
void LoadUnicharAmbigs(const UNICHARSET &encoder_set, TFile *ambigs_file, int debug_level, bool use_ambigs_for_adaption, UNICHARSET *unicharset)
bool CompareFontInfo(const FontInfo &fi1, const FontInfo &fi2)
void program_editup(const char *textbase, TessdataManager *init_classifier, TessdataManager *init_dict)
bool major_right_to_left() const
bool tessedit_init_config_only
void truncate_at(inT32 index)
void CopyFrom(const UNICHARSET &src)
static bool SetParam(const char *name, const char *value, SetParamConstraint constraint, ParamsVectors *member_params)
bool load_from_file(const char *const filename, bool skip_fragments)
char * tessedit_load_sublangs
UnicharAmbigs unichar_ambigs
void read_config_file(const char *filename, SetParamConstraint constraint)
int init_tesseract_lm(const char *arg0, const char *textbase, const char *language, TessdataManager *mgr)
bool use_ambigs_for_adaption
void SetPass(PassEnum pass)
void set_compare_callback(TessResultCallback2< bool, T const &, T const &> *cb)
ETEXT_DESC * global_monitor
const UNICHARSET & GetUnicharset() const
bool Init(const char *data_file_name)
bool Load(const char *lang, TessdataManager *mgr)
bool IsBaseAvailable() const
void InitUnicharAmbigs(const UNICHARSET &unicharset, bool use_ambigs_for_adaption)
int init_tesseract_internal(const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params, TessdataManager *mgr)
STRING language_data_path_prefix
void LoadUniversal(const UNICHARSET &encoder_set, UNICHARSET *unicharset)
static void PrintParams(FILE *fp, const ParamsVectors *member_params)
bool tessedit_ambigs_training
int size() const
Return the size used.