19#ifndef TESSERACT_IMAGE_IMAGEDATA_H_
20#define TESSERACT_IMAGE_IMAGEDATA_H_
71 static ImageData *Build(
const char *name,
int page_number,
const char *lang,
72 const char *imagedata,
int imagedatasize,
const char *truth_text,
73 const char *box_text);
80 static bool SkipDeSerialize(
TFile *fp);
84 return imagefilename_;
87 imagefilename_ = name;
105 return transcription_;
107 const std::vector<TBOX> &
boxes()
const {
114 return box_texts_[index];
119 void SetPix(
Image pix);
121 Image GetPix()
const;
128 Image PreScale(
int target_height,
int max_height,
float *scale_factor,
int *scaled_width,
129 int *scaled_height, std::vector<TBOX> *boxes)
const;
131 int MemoryUsed()
const;
134 void Display()
const;
138 void AddBoxes(
const std::vector<TBOX> &boxes,
const std::vector<std::string> &texts,
139 const std::vector<int> &box_pages);
145 static void SetPixInternal(
Image pix, std::vector<char> *image_data);
147 static Image GetPixInternal(
const std::vector<char> &image_data);
150 bool AddBoxes(
const char *box_text);
153 std::string imagefilename_;
154 int32_t page_number_;
157#ifdef TESSERACT_IMAGEDATA_AS_PIX
160 std::vector<char> image_data_;
161 std::string language_;
162 std::string transcription_;
163 std::vector<TBOX> boxes_;
164 std::vector<std::string> box_texts_;
191 std::lock_guard<std::mutex> lock(general_mutex_);
192 return document_name_;
195 std::lock_guard<std::mutex> lock(general_mutex_);
199 return pages_.size();
202 std::lock_guard<std::mutex> lock(general_mutex_);
227 std::lock_guard<std::mutex> lock(pages_mutex_);
229 pages_[index] =
nullptr;
245 void set_total_pages(
int total) {
246 std::lock_guard<std::mutex> lock(general_mutex_);
247 total_pages_ = total;
250 std::lock_guard<std::mutex> lock(general_mutex_);
259 std::string document_name_;
261 std::vector<ImageData *> pages_;
267 int64_t memory_used_;
274 std::mutex pages_mutex_;
277 mutable std::mutex general_mutex_;
297 for (
auto *document : documents_) {
301 num_pages_per_doc_ = 0;
319 return GetPageSequential(serial);
321 return GetPageRoundRobin(serial);
338 const ImageData *GetPageRoundRobin(
int serial);
343 const ImageData *GetPageSequential(
int serial);
347 int CountNeighbourDocs(
int index,
int dir);
350 std::vector<DocumentData *> documents_;
355 int num_pages_per_doc_ = 0;
357 int64_t max_memory_ = 0;
bool(*)(const std::vector< char > &data, const char *filename) FileWriter
bool DeSerialize(bool swap, FILE *fp, std::vector< T > &data)
bool Serialize(FILE *fp, const std::vector< T > &data)
const int kFeaturePadding
bool(*)(const char *filename, std::vector< char > *data) FileReader
void set_imagefilename(const std::string &name)
void set_page_number(int num)
const std::string & imagefilename() const
const std::vector< char > & image_data() const
const std::string & box_text(int index) const
const std::vector< std::string > & box_texts() const
const std::string & transcription() const
const std::string & language() const
void set_language(const std::string &lang)
const std::vector< TBOX > & boxes() const
ImageData * TakePage(int index)
bool IsPageAvailable(int index, ImageData **page)
TESS_API DocumentData(const std::string &name)
void SetDocument(const char *filename, int64_t max_memory, FileReader reader)
int64_t memory_used() const
TESS_API bool SaveDocument(const char *filename, FileWriter writer)
void LoadPageInBackground(int index)
const std::string & document_name() const
TESS_API bool LoadDocument(const char *filename, int start_page, int64_t max_memory, FileReader reader)
TESS_API const ImageData * GetPage(int index)
TESS_API void AddPageToDocument(ImageData *page)
bool AddToCache(DocumentData *data)
const ImageData * GetPageBySerial(int serial)
DocumentData * FindDocument(const std::string &document_name) const
TESS_API bool LoadDocuments(const std::vector< std::string > &filenames, CachingStrategy cache_strategy, FileReader reader)
TESS_API ~DocumentCache()
const std::vector< DocumentData * > & documents() const
TESS_API int TotalPages()
TESS_API DocumentCache(int64_t max_memory)