tesseract  4.00.00dev
tesseract::DocumentData Class Reference

#include <imagedata.h>

Public Member Functions

 DocumentData (const STRING &name)
 
 ~DocumentData ()
 
bool LoadDocument (const char *filename, int start_page, inT64 max_memory, FileReader reader)
 
void SetDocument (const char *filename, inT64 max_memory, FileReader reader)
 
bool SaveDocument (const char *filename, FileWriter writer)
 
bool SaveToBuffer (GenericVector< char > *buffer)
 
void AddPageToDocument (ImageData *page)
 
const STRINGdocument_name () const
 
int NumPages () const
 
inT64 memory_used () const
 
void LoadPageInBackground (int index)
 
const ImageDataGetPage (int index)
 
bool IsPageAvailable (int index, ImageData **page)
 
ImageDataTakePage (int index)
 
bool IsCached () const
 
inT64 UnCache ()
 
void Shuffle ()
 

Friends

void * ReCachePagesFunc (void *data)
 

Detailed Description

Definition at line 203 of file imagedata.h.

Constructor & Destructor Documentation

◆ DocumentData()

tesseract::DocumentData::DocumentData ( const STRING name)
explicit

Definition at line 376 of file imagedata.cpp.

377  : document_name_(name),
378  pages_offset_(-1),
379  total_pages_(-1),
380  memory_used_(0),
381  max_memory_(0),
382  reader_(NULL) {}

◆ ~DocumentData()

tesseract::DocumentData::~DocumentData ( )

Definition at line 384 of file imagedata.cpp.

384  {
385  SVAutoLock lock_p(&pages_mutex_);
386  SVAutoLock lock_g(&general_mutex_);
387 }

Member Function Documentation

◆ AddPageToDocument()

void tesseract::DocumentData::AddPageToDocument ( ImageData page)

Definition at line 428 of file imagedata.cpp.

428  {
429  SVAutoLock lock(&pages_mutex_);
430  pages_.push_back(page);
431  set_memory_used(memory_used() + page->MemoryUsed());
432 }
inT64 memory_used() const
Definition: imagedata.h:231

◆ document_name()

const STRING& tesseract::DocumentData::document_name ( ) const
inline

Definition at line 223 of file imagedata.h.

223  {
224  SVAutoLock lock(&general_mutex_);
225  return document_name_;
226  }

◆ GetPage()

const ImageData * tesseract::DocumentData::GetPage ( int  index)

Definition at line 448 of file imagedata.cpp.

448  {
449  ImageData* page = NULL;
450  while (!IsPageAvailable(index, &page)) {
451  // If there is no background load scheduled, schedule one now.
452  pages_mutex_.Lock();
453  bool needs_loading = pages_offset_ != index;
454  pages_mutex_.Unlock();
455  if (needs_loading) LoadPageInBackground(index);
456  // We can't directly load the page, or the background load will delete it
457  // while the caller is using it, so give it a chance to work.
458 #if defined(__MINGW32__)
459  sleep(1);
460 #else
461  std::this_thread::sleep_for(std::chrono::seconds(1));
462 #endif
463  }
464  return page;
465 }
bool IsPageAvailable(int index, ImageData **page)
Definition: imagedata.cpp:470
void Lock()
Locks on a mutex.
Definition: svutil.cpp:70
void Unlock()
Unlocks on a mutex.
Definition: svutil.cpp:78
void LoadPageInBackground(int index)
Definition: imagedata.cpp:436

◆ IsCached()

bool tesseract::DocumentData::IsCached ( ) const
inline

Definition at line 263 of file imagedata.h.

263 { return NumPages() >= 0; }
int NumPages() const
Definition: imagedata.h:227

◆ IsPageAvailable()

bool tesseract::DocumentData::IsPageAvailable ( int  index,
ImageData **  page 
)

Definition at line 470 of file imagedata.cpp.

470  {
471  SVAutoLock lock(&pages_mutex_);
472  int num_pages = NumPages();
473  if (num_pages == 0 || index < 0) {
474  *page = NULL; // Empty Document.
475  return true;
476  }
477  if (num_pages > 0) {
478  index = Modulo(index, num_pages);
479  if (pages_offset_ <= index && index < pages_offset_ + pages_.size()) {
480  *page = pages_[index - pages_offset_]; // Page is available already.
481  return true;
482  }
483  }
484  return false;
485 }
int NumPages() const
Definition: imagedata.h:227
int Modulo(int a, int b)
Definition: helpers.h:164

◆ LoadDocument()

bool tesseract::DocumentData::LoadDocument ( const char *  filename,
int  start_page,
inT64  max_memory,
FileReader  reader 
)

Definition at line 391 of file imagedata.cpp.

392  {
393  SetDocument(filename, max_memory, reader);
394  pages_offset_ = start_page;
395  return ReCachePages();
396 }
void SetDocument(const char *filename, inT64 max_memory, FileReader reader)
Definition: imagedata.cpp:399

◆ LoadPageInBackground()

void tesseract::DocumentData::LoadPageInBackground ( int  index)

Definition at line 436 of file imagedata.cpp.

436  {
437  ImageData* page = NULL;
438  if (IsPageAvailable(index, &page)) return;
439  SVAutoLock lock(&pages_mutex_);
440  if (pages_offset_ == index) return;
441  pages_offset_ = index;
442  pages_.clear();
444 }
bool IsPageAvailable(int index, ImageData **page)
Definition: imagedata.cpp:470
friend void * ReCachePagesFunc(void *data)
Definition: imagedata.cpp:370
static void StartThread(void *(*func)(void *), void *arg)
Create new thread.
Definition: svutil.cpp:87

◆ memory_used()

inT64 tesseract::DocumentData::memory_used ( ) const
inline

Definition at line 231 of file imagedata.h.

231  {
232  SVAutoLock lock(&general_mutex_);
233  return memory_used_;
234  }

◆ NumPages()

int tesseract::DocumentData::NumPages ( ) const
inline

Definition at line 227 of file imagedata.h.

227  {
228  SVAutoLock lock(&general_mutex_);
229  return total_pages_;
230  }

◆ SaveDocument()

bool tesseract::DocumentData::SaveDocument ( const char *  filename,
FileWriter  writer 
)

Definition at line 410 of file imagedata.cpp.

410  {
411  SVAutoLock lock(&pages_mutex_);
412  TFile fp;
413  fp.OpenWrite(NULL);
414  if (!pages_.Serialize(&fp) || !fp.CloseWrite(filename, writer)) {
415  tprintf("Serialize failed: %s\n", filename);
416  return false;
417  }
418  return true;
419 }
#define tprintf(...)
Definition: tprintf.h:31

◆ SaveToBuffer()

bool tesseract::DocumentData::SaveToBuffer ( GenericVector< char > *  buffer)

Definition at line 420 of file imagedata.cpp.

420  {
421  SVAutoLock lock(&pages_mutex_);
422  TFile fp;
423  fp.OpenWrite(buffer);
424  return pages_.Serialize(&fp);
425 }

◆ SetDocument()

void tesseract::DocumentData::SetDocument ( const char *  filename,
inT64  max_memory,
FileReader  reader 
)

Definition at line 399 of file imagedata.cpp.

400  {
401  SVAutoLock lock_p(&pages_mutex_);
402  SVAutoLock lock(&general_mutex_);
403  document_name_ = filename;
404  pages_offset_ = -1;
405  max_memory_ = max_memory;
406  reader_ = reader;
407 }

◆ Shuffle()

void tesseract::DocumentData::Shuffle ( )

Definition at line 502 of file imagedata.cpp.

502  {
503  TRand random;
504  // Different documents get shuffled differently, but the same for the same
505  // name.
506  random.set_seed(document_name_.string());
507  int num_pages = pages_.size();
508  // Execute one random swap for each page in the document.
509  for (int i = 0; i < num_pages; ++i) {
510  int src = random.IntRand() % num_pages;
511  int dest = random.IntRand() % num_pages;
512  std::swap(pages_[src], pages_[dest]);
513  }
514 }
const char * string() const
Definition: strngs.cpp:198

◆ TakePage()

ImageData* tesseract::DocumentData::TakePage ( int  index)
inline

Definition at line 255 of file imagedata.h.

255  {
256  SVAutoLock lock(&pages_mutex_);
257  ImageData* page = pages_[index];
258  pages_[index] = NULL;
259  return page;
260  }

◆ UnCache()

inT64 tesseract::DocumentData::UnCache ( )

Definition at line 489 of file imagedata.cpp.

489  {
490  SVAutoLock lock(&pages_mutex_);
491  inT64 memory_saved = memory_used();
492  pages_.clear();
493  pages_offset_ = -1;
494  set_total_pages(-1);
495  set_memory_used(0);
496  tprintf("Unloaded document %s, saving %" PRId64 " memory\n",
497  document_name_.string(), memory_saved);
498  return memory_saved;
499 }
inT64 memory_used() const
Definition: imagedata.h:231
#define tprintf(...)
Definition: tprintf.h:31
const char * string() const
Definition: strngs.cpp:198
int64_t inT64
Definition: host.h:40

Friends And Related Function Documentation

◆ ReCachePagesFunc

void* ReCachePagesFunc ( void *  data)
friend

Definition at line 370 of file imagedata.cpp.

370  {
371  DocumentData* document_data = static_cast<DocumentData*>(data);
372  document_data->ReCachePages();
373  return NULL;
374 }
DocumentData(const STRING &name)
Definition: imagedata.cpp:376

The documentation for this class was generated from the following files: