#include <recodebeam.h>

Public Member Functions
	RecodeBeamSearch (const UnicharCompress &recoder, int null_char, bool simple_text, Dict *dict)

	~RecodeBeamSearch ()

void	Decode (const NetworkIO &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset, int lstm_choice_mode=0)

void	Decode (const GENERIC_2D_ARRAY< float > &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset)

void	DecodeSecondaryBeams (const NetworkIO &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset, int lstm_choice_mode=0)

void	ExtractBestPathAsLabels (std::vector< int > labels, std::vector< int > xcoords) const

void	ExtractBestPathAsUnicharIds (bool debug, const UNICHARSET unicharset, std::vector< int > unichar_ids, std::vector< float > certs, std::vector< float > ratings, std::vector< int > *xcoords) const

void	ExtractBestPathAsWords (const TBOX &line_box, float scale_factor, bool debug, const UNICHARSET unicharset, PointerVector< WERD_RES > words, int lstm_choice_mode=0)

void	DebugBeams (const UNICHARSET &unicharset) const

void	extractSymbolChoices (const UNICHARSET *unicharset)

void	PrintBeam2 (bool uids, int num_outputs, const UNICHARSET *charset, bool secondary) const

void	segmentTimestepsByCharacters ()

std::vector< std::vector< std::pair< const char *, float > > >	combineSegmentedTimesteps (std::vector< std::vector< std::vector< std::pair< const char , float > > > > segmentedTimesteps)

Static Public Member Functions
static int	LengthFromBeamsIndex (int index)

static NodeContinuation	ContinuationFromBeamsIndex (int index)

static bool	IsDawgFromBeamsIndex (int index)

static int	BeamIndex (bool is_dawg, NodeContinuation cont, int length)

Public Attributes
std::vector< std::vector< std::pair< const char *, float > > >	timesteps

std::vector< std::vector< std::vector< std::pair< const char *, float > > > >	segmentedTimesteps

std::vector< std::vector< std::pair< const char *, float > > >	ctc_choices

std::vector< std::unordered_set< int > >	excludedUnichars

std::vector< int >	character_boundaries_

Static Public Attributes
static constexpr float	kMinCertainty = -20.0f

static const int	kNumLengths = RecodedCharID::kMaxCodeLen + 1

static const int	kNumBeams = 2 * NC_COUNT * kNumLengths

Detailed Description

Definition at line 181 of file recodebeam.h.

Constructor & Destructor Documentation

◆ RecodeBeamSearch()

tesseract::RecodeBeamSearch::RecodeBeamSearch	(	const UnicharCompress &	recoder,
		int	null_char,
		bool	simple_text,
		Dict *	dict
	)

Definition at line 58 of file recodebeam.cpp.

    : recoder_(recoder),
      beam_size_(0),
      top_code_(-1),
      second_code_(-1),
      dict_(dict),
      space_delimited_(true),
      is_simple_text_(simple_text),
      null_char_(null_char) {
  if (dict_ != nullptr && !dict_->IsSpaceDelimitedLang()) {
    space_delimited_ = false;
  }
}

◆ ~RecodeBeamSearch()

tesseract::RecodeBeamSearch::~RecodeBeamSearch ( )

Definition at line 73 of file recodebeam.cpp.

                                    {
  for (auto data : beam_) {
    delete data;
  }
  for (auto data : secondary_beam_) {
    delete data;
  }
}

Member Function Documentation

◆ BeamIndex()

static int tesseract::RecodeBeamSearch::BeamIndex	(	bool	is_dawg,
		NodeContinuation	cont,
		int	length
	)

inlinestatic

Definition at line 260 of file recodebeam.h.

                                                                        {
    return (is_dawg * NC_COUNT + cont) * kNumLengths + length;
  }

◆ combineSegmentedTimesteps()

std::vector< std::vector< std::pair< const char *, float > > > tesseract::RecodeBeamSearch::combineSegmentedTimesteps ( std::vector< std::vector< std::vector< std::pair< const char *, float > > > > * segmentedTimesteps )

Definition at line 175 of file recodebeam.cpp.

                             {
  std::vector<std::vector<std::pair<const char *, float>>> combined_timesteps;
  for (auto &segmentedTimestep : *segmentedTimesteps) {
    for (auto &j : segmentedTimestep) {
      combined_timesteps.push_back(j);
    }
  }
  return combined_timesteps;
}

◆ ContinuationFromBeamsIndex()

static NodeContinuation tesseract::RecodeBeamSearch::ContinuationFromBeamsIndex ( int index )

inlinestatic

Definition at line 253 of file recodebeam.h.

                                                                {
    return static_cast<NodeContinuation>((index / kNumLengths) % NC_COUNT);
  }

◆ DebugBeams()

void tesseract::RecodeBeamSearch::DebugBeams ( const UNICHARSET & unicharset ) const

Definition at line 516 of file recodebeam.cpp.

                                                                    {
  for (int p = 0; p < beam_size_; ++p) {
    for (int d = 0; d < 2; ++d) {
      for (int c = 0; c < NC_COUNT; ++c) {
        auto cont = static_cast<NodeContinuation>(c);
        int index = BeamIndex(d, cont, 0);
        if (beam_[p]->beams_[index].empty()) {
          continue;
        }
        // Print all the best scoring nodes for each unichar found.
        tprintf("Position %d: %s+%s beam\n", p, d ? "Dict" : "Non-Dict",
                kNodeContNames[c]);
        DebugBeamPos(unicharset, beam_[p]->beams_[index]);
      }
    }
  }
}

◆ Decode() [1/2]

void tesseract::RecodeBeamSearch::Decode	(	const GENERIC_2D_ARRAY< float > &	output,
		double	dict_ratio,
		double	cert_offset,
		double	worst_dict_cert,
		const UNICHARSET *	charset
	)

Definition at line 100 of file recodebeam.cpp.

                                                         {
  beam_size_ = 0;
  int width = output.dim1();
  for (int t = 0; t < width; ++t) {
    ComputeTopN(output[t], output.dim2(), kBeamWidths[0]);
    DecodeStep(output[t], t, dict_ratio, cert_offset, worst_dict_cert, charset);
  }
}

◆ Decode() [2/2]

void tesseract::RecodeBeamSearch::Decode	(	const NetworkIO &	output,
		double	dict_ratio,
		double	cert_offset,
		double	worst_dict_cert,
		const UNICHARSET *	charset,
		int	lstm_choice_mode = `0`
	)

Definition at line 83 of file recodebeam.cpp.

                                                                               {
  beam_size_ = 0;
  int width = output.Width();
  if (lstm_choice_mode) {
    timesteps.clear();
  }
  for (int t = 0; t < width; ++t) {
    ComputeTopN(output.f(t), output.NumFeatures(), kBeamWidths[0]);
    DecodeStep(output.f(t), t, dict_ratio, cert_offset, worst_dict_cert,
               charset);
    if (lstm_choice_mode) {
      SaveMostCertainChoices(output.f(t), output.NumFeatures(), charset, t);
    }
  }
}

◆ DecodeSecondaryBeams()

void tesseract::RecodeBeamSearch::DecodeSecondaryBeams	(	const NetworkIO &	output,
		double	dict_ratio,
		double	cert_offset,
		double	worst_dict_cert,
		const UNICHARSET *	charset,
		int	lstm_choice_mode = `0`
	)

Definition at line 112 of file recodebeam.cpp.

                                                                             {
  for (auto data : secondary_beam_) {
    delete data;
  }
  secondary_beam_.clear();
  if (character_boundaries_.size() < 2) {
    return;
  }
  int width = output.Width();
  unsigned bucketNumber = 0;
  for (int t = 0; t < width; ++t) {
    while ((bucketNumber + 1) < character_boundaries_.size() &&
           t >= character_boundaries_[bucketNumber + 1]) {
      ++bucketNumber;
    }
    ComputeSecTopN(&(excludedUnichars)[bucketNumber], output.f(t),
                   output.NumFeatures(), kBeamWidths[0]);
    DecodeSecondaryStep(output.f(t), t, dict_ratio, cert_offset,
                        worst_dict_cert, charset);
  }
}

◆ ExtractBestPathAsLabels()

void tesseract::RecodeBeamSearch::ExtractBestPathAsLabels	(	std::vector< int > *	labels,
		std::vector< int > *	xcoords
	)		const

Definition at line 201 of file recodebeam.cpp.

                                                           {
  labels->clear();
  xcoords->clear();
  std::vector<const RecodeNode *> best_nodes;
  ExtractBestPaths(&best_nodes, nullptr);
  // Now just run CTC on the best nodes.
  int t = 0;
  int width = best_nodes.size();
  while (t < width) {
    int label = best_nodes[t]->code;
    if (label != null_char_) {
      labels->push_back(label);
      xcoords->push_back(t);
    }
    while (++t < width && !is_simple_text_ && best_nodes[t]->code == label) {
    }
  }
  xcoords->push_back(width);
}

◆ ExtractBestPathAsUnicharIds()

void tesseract::RecodeBeamSearch::ExtractBestPathAsUnicharIds	(	bool	debug,
		const UNICHARSET *	unicharset,
		std::vector< int > *	unichar_ids,
		std::vector< float > *	certs,
		std::vector< float > *	ratings,
		std::vector< int > *	xcoords
	)		const

Definition at line 224 of file recodebeam.cpp.

                                   {
  std::vector<const RecodeNode *> best_nodes;
  ExtractBestPaths(&best_nodes, nullptr);
  ExtractPathAsUnicharIds(best_nodes, unichar_ids, certs, ratings, xcoords);
  if (debug) {
    DebugPath(unicharset, best_nodes);
    DebugUnicharPath(unicharset, best_nodes, *unichar_ids, *certs, *ratings,
                     *xcoords);
  }
}

◆ ExtractBestPathAsWords()

void tesseract::RecodeBeamSearch::ExtractBestPathAsWords	(	const TBOX &	line_box,
		float	scale_factor,
		bool	debug,
		const UNICHARSET *	unicharset,
		PointerVector< WERD_RES > *	words,
		int	lstm_choice_mode = `0`
	)

Definition at line 239 of file recodebeam.cpp.

                                                                    {
  words->truncate(0);
  std::vector<int> unichar_ids;
  std::vector<float> certs;
  std::vector<float> ratings;
  std::vector<int> xcoords;
  std::vector<const RecodeNode *> best_nodes;
  std::vector<const RecodeNode *> second_nodes;
  character_boundaries_.clear();
  ExtractBestPaths(&best_nodes, &second_nodes);
  if (debug) {
    DebugPath(unicharset, best_nodes);
    ExtractPathAsUnicharIds(second_nodes, &unichar_ids, &certs, &ratings,
                            &xcoords);
    tprintf("\nSecond choice path:\n");
    DebugUnicharPath(unicharset, second_nodes, unichar_ids, certs, ratings,
                     xcoords);
  }
  // If lstm choice mode is required in granularity level 2, it stores the x
  // Coordinates of every chosen character, to match the alternative choices to
  // it.
  ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings, &xcoords,
                          &character_boundaries_);
  int num_ids = unichar_ids.size();
  if (debug) {
    DebugUnicharPath(unicharset, best_nodes, unichar_ids, certs, ratings,
                     xcoords);
  }
  // Convert labels to unichar-ids.
  int word_end = 0;
  float prev_space_cert = 0.0f;
  for (int word_start = 0; word_start < num_ids; word_start = word_end) {
    for (word_end = word_start + 1; word_end < num_ids; ++word_end) {
      // A word is terminated when a space character or start_of_word flag is
      // hit. We also want to force a separate word for every non
      // space-delimited character when not in a dictionary context.
      if (unichar_ids[word_end] == UNICHAR_SPACE) {
        break;
      }
      int index = xcoords[word_end];
      if (best_nodes[index]->start_of_word) {
        break;
      }
      if (best_nodes[index]->permuter == TOP_CHOICE_PERM &&
          (!unicharset->IsSpaceDelimited(unichar_ids[word_end]) ||
           !unicharset->IsSpaceDelimited(unichar_ids[word_end - 1]))) {
        break;
      }
    }
    float space_cert = 0.0f;
    if (word_end < num_ids && unichar_ids[word_end] == UNICHAR_SPACE) {
      space_cert = certs[word_end];
    }
    bool leading_space =
        word_start > 0 && unichar_ids[word_start - 1] == UNICHAR_SPACE;
    // Create a WERD_RES for the output word.
    WERD_RES *word_res =
        InitializeWord(leading_space, line_box, word_start, word_end,
                       std::min(space_cert, prev_space_cert), unicharset,
                       xcoords, scale_factor);
    for (int i = word_start; i < word_end; ++i) {
      auto *choices = new BLOB_CHOICE_LIST;
      BLOB_CHOICE_IT bc_it(choices);
      auto *choice = new BLOB_CHOICE(unichar_ids[i], ratings[i], certs[i], -1,
                                     1.0f, static_cast<float>(INT16_MAX), 0.0f,
                                     BCC_STATIC_CLASSIFIER);
      int col = i - word_start;
      choice->set_matrix_cell(col, col);
      bc_it.add_after_then_move(choice);
      word_res->ratings->put(col, col, choices);
    }
    int index = xcoords[word_end - 1];
    word_res->FakeWordFromRatings(best_nodes[index]->permuter);
    words->push_back(word_res);
    prev_space_cert = space_cert;
    if (word_end < num_ids && unichar_ids[word_end] == UNICHAR_SPACE) {
      ++word_end;
    }
  }
}

◆ extractSymbolChoices()

void tesseract::RecodeBeamSearch::extractSymbolChoices ( const UNICHARSET * unicharset )

Definition at line 409 of file recodebeam.cpp.

                                                                        {
  if (character_boundaries_.size() < 2) {
    return;
  }
  // For the first iteration the original beam is analyzed. After that a
  // new beam is calculated based on the results from the original beam.
  std::vector<RecodeBeam *> &currentBeam =
      secondary_beam_.empty() ? beam_ : secondary_beam_;
  character_boundaries_[0] = 0;
  for (unsigned j = 1; j < character_boundaries_.size(); ++j) {
    std::vector<int> unichar_ids;
    std::vector<float> certs;
    std::vector<float> ratings;
    std::vector<int> xcoords;
    int backpath = character_boundaries_[j] - character_boundaries_[j - 1];
    std::vector<tesseract::RecodePair> &heaps =
        currentBeam.at(character_boundaries_[j] - 1)->beams_->heap();
    std::vector<const RecodeNode *> best_nodes;
    std::vector<const RecodeNode *> best;
    // Scan the segmented node chain for valid unichar ids.
    for (auto &&entry : heaps) {
      bool validChar = false;
      int backcounter = 0;
      const RecodeNode *node = &entry.data();
      while (node != nullptr && backcounter < backpath) {
        if (node->code != null_char_ &&
            node->unichar_id != INVALID_UNICHAR_ID) {
          validChar = true;
          break;
        }
        node = node->prev;
        ++backcounter;
      }
      if (validChar) {
        best.push_back(&entry.data());
      }
    }
    // find the best rated segmented node chain and extract the unichar id.
    if (!best.empty()) {
      std::sort(best.begin(), best.end(), greater_than());
      ExtractPath(best[0], &best_nodes, backpath);
      ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
                              &xcoords);
    }
    if (!unichar_ids.empty()) {
      int bestPos = 0;
      for (unsigned i = 1; i < unichar_ids.size(); ++i) {
        if (ratings[i] < ratings[bestPos]) {
          bestPos = i;
        }
      }
#if 0 // TODO: bestCode is currently unused (see commit 2dd5d0d60).
      int bestCode = -10;
      for (auto &node : best_nodes) {
        if (node->unichar_id == unichar_ids[bestPos]) {
          bestCode = node->code;
        }
      }
#endif
      // Exclude the best choice for the followup decoding.
      std::unordered_set<int> excludeCodeList;
      for (auto &best_node : best_nodes) {
        if (best_node->code != null_char_) {
          excludeCodeList.insert(best_node->code);
        }
      }
      if (j - 1 < excludedUnichars.size()) {
        for (auto elem : excludeCodeList) {
          excludedUnichars[j - 1].insert(elem);
        }
      } else {
        excludedUnichars.push_back(excludeCodeList);
      }
      // Save the best choice for the choice iterator.
      if (j - 1 < ctc_choices.size()) {
        int id = unichar_ids[bestPos];
        const char *result = unicharset->id_to_unichar_ext(id);
        float rating = ratings[bestPos];
        ctc_choices[j - 1].push_back(
            std::pair<const char *, float>(result, rating));
      } else {
        std::vector<std::pair<const char *, float>> choice;
        int id = unichar_ids[bestPos];
        const char *result = unicharset->id_to_unichar_ext(id);
        float rating = ratings[bestPos];
        choice.emplace_back(result, rating);
        ctc_choices.push_back(choice);
      }
      // fill the blank spot with an empty array
    } else {
      if (j - 1 >= excludedUnichars.size()) {
        std::unordered_set<int> excludeCodeList;
        excludedUnichars.push_back(excludeCodeList);
      }
      if (j - 1 >= ctc_choices.size()) {
        std::vector<std::pair<const char *, float>> choice;
        ctc_choices.push_back(choice);
      }
    }
  }
  for (auto data : secondary_beam_) {
    delete data;
  }
  secondary_beam_.clear();
}

◆ IsDawgFromBeamsIndex()

static bool tesseract::RecodeBeamSearch::IsDawgFromBeamsIndex ( int index )

inlinestatic

Definition at line 256 of file recodebeam.h.

                                              {
    return index / (kNumLengths * NC_COUNT) > 0;
  }

◆ LengthFromBeamsIndex()

static int tesseract::RecodeBeamSearch::LengthFromBeamsIndex ( int index )

inlinestatic

Definition at line 250 of file recodebeam.h.

                                             {
    return index % kNumLengths;
  }

◆ PrintBeam2()

void tesseract::RecodeBeamSearch::PrintBeam2	(	bool	uids,
		int	num_outputs,
		const UNICHARSET *	charset,
		bool	secondary
	)		const

Definition at line 330 of file recodebeam.cpp.

                                                        {
  std::vector<std::vector<const RecodeNode *>> topology;
  std::unordered_set<const RecodeNode *> visited;
  const std::vector<RecodeBeam *> &beam = !secondary ? beam_ : secondary_beam_;
  // create the topology
  for (int step = beam.size() - 1; step >= 0; --step) {
    std::vector<const RecodeNode *> layer;
    topology.push_back(layer);
  }
  // fill the topology with depths first
  for (int step = beam.size() - 1; step >= 0; --step) {
    std::vector<tesseract::RecodePair> &heaps = beam.at(step)->beams_->heap();
    for (auto &&node : heaps) {
      int backtracker = 0;
      const RecodeNode *curr = &node.data();
      while (curr != nullptr && !visited.count(curr)) {
        visited.insert(curr);
        topology[step - backtracker].push_back(curr);
        curr = curr->prev;
        ++backtracker;
      }
    }
  }
  int ct = 0;
  unsigned cb = 1;
  for (const std::vector<const RecodeNode *> &layer : topology) {
    if (cb >= character_boundaries_.size()) {
      break;
    }
    if (ct == character_boundaries_[cb]) {
      tprintf("***\n");
      ++cb;
    }
    for (const RecodeNode *node : layer) {
      const char *code;
      int intCode;
      if (node->unichar_id != INVALID_UNICHAR_ID) {
        code = charset->id_to_unichar(node->unichar_id);
        intCode = node->unichar_id;
      } else if (node->code == null_char_) {
        intCode = 0;
        code = " ";
      } else {
        intCode = 666;
        code = "*";
      }
      int intPrevCode = 0;
      const char *prevCode;
      float prevScore = 0;
      if (node->prev != nullptr) {
        prevScore = node->prev->score;
        if (node->prev->unichar_id != INVALID_UNICHAR_ID) {
          prevCode = charset->id_to_unichar(node->prev->unichar_id);
          intPrevCode = node->prev->unichar_id;
        } else if (node->code == null_char_) {
          intPrevCode = 0;
          prevCode = " ";
        } else {
          prevCode = "*";
          intPrevCode = 666;
        }
      } else {
        prevCode = " ";
      }
      if (uids) {
        tprintf("%x(|)%f(>)%x(|)%f\n", intPrevCode, prevScore, intCode,
                node->score);
      } else {
        tprintf("%s(|)%f(>)%s(|)%f\n", prevCode, prevScore, code, node->score);
      }
    }
    tprintf("-\n");
    ++ct;
  }
  tprintf("***\n");
}

◆ segmentTimestepsByCharacters()

void tesseract::RecodeBeamSearch::segmentTimestepsByCharacters ( )

Definition at line 164 of file recodebeam.cpp.

                                                    {
  for (unsigned i = 1; i < character_boundaries_.size(); ++i) {
    std::vector<std::vector<std::pair<const char *, float>>> segment;
    for (int j = character_boundaries_[i - 1]; j < character_boundaries_[i];
         ++j) {
      segment.push_back(timesteps[j]);
    }
    segmentedTimesteps.push_back(segment);
  }
}

Member Data Documentation

◆ character_boundaries_

std::vector<int> tesseract::RecodeBeamSearch::character_boundaries_

Definition at line 238 of file recodebeam.h.

◆ ctc_choices

std::vector<std::vector<std::pair<const char *, float> > > tesseract::RecodeBeamSearch::ctc_choices

Definition at line 234 of file recodebeam.h.

◆ excludedUnichars

std::vector<std::unordered_set<int> > tesseract::RecodeBeamSearch::excludedUnichars

Definition at line 236 of file recodebeam.h.

◆ kMinCertainty

constexpr float tesseract::RecodeBeamSearch::kMinCertainty = -20.0f

staticconstexpr

Definition at line 243 of file recodebeam.h.

◆ kNumBeams

const int tesseract::RecodeBeamSearch::kNumBeams = 2 * NC_COUNT * kNumLengths

static

Definition at line 248 of file recodebeam.h.

◆ kNumLengths

const int tesseract::RecodeBeamSearch::kNumLengths = RecodedCharID::kMaxCodeLen + 1

static

Definition at line 245 of file recodebeam.h.

◆ segmentedTimesteps

std::vector<std::vector<std::vector<std::pair<const char *, float> > > > tesseract::RecodeBeamSearch::segmentedTimesteps

Definition at line 232 of file recodebeam.h.

◆ timesteps

std::vector<std::vector<std::pair<const char *, float> > > tesseract::RecodeBeamSearch::timesteps

Definition at line 231 of file recodebeam.h.

The documentation for this class was generated from the following files:

/media/home/debian/src/github/tesseract-ocr/tesseract/src/lstm/recodebeam.h
/media/home/debian/src/github/tesseract-ocr/tesseract/src/lstm/recodebeam.cpp

Public Member Functions

Static Public Member Functions

Public Attributes

Static Public Attributes

Detailed Description

Constructor & Destructor Documentation

◆ RecodeBeamSearch()

◆ ~RecodeBeamSearch()

Member Function Documentation

◆ BeamIndex()

◆ combineSegmentedTimesteps()

◆ ContinuationFromBeamsIndex()

◆ DebugBeams()

◆ Decode() [1/2]

◆ Decode() [2/2]

◆ DecodeSecondaryBeams()

◆ ExtractBestPathAsLabels()

◆ ExtractBestPathAsUnicharIds()

◆ ExtractBestPathAsWords()

◆ extractSymbolChoices()

◆ IsDawgFromBeamsIndex()

◆ LengthFromBeamsIndex()

◆ PrintBeam2()

◆ segmentTimestepsByCharacters()

Member Data Documentation

◆ character_boundaries_

◆ ctc_choices

◆ excludedUnichars

◆ kMinCertainty

◆ kNumBeams

◆ kNumLengths

◆ segmentedTimesteps

◆ timesteps