tesseract v5.3.3.20231005
makerow.cpp File Reference
#include "makerow.h"
#include "blkocc.h"
#include "blobbox.h"
#include "ccstruct.h"
#include "detlinefit.h"
#include "drawtord.h"
#include "oldbasel.h"
#include "sortflts.h"
#include "statistc.h"
#include "textord.h"
#include "tordmain.h"
#include "tovars.h"
#include "tprintf.h"
#include "underlin.h"
#include <algorithm>
#include <cmath>
#include <vector>

Go to the source code of this file.

Namespaces

namespace  tesseract
 

Macros

#define MAX_HEIGHT_MODES   12
 

Functions

row_y_order

Sort function to sort rows in y from page top.

row_spacing_order

Qsort style function to compare 2 TO_ROWS based on their spacing value.

make_single_row

Arrange the blobs into a single row... well actually, if there is only a single blob, it makes 2 rows, in case the top-level blob is a container of the real blobs to recognize.

float tesseract::make_single_row (ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks)
 
make_rows

Arrange the blobs into rows.

float tesseract::make_rows (ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
 
make_initial_textrows

Arrange the good blobs into rows of text.

void tesseract::make_initial_textrows (ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, bool testing_on)
 
fit_lms_line

Fit an LMS line to a row.

void tesseract::fit_lms_line (TO_ROW *row)
 
find_best_dropout_row

Delete this row if it has a neighbour with better dropout characteristics. true is returned if the row should be deleted.

bool tesseract::find_best_dropout_row (TO_ROW *row, int32_t distance, float dist_limit, int32_t line_index, TO_ROW_IT *row_it, bool testing_on)
 
deskew_block_coords

Compute the bounding box of all the blobs in the block if they were deskewed without actually doing it.

TBOX tesseract::deskew_block_coords (TO_BLOCK *block, float gradient)
 
compute_line_occupation

Compute the pixel projection back on the y axis given the global skew. Also compute the 1st derivative.

void tesseract::compute_line_occupation (TO_BLOCK *block, float gradient, int32_t min_y, int32_t max_y, int32_t *occupation, int32_t *deltas)
 
void tesseract::compute_occupation_threshold (int32_t low_window, int32_t high_window, int32_t line_count, int32_t *occupation, int32_t *thresholds)
 
compute_dropout_distances

Compute the distance from each coordinate to the nearest dropout.

void tesseract::compute_dropout_distances (int32_t *occupation, int32_t *thresholds, int32_t line_count)
 
expand_rows

Expand each row to the least of its allowed size and touching its neighbours. If the expansion would entirely swallow a neighbouring row then do so.

void tesseract::expand_rows (ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
 
void tesseract::adjust_row_limits (TO_BLOCK *block)
 
compute_row_stats

Compute the linespacing and offset.

void tesseract::compute_row_stats (TO_BLOCK *block, bool testing_on)
 
fill_heights

Fill the given heights with heights of the blobs that are legal candidates for estimating xheight.

void tesseract::fill_heights (TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights, STATS *floating_heights)
 
compute_xheight_from_modes

Given a STATS object heights, looks for two most frequently occurring heights that look like xheight and xheight + ascrise. If found, sets the values of *xheight and *ascrise accordingly, otherwise sets xheight to any most frequently occurring height and sets *ascrise to 0. Returns the number of times xheight occurred in heights. For each mode that is considered for being an xheight the count of floating blobs (stored in floating_heights) is subtracted from the total count of the blobs of this height. This is done because blobs that sit far above the baseline could represent valid ascenders, but it is highly unlikely that such a character's height will be an xheight (e.g. -, ', =, ^, ‘, ", ’, etc) If cap_only, then force finding of only the top mode.

int tesseract::compute_xheight_from_modes (STATS *heights, STATS *floating_heights, bool cap_only, int min_height, int max_height, float *xheight, float *ascrise)
 
compute_row_descdrop

Estimates the descdrop of this row. This function looks for "significant" descenders of lowercase letters (those that could not just be the small descenders of upper case letters like Q,J). The function also takes into account how many potential ascenders this row might contain. If the number of potential ascenders along with descenders is close to the expected fraction of the total number of blobs in the row, the function returns the descender height, returns 0 otherwise.

int32_t tesseract::compute_row_descdrop (TO_ROW *row, float gradient, int xheight_blob_count, STATS *asc_heights)
 
compute_height_modes

Find the top maxmodes values in the input array and put their indices in the output in the order in which they occurred.

int32_t tesseract::compute_height_modes (STATS *heights, int32_t min_height, int32_t max_height, int32_t *modes, int32_t maxmodes)
 
correct_row_xheight

Adjust the xheight etc of this row if not within reasonable limits of the average for the block.

void tesseract::correct_row_xheight (TO_ROW *row, float xheight, float ascrise, float descdrop)
 
separate_underlines

Test wide objects for being potential underlines. If they are then put them in a separate list in the block.

void tesseract::separate_underlines (TO_BLOCK *block, float gradient, FCOORD rotation, bool testing_on)
 
pre_associate_blobs

Associate overlapping blobs and fake chop wide blobs.

void tesseract::pre_associate_blobs (ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, bool testing_on)
 
fit_parallel_rows

Re-fit the rows in the block to the given gradient.

void tesseract::fit_parallel_rows (TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
 
fit_parallel_lms

Fit an LMS line to a row. Make the fit parallel to the given gradient and set the row accordingly.

void tesseract::fit_parallel_lms (float gradient, TO_ROW *row)
 
make_baseline_spline

Fit an LMS line to a row. Make the fit parallel to the given gradient and set the row accordingly.

void tesseract::make_baseline_spline (TO_ROW *row, TO_BLOCK *block)
 
segment_baseline

Divide the baseline up into segments which require a different quadratic fitted to them. Return true if enough blobs were far enough away to need a quadratic.

bool tesseract::segment_baseline (TO_ROW *row, TO_BLOCK *block, int32_t &segments, int32_t *xstarts)
 
linear_spline_baseline

Divide the baseline up into segments which require a different quadratic fitted to them.

Returns
true if enough blobs were far enough away to need a quadratic.
double * tesseract::linear_spline_baseline (TO_ROW *row, TO_BLOCK *block, int32_t &segments, int32_t xstarts[])
 
assign_blobs_to_rows

Make enough rows to allocate all the given blobs to one. If a block skew is given, use that, else attempt to track it.

void tesseract::assign_blobs_to_rows (TO_BLOCK *block, float *gradient, int pass, bool reject_misses, bool make_new_rows, bool drawing_skew)
 
most_overlapping_row

Return the row which most overlaps the blob.

OVERLAP_STATE tesseract::most_overlapping_row (TO_ROW_IT *row_it, TO_ROW *&best_row, float top, float bottom, float rowsize, bool testing_blob)
 
blob_x_order

Sort function to sort blobs in x from page left.

int tesseract::blob_x_order (const void *item1, const void *item2)
 
mark_repeated_chars

Mark blobs marked with BTFT_LEADER in repeated sets using the repeated_set member of BLOBNBOX.

void tesseract::mark_repeated_chars (TO_ROW *row)
 

Variables

bool tesseract::textord_heavy_nr = false
 
bool tesseract::textord_show_initial_rows = false
 
bool tesseract::textord_show_parallel_rows = false
 
bool tesseract::textord_show_expanded_rows = false
 
bool tesseract::textord_show_final_rows = false
 
bool tesseract::textord_show_final_blobs = false
 
bool tesseract::textord_test_landscape = false
 
bool tesseract::textord_parallel_baselines = true
 
bool tesseract::textord_straight_baselines = false
 
bool tesseract::textord_old_baselines = true
 
bool tesseract::textord_old_xheight = false
 
bool tesseract::textord_fix_xheight_bug = true
 
bool tesseract::textord_fix_makerow_bug = true
 
bool tesseract::textord_debug_xheights = false
 
int tesseract::textord_test_x = -INT32_MAX
 
int tesseract::textord_test_y = -INT32_MAX
 
int tesseract::textord_min_blobs_in_row = 4
 
int tesseract::textord_spline_minblobs = 8
 
int tesseract::textord_spline_medianwin = 6
 
int tesseract::textord_min_xheight = 10
 
double tesseract::textord_spline_shift_fraction = 0.02
 
double tesseract::textord_skew_ile = 0.5
 
double tesseract::textord_skew_lag = 0.02
 
double tesseract::textord_linespace_iqrlimit = 0.2
 
double tesseract::textord_width_limit = 8
 
double tesseract::textord_chop_width = 1.5
 
double tesseract::textord_minxh = 0.25
 
double tesseract::textord_min_linesize = 1.25
 
double tesseract::textord_excess_blobsize = 1.3
 
double tesseract::textord_occupancy_threshold = 0.4
 
double tesseract::textord_underline_width = 2.0
 
double tesseract::textord_min_blob_height_fraction = 0.75
 
double tesseract::textord_xheight_mode_fraction = 0.4
 
double tesseract::textord_ascheight_mode_fraction = 0.08
 
double tesseract::textord_ascx_ratio_min = 1.25
 
double tesseract::textord_ascx_ratio_max = 1.8
 
double tesseract::textord_descx_ratio_min = 0.25
 
double tesseract::textord_descx_ratio_max = 0.6
 
double tesseract::textord_xheight_error_margin = 0.1
 
int tesseract::textord_lms_line_trials = 12
 
bool tesseract::textord_new_initial_xheight = true
 
bool tesseract::textord_debug_blob = false
 

compute_page_skew

Compute the skew over a full page by averaging the gradients over all the lines. Get the error of the same row.

const double tesseract::kNoiseSize = 0.5
 
const int tesseract::kMinSize = 8
 
void tesseract::compute_page_skew (TO_BLOCK_LIST *blocks, float &page_m, float &page_err)
 
void tesseract::vigorous_noise_removal (TO_BLOCK *block)
 
void tesseract::cleanup_rows_making (ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
 
void tesseract::delete_non_dropout_rows (TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
 

Macro Definition Documentation

◆ MAX_HEIGHT_MODES

#define MAX_HEIGHT_MODES   12

Definition at line 98 of file makerow.cpp.