23# include "config_auto.h"
96 , median_cell_height_(0)
97 , median_cell_width_(0)
98 , max_text_height_(INT32_MAX) {}
163 box_search.SetUniqueMode(
true);
167 while ((line = box_search.NextRectSearch()) !=
nullptr) {
260 unsigned column_end) {
265 for (
unsigned row = row_start; row <= row_end; ++row) {
268 for (
unsigned col = column_start; col <= column_end; ++col) {
303 double area_covered = 0;
310 const int32_t current_area = kCellBox.
area();
311 if (current_area == 0) {
314 return std::min(1.0, area_covered / current_area);
317#ifndef GRAPHICS_DISABLED
386 std::vector<int> left_sides;
387 std::vector<int> right_sides;
407 if (left_sides.empty() || right_sides.empty()) {
412 std::sort(left_sides.begin(), left_sides.end());
413 std::sort(right_sides.begin(), right_sides.end());
431 std::vector<int> bottom_sides;
432 std::vector<int> top_sides;
435 int min_bottom = INT32_MAX;
436 int max_top = INT32_MIN;
453 max_top = std::max(max_top,
static_cast<int>(text->
bounding_box().
top()));
471 bottom_sides.push_back(bottom);
472 top_sides.push_back(top);
475 if (bottom_sides.empty() || top_sides.empty()) {
480 std::sort(bottom_sides.begin(), bottom_sides.end());
481 std::sort(top_sides.begin(), top_sides.end());
552 const int kMaxCellHeight = 1000;
553 const int kMaxCellWidth = 1000;
554 STATS height_stats(0, kMaxCellHeight);
555 STATS width_stats(0, kMaxCellWidth);
631 const std::vector<int> &max_list,
int max_merged,
632 std::vector<int> *locations) {
635 if (min_list.empty()) {
639 ASSERT_HOST(min_list.at(min_list.size() - 1) < max_list.at(max_list.size() - 1));
641 locations->push_back(min_list.at(0));
642 unsigned min_index = 0;
643 unsigned max_index = 0;
644 int stacked_partitions = 0;
645 int last_cross_position = INT32_MAX;
649 while (min_index < min_list.size()) {
651 if (min_list[min_index] < max_list[max_index]) {
652 ++stacked_partitions;
653 if (last_cross_position != INT32_MAX && stacked_partitions > max_merged) {
654 int mid = (last_cross_position + min_list[min_index]) / 2;
655 locations->push_back(mid);
656 last_cross_position = INT32_MAX;
661 --stacked_partitions;
662 if (last_cross_position == INT32_MAX && stacked_partitions <= max_merged) {
663 last_cross_position = max_list[max_index];
668 locations->push_back(max_list.at(max_list.size() - 1));
705 horizontal_box.
set_top(
y + kGridSize);
792 TBOX line_bound = guess_box;
811 int vertical_count = 0;
812 int horizontal_count = 0;
859 int old_area = bounding_box->
area();
864 changed = (bounding_box->
area() > old_area);
876 bool first_line =
true;
909 TBOX best_box = guess_box;
912 TBOX adjusted = guess_box;
917 const int kMidGuessY = (guess_box.
bottom() + guess_box.
top()) / 2;
920 unsigned best_cols = 0;
922 bool found_good_border =
false;
927 int last_bottom = INT32_MAX;
937 int previous_below = 0;
938 const int kMaxChances = 10;
939 int chances = kMaxChances;
940 while (bottom != last_bottom) {
949 if (
false && IsWeakTableRow(table, 0)) {
960 chances = kMaxChances;
968 found_good_border =
true;
980 last_bottom = bottom;
983 if (!found_good_border) {
988 found_good_border =
false;
989 int last_top = INT32_MIN;
992 int previous_above = 0;
993 chances = kMaxChances;
996 while (last_top != top) {
1003 if (
false && IsWeakTableRow(table, last_row)) {
1007 chances = kMaxChances;
1012 table->
row_height(last_row) < max_row_height)) {
1015 best_cols = std::max(table->
column_count(), best_cols);
1016 found_good_border =
true;
1032 if (!found_good_border) {
1068 if (top_to_bottom && (last_y >=
y || last_y <= text_box.
top())) {
1069 last_y = std::min(last_y,
static_cast<int>(text_box.
bottom()));
1072 if (!top_to_bottom && (last_y <= y || last_y >= text_box.
bottom())) {
1073 last_y = std::max(last_y,
static_cast<int>(text_box.
top()));
UnicodeText::const_iterator::difference_type distance(const UnicodeText::const_iterator &first, const UnicodeText::const_iterator &last)
const double kVerticalSpacing
const double kGoodRowNumberOfColumnsLarge
const int kLinedTableMinHorizontalLines
const int kCellSplitRowThreshold
const double kHorizontalSpacing
const int kCellSplitColumnThreshold
const double kRequiredColumns
const double kGoodRowNumberOfColumnsSmall[]
const double kMinFilledArea
constexpr size_t countof(T const (&)[N]) noexcept
const double kMarginFactor
const int kLinedTableMinVerticalLines
TDimension height() const
TBOX intersection(const TBOX &box) const
TDimension bottom() const
void add(int32_t value, int32_t count)
void StartVerticalSearch(int xmin, int xmax, int y)
void SetUniqueMode(bool mode)
BBC * NextSideSearch(bool right_to_left)
void StartSideSearch(int x, int ymin, int ymax)
BBC * NextVerticalSearch(bool top_to_bottom)
void StartRectSearch(const TBOX &rect)
bool IsHorizontalLine() const
bool IsVerticalLine() const
const TBOX & bounding_box() const
bool IsHorizontalType() const
static void FindCellSplitLocations(const std::vector< int > &min_list, const std::vector< int > &max_list, int max_merged, std::vector< int > *locations)
bool VerifyLinedTableCells()
int row_height(unsigned row) const
std::vector< int > cell_y_
bool VerifyRowFilled(int row)
unsigned column_count() const
void FindWhitespacedRows()
const TBOX & bounding_box() const
ColPartitionGrid * text_grid_
void set_max_text_height(int height)
std::vector< int > cell_x_
bool DoesPartitionFit(const ColPartition &part) const
ColPartitionGrid * line_grid_
int FindVerticalMargin(ColPartitionGrid *grid, int start_x, bool decrease) const
unsigned cell_count() const
void set_bounding_box(const TBOX &box)
int CountFilledCellsInColumn(int column)
double CalculateCellFilledPercentage(unsigned row, unsigned column)
bool FindWhitespacedStructure()
void FindWhitespacedColumns()
bool FindLinedStructure()
void Display(ScrollView *window, ScrollView::Color color)
void set_line_grid(ColPartitionGrid *lines)
int CountHorizontalIntersections(int y)
int CountFilledCellsInRow(int row)
int FindHorizontalMargin(ColPartitionGrid *grid, int start_y, bool decrease) const
int column_width(unsigned column) const
bool VerifyWhitespacedTable()
int CountPartitions(const TBOX &box)
int CountVerticalIntersections(int x)
void set_text_grid(ColPartitionGrid *text)
void UpdateMargins(ColPartitionGrid *grid)
unsigned row_count() const
bool RecognizeLinedTable(const TBOX &guess_box, StructuredTable *table)
bool FindLinesBoundingBoxIteration(TBOX *bounding_box)
bool FindLinesBoundingBox(TBOX *bounding_box)
ColPartitionGrid * text_grid_
void set_min_width(int width)
void set_max_text_height(int height)
int NextHorizontalSplit(int left, int right, int y, bool top_to_bottom)
void set_line_grid(ColPartitionGrid *lines)
bool HasSignificantLines(const TBOX &guess)
void set_text_grid(ColPartitionGrid *text)
ColPartitionGrid * line_grid_
void set_min_height(int height)
bool RecognizeWhitespacedTable(const TBOX &guess_box, StructuredTable *table)
StructuredTable * RecognizeTable(const TBOX &guess_box)
void Line(int x1, int y1, int x2, int y2)
void Rectangle(int x1, int y1, int x2, int y2)