29# include "config_auto.h"
32#include <allheaders.h>
37#ifndef DISABLED_LEGACY_ENGINE
61static Image RemoveEnclosingCircle(
Image pixs) {
62 Image pixsi = pixInvert(
nullptr, pixs);
63 Image pixc = pixCreateTemplate(pixs);
64 pixSetOrClearBorder(pixc, 1, 1, 1, 1, PIX_SET);
65 pixSeedfillBinary(pixc, pixc, pixsi, 4);
66 pixInvert(pixc, pixc);
68 Image pixt = pixs & pixc;
70 pixCountConnComp(pixt, 8, &max_count);
72 l_int32 min_count = INT32_MAX;
73 Image pixout =
nullptr;
76 pixErodeBrick(pixc, pixc, 3, 3);
79 pixCountConnComp(pixt, 8, &
count);
80 if (
i == 1 ||
count > max_count) {
83 }
else if (
count < min_count) {
87 }
else if (
count >= min_count) {
104 int width = pixGetWidth(pix_binary_);
105 int height = pixGetHeight(pix_binary_);
107 auto pageseg_mode =
static_cast<PageSegMode>(
static_cast<int>(tessedit_pageseg_mode));
110 std::string name = input_file;
111 std::size_t lastdot = name.find_last_of(
".");
112 name = name.substr(0, lastdot);
115 if (blocks->empty()) {
118 BLOCK_IT block_it(blocks);
119 auto *block =
new BLOCK(
"",
true, 0, 0, 0, 0, width, height);
121 block_it.add_to_end(block);
132 BLOBNBOX_LIST diacritic_blobs;
133 int auto_page_seg_ret_val = 0;
134 TO_BLOCK_LIST to_blocks;
137 auto_page_seg_ret_val =
139 enable_noise_removal ? &diacritic_blobs :
nullptr, osd_tess, osr);
141 return auto_page_seg_ret_val;
146 deskew_ =
FCOORD(1.0f, 0.0f);
147 reskew_ =
FCOORD(1.0f, 0.0f);
149 Image pixcleaned = RemoveEnclosingCircle(pix_binary_);
150 if (pixcleaned !=
nullptr) {
152 pix_binary_ = pixcleaned;
157 if (auto_page_seg_ret_val < 0) {
161 if (blocks->empty()) {
168 bool cjk_mode = textord_use_cjk_fp_model;
170 textord_.
TextordPage(pageseg_mode, reskew_, width, height, pix_binary_, pix_thresholds_,
171 pix_grey_, splitting || cjk_mode, &diacritic_blobs, blocks, &to_blocks);
172 return auto_page_seg_ret_val;
201 Image photomask_pix =
nullptr;
202 Image musicmask_pix =
nullptr;
204 BLOCK_LIST found_blocks;
205 TO_BLOCK_LIST temp_blocks;
208 pageseg_mode, blocks, osd_tess, osr, &temp_blocks, &photomask_pix,
209 pageseg_apply_music_mask ? &musicmask_pix :
nullptr);
211 if (finder !=
nullptr) {
212 TO_BLOCK_IT to_block_it(&temp_blocks);
213 TO_BLOCK *to_block = to_block_it.data();
214 if (musicmask_pix !=
nullptr) {
217 photomask_pix |= musicmask_pix;
219#ifndef DISABLED_LEGACY_ENGINE
224 result = finder->
FindBlocks(pageseg_mode, scaled_color_, scaled_factor_, to_block,
225 photomask_pix, pix_thresholds_, pix_grey_, &pixa_debug_,
226 &found_blocks, diacritic_blobs, to_blocks);
239 BLOCK_IT block_it(blocks);
241 block_it.add_list_after(&found_blocks);
248 std::vector<int> *allowed_ids) {
272 OSResults *osr, TO_BLOCK_LIST *to_blocks,
273 Image *photo_mask_pix,
274 Image *music_mask_pix) {
277 TabVector_LIST v_lines;
278 TabVector_LIST h_lines;
282 if (tessedit_dump_pageseg_images) {
283 pixa_debug_.
AddPix(pix_binary_,
"PageSegInput");
287 &vertical_x, &vertical_y, music_mask_pix, &v_lines, &h_lines);
288 if (tessedit_dump_pageseg_images) {
289 pixa_debug_.
AddPix(pix_binary_,
"NoLines");
293 if (tessedit_dump_pageseg_images) {
294 Image pix_no_image_ =
nullptr;
295 if (*photo_mask_pix !=
nullptr) {
296 pix_no_image_ = pixSubtract(
nullptr, pix_binary_, *photo_mask_pix);
298 pix_no_image_ = pix_binary_.
clone();
300 pixa_debug_.
AddPix(pix_no_image_,
"NoImages");
310 TO_BLOCK_IT to_block_it(to_blocks);
314 TO_BLOCK *to_block = to_block_it.data();
317 int estimated_resolution = source_resolution_;
322 estimated_resolution = res;
323 tprintf(
"Estimating resolution as %d\n", estimated_resolution);
329 blkbox.
topright(), estimated_resolution, textord_use_cjk_fp_model,
330 textord_tabfind_aligned_gap_fraction, &v_lines, &h_lines, vertical_x,
335 #ifndef DISABLED_LEGACY_ENGINE
341 BLOBNBOX_CLIST osd_blobs;
346 int osd_orientation = 0;
354 #ifndef DISABLED_LEGACY_ENGINE
355 if (
PSM_OSD_ENABLED(pageseg_mode) && osd_tess !=
nullptr && osr !=
nullptr) {
356 std::vector<int> osd_scripts;
357 if (osd_tess !=
this) {
361 for (
auto &
lang : sub_langs_) {
362 AddAllScriptsConverted(
lang->unicharset, osd_tess->
unicharset, &osd_scripts);
372 double osd_margin = min_orientation_margin * 2;
373 for (
int i = 0;
i < 4; ++
i) {
374 if (
i != osd_orientation && osd_score - osr->
orientations[
i] < osd_margin) {
383 strcmp(
"Japanese", best_script_str) == 0 ||
384 strcmp(
"Korean", best_script_str) == 0 || strcmp(
"Hangul", best_script_str) == 0;
388 if (osd_margin < min_orientation_margin) {
390 if (!cjk && !vertical_text && osd_orientation == 2) {
393 "OSD: Weak margin (%.2f), horiz textlines, not CJK: "
399 "OSD: Weak margin (%.2f) for %d blob text block, "
400 "but using orientation anyway: %d\n",
401 osd_margin, osd_blobs.length(), osd_orientation);
407 osd_blobs.shallow_clear();
constexpr int kResolutionEstimationFactor
bool PSM_OSD_ENABLED(int pageseg_mode)
@ PSM_CIRCLE_WORD
Treat the image as a single word in a circle.
@ PSM_OSD_ONLY
Orientation and script detection only.
@ PSM_SINGLE_BLOCK_VERT_TEXT
@ PSM_SINGLE_BLOCK
Assume a single uniform block of text. (Default.)
bool PSM_ORIENTATION_ENABLED(int pageseg_mode)
bool read_unlv_file(std::string &name, int32_t xsize, int32_t ysize, BLOCK_LIST *blocks)
void tprintf(const char *format,...)
int IntCastRounded(double x)
bool PSM_COL_FIND_ENABLED(int pageseg_mode)
int os_detect_blobs(const std::vector< int > *allowed_scripts, BLOBNBOX_CLIST *blob_list, OSResults *osr, tesseract::Tesseract *tess)
constexpr int kMaxCredibleResolution
bool PSM_SPARSE(int pageseg_mode)
int textord_debug_tabfind
const int kMaxCircleErosions
constexpr int kMinCredibleResolution
bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode)
int LabelSpecialText(TO_BLOCK *to_block) override
int AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks, BLOBNBOX_LIST *diacritic_blobs, Tesseract *osd_tess, OSResults *osr)
int SegmentPage(const char *input_file, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr)
bool right_to_left() const
ColumnFinder * SetupPageSegAndDetectOrientation(PageSegMode pageseg_mode, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr, TO_BLOCK_LIST *to_blocks, Image *photo_mask_pix, Image *music_mask_pix)
void AddPix(const Image pix, const char *caption)
PDBLK pdblk
Page Description Block.
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
const ICOORD & botleft() const
const ICOORD & topright() const
const char * get_script_from_script_id(int id) const
int get_script_table_size() const
int get_script_id_from_name(const char *script_name) const
void GetDeskewVectors(FCOORD *deskew, FCOORD *reskew)
void set_cjk_script(bool is_cjk)
bool IsVerticallyAlignedText(double find_vertical_text_ratio, TO_BLOCK *block, BLOBNBOX_CLIST *osd_blobs)
void SetEquationDetect(EquationDetectBase *detect)
int FindBlocks(PageSegMode pageseg_mode, Image scaled_color, int scaled_factor, TO_BLOCK *block, Image photo_mask_pix, Image thresholds_pix, Image grey_pix, DebugPixa *pixa_debug, BLOCK_LIST *blocks, BLOBNBOX_LIST *diacritic_blobs, TO_BLOCK_LIST *to_blocks)
void SetupAndFilterNoise(PageSegMode pageseg_mode, Image photo_mask_pix, TO_BLOCK *input_block)
void CorrectOrientation(TO_BLOCK *block, bool vertical_text_lines, int recognition_rotation)
static Image FindImages(Image pix, DebugPixa *pixa_debug)
static void FindAndRemoveLines(int resolution, bool debug, Image pix, int *vertical_x, int *vertical_y, Image *pix_music_mask, TabVector_LIST *v_lines, TabVector_LIST *h_lines)
void TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height, Image binary_pix, Image thresholds_pix, Image grey_pix, bool use_box_bottoms, BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
void find_components(Image pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)