All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
text2image.cpp File Reference
#include <stdlib.h>
#include <string.h>
#include <algorithm>
#include <iostream>
#include <map>
#include <string>
#include <utility>
#include <vector>
#include "allheaders.h"
#include "boxchar.h"
#include "commandlineflags.h"
#include "degradeimage.h"
#include "errcode.h"
#include "fileio.h"
#include "helpers.h"
#include "normstrngs.h"
#include "stringrenderer.h"
#include "tlog.h"
#include "unicharset.h"
#include "util.h"

Go to the source code of this file.

Classes

struct  tesseract::SpacingProperties
 

Namespaces

 tesseract
 

Functions

 STRING_PARAM_FLAG (text,"","File name of text input to process")
 
 STRING_PARAM_FLAG (outputbase,"","Basename for output image/box file")
 
 BOOL_PARAM_FLAG (degrade_image, true,"Degrade rendered image with speckle noise, dilation/erosion ""and rotation")
 
 INT_PARAM_FLAG (exposure, 0,"Exposure level in photocopier")
 
 INT_PARAM_FLAG (resolution, 300,"Pixels per inch")
 
 INT_PARAM_FLAG (xsize, 3600,"Width of output image")
 
 INT_PARAM_FLAG (ysize, 4800,"Height of output image")
 
 INT_PARAM_FLAG (margin, 100,"Margin round edges of image")
 
 INT_PARAM_FLAG (ptsize, 12,"Size of printed text")
 
 DOUBLE_PARAM_FLAG (char_spacing, 0,"Inter-character space in ems")
 
 DOUBLE_PARAM_FLAG (underline_start_prob, 0,"Fraction of words to underline (value in [0,1])")
 
 DOUBLE_PARAM_FLAG (underline_continuation_prob, 0,"Fraction of words to underline (value in [0,1])")
 
 INT_PARAM_FLAG (leading, 12,"Inter-line space (in pixels)")
 
 STRING_PARAM_FLAG (writing_mode,"horizontal","Specify one of the following writing"" modes.\n""'horizontal' : Render regular horizontal text. (default)\n""'vertical' : Render vertical text. Glyph orientation is"" selected by Pango.\n""'vertical-upright' : Render vertical text. Glyph "" orientation is set to be upright.")
 
 INT_PARAM_FLAG (box_padding, 0,"Padding around produced bounding boxes")
 
 BOOL_PARAM_FLAG (strip_unrenderable_words, true,"Remove unrenderable words from source text")
 
 STRING_PARAM_FLAG (font,"Arial","Font description name to use")
 
 BOOL_PARAM_FLAG (ligatures, false,"Rebuild and render ligatures")
 
 BOOL_PARAM_FLAG (find_fonts, false,"Search for all fonts that can render the text")
 
 BOOL_PARAM_FLAG (render_per_font, true,"If find_fonts==true, render each font to its own image. ""Image filenames are of the form output_name.font_name.tif")
 
 DOUBLE_PARAM_FLAG (min_coverage, 1.0,"If find_fonts==true, the minimum coverage the font has of ""the characters in the text file to include it, between ""0 and 1.")
 
 BOOL_PARAM_FLAG (list_available_fonts, false,"List available fonts and quit.")
 
 BOOL_PARAM_FLAG (render_ngrams, false,"Put each space-separated entity from the"" input file into one bounding box. The ngrams in the input"" file will be randomly permuted before rendering (so that"" there is sufficient variety of characters on each line).")
 
 BOOL_PARAM_FLAG (output_word_boxes, false,"Output word bounding boxes instead of character boxes. ""This is used for Cube training, and implied by ""--render_ngrams.")
 
 STRING_PARAM_FLAG (unicharset_file,"","File with characters in the unicharset. If --render_ngrams"" is true and --unicharset_file is specified, ngrams with"" characters that are not in unicharset will be omitted")
 
 BOOL_PARAM_FLAG (bidirectional_rotation, false,"Rotate the generated characters both ways.")
 
 BOOL_PARAM_FLAG (only_extract_font_properties, false,"Assumes that the input file contains a list of ngrams. Renders"" each ngram, extracts spacing properties and records them in"" output_base/[font_name].fontinfo file.")
 
 BOOL_PARAM_FLAG (output_individual_glyph_images, false,"If true also outputs individual character images")
 
 INT_PARAM_FLAG (glyph_resized_size, 0,"Each glyph is square with this side length in pixels")
 
 INT_PARAM_FLAG (glyph_num_border_pixels_to_pad, 0,"Final_size=glyph_resized_size+2*glyph_num_border_pixels_to_pad")
 
void tesseract::ExtractFontProperties (const string &utf8_text, StringRenderer *render, const string &output_base)
 
bool tesseract::MakeIndividualGlyphs (Pix *pix, const vector< BoxChar * > &vbox, const int input_tiff_page)
 
int main (int argc, char **argv)
 

Variables

const int kRandomSeed = 0x18273645
 

Function Documentation

BOOL_PARAM_FLAG ( degrade_image  ,
true  ,
"Degrade rendered image with speckle  noise,
dilation/erosion""and rotation"   
)
BOOL_PARAM_FLAG ( strip_unrenderable_words  ,
true  ,
"Remove unrenderable words from source text"   
)
BOOL_PARAM_FLAG ( ligatures  ,
false  ,
"Rebuild and render ligatures"   
)
BOOL_PARAM_FLAG ( find_fonts  ,
false  ,
"Search for all fonts that can render the text"   
)
BOOL_PARAM_FLAG ( render_per_font  ,
true  ,
"If  find_fonts = =true,
render each font to its own image.""Image filenames are of the form output_name.font_name.tif"   
)
BOOL_PARAM_FLAG ( list_available_fonts  ,
false  ,
"List available fonts and quit."   
)
BOOL_PARAM_FLAG ( render_ngrams  ,
false  ,
"Put each space-separated entity from the"" input file into one bounding box. The ngrams in the input"" file will be randomly permuted before rendering (so that"" there is sufficient variety of characters on each line)."   
)
BOOL_PARAM_FLAG ( output_word_boxes  ,
false  ,
"Output word bounding boxes instead of character boxes. ""This is used for Cube  training,
and implied by""--render_ngrams."   
)
BOOL_PARAM_FLAG ( bidirectional_rotation  ,
false  ,
"Rotate the generated characters both ways."   
)
BOOL_PARAM_FLAG ( only_extract_font_properties  ,
false  ,
"Assumes that the input file contains a list of ngrams. Renders"" each  ngram,
extracts spacing properties and records them in""output_base/.fontinfo file."  [font_name] 
)
BOOL_PARAM_FLAG ( output_individual_glyph_images  ,
false  ,
"If true also outputs individual character images"   
)
DOUBLE_PARAM_FLAG ( char_spacing  ,
,
"Inter-character space in ems"   
)
DOUBLE_PARAM_FLAG ( underline_start_prob  ,
,
"Fraction of words to underline (value in [0,1])"   
)
DOUBLE_PARAM_FLAG ( underline_continuation_prob  ,
,
"Fraction of words to underline (value in [0,1])"   
)
DOUBLE_PARAM_FLAG ( min_coverage  ,
1.  0,
"If  find_fonts = =true,
the minimum coverage the font has of""the characters in the text file to include  it,
between""0 and 1."   
)
INT_PARAM_FLAG ( exposure  ,
,
"Exposure level in photocopier"   
)
INT_PARAM_FLAG ( resolution  ,
300  ,
"Pixels per inch"   
)
INT_PARAM_FLAG ( xsize  ,
3600  ,
"Width of output image"   
)
INT_PARAM_FLAG ( ysize  ,
4800  ,
"Height of output image"   
)
INT_PARAM_FLAG ( margin  ,
100  ,
"Margin round edges of image"   
)
INT_PARAM_FLAG ( ptsize  ,
12  ,
"Size of printed text"   
)
INT_PARAM_FLAG ( leading  ,
12  ,
"Inter-line space (in pixels)"   
)
INT_PARAM_FLAG ( box_padding  ,
,
"Padding around produced bounding boxes"   
)
INT_PARAM_FLAG ( glyph_resized_size  ,
,
"Each glyph is square with this side length in pixels"   
)
INT_PARAM_FLAG ( glyph_num_border_pixels_to_pad  ,
 
)
int main ( int  argc,
char **  argv 
)

This program reads in a text file consisting of feature samples from a training page in the following format:

   FontName UTF8-char-str xmin ymin xmax ymax page-number
    NumberOfFeatureTypes(N)
      FeatureTypeName1 NumberOfFeatures(M)
         Feature1
         ...
         FeatureM
      FeatureTypeName2 NumberOfFeatures(M)
         Feature1
         ...
         FeatureM
      ...
      FeatureTypeNameN NumberOfFeatures(M)
         Feature1
         ...
         FeatureM
   FontName CharName ...

The result of this program is a binary inttemp file used by the OCR engine.

Parameters
argcnumber of command line arguments
argvarray of command line arguments
Returns
none
Note
Exceptions: none
History: Fri Aug 18 08:56:17 1989, DSJ, Created.
History: Mon May 18 1998, Christy Russson, Revistion started.

Definition at line 414 of file text2image.cpp.

414  {
415  tesseract::ParseCommandLineFlags(argv[0], &argc, &argv, true);
416 
417  if (FLAGS_list_available_fonts) {
418  const vector<string>& all_fonts = FontUtils::ListAvailableFonts();
419  for (int i = 0; i < all_fonts.size(); ++i) {
420  tprintf("%3d: %s\n", i, all_fonts[i].c_str());
421  ASSERT_HOST_MSG(FontUtils::IsAvailableFont(all_fonts[i].c_str()),
422  "Font %s is unrecognized.\n", all_fonts[i].c_str());
423  }
424  return EXIT_SUCCESS;
425  }
426  // Check validity of input flags.
427  ASSERT_HOST_MSG(!FLAGS_text.empty(), "Text file missing!\n");
428  ASSERT_HOST_MSG(!FLAGS_outputbase.empty(), "Output file missing!\n");
429  ASSERT_HOST_MSG(FLAGS_render_ngrams || FLAGS_unicharset_file.empty(),
430  "Use --unicharset_file only if --render_ngrams is set.\n");
431 
432  if (!FLAGS_find_fonts && !FontUtils::IsAvailableFont(FLAGS_font.c_str())) {
433  string pango_name;
434  if (!FontUtils::IsAvailableFont(FLAGS_font.c_str(), &pango_name)) {
435  tprintf("Could not find font named %s. Pango suggested font %s\n",
436  FLAGS_font.c_str(), pango_name.c_str());
437  TLOG_FATAL("Please correct --font arg.");
438  }
439  }
440 
441  if (FLAGS_render_ngrams)
442  FLAGS_output_word_boxes = true;
443 
444  char font_desc_name[1024];
445  snprintf(font_desc_name, 1024, "%s %d", FLAGS_font.c_str(),
446  static_cast<int>(FLAGS_ptsize));
447  StringRenderer render(font_desc_name, FLAGS_xsize, FLAGS_ysize);
448  render.set_add_ligatures(FLAGS_ligatures);
449  render.set_leading(FLAGS_leading);
450  render.set_resolution(FLAGS_resolution);
451  render.set_char_spacing(FLAGS_char_spacing * FLAGS_ptsize);
452  render.set_h_margin(FLAGS_margin);
453  render.set_v_margin(FLAGS_margin);
454  render.set_output_word_boxes(FLAGS_output_word_boxes);
455  render.set_box_padding(FLAGS_box_padding);
456  render.set_strip_unrenderable_words(FLAGS_strip_unrenderable_words);
457  render.set_underline_start_prob(FLAGS_underline_start_prob);
458  render.set_underline_continuation_prob(FLAGS_underline_continuation_prob);
459 
460  // Set text rendering orientation and their forms.
461  if (FLAGS_writing_mode == "horizontal") {
462  // Render regular horizontal text (default).
463  render.set_vertical_text(false);
464  render.set_gravity_hint_strong(false);
465  render.set_render_fullwidth_latin(false);
466  } else if (FLAGS_writing_mode == "vertical") {
467  // Render vertical text. Glyph orientation is selected by Pango.
468  render.set_vertical_text(true);
469  render.set_gravity_hint_strong(false);
470  render.set_render_fullwidth_latin(false);
471  } else if (FLAGS_writing_mode == "vertical-upright") {
472  // Render vertical text. Glyph orientation is set to be upright.
473  // Also Basic Latin characters are converted to their fullwidth forms
474  // on rendering, since fullwidth Latin characters are well designed to fit
475  // vertical text lines, while .box files store halfwidth Basic Latin
476  // unichars.
477  render.set_vertical_text(true);
478  render.set_gravity_hint_strong(true);
479  render.set_render_fullwidth_latin(true);
480  } else {
481  TLOG_FATAL("Invalid writing mode : %s\n", FLAGS_writing_mode.c_str());
482  }
483 
484  string src_utf8;
485  // This c_str is NOT redundant!
486  File::ReadFileToStringOrDie(FLAGS_text.c_str(), &src_utf8);
487 
488  // Remove the unicode mark if present.
489  if (strncmp(src_utf8.c_str(), "\xef\xbb\xbf", 3) == 0) {
490  src_utf8.erase(0, 3);
491  }
492  tlog(1, "Render string of size %d\n", src_utf8.length());
493 
494  if (FLAGS_render_ngrams || FLAGS_only_extract_font_properties) {
495  // Try to preserve behavior of old text2image by expanding inter-word
496  // spaces by a factor of 4.
497  const string kSeparator = FLAGS_render_ngrams ? " " : " ";
498  // Also restrict the number of charactes per line to try and avoid
499  // line-breaking in the middle of words like "-A", "R$" etc. which are
500  // otherwise allowed by the standard unicode line-breaking rules.
501  const int kCharsPerLine = (FLAGS_ptsize > 20) ? 50 : 100;
502  string rand_utf8;
503  UNICHARSET unicharset;
504  if (FLAGS_render_ngrams && !FLAGS_unicharset_file.empty() &&
505  !unicharset.load_from_file(FLAGS_unicharset_file.c_str())) {
506  TLOG_FATAL("Failed to load unicharset from file %s\n",
507  FLAGS_unicharset_file.c_str());
508  }
509 
510  // If we are rendering ngrams that will be OCRed later, shuffle them so that
511  // tesseract does not have difficulties finding correct baseline, word
512  // spaces, etc.
513  const char *str8 = src_utf8.c_str();
514  int len = src_utf8.length();
515  int step;
516  vector<pair<int, int> > offsets;
517  int offset = SpanUTF8Whitespace(str8);
518  while (offset < len) {
519  step = SpanUTF8NotWhitespace(str8 + offset);
520  offsets.push_back(make_pair(offset, step));
521  offset += step;
522  offset += SpanUTF8Whitespace(str8 + offset);
523  }
524  if (FLAGS_render_ngrams)
525  std::random_shuffle(offsets.begin(), offsets.end());
526 
527  for (int i = 0, line = 1; i < offsets.size(); ++i) {
528  const char *curr_pos = str8 + offsets[i].first;
529  int ngram_len = offsets[i].second;
530  // Skip words that contain characters not in found in unicharset.
531  if (!FLAGS_unicharset_file.empty() &&
532  !unicharset.encodable_string(curr_pos, NULL)) {
533  continue;
534  }
535  rand_utf8.append(curr_pos, ngram_len);
536  if (rand_utf8.length() > line * kCharsPerLine) {
537  rand_utf8.append(" \n");
538  ++line;
539  if (line & 0x1) rand_utf8.append(kSeparator);
540  } else {
541  rand_utf8.append(kSeparator);
542  }
543  }
544  tlog(1, "Rendered ngram string of size %d\n", rand_utf8.length());
545  src_utf8.swap(rand_utf8);
546  }
547  if (FLAGS_only_extract_font_properties) {
548  tprintf("Extracting font properties only\n");
549  ExtractFontProperties(src_utf8, &render, FLAGS_outputbase.c_str());
550  tprintf("Done!\n");
551  return 0;
552  }
553 
554  int im = 0;
555  vector<float> page_rotation;
556  const char* to_render_utf8 = src_utf8.c_str();
557 
558  tesseract::TRand randomizer;
559  randomizer.set_seed(kRandomSeed);
560  vector<string> font_names;
561  // We use a two pass mechanism to rotate images in both direction.
562  // The first pass(0) will rotate the images in random directions and
563  // the second pass(1) will mirror those rotations.
564  int num_pass = FLAGS_bidirectional_rotation ? 2 : 1;
565  for (int pass = 0; pass < num_pass; ++pass) {
566  int page_num = 0;
567  string font_used;
568  for (int offset = 0; offset < strlen(to_render_utf8); ++im, ++page_num) {
569  tlog(1, "Starting page %d\n", im);
570  Pix* pix = NULL;
571  if (FLAGS_find_fonts) {
572  offset += render.RenderAllFontsToImage(FLAGS_min_coverage,
573  to_render_utf8 + offset,
574  strlen(to_render_utf8 + offset),
575  &font_used, &pix);
576  } else {
577  offset += render.RenderToImage(to_render_utf8 + offset,
578  strlen(to_render_utf8 + offset), &pix);
579  }
580  if (pix != NULL) {
581  float rotation = 0;
582  if (pass == 1) {
583  // Pass 2, do mirror rotation.
584  rotation = -1 * page_rotation[page_num];
585  }
586  if (FLAGS_degrade_image) {
587  pix = DegradeImage(pix, FLAGS_exposure, &randomizer, &rotation);
588  }
589  render.RotatePageBoxes(rotation);
590 
591  if (pass == 0) {
592  // Pass 1, rotate randomly and store the rotation..
593  page_rotation.push_back(rotation);
594  }
595 
596  Pix* gray_pix = pixConvertTo8(pix, false);
597  pixDestroy(&pix);
598  Pix* binary = pixThresholdToBinary(gray_pix, 128);
599  pixDestroy(&gray_pix);
600  char tiff_name[1024];
601  if (FLAGS_find_fonts) {
602  if (FLAGS_render_per_font) {
603  string fontname_for_file = tesseract::StringReplace(
604  font_used, " ", "_");
605  snprintf(tiff_name, 1024, "%s.%s.tif", FLAGS_outputbase.c_str(),
606  fontname_for_file.c_str());
607  pixWriteTiff(tiff_name, binary, IFF_TIFF_G4, "w");
608  tprintf("Rendered page %d to file %s\n", im, tiff_name);
609  } else {
610  font_names.push_back(font_used);
611  }
612  } else {
613  snprintf(tiff_name, 1024, "%s.tif", FLAGS_outputbase.c_str());
614  pixWriteTiff(tiff_name, binary, IFF_TIFF_G4, im == 0 ? "w" : "a");
615  tprintf("Rendered page %d to file %s\n", im, tiff_name);
616  }
617  // Make individual glyphs
618  if (FLAGS_output_individual_glyph_images) {
619  if (!MakeIndividualGlyphs(binary, render.GetBoxes(), im)) {
620  tprintf("ERROR: Individual glyphs not saved\n");
621  }
622  }
623  pixDestroy(&binary);
624  }
625  if (FLAGS_find_fonts && offset != 0) {
626  // We just want a list of names, or some sample images so we don't need
627  // to render more than the first page of the text.
628  break;
629  }
630  }
631  }
632  if (!FLAGS_find_fonts) {
633  string box_name = FLAGS_outputbase.c_str();
634  box_name += ".box";
635  render.WriteAllBoxes(box_name);
636  } else if (!FLAGS_render_per_font && !font_names.empty()) {
637  string filename = FLAGS_outputbase.c_str();
638  filename += ".fontlist.txt";
639  FILE* fp = fopen(filename.c_str(), "wb");
640  if (fp == NULL) {
641  tprintf("Failed to create output font list %s\n", filename.c_str());
642  } else {
643  for (int i = 0; i < font_names.size(); ++i) {
644  fprintf(fp, "%s\n", font_names[i].c_str());
645  }
646  fclose(fp);
647  }
648  }
649 
650  return 0;
651 }
bool MakeIndividualGlyphs(Pix *pix, const vector< BoxChar * > &vbox, const int input_tiff_page)
Definition: text2image.cpp:309
#define tprintf(...)
Definition: tprintf.h:31
void ParseCommandLineFlags(const char *usage, int *argc, char ***argv, const bool remove_flags)
bool load_from_file(const char *const filename, bool skip_fragments)
Definition: unicharset.h:346
int SpanUTF8Whitespace(const char *text)
Definition: normstrngs.cpp:186
#define ASSERT_HOST_MSG(x, msg...)
Definition: errcode.h:98
int SpanUTF8NotWhitespace(const char *text)
Definition: normstrngs.cpp:197
void set_seed(uinT64 seed)
Definition: helpers.h:43
#define TLOG_FATAL(msg...)
Definition: tlog.h:41
bool encodable_string(const char *str, int *first_bad_position) const
Definition: unicharset.cpp:222
Pix * DegradeImage(Pix *input, int exposure, TRand *randomizer, float *rotation)
const int kRandomSeed
Definition: text2image.cpp:60
#define tlog(level,...)
Definition: tlog.h:33
void ExtractFontProperties(const string &utf8_text, StringRenderer *render, const string &output_base)
Definition: text2image.cpp:212
#define NULL
Definition: host.h:144
STRING_PARAM_FLAG ( text  ,
""  ,
"File name of text input to process"   
)
STRING_PARAM_FLAG ( outputbase  ,
""  ,
"Basename for output image/box file"   
)
STRING_PARAM_FLAG ( writing_mode  ,
"horizontal"  ,
"Specify one of the following writing"" modes.\n""'horizontal' : Render regular horizontal text. (default)\n""'vertical' : Render vertical text. Glyph orientation is"" selected by Pango.\n""'vertical-upright' : Render vertical text. Glyph "" orientation is set to be upright."   
)
STRING_PARAM_FLAG ( font  ,
"Arial"  ,
"Font description name to use"   
)
STRING_PARAM_FLAG ( unicharset_file  ,
""  ,
"File with characters in the unicharset. If --render_ngrams"" is true and --unicharset_file is  specified,
ngrams with""characters that are not in unicharset will be omitted"   
)

Variable Documentation

const int kRandomSeed = 0x18273645

Definition at line 60 of file text2image.cpp.