416 if (FLAGS_list_available_fonts) {
417 const std::vector<string>& all_fonts = FontUtils::ListAvailableFonts();
418 for (
unsigned int i = 0; i < all_fonts.size(); ++i) {
419 printf(
"%3u: %s\n", i, all_fonts[i].c_str());
421 "Font %s is unrecognized.\n", all_fonts[i].c_str());
427 if (FLAGS_text.empty()) {
428 tprintf(
"'--text' option is missing!\n");
431 if (FLAGS_outputbase.empty()) {
432 tprintf(
"'--outputbase' option is missing!\n");
435 if (!FLAGS_unicharset_file.empty() && FLAGS_render_ngrams) {
436 tprintf(
"Use '--unicharset_file' only if '--render_ngrams' is set.\n");
440 if (!FLAGS_find_fonts && !FontUtils::IsAvailableFont(FLAGS_font.c_str())) {
442 if (!FontUtils::IsAvailableFont(FLAGS_font.c_str(), &pango_name)) {
443 tprintf(
"Could not find font named %s.\n", FLAGS_font.c_str());
444 if (!pango_name.empty()) {
445 tprintf(
"Pango suggested font %s.\n", pango_name.c_str());
447 tprintf(
"Please correct --font arg.\n");
452 if (FLAGS_render_ngrams)
453 FLAGS_output_word_boxes =
true;
455 char font_desc_name[1024];
456 snprintf(font_desc_name, 1024,
"%s %d", FLAGS_font.c_str(),
457 static_cast<int>(FLAGS_ptsize));
459 render.set_add_ligatures(FLAGS_ligatures);
460 render.set_leading(FLAGS_leading);
461 render.set_resolution(FLAGS_resolution);
462 render.set_char_spacing(FLAGS_char_spacing * FLAGS_ptsize);
463 render.set_h_margin(FLAGS_margin);
464 render.set_v_margin(FLAGS_margin);
465 render.set_output_word_boxes(FLAGS_output_word_boxes);
466 render.set_box_padding(FLAGS_box_padding);
467 render.set_strip_unrenderable_words(FLAGS_strip_unrenderable_words);
468 render.set_underline_start_prob(FLAGS_underline_start_prob);
469 render.set_underline_continuation_prob(FLAGS_underline_continuation_prob);
472 if (FLAGS_writing_mode ==
"horizontal") {
474 render.set_vertical_text(
false);
475 render.set_gravity_hint_strong(
false);
476 render.set_render_fullwidth_latin(
false);
477 }
else if (FLAGS_writing_mode ==
"vertical") {
479 render.set_vertical_text(
true);
480 render.set_gravity_hint_strong(
false);
481 render.set_render_fullwidth_latin(
false);
482 }
else if (FLAGS_writing_mode ==
"vertical-upright") {
488 render.set_vertical_text(
true);
489 render.set_gravity_hint_strong(
true);
490 render.set_render_fullwidth_latin(
true);
492 tprintf(
"Invalid writing mode: %s\n", FLAGS_writing_mode.c_str());
498 if (!File::ReadFileToString(FLAGS_text.c_str(), &src_utf8)) {
499 tprintf(
"Failed to read file: %s\n", FLAGS_text.c_str());
504 if (strncmp(src_utf8.c_str(),
"\xef\xbb\xbf", 3) == 0) {
505 src_utf8.erase(0, 3);
507 tlog(1,
"Render string of size %d\n", src_utf8.length());
509 if (FLAGS_render_ngrams || FLAGS_only_extract_font_properties) {
512 const string kSeparator = FLAGS_render_ngrams ?
" " :
" ";
516 const unsigned int kCharsPerLine = (FLAGS_ptsize > 20) ? 50 : 100;
519 if (FLAGS_render_ngrams && !FLAGS_unicharset_file.empty() &&
521 tprintf(
"Failed to load unicharset from file %s\n",
522 FLAGS_unicharset_file.c_str());
529 const char *str8 = src_utf8.c_str();
530 int len = src_utf8.length();
532 std::vector<std::pair<int, int> > offsets;
534 while (offset < len) {
536 offsets.push_back(std::make_pair(offset, step));
540 if (FLAGS_render_ngrams)
541 std::random_shuffle(offsets.begin(), offsets.end());
543 for (
size_t i = 0, line = 1; i < offsets.size(); ++i) {
544 const char *curr_pos = str8 + offsets[i].first;
545 int ngram_len = offsets[i].second;
548 if (!FLAGS_unicharset_file.empty() &&
552 rand_utf8.append(curr_pos, ngram_len);
553 if (rand_utf8.length() > line * kCharsPerLine) {
554 rand_utf8.append(
" \n");
556 if (line & 0x1) rand_utf8.append(kSeparator);
558 rand_utf8.append(kSeparator);
561 tlog(1,
"Rendered ngram string of size %d\n", rand_utf8.length());
562 src_utf8.swap(rand_utf8);
564 if (FLAGS_only_extract_font_properties) {
565 tprintf(
"Extracting font properties only\n");
572 std::vector<float> page_rotation;
573 const char* to_render_utf8 = src_utf8.c_str();
577 std::vector<string> font_names;
581 int num_pass = FLAGS_bidirectional_rotation ? 2 : 1;
582 for (
int pass = 0; pass < num_pass; ++pass) {
585 for (
size_t offset = 0;
586 offset < strlen(to_render_utf8) &&
587 (FLAGS_max_pages == 0 || page_num < FLAGS_max_pages);
589 tlog(1,
"Starting page %d\n", im);
591 if (FLAGS_find_fonts) {
592 offset += render.RenderAllFontsToImage(FLAGS_min_coverage,
593 to_render_utf8 + offset,
594 strlen(to_render_utf8 + offset),
597 offset += render.RenderToImage(to_render_utf8 + offset,
598 strlen(to_render_utf8 + offset), &pix);
600 if (pix !=
nullptr) {
604 rotation = -1 * page_rotation[page_num];
606 if (FLAGS_degrade_image) {
608 FLAGS_rotate_image ? &rotation :
nullptr);
610 render.RotatePageBoxes(rotation);
614 page_rotation.push_back(rotation);
617 Pix* gray_pix = pixConvertTo8(pix,
false);
619 Pix* binary = pixThresholdToBinary(gray_pix, 128);
620 pixDestroy(&gray_pix);
621 char tiff_name[1024];
622 if (FLAGS_find_fonts) {
623 if (FLAGS_render_per_font) {
624 string fontname_for_file = tesseract::StringReplace(
625 font_used,
" ",
"_");
626 snprintf(tiff_name, 1024,
"%s.%s.tif", FLAGS_outputbase.c_str(),
627 fontname_for_file.c_str());
628 pixWriteTiff(tiff_name, binary, IFF_TIFF_G4,
"w");
629 tprintf(
"Rendered page %d to file %s\n", im, tiff_name);
631 font_names.push_back(font_used);
634 snprintf(tiff_name, 1024,
"%s.tif", FLAGS_outputbase.c_str());
635 pixWriteTiff(tiff_name, binary, IFF_TIFF_G4, im == 0 ?
"w" :
"a");
636 tprintf(
"Rendered page %d to file %s\n", im, tiff_name);
639 if (FLAGS_output_individual_glyph_images) {
641 tprintf(
"ERROR: Individual glyphs not saved\n");
646 if (FLAGS_find_fonts && offset != 0) {
653 if (!FLAGS_find_fonts) {
654 string box_name = FLAGS_outputbase.c_str();
656 render.WriteAllBoxes(box_name);
657 }
else if (!FLAGS_render_per_font && !font_names.empty()) {
658 string filename = FLAGS_outputbase.c_str();
659 filename +=
".fontlist.txt";
660 FILE* fp = fopen(filename.c_str(),
"wb");
662 tprintf(
"Failed to create output font list %s\n", filename.c_str());
664 for (
size_t i = 0; i < font_names.size(); ++i) {
665 fprintf(fp,
"%s\n", font_names[i].c_str());
#define ASSERT_HOST_MSG(x,...)
unsigned int SpanUTF8NotWhitespace(const char *text)
unsigned int SpanUTF8Whitespace(const char *text)
static string CleanupString(const char *utf8_str)
void set_seed(uinT64 seed)
void ExtractFontProperties(const string &utf8_text, StringRenderer *render, const string &output_base)
bool encodable_string(const char *str, int *first_bad_position) const
bool load_from_file(const char *const filename, bool skip_fragments)
Pix * DegradeImage(Pix *input, int exposure, TRand *randomizer, float *rotation)
bool MakeIndividualGlyphs(Pix *pix, const std::vector< BoxChar *> &vbox, const int input_tiff_page)