All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
tesshelper Namespace Reference

Functions

def getProjectfiles
 
def getTessLibFiles
 
def tessCompare
 
def tessReport
 
def copyIncludes
 
def tessCopy
 
def tessClean
 
def validateTessDir
 
def validateDir
 
def main
 

Variables

string epilogStr = r""
 
string VERSION = "1.0 %s"
 
string PROJ_SUBDIR = r"vs2008\libtesseract"
 
string PROJFILE = "libtesseract.vcproj"
 
string NEWHEADERS_FILENAME = "newheaders.txt"
 
string NEWSOURCES_FILENAME = "newsources.txt"
 
 fileNodeTemplate = \
 

Function Documentation

def tesshelper.copyIncludes (   fileSet,
  description,
  tessDir,
  includeDir 
)
Copy set of files to specified include dir.

Definition at line 265 of file tesshelper.py.

266 def copyIncludes(fileSet, description, tessDir, includeDir):
267  """Copy set of files to specified include dir."""
268 
269  print()
270  print('Copying libtesseract "%s" headers to %s' % (description, includeDir))
271  print()
272 
273  sortedList = list(fileSet)
274  sortedList.sort()
275 
276  count = 0
277  errList = []
278  for includeFile in sortedList:
279  filepath = os.path.join(tessDir, includeFile)
280  if os.path.isfile(filepath):
281  shutil.copy2(filepath, includeDir)
282  print("Copied: %s" % includeFile)
283  count += 1
284  else:
285  print('***Error: "%s" doesn\'t exist"' % filepath)
286  errList.append(filepath)
287 
288  print('%d header files successfully copied to "%s"' % (count, includeDir))
289  if len(errList):
290  print("The following %d files were not copied:")
291  for filepath in errList:
292  print(" %s" % filepath)
def copyIncludes
Definition: tesshelper.py:265
def tesshelper.getProjectfiles (   libTessDir,
  libProjectFile,
  nTrimChars 
)
Return sets of all, c, h, and resources files in libtesseract Project

Definition at line 84 of file tesshelper.py.

84 
85 def getProjectfiles(libTessDir, libProjectFile, nTrimChars):
86  """Return sets of all, c, h, and resources files in libtesseract Project"""
87 
88  #extract filenames of header & source files from the .vcproj
89  projectCFiles = set()
90  projectHFiles = set()
91  projectRFiles = set()
92  projectFilesSet = set()
93  f = open(libProjectFile, "r")
94  data = f.read()
95  f.close()
96 
97  projectFiles = re.findall(r'(?i)Include="(\.[^"]+)"', data)
98  for projectFile in projectFiles:
99  root, ext = os.path.splitext(projectFile.lower())
100  if ext == ".c" or ext == ".cpp":
101  projectCFiles.add(projectFile)
102  elif ext == ".h":
103  projectHFiles.add(projectFile)
104  elif ext == ".rc":
105  projectRFiles.add(projectFile)
106  else:
107  print("unknown file type: %s" % projectFile)
108 
109  relativePath = os.path.join(libTessDir, projectFile)
110  relativePath = os.path.abspath(relativePath)
111  relativePath = relativePath[nTrimChars:].lower()
112  projectFilesSet.add(relativePath)
113 
114  return projectFilesSet, projectHFiles, projectCFiles, projectRFiles
def getProjectfiles
Definition: tesshelper.py:84
def tesshelper.getTessLibFiles (   tessDir,
  nTrimChars 
)
Return set of all libtesseract files in tessDir

Definition at line 115 of file tesshelper.py.

116 def getTessLibFiles(tessDir, nTrimChars):
117  """Return set of all libtesseract files in tessDir"""
118 
119  libDirs = [
120  "api",
121  "ccmain",
122  "ccstruct",
123  "ccutil",
124  "classify",
125  "cube",
126  "cutil",
127  "dict",
128  r"neural_networks\runtime",
129  "opencl",
130  "textord",
131  "viewer",
132  "wordrec",
133  #"training",
134  r"vs2010\port",
135  r"vs2010\libtesseract",
136  ]
137 
138  #create list of all .h, .c, .cpp files in "library" directories
139  tessFiles = set()
140  for curDir in libDirs:
141  baseDir = os.path.join(tessDir, curDir)
142  for filetype in ["*.c", "*.cpp", "*.h"]:
143  pattern = os.path.join(baseDir, filetype)
144  fileList = glob.glob(pattern)
145  for curFile in fileList:
146  curFile = os.path.abspath(curFile)
147  relativePath = curFile[nTrimChars:].lower()
148  tessFiles.add(relativePath)
149 
150  return tessFiles
151 
152 # ====================================================================
def getTessLibFiles
Definition: tesshelper.py:115
def tesshelper.main ( )

Definition at line 459 of file tesshelper.py.

460 def main ():
461  parser = argparse.ArgumentParser(
462  epilog=epilogStr,
463  formatter_class=argparse.RawDescriptionHelpFormatter)
464 
465  parser.add_argument("--version", action="version",
466  version="%(prog)s " + VERSION)
467  parser.add_argument('tessDir', type=validateTessDir,
468  help="tesseract installation directory")
469 
470  subparsers = parser.add_subparsers(
471  dest="subparser_name",
472  title="Commands")
473  parser_changes = subparsers.add_parser('compare',
474  help="compare libtesseract Project with tessDir")
475  parser_changes.set_defaults(func=tessCompare)
476 
477  parser_report = subparsers.add_parser('report',
478  help="report libtesseract summary stats")
479  parser_report.set_defaults(func=tessReport)
480 
481  parser_copy = subparsers.add_parser('copy',
482  help="copy public libtesseract header files to includeDir")
483  parser_copy.add_argument('includeDir', type=validateDir,
484  help="Directory to copy header files to.")
485  parser_copy.set_defaults(func=tessCopy)
486 
487  parser_clean = subparsers.add_parser('clean',
488  help="clean vs2008 folder of build folders and .user files")
489  parser_clean.set_defaults(func=tessClean)
490 
491  #kludge because argparse has no ability to set default subparser
492  if (len(sys.argv) == 2):
493  sys.argv.append("compare")
494  args = parser.parse_args()
495 
496  #handle commands
497  if args.func == tessCopy:
498  args.func(args.tessDir, args.includeDir)
499  else:
500  args.func(args.tessDir)
def tesshelper.tessClean (   tessDir)
Clean vs2008 folder of all build directories and certain temp files.

Definition at line 382 of file tesshelper.py.

383 def tessClean(tessDir):
384  '''Clean vs2008 folder of all build directories and certain temp files.'''
385 
386  vs2010Dir = os.path.join(tessDir, "vs2008")
387  vs2008AbsDir = os.path.abspath(vs2010Dir)
388 
389  answer = eval(input(
390  'Are you sure you want to clean the\n "%s" folder (Yes/No) [No]? ' %
391  vs2008AbsDir))
392  if answer.lower() not in ("yes",):
393  return
394  answer = eval(input('Only list the items to be deleted (Yes/No) [Yes]? '))
395  answer = answer.strip()
396  listOnly = answer.lower() not in ("no",)
397 
398  for rootDir, dirs, files in os.walk(vs2008AbsDir):
399  for buildDir in ("LIB_Release", "LIB_Debug", "DLL_Release", "DLL_Debug"):
400  if buildDir in dirs:
401  dirs.remove(buildDir)
402  absBuildDir = os.path.join(rootDir, buildDir)
403  if listOnly:
404  print("Would remove: %s" % absBuildDir)
405  else:
406  print("Removing: %s" % absBuildDir)
407  shutil.rmtree(absBuildDir)
408 
409  if rootDir == vs2008AbsDir:
410  for file in files:
411  if file.lower() not in ("tesseract.sln",
412  "tesshelper.py",
413  "readme.txt"):
414  absPath = os.path.join(rootDir, file)
415  if listOnly:
416  print("Would remove: %s" % absPath)
417  else:
418  print("Removing: %s" % absPath)
419  os.remove(absPath)
420  else:
421  for file in files:
422  root, ext = os.path.splitext(file)
423  if ext.lower() in (".suo",
424  ".ncb",
425  ".user",
426  ) or (
427  len(ext)>0 and ext[-1] == "~"):
428  absPath = os.path.join(rootDir, file)
429  if listOnly:
430  print("Would remove: %s" % absPath)
431  else:
432  print("Removing: %s" % absPath)
433  os.remove(absPath)
434 
435 # ====================================================================
def tessClean
Definition: tesshelper.py:382
def tesshelper.tessCompare (   tessDir)
Compare libtesseract Project files and actual "sub-library" files.

Definition at line 153 of file tesshelper.py.

154 def tessCompare(tessDir):
155  '''Compare libtesseract Project files and actual "sub-library" files.'''
156 
157  vs2010Dir = os.path.join(tessDir, "vs2010")
158  libTessDir = os.path.join(vs2010Dir, "libtesseract")
159  libProjectFile = os.path.join(libTessDir,"libtesseract.vcxproj")
160  tessAbsDir = os.path.abspath(tessDir)
161  nTrimChars = len(tessAbsDir)+1
162  print('Comparing VS2010 Project "%s" with\n "%s"' % (libProjectFile,
163  tessAbsDir))
164 
165  projectFilesSet, projectHFiles, projectCFiles, projectRFiles = \
166  getProjectfiles(libTessDir, libProjectFile, nTrimChars)
167  tessFiles = getTessLibFiles(tessDir, nTrimChars)
168 
169  extraFiles = tessFiles - projectFilesSet
170  print("%2d Extra files (in %s but not in Project)" % (len(extraFiles),
171  tessAbsDir))
172  headerFiles = []
173  sourceFiles = []
174  sortedList = list(extraFiles)
175  sortedList.sort()
176  for filename in sortedList:
177  root, ext = os.path.splitext(filename.lower())
178  if ext == ".h":
179  headerFiles.append(filename)
180  else:
181  sourceFiles.append(filename)
182  print(" %s " % filename)
183 
184  print()
185  print("%2d new header file items written to %s" % (len(headerFiles),
186  NEWHEADERS_FILENAME))
187  headerFiles.sort()
188  with open(NEWHEADERS_FILENAME, "w") as f:
189  for filename in headerFiles:
190  f.write(fileNodeTemplate % filename)
191 
192  print("%2d new source file items written to %s" % (len(sourceFiles),
193  NEWSOURCES_FILENAME))
194  sourceFiles.sort()
195  with open(NEWSOURCES_FILENAME, "w") as f:
196  for filename in sourceFiles:
197  f.write(fileNodeTemplate % filename)
198  print()
199 
200  deadFiles = projectFilesSet - tessFiles
201  print("%2d Dead files (in Project but not in %s" % (len(deadFiles),
202  tessAbsDir))
203  sortedList = list(deadFiles)
204  sortedList.sort()
205  for filename in sortedList:
206  print(" %s " % filename)
207 
208 # ====================================================================
def getTessLibFiles
Definition: tesshelper.py:115
def getProjectfiles
Definition: tesshelper.py:84
def tessCompare
Definition: tesshelper.py:153
def tesshelper.tessCopy (   tessDir,
  includeDir 
)
Copy all "public" libtesseract Project header files to include directory.

Preserves directory hierarchy.

Definition at line 293 of file tesshelper.py.

294 def tessCopy(tessDir, includeDir):
295  '''Copy all "public" libtesseract Project header files to include directory.
296 
297  Preserves directory hierarchy.'''
298 
299  baseIncludeSet = {
300  r"api\baseapi.h",
301  r"api\capi.h",
302  r"api\apitypes.h",
303  r"ccstruct\publictypes.h",
304  r"ccmain\thresholder.h",
305  r"ccutil\host.h",
306  r"ccutil\basedir.h",
307  r"ccutil\tesscallback.h",
308  r"ccutil\unichar.h",
309  r"ccutil\platform.h",
310  }
311 
312  strngIncludeSet = {
313  r"ccutil\strngs.h",
314  r"ccutil\memry.h",
315  r"ccutil\host.h",
316  r"ccutil\serialis.h",
317  r"ccutil\errcode.h",
318  r"ccutil\fileerr.h",
319  #r"ccutil\genericvector.h",
320  }
321 
322  resultIteratorIncludeSet = {
323  r"ccmain\ltrresultiterator.h",
324  r"ccmain\pageiterator.h",
325  r"ccmain\resultiterator.h",
326  r"ccutil\genericvector.h",
327  r"ccutil\tesscallback.h",
328  r"ccutil\errcode.h",
329  r"ccutil\host.h",
330  r"ccutil\helpers.h",
331  r"ccutil\ndminx.h",
332  r"ccutil\params.h",
333  r"ccutil\unicharmap.h",
334  r"ccutil\unicharset.h",
335  }
336 
337  genericVectorIncludeSet = {
338  r"ccutil\genericvector.h",
339  r"ccutil\tesscallback.h",
340  r"ccutil\errcode.h",
341  r"ccutil\host.h",
342  r"ccutil\helpers.h",
343  r"ccutil\ndminx.h",
344  }
345 
346  blobsIncludeSet = {
347  r"ccstruct\blobs.h",
348  r"ccstruct\rect.h",
349  r"ccstruct\points.h",
350  r"ccstruct\ipoints.h",
351  r"ccutil\elst.h",
352  r"ccutil\host.h",
353  r"ccutil\serialis.h",
354  r"ccutil\lsterr.h",
355  r"ccutil\ndminx.h",
356  r"ccutil\tprintf.h",
357  r"ccutil\params.h",
358  r"viewer\scrollview.h",
359  r"ccstruct\vecfuncs.h",
360  }
361 
362  extraFilesSet = {
363  #r"vs2008\include\stdint.h",
364  r"vs2008\include\leptonica_versionnumbers.vsprops",
365  r"vs2008\include\tesseract_versionnumbers.vsprops",
366  }
367 
368  tessIncludeDir = os.path.join(includeDir, "tesseract")
369  if os.path.isfile(tessIncludeDir):
370  print('Aborting: "%s" is a file not a directory.' % tessIncludeDir)
371  return
372  if not os.path.exists(tessIncludeDir):
373  os.mkdir(tessIncludeDir)
374 
375  #fileSet = baseIncludeSet | strngIncludeSet | genericVectorIncludeSet | blobsIncludeSet
376  fileSet = baseIncludeSet | strngIncludeSet | resultIteratorIncludeSet
377 
378  copyIncludes(fileSet, "public", tessDir, tessIncludeDir)
379  copyIncludes(extraFilesSet, "extra", tessDir, includeDir)
380 
381 # ====================================================================
def copyIncludes
Definition: tesshelper.py:265
def tessCopy
Definition: tesshelper.py:293
def tesshelper.tessReport (   tessDir)
Report summary stats on "sub-library" files and libtesseract Project file.

Definition at line 209 of file tesshelper.py.

210 def tessReport(tessDir):
211  """Report summary stats on "sub-library" files and libtesseract Project file."""
212 
213  vs2010Dir = os.path.join(tessDir, "vs2008")
214  libTessDir = os.path.join(vs2010Dir, "libtesseract")
215  libProjectFile = os.path.join(libTessDir,"libtesseract.vcproj")
216  tessAbsDir = os.path.abspath(tessDir)
217  nTrimChars = len(tessAbsDir)+1
218 
219  projectFilesSet, projectHFiles, projectCFiles, projectRFiles = \
220  getProjectfiles(libTessDir, libProjectFile, nTrimChars)
221  tessFiles = getTessLibFiles(tessDir, nTrimChars)
222 
223  print('Summary stats for "%s" library directories' % tessAbsDir)
224  folderCounters = {}
225  for tessFile in tessFiles:
226  tessFile = tessFile.lower()
227  folder, head = os.path.split(tessFile)
228  file, ext = os.path.splitext(head)
229  typeCounter = folderCounters.setdefault(folder, collections.Counter())
230  typeCounter[ext[1:]] += 1
231 
232  folders = list(folderCounters.keys())
233  folders.sort()
234  totalFiles = 0
235  totalH = 0
236  totalCPP = 0
237  totalOther = 0
238 
239  print()
240  print(" total h cpp")
241  print(" ----- --- ---")
242  for folder in folders:
243  counters = folderCounters[folder]
244  nHFiles = counters['h']
245  nCPPFiles = counters['cpp']
246 
247  total = nHFiles + nCPPFiles
248  totalFiles += total
249  totalH += nHFiles
250  totalCPP += nCPPFiles
251 
252  print(" %5d %3d %3d %s" % (total, nHFiles, nCPPFiles, folder))
253  print(" ----- --- ---")
254  print(" %5d %3d %3d" % (totalFiles, totalH, totalCPP))
255 
256  print()
257  print('Summary stats for VS2008 Project "%s"' % libProjectFile)
258  print(" %5d %s" %(len(projectHFiles), "Header files"))
259  print(" %5d %s" % (len(projectCFiles), "Source files"))
260  print(" %5d %s" % (len(projectRFiles), "Resource files"))
261  print(" -----")
262  print(" %5d" % (len(projectHFiles) + len(projectCFiles) + len(projectRFiles), ))
263 
264 # ====================================================================
def getTessLibFiles
Definition: tesshelper.py:115
def tessReport
Definition: tesshelper.py:209
def getProjectfiles
Definition: tesshelper.py:84
def tesshelper.validateDir (   dir)
Check that dir is a valid directory named include.

Definition at line 446 of file tesshelper.py.

447 def validateDir(dir):
448  """Check that dir is a valid directory named include."""
449 
450  if not os.path.isdir(dir):
451  raise argparse.ArgumentTypeError('Directory "%s" doesn\'t exist.' % dir)
452 
453  dirpath = os.path.abspath(dir)
454  head, tail = os.path.split(dirpath)
455  if tail.lower() != "include":
456  raise argparse.ArgumentTypeError('Include directory "%s" must be named "include".' % tail)
457 
458  return dir
def validateDir
Definition: tesshelper.py:446
def tesshelper.validateTessDir (   tessDir)
Check that tessDir is a valid tesseract directory.

Definition at line 436 of file tesshelper.py.

437 def validateTessDir(tessDir):
438  """Check that tessDir is a valid tesseract directory."""
439 
440  if not os.path.isdir(tessDir):
441  raise argparse.ArgumentTypeError('Directory "%s" doesn\'t exist.' % tessDir)
442  projFile = os.path.join(tessDir, PROJ_SUBDIR, PROJFILE)
443  if not os.path.isfile(projFile):
444  raise argparse.ArgumentTypeError('Project file "%s" doesn\'t exist.' % projFile)
445  return tessDir
def validateTessDir
Definition: tesshelper.py:436

Variable Documentation

string tesshelper.epilogStr = r""

Definition at line 31 of file tesshelper.py.

tesshelper.fileNodeTemplate = \

Definition at line 79 of file tesshelper.py.

string tesshelper.NEWHEADERS_FILENAME = "newheaders.txt"

Definition at line 76 of file tesshelper.py.

string tesshelper.NEWSOURCES_FILENAME = "newsources.txt"

Definition at line 77 of file tesshelper.py.

string tesshelper.PROJ_SUBDIR = r"vs2008\libtesseract"

Definition at line 73 of file tesshelper.py.

string tesshelper.PROJFILE = "libtesseract.vcproj"

Definition at line 74 of file tesshelper.py.

string tesshelper.VERSION = "1.0 %s"

Definition at line 72 of file tesshelper.py.