All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
strngs.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: strngs.h (Formerly strings.h)
3  * Description: STRING class definition.
4  * Author: Ray Smith
5  * Created: Fri Feb 15 09:15:01 GMT 1991
6  *
7  * (C) Copyright 1991, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #ifndef STRNGS_H
21 #define STRNGS_H
22 
23 #include <stdio.h>
24 #include <string.h>
25 #include "platform.h"
26 #include "memry.h"
27 
28 namespace tesseract {
29 class TFile;
30 } // namespace tesseract.
31 
32 // STRING_IS_PROTECTED means that string[index] = X is invalid
33 // because you have to go through strings interface to modify it.
34 // This allows the string to ensure internal integrity and maintain
35 // its own string length. Unfortunately this is not possible because
36 // STRINGS are used as direct-manipulation data buffers for things
37 // like length arrays and many places cast away the const on string()
38 // to mutate the string. Turning this off means that internally we
39 // cannot assume we know the strlen.
40 #define STRING_IS_PROTECTED 0
41 
42 template <typename T> class GenericVector;
43 
45 {
46  public:
47  STRING();
48  STRING(const STRING &string);
49  STRING(const char *string);
50  STRING(const char *data, int length);
51  ~STRING ();
52 
53  // Writes to the given file. Returns false in case of error.
54  bool Serialize(FILE* fp) const;
55  // Reads from the given file. Returns false in case of error.
56  // If swap is true, assumes a big/little-endian swap is needed.
57  bool DeSerialize(bool swap, FILE* fp);
58  // Writes to the given file. Returns false in case of error.
59  bool Serialize(tesseract::TFile* fp) const;
60  // Reads from the given file. Returns false in case of error.
61  // If swap is true, assumes a big/little-endian swap is needed.
62  bool DeSerialize(bool swap, tesseract::TFile* fp);
63 
64  BOOL8 contains(const char c) const;
65  inT32 length() const;
66  inT32 size() const { return length(); }
67  const char *string() const;
68  const char *c_str() const;
69 
70  inline char* strdup() const {
71  inT32 len = length() + 1;
72  return strncpy(new char[len], GetCStr(), len);
73  }
74 
75 #if STRING_IS_PROTECTED
76  const char &operator[] (inT32 index) const;
77  // len is number of chars in s to insert starting at index in this string
78  void insert_range(inT32 index, const char*s, int len);
79  void erase_range(inT32 index, int len);
80 #else
81  char &operator[] (inT32 index) const;
82 #endif
83  void split(const char c, GenericVector<STRING> *splited);
84  void truncate_at(inT32 index);
85 
86  BOOL8 operator== (const STRING & string) const;
87  BOOL8 operator!= (const STRING & string) const;
88  BOOL8 operator!= (const char *string) const;
89 
90  STRING & operator= (const char *string);
91  STRING & operator= (const STRING & string);
92 
93  STRING operator+ (const STRING & string) const;
94  STRING operator+ (const char ch) const;
95 
96  STRING & operator+= (const char *string);
97  STRING & operator+= (const STRING & string);
98  STRING & operator+= (const char ch);
99 
100  // Assignment for strings which are not null-terminated.
101  void assign(const char *cstr, int len);
102 
103  // Appends the given string and int (as a %d) to this.
104  // += cannot be used for ints as there as a char += operator that would
105  // be ambiguous, and ints usually need a string before or between them
106  // anyway.
107  void add_str_int(const char* str, int number);
108  // Appends the given string and double (as a %.8g) to this.
109  void add_str_double(const char* str, double number);
110 
111  // ensure capacity but keep pointer encapsulated
112  inline void ensure(inT32 min_capacity) { ensure_cstr(min_capacity); }
113 
114  private:
115  typedef struct STRING_HEADER {
116  // How much space was allocated in the string buffer for char data.
117  int capacity_;
118 
119  // used_ is how much of the capacity is currently being used,
120  // including a '\0' terminator.
121  //
122  // If used_ is 0 then string is NULL (not even the '\0')
123  // else if used_ > 0 then it is strlen() + 1 (because it includes '\0')
124  // else strlen is >= 0 (not NULL) but needs to be computed.
125  // this condition is set when encapsulation is violated because
126  // an API returned a mutable string.
127  //
128  // capacity_ - used_ = excess capacity that the string can grow
129  // without reallocating
130  mutable int used_;
131  } STRING_HEADER;
132 
133  // To preserve the behavior of the old serialization, we only have space
134  // for one pointer in this structure. So we are embedding a data structure
135  // at the start of the storage that will hold additional state variables,
136  // then storing the actual string contents immediately after.
137  STRING_HEADER* data_;
138 
139  // returns the header part of the storage
140  inline STRING_HEADER* GetHeader() {
141  return data_;
142  }
143  inline const STRING_HEADER* GetHeader() const {
144  return data_;
145  }
146 
147  // returns the string data part of storage
148  inline char* GetCStr() {
149  return ((char *)data_) + sizeof(STRING_HEADER);
150  };
151 
152  inline const char* GetCStr() const {
153  return ((const char *)data_) + sizeof(STRING_HEADER);
154  };
155  inline bool InvariantOk() const {
156 #if STRING_IS_PROTECTED
157  return (GetHeader()->used_ == 0) ?
158  (string() == NULL) : (GetHeader()->used_ == (strlen(string()) + 1));
159 #else
160  return true;
161 #endif
162  }
163 
164  // Ensure string has requested capacity as optimization
165  // to avoid unnecessary reallocations.
166  // The return value is a cstr buffer with at least requested capacity
167  char* ensure_cstr(inT32 min_capacity);
168 
169  void FixHeader() const; // make used_ non-negative, even if const
170 
171  char* AllocData(int used, int capacity);
172  void DiscardData();
173 };
174 #endif
unsigned char BOOL8
Definition: host.h:113
char * strdup() const
Definition: strngs.h:70
inT32 size() const
Definition: strngs.h:66
#define TESS_API
Definition: platform.h:73
ICOORD operator+(const ICOORD &op1, const ICOORD &op2)
Definition: ipoints.h:68
Definition: strngs.h:44
#define NULL
Definition: host.h:144
void ensure(inT32 min_capacity)
Definition: strngs.h:112
ICOORD & operator+=(ICOORD &op1, const ICOORD &op2)
Definition: ipoints.h:86
int inT32
Definition: host.h:102