tesseract v5.3.3.20231005
serialis.cpp
Go to the documentation of this file.
1/**********************************************************************
2 * File: serialis.cpp (Formerly serialmac.h)
3 * Description: Inline routines and macros for serialisation functions
4 * Author: Phil Cheatle
5 *
6 * (C) Copyright 1990, Hewlett-Packard Ltd.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 *
17 **********************************************************************/
18
19#include "serialis.h"
20
21#include "errcode.h"
22
23#include "helpers.h" // for ReverseN
24
25#include <climits> // for INT_MAX
26#include <cstdio>
27
28namespace tesseract {
29
30// The default FileReader loads the whole file into the vector of char,
31// returning false on error.
32bool LoadDataFromFile(const char *filename, std::vector<char> *data) {
33 bool result = false;
34 FILE *fp = fopen(filename, "rb");
35 if (fp != nullptr) {
36 fseek(fp, 0, SEEK_END);
37 auto size = std::ftell(fp);
38 fseek(fp, 0, SEEK_SET);
39 // Trying to open a directory on Linux sets size to LONG_MAX. Catch it here.
40 if (size > 0 && size < LONG_MAX) {
41 // reserve an extra byte in case caller wants to append a '\0' character
42 data->reserve(size + 1);
43 data->resize(size); // TODO: optimize no init
44 result = static_cast<long>(fread(&(*data)[0], 1, size, fp)) == size;
45 }
46 fclose(fp);
47 }
48 return result;
49}
50
51// The default FileWriter writes the vector of char to the filename file,
52// returning false on error.
53bool SaveDataToFile(const std::vector<char> &data, const char *filename) {
54 FILE *fp = fopen(filename, "wb");
55 if (fp == nullptr) {
56 return false;
57 }
58 bool result = fwrite(&data[0], 1, data.size(), fp) == data.size();
59 fclose(fp);
60 return result;
61}
62
64}
65
67 if (data_is_owned_) {
68 delete data_;
69 }
70}
71
72bool TFile::DeSerializeSize(int32_t *pSize) {
73 uint32_t size;
74 if (FReadEndian(&size, sizeof(size), 1) != 1) {
75 return false;
76 }
77 if (size > data_->size() / 4) {
78 // Reverse endianness.
79 swap_ = !swap_;
80 ReverseN(&size, 4);
81 }
82 *pSize = size;
83 return true;
84}
85
86bool TFile::DeSerializeSkip(size_t size) {
87 uint32_t len;
88 if (!DeSerialize(&len)) {
89 return false;
90 }
91 return Skip(len * size);
92}
93
94bool TFile::DeSerialize(std::string &data) {
95 uint32_t size;
96 if (!DeSerialize(&size)) {
97 return false;
98 } else if (size > 0) {
99 // TODO: optimize.
100 data.resize(size);
101 return DeSerialize(&data[0], size);
102 }
103 data.clear();
104 return true;
105}
106
107bool TFile::Serialize(const std::string &data) {
108 uint32_t size = data.size();
109 return Serialize(&size) && Serialize(data.c_str(), size);
110}
111
112bool TFile::DeSerialize(std::vector<char> &data) {
113 uint32_t size;
114 if (!DeSerialize(&size)) {
115 return false;
116 } else if (size > 0) {
117 // TODO: optimize.
118 data.resize(size);
119 return DeSerialize(&data[0], data.size());
120 }
121 data.clear();
122 return true;
123}
124
125bool TFile::Serialize(const std::vector<char> &data) {
126 uint32_t size = data.size();
127 if (!Serialize(&size)) {
128 return false;
129 } else if (size > 0) {
130 return Serialize(&data[0], size);
131 }
132 return true;
133}
134
135bool TFile::Skip(size_t count) {
136 offset_ += count;
137 return true;
138}
139
140bool TFile::Open(const char *filename, FileReader reader) {
141 if (!data_is_owned_) {
142 data_ = new std::vector<char>;
143 data_is_owned_ = true;
144 }
145 offset_ = 0;
146 is_writing_ = false;
147 swap_ = false;
148 if (reader == nullptr) {
149 return LoadDataFromFile(filename, data_);
150 } else {
151 return (*reader)(filename, data_);
152 }
153}
154
155bool TFile::Open(const char *data, size_t size) {
156 offset_ = 0;
157 if (!data_is_owned_) {
158 data_ = new std::vector<char>;
159 data_is_owned_ = true;
160 }
161 is_writing_ = false;
162 swap_ = false;
163 data_->resize(size); // TODO: optimize no init
164 memcpy(&(*data_)[0], data, size);
165 return true;
166}
167
168bool TFile::Open(FILE *fp, int64_t end_offset) {
169 offset_ = 0;
170 auto current_pos = std::ftell(fp);
171 if (current_pos < 0) {
172 // ftell failed.
173 return false;
174 }
175 if (end_offset < 0) {
176 if (fseek(fp, 0, SEEK_END)) {
177 return false;
178 }
179 end_offset = ftell(fp);
180 if (fseek(fp, current_pos, SEEK_SET)) {
181 return false;
182 }
183 }
184 size_t size = end_offset - current_pos;
185 is_writing_ = false;
186 swap_ = false;
187 if (!data_is_owned_) {
188 data_ = new std::vector<char>;
189 data_is_owned_ = true;
190 }
191 data_->resize(size); // TODO: optimize no init
192 return fread(&(*data_)[0], 1, size, fp) == size;
193}
194
195char *TFile::FGets(char *buffer, int buffer_size) {
196 ASSERT_HOST(!is_writing_);
197 int size = 0;
198 while (size + 1 < buffer_size && offset_ < data_->size()) {
199 buffer[size++] = (*data_)[offset_++];
200 if ((*data_)[offset_ - 1] == '\n') {
201 break;
202 }
203 }
204 if (size < buffer_size) {
205 buffer[size] = '\0';
206 }
207 return size > 0 ? buffer : nullptr;
208}
209
210size_t TFile::FReadEndian(void *buffer, size_t size, size_t count) {
211 auto num_read = FRead(buffer, size, count);
212 if (swap_ && size != 1) {
213 char *char_buffer = static_cast<char *>(buffer);
214 for (size_t i = 0; i < num_read; ++i, char_buffer += size) {
215 ReverseN(char_buffer, size);
216 }
217 }
218 return num_read;
219}
220
221size_t TFile::FRead(void *buffer, size_t size, size_t count) {
222 ASSERT_HOST(!is_writing_);
223 ASSERT_HOST(size > 0);
224 size_t required_size;
225 if (SIZE_MAX / size <= count) {
226 // Avoid integer overflow.
227 required_size = data_->size() - offset_;
228 } else {
229 required_size = size * count;
230 if (data_->size() - offset_ < required_size) {
231 required_size = data_->size() - offset_;
232 }
233 }
234 if (required_size > 0 && buffer != nullptr) {
235 memcpy(buffer, &(*data_)[offset_], required_size);
236 }
237 offset_ += required_size;
238 return required_size / size;
239}
240
242 ASSERT_HOST(!is_writing_);
243 offset_ = 0;
244}
245
246void TFile::OpenWrite(std::vector<char> *data) {
247 offset_ = 0;
248 if (data != nullptr) {
249 if (data_is_owned_) {
250 delete data_;
251 }
252 data_ = data;
253 data_is_owned_ = false;
254 } else if (!data_is_owned_) {
255 data_ = new std::vector<char>;
256 data_is_owned_ = true;
257 }
258 is_writing_ = true;
259 swap_ = false;
260 data_->clear();
261}
262
263bool TFile::CloseWrite(const char *filename, FileWriter writer) {
264 ASSERT_HOST(is_writing_);
265 if (writer == nullptr) {
266 return SaveDataToFile(*data_, filename);
267 } else {
268 return (*writer)(*data_, filename);
269 }
270}
271
272size_t TFile::FWrite(const void *buffer, size_t size, size_t count) {
273 ASSERT_HOST(is_writing_);
274 ASSERT_HOST(size > 0);
275 ASSERT_HOST(SIZE_MAX / size > count);
276 size_t total = size * count;
277 const char *buf = static_cast<const char *>(buffer);
278 // This isn't very efficient, but memory is so fast compared to disk
279 // that it is relatively unimportant, and very simple.
280 for (size_t i = 0; i < total; ++i) {
281 data_->push_back(buf[i]);
282 }
283 return count;
284}
285
286} // namespace tesseract.
#define ASSERT_HOST(x)
Definition: errcode.h:54
int * count
void ReverseN(void *ptr, int num_bytes)
Definition: helpers.h:184
bool(*)(const std::vector< char > &data, const char *filename) FileWriter
Definition: serialis.h:40
bool SaveDataToFile(const GenericVector< char > &data, const char *filename)
bool(*)(const char *filename, std::vector< char > *data) FileReader
Definition: baseapi.h:61
bool LoadDataFromFile(const char *filename, GenericVector< char > *data)
bool DeSerializeSize(int32_t *data)
Definition: serialis.cpp:72
void OpenWrite(std::vector< char > *data)
Definition: serialis.cpp:246
size_t FWrite(const void *buffer, size_t size, size_t count)
Definition: serialis.cpp:272
size_t FReadEndian(void *buffer, size_t size, size_t count)
Definition: serialis.cpp:210
bool DeSerialize(std::string &data)
Definition: serialis.cpp:94
bool Serialize(const std::string &data)
Definition: serialis.cpp:107
size_t FRead(void *buffer, size_t size, size_t count)
Definition: serialis.cpp:221
char * FGets(char *buffer, int buffer_size)
Definition: serialis.cpp:195
bool DeSerializeSkip(size_t size=1)
Definition: serialis.cpp:86
bool Open(const char *filename, FileReader reader)
Definition: serialis.cpp:140
bool Skip(size_t count)
Definition: serialis.cpp:135
bool CloseWrite(const char *filename, FileWriter writer)
Definition: serialis.cpp:263