16#ifndef TESSERACT_TRAINING_TRAININGSAMPLESET_H_
17#define TESSERACT_TRAINING_TRAININGSAMPLESET_H_
33struct UnicharAndFonts;
54 return samples_.size();
57 return num_raw_samples_;
66 return unicharset_size_;
69 return fontinfo_table_;
121 float ClusterDistance(
int font_id1,
int class_id1,
int font_id2,
int class_id2,
154 return samples_[index];
159 samples_[index] =
nullptr;
212 struct FontClassDistance {
218 struct FontClassInfo {
230 int32_t canonical_sample;
232 float canonical_dist;
234 std::vector<int32_t> samples;
238 std::vector<int> canonical_features;
240 BitVector cloud_features;
245 std::vector<float> font_distance_cache;
247 std::vector<float> unichar_distance_cache;
250 std::vector<FontClassDistance> distance_cache;
253 std::vector<TrainingSample *> samples_;
255 int num_raw_samples_;
257 UNICHARSET unicharset_;
259 int unicharset_size_;
263 IndexMapBiDi font_id_map_;
266 GENERIC_2D_ARRAY<FontClassInfo> *font_class_array_;
270 const FontInfoTable &fontinfo_table_;
UnicodeText::const_iterator::difference_type distance(const UnicodeText::const_iterator &first, const UnicodeText::const_iterator &last)
int SparseSize() const override
const TrainingSample * GetCanonicalSample(int font_id, int class_id) const
int NumClassSamples(int font_id, int class_id, bool randomize) const
int AddSample(const char *unichar, TrainingSample *sample)
TrainingSample * extract_sample(int index)
bool Serialize(FILE *fp) const
const FontInfoTable & fontinfo_table() const
void AddAllFontsForClass(int class_id, Shape *shape) const
void IndexFeatures(const IntFeatureSpace &feature_space)
float ComputeClusterDistance(int font_id1, int class_id1, int font_id2, int class_id2, const IntFeatureMap &feature_map) const
void KillSample(TrainingSample *sample)
void ComputeCanonicalSamples(const IntFeatureMap &map, bool debug)
void ComputeCloudFeatures(int feature_space_size)
bool DeSerialize(bool swap, FILE *fp)
int num_raw_samples() const
void LoadUnicharset(const char *filename)
const BitVector & GetCloudFeatures(int font_id, int class_id) const
const UNICHARSET & unicharset() const
int GlobalSampleIndex(int font_id, int class_id, int index) const
void ComputeCanonicalFeatures()
TrainingSample * mutable_sample(int index)
const std::vector< int > & GetCanonicalFeatures(int font_id, int class_id) const
void OrganizeByFontAndClass()
TrainingSampleSet(const FontInfoTable &fontinfo_table)
float ClusterDistance(int font_id1, int class_id1, int font_id2, int class_id2, const IntFeatureMap &feature_map)
float GetCanonicalDist(int font_id, int class_id) const
void ReplicateAndRandomizeSamples()
const TrainingSample * GetSample(int index) const
std::string SampleToString(const TrainingSample &sample) const
void DisplaySamplesWithFeature(int f_index, const Shape &shape, const IntFeatureSpace &feature_space, ScrollView::Color color, ScrollView *window) const
int ReliablySeparable(int font_id1, int class_id1, int font_id2, int class_id2, const IntFeatureMap &feature_map, bool thorough) const
TrainingSample * MutableSample(int font_id, int class_id, int index)
float UnicharDistance(const UnicharAndFonts &uf1, const UnicharAndFonts &uf2, bool matched_fonts, const IntFeatureMap &feature_map)