tesseract v5.3.3.20231005
testing::internal::edit_distance Namespace Reference

Enumerations

enum  EditType { kMatch , kAdd , kRemove , kReplace }
 

Functions

GTEST_API_ std::vector< EditTypeCalculateOptimalEdits (const std::vector< size_t > &left, const std::vector< size_t > &right)
 
GTEST_API_ std::vector< EditTypeCalculateOptimalEdits (const std::vector< std::string > &left, const std::vector< std::string > &right)
 
GTEST_API_ std::string CreateUnifiedDiff (const std::vector< std::string > &left, const std::vector< std::string > &right, size_t context=2)
 

Enumeration Type Documentation

◆ EditType

Function Documentation

◆ CalculateOptimalEdits() [1/2]

std::vector< EditType > testing::internal::edit_distance::CalculateOptimalEdits ( const std::vector< size_t > &  left,
const std::vector< size_t > &  right 
)

Definition at line 1227 of file gtest.cc.

1228 {
1229 std::vector<std::vector<double> > costs(
1230 left.size() + 1, std::vector<double>(right.size() + 1));
1231 std::vector<std::vector<EditType> > best_move(
1232 left.size() + 1, std::vector<EditType>(right.size() + 1));
1233
1234 // Populate for empty right.
1235 for (size_t l_i = 0; l_i < costs.size(); ++l_i) {
1236 costs[l_i][0] = static_cast<double>(l_i);
1237 best_move[l_i][0] = kRemove;
1238 }
1239 // Populate for empty left.
1240 for (size_t r_i = 1; r_i < costs[0].size(); ++r_i) {
1241 costs[0][r_i] = static_cast<double>(r_i);
1242 best_move[0][r_i] = kAdd;
1243 }
1244
1245 for (size_t l_i = 0; l_i < left.size(); ++l_i) {
1246 for (size_t r_i = 0; r_i < right.size(); ++r_i) {
1247 if (left[l_i] == right[r_i]) {
1248 // Found a match. Consume it.
1249 costs[l_i + 1][r_i + 1] = costs[l_i][r_i];
1250 best_move[l_i + 1][r_i + 1] = kMatch;
1251 continue;
1252 }
1253
1254 const double add = costs[l_i + 1][r_i];
1255 const double remove = costs[l_i][r_i + 1];
1256 const double replace = costs[l_i][r_i];
1257 if (add < remove && add < replace) {
1258 costs[l_i + 1][r_i + 1] = add + 1;
1259 best_move[l_i + 1][r_i + 1] = kAdd;
1260 } else if (remove < add && remove < replace) {
1261 costs[l_i + 1][r_i + 1] = remove + 1;
1262 best_move[l_i + 1][r_i + 1] = kRemove;
1263 } else {
1264 // We make replace a little more expensive than add/remove to lower
1265 // their priority.
1266 costs[l_i + 1][r_i + 1] = replace + 1.00001;
1267 best_move[l_i + 1][r_i + 1] = kReplace;
1268 }
1269 }
1270 }
1271
1272 // Reconstruct the best path. We do it in reverse order.
1273 std::vector<EditType> best_path;
1274 for (size_t l_i = left.size(), r_i = right.size(); l_i > 0 || r_i > 0;) {
1275 EditType move = best_move[l_i][r_i];
1276 best_path.push_back(move);
1277 l_i -= move != kAdd;
1278 r_i -= move != kRemove;
1279 }
1280 std::reverse(best_path.begin(), best_path.end());
1281 return best_path;
1282}

◆ CalculateOptimalEdits() [2/2]

std::vector< EditType > testing::internal::edit_distance::CalculateOptimalEdits ( const std::vector< std::string > &  left,
const std::vector< std::string > &  right 
)

Definition at line 1303 of file gtest.cc.

1305 {
1306 std::vector<size_t> left_ids, right_ids;
1307 {
1308 InternalStrings intern_table;
1309 for (size_t i = 0; i < left.size(); ++i) {
1310 left_ids.push_back(intern_table.GetId(left[i]));
1311 }
1312 for (size_t i = 0; i < right.size(); ++i) {
1313 right_ids.push_back(intern_table.GetId(right[i]));
1314 }
1315 }
1316 return CalculateOptimalEdits(left_ids, right_ids);
1317}
GTEST_API_ std::vector< EditType > CalculateOptimalEdits(const std::vector< size_t > &left, const std::vector< size_t > &right)
Definition: gtest.cc:1227

◆ CreateUnifiedDiff()

std::string testing::internal::edit_distance::CreateUnifiedDiff ( const std::vector< std::string > &  left,
const std::vector< std::string > &  right,
size_t  context = 2 
)

Definition at line 1402 of file gtest.cc.

1404 {
1405 const std::vector<EditType> edits = CalculateOptimalEdits(left, right);
1406
1407 size_t l_i = 0, r_i = 0, edit_i = 0;
1408 std::stringstream ss;
1409 while (edit_i < edits.size()) {
1410 // Find first edit.
1411 while (edit_i < edits.size() && edits[edit_i] == kMatch) {
1412 ++l_i;
1413 ++r_i;
1414 ++edit_i;
1415 }
1416
1417 // Find the first line to include in the hunk.
1418 const size_t prefix_context = std::min(l_i, context);
1419 Hunk hunk(l_i - prefix_context + 1, r_i - prefix_context + 1);
1420 for (size_t i = prefix_context; i > 0; --i) {
1421 hunk.PushLine(' ', left[l_i - i].c_str());
1422 }
1423
1424 // Iterate the edits until we found enough suffix for the hunk or the input
1425 // is over.
1426 size_t n_suffix = 0;
1427 for (; edit_i < edits.size(); ++edit_i) {
1428 if (n_suffix >= context) {
1429 // Continue only if the next hunk is very close.
1430 auto it = edits.begin() + static_cast<int>(edit_i);
1431 while (it != edits.end() && *it == kMatch) ++it;
1432 if (it == edits.end() ||
1433 static_cast<size_t>(it - edits.begin()) - edit_i >= context) {
1434 // There is no next edit or it is too far away.
1435 break;
1436 }
1437 }
1438
1439 EditType edit = edits[edit_i];
1440 // Reset count when a non match is found.
1441 n_suffix = edit == kMatch ? n_suffix + 1 : 0;
1442
1443 if (edit == kMatch || edit == kRemove || edit == kReplace) {
1444 hunk.PushLine(edit == kMatch ? ' ' : '-', left[l_i].c_str());
1445 }
1446 if (edit == kAdd || edit == kReplace) {
1447 hunk.PushLine('+', right[r_i].c_str());
1448 }
1449
1450 // Advance indices, depending on edit type.
1451 l_i += edit != kAdd;
1452 r_i += edit != kRemove;
1453 }
1454
1455 if (!hunk.has_edits()) {
1456 // We are done. We don't want this hunk.
1457 break;
1458 }
1459
1460 hunk.PrintTo(&ss);
1461 }
1462 return ss.str();
1463}