aboutsummaryrefslogtreecommitdiff
path: root/src/brussel.engine/FuzzyMatch.cpp
blob: 0ab604d952caf44df53e33045c2fd662466c4964 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
// Adapted from https://github.com/forrestthewoods/lib_fts/blob/master/code/fts_fuzzy_match.h
#include "FuzzyMatch.hpp"

#include <cctype>
#include <cstring>

namespace FuzzyMatch {

namespace P6503_UNITY_ID {
	bool SearchRecursive(const char* pattern, const char* src, int& outScore, const char* strBegin, const uint8_t srcMatches[], uint8_t newMatches[], int maxMatches, int& nextMatch, int& recursionCount, int recursionLimit);
} // namespace P6503_UNITY_ID

bool SearchSimple(char const* pattern, char const* haystack) {
	while (*pattern != '\0' && *haystack != '\0') {
		if (tolower(*pattern) == tolower(*haystack)) {
			++pattern;
		}
		++haystack;
	}

	return *pattern == '\0';
}

bool Search(char const* pattern, char const* haystack, int& outScore) {
	uint8_t matches[256];
	int matchCount = 0;
	return Search(pattern, haystack, outScore, matches, sizeof(matches), matchCount);
}

bool Search(char const* pattern, char const* haystack, int& outScore, uint8_t matches[], int maxMatches, int& outMatches) {
	int recursionCount = 0;
	int recursionLimit = 10;
	int newMatches = 0;
	bool result = P6503_UNITY_ID::SearchRecursive(pattern, haystack, outScore, haystack, nullptr, matches, maxMatches, newMatches, recursionCount, recursionLimit);
	outMatches = newMatches;
	return result;
}

namespace P6503_UNITY_ID {
	bool SearchRecursive(const char* pattern, const char* src, int& outScore, const char* strBegin, const uint8_t srcMatches[], uint8_t newMatches[], int maxMatches, int& nextMatch, int& recursionCount, int recursionLimit) {
		// Count recursions
		++recursionCount;
		if (recursionCount >= recursionLimit) {
			return false;
		}

		// Detect end of strings
		if (*pattern == '\0' || *src == '\0') {
			return false;
		}

		// Recursion params
		bool recursiveMatch = false;
		uint8_t bestRecursiveMatches[256];
		int bestRecursiveScore = 0;

		// Loop through pattern and str looking for a match
		bool firstMatch = true;
		while (*pattern != '\0' && *src != '\0') {
			// Found match
			if (tolower(*pattern) == tolower(*src)) {
				// Supplied matches buffer was too short
				if (nextMatch >= maxMatches) {
					return false;
				}

				// "Copy-on-Write" srcMatches into matches
				if (firstMatch && srcMatches) {
					memcpy(newMatches, srcMatches, nextMatch);
					firstMatch = false;
				}

				// Recursive call that "skips" this match
				uint8_t recursiveMatches[256];
				int recursiveScore;
				int recursiveNextMatch = nextMatch;
				if (SearchRecursive(pattern, src + 1, recursiveScore, strBegin, newMatches, recursiveMatches, sizeof(recursiveMatches), recursiveNextMatch, recursionCount, recursionLimit)) {
					// Pick the best recursive score
					if (!recursiveMatch || recursiveScore > bestRecursiveScore) {
						memcpy(bestRecursiveMatches, recursiveMatches, 256);
						bestRecursiveScore = recursiveScore;
					}
					recursiveMatch = true;
				}

				// Advance
				newMatches[nextMatch++] = (uint8_t)(src - strBegin);
				++pattern;
			}
			++src;
		}

		// Determine if full pattern was matched
		bool matched = *pattern == '\0';

		// Calculate score
		if (matched) {
			const int sequentialBonus = 15; // bonus for adjacent matches
			const int separatorBonus = 30; // bonus if match occurs after a separator
			const int camelBonus = 30; // bonus if match is uppercase and prev is lower
			const int firstLetterBonus = 15; // bonus if the first letter is matched

			const int leadingLetterPenalty = -5; // penalty applied for every letter in str before the first match
			const int maxLeadingLetterPenalty = -15; // maximum penalty for leading letters
			const int unmatchedLetterPenalty = -1; // penalty for every letter that doesn't matter

			// Iterate str to end
			while (*src != '\0') {
				++src;
			}

			// Initialize score
			outScore = 100;

			// Apply leading letter penalty
			int penalty = leadingLetterPenalty * newMatches[0];
			if (penalty < maxLeadingLetterPenalty) {
				penalty = maxLeadingLetterPenalty;
			}
			outScore += penalty;

			// Apply unmatched penalty
			int unmatched = (int)(src - strBegin) - nextMatch;
			outScore += unmatchedLetterPenalty * unmatched;

			// Apply ordering bonuses
			for (int i = 0; i < nextMatch; ++i) {
				uint8_t currIdx = newMatches[i];

				if (i > 0) {
					uint8_t prevIdx = newMatches[i - 1];

					// Sequential
					if (currIdx == (prevIdx + 1))
						outScore += sequentialBonus;
				}

				// Check for bonuses based on neighbor character value
				if (currIdx > 0) {
					// Camel case
					char neighbor = strBegin[currIdx - 1];
					char curr = strBegin[currIdx];
					if (::islower(neighbor) && ::isupper(curr)) {
						outScore += camelBonus;
					}

					// Separator
					bool neighborSeparator = neighbor == '_' || neighbor == ' ';
					if (neighborSeparator) {
						outScore += separatorBonus;
					}
				} else {
					// First letter
					outScore += firstLetterBonus;
				}
			}
		}

		// Return best result
		if (recursiveMatch && (!matched || bestRecursiveScore > outScore)) {
			// Recursive score is better than "this"
			memcpy(newMatches, bestRecursiveMatches, maxMatches);
			outScore = bestRecursiveScore;
			return true;
		} else if (matched) {
			// "this" score is better than recursive
			return true;
		} else {
			// no match
			return false;
		}
	}
} // namespace P6503_UNITY_ID
} // namespace FuzzyMatch