Просмотр исходного кода

Basic fuzzy search for autocomplete

Simon Lübeß 3 лет назад
Родитель
Сommit
f9f53eb97b

+ 57 - 8
IDE/src/ui/AutoComplete.bf

@@ -382,6 +382,7 @@ namespace IDE.ui
                 public String mEntryInsert;
 				public String mDocumentation;
                 public Image mIcon;
+				public List<uint8> mMatchIndices;
 
 				public float Y
 				{
@@ -401,7 +402,31 @@ namespace IDE.ui
                         g.Draw(mIcon, 0, 0);
 
                     g.SetFont(IDEApp.sApp.mCodeFont);
-                    g.DrawString(mEntryDisplay, GS!(20), 0);
+					
+					float offset = GS!(20);
+
+					// TODO(FUZZY): this is not unicode compatible
+					for(int i < mEntryDisplay.Length)
+					{
+						char8 c = mEntryDisplay[i];
+
+						if(mMatchIndices.Contains((uint8)i))
+						{
+							g.PushColor(.Blue);
+						}
+						else
+						{
+							g.PushColor(.White);
+						}
+
+						g.DrawString(.(&c, 1), offset, 0);
+
+						offset += IDEApp.sApp.mCodeFont.GetWidth(.(&c, 1));
+
+						g.PopColor();
+					}
+					
+                    //g.DrawString(mEntryDisplay, GS!(20), 0);
                 }                
             }
 
@@ -602,8 +627,8 @@ namespace IDE.ui
 	                mMaxWidth = Math.Max(mMaxWidth, entryWidth);
 				}*/
             }
-
-            public void AddEntry(StringView entryType, StringView entryDisplay, Image icon, StringView entryInsert = default, StringView documentation = default)
+			
+			public void AddEntry(StringView entryType, StringView entryDisplay, Image icon, StringView entryInsert = default, StringView documentation = default, List<uint8> matchIndices = null)
             {                
                 var entryWidget = new:mAlloc EntryWidget();
                 entryWidget.mAutoCompleteListWidget = this;
@@ -614,6 +639,9 @@ namespace IDE.ui
 				if (!documentation.IsEmpty)
 					entryWidget.mDocumentation = new:mAlloc String(documentation);
                 entryWidget.mIcon = icon;
+				// TODO(FUZZY): There may be a better way
+				if (matchIndices != null && !matchIndices.IsEmpty)
+					entryWidget.mMatchIndices = new:mAlloc List<uint8>(matchIndices.GetEnumerator());
 
                 UpdateEntry(entryWidget, mEntryList.Count);
                 mEntryList.Add(entryWidget);
@@ -1981,9 +2009,9 @@ namespace IDE.ui
 
             InvokeWidget oldInvokeWidget = null;
             String selectString = null;
+			List<uint8> matchIndices = new:ScopedAlloc! .(256);
 			for (var entryView in info.Split('\n'))
             {
-				
 				Image entryIcon = null;
 				StringView entryType = StringView(entryView);
 				int tabPos = entryType.IndexOf('\t');
@@ -1993,13 +2021,34 @@ namespace IDE.ui
 					entryDisplay = StringView(entryView, tabPos + 1);
 					entryType = StringView(entryType, 0, tabPos);
 				}
+				StringView matches = default;
+				int matchesPos = entryDisplay.IndexOf('\x02');
+				matchIndices.Clear();
+				if (matchesPos != -1)
+				{
+					matches = StringView(entryDisplay, matchesPos + 1);
+					entryDisplay = StringView(entryDisplay, 0, matchesPos);
+
+					for(var sub in matches.Split(','))
+					{
+						if(sub == "X")
+							break;
+
+						var result = int64.Parse(sub, .HexNumber);
+
+						Debug.Assert((result case .Ok(let value)) && value <= uint8.MaxValue);
+
+						// TODO(FUZZY): we could save start and length instead of single chars
+						matchIndices.Add((uint8)result.Value);
+					}
+				}
 
 				StringView documentation = default;
-				int docPos = entryDisplay.IndexOf('\x03');
+				int docPos = matches.IndexOf('\x03');
 				if (docPos != -1)
 				{
-					documentation = StringView(entryDisplay, docPos + 1);
-					entryDisplay = StringView(entryDisplay, 0, docPos);
+					documentation = StringView(matches, docPos + 1);
+					matches = StringView(matches, 0, docPos);
 				}
 
 				StringView entryInsert = default;
@@ -2128,7 +2177,7 @@ namespace IDE.ui
 						if (!mInvokeOnly)
 						{
 							mIsFixit |= entryType == "fixit";
-                            mAutoCompleteListWidget.AddEntry(entryType, entryDisplay, entryIcon, entryInsert, documentation);
+                            mAutoCompleteListWidget.AddEntry(entryType, entryDisplay, entryIcon, entryInsert, documentation, matchIndices);
 						}
                     }                        
                 }

+ 32 - 8
IDEHelper/Compiler/BfAutoComplete.cpp

@@ -6,6 +6,9 @@
 #include "BfFixits.h"
 #include "BfResolvedTypeUtils.h"
 
+#define FTS_FUZZY_MATCH_IMPLEMENTATION
+#include "FtsFuzzyMatch.h"
+
 #pragma warning(disable:4996)
 
 using namespace llvm;
@@ -25,16 +28,16 @@ AutoCompleteBase::~AutoCompleteBase()
 	Clear();
 }
 
-AutoCompleteEntry* AutoCompleteBase::AddEntry(const AutoCompleteEntry& entry, const StringImpl& filter)
+AutoCompleteEntry* AutoCompleteBase::AddEntry(AutoCompleteEntry& entry, const StringImpl& filter)
 {
-	if ((!DoesFilterMatch(entry.mDisplay, filter.c_str())) || (entry.mNamePrefixCount < 0))
+	if ((!DoesFilterMatch(entry.mDisplay, filter.c_str(), entry.mScore, entry.mMatches, sizeof(entry.mMatches))) || (entry.mNamePrefixCount < 0))
 		return NULL;
 	return AddEntry(entry);
 }
 
-AutoCompleteEntry* AutoCompleteBase::AddEntry(const AutoCompleteEntry& entry, const char* filter)
+AutoCompleteEntry* AutoCompleteBase::AddEntry(AutoCompleteEntry& entry, const char* filter)
 {
-	if ((!DoesFilterMatch(entry.mDisplay, filter)) || (entry.mNamePrefixCount < 0))
+	if ((!DoesFilterMatch(entry.mDisplay, filter, entry.mScore, entry.mMatches, sizeof(entry.mMatches))) || (entry.mNamePrefixCount < 0))
 		return NULL;
 	return AddEntry(entry);
 }
@@ -60,7 +63,7 @@ AutoCompleteEntry* AutoCompleteBase::AddEntry(const AutoCompleteEntry& entry)
 	return insertedEntry;
 }
 
-bool AutoCompleteBase::DoesFilterMatch(const char* entry, const char* filter)
+bool AutoCompleteBase::DoesFilterMatch(const char* entry, const char* filter, int& score, uint8* matches, int maxMatches)
 {	
 	if (mIsGetDefinition)
 	{
@@ -73,12 +76,28 @@ bool AutoCompleteBase::DoesFilterMatch(const char* entry, const char* filter)
 	if (!mIsAutoComplete)
 		return false;
 
-	if (filter[0] == 0)
+	if (filter[0] == '\0')
+	{
+		// Kinda dirty
+		matches[0] = UINT8_MAX;
+		matches[1] = 0;
 		return true;
+	}
 
 	int filterLen = (int)strlen(filter);
 	int entryLen = (int)strlen(entry);
 
+	if (filterLen > entryLen)
+	{
+		// Kinda dirty
+		matches[0] = UINT8_MAX;
+		matches[1] = 0;
+		return false;
+	}
+
+	// TODO: also do matches (but probably optimize them)
+	return fts::fuzzy_match(filter, entry, score, matches, maxMatches);
+	/*
 	bool hasUnderscore = false;
 	bool checkInitials = filterLen > 1;
 	for (int i = 0; i < (int)filterLen; i++)
@@ -126,6 +145,7 @@ bool AutoCompleteBase::DoesFilterMatch(const char* entry, const char* filter)
 		return false;
 	*(initialStrP++) = 0;
 	return strnicmp(filter, initialStr, filterLen) == 0;
+	*/
 }
 
 void AutoCompleteBase::Clear()
@@ -550,7 +570,9 @@ void BfAutoComplete::AddTypeDef(BfTypeDef* typeDef, const StringImpl& filter, bo
 			return;
 		}
 
-		if (!DoesFilterMatch(name.c_str(), filter.c_str()))
+		int score;
+		uint8 matches[256];
+		if (!DoesFilterMatch(name.c_str(), filter.c_str(), score, matches, sizeof(matches)))
 			return;		
 
 		auto type = mModule->ResolveTypeDef(typeDef, BfPopulateType_Declaration);
@@ -1128,8 +1150,10 @@ void BfAutoComplete::AddExtensionMethods(BfTypeInstance* targetType, BfTypeInsta
 		if (methodInstance == NULL)
 			continue;
 
+		int score;
+		uint8 matches[256];
 		// Do filter match first- may be cheaper than generic validation
-		if (!DoesFilterMatch(methodDef->mName.c_str(), filter.c_str()))
+		if (!DoesFilterMatch(methodDef->mName.c_str(), filter.c_str(), score, matches, sizeof(matches)))
 			continue;
 
 		auto thisType = methodInstance->GetParamType(0);		

+ 8 - 3
IDEHelper/Compiler/BfAutoComplete.h

@@ -16,6 +16,8 @@ public:
 	const char* mDisplay;
 	const char* mDocumentation;
 	int8 mNamePrefixCount;
+	int mScore;
+	uint8 mMatches[256];
 
 public:
 	AutoCompleteEntry()
@@ -29,6 +31,7 @@ public:
 		mDisplay = display;
 		mDocumentation = NULL;
 		mNamePrefixCount = 0;
+		mScore = 0;
 	}
 
 	AutoCompleteEntry(const char* entryType, const StringImpl& display)
@@ -37,6 +40,7 @@ public:
 		mDisplay = display.c_str();
 		mDocumentation = NULL;
 		mNamePrefixCount = 0;
+		mScore = 0;
 	}
 
 	AutoCompleteEntry(const char* entryType, const StringImpl& display, int namePrefixCount)
@@ -45,6 +49,7 @@ public:
 		mDisplay = display.c_str();
 		mDocumentation = NULL;
 		mNamePrefixCount = (int8)namePrefixCount;
+		mScore = 0;
 	}
 	
 	bool operator==(const AutoCompleteEntry& other) const
@@ -100,9 +105,9 @@ public:
 	int mInsertStartIdx;
 	int mInsertEndIdx;
 
-	bool DoesFilterMatch(const char* entry, const char* filter);		
-	AutoCompleteEntry* AddEntry(const AutoCompleteEntry& entry, const StringImpl& filter);	
-	AutoCompleteEntry* AddEntry(const AutoCompleteEntry& entry, const char* filter);
+	bool DoesFilterMatch(const char* entry, const char* filter, int& score, uint8* matches, int maxMatches);
+	AutoCompleteEntry* AddEntry(AutoCompleteEntry& entry, const StringImpl& filter);	
+	AutoCompleteEntry* AddEntry(AutoCompleteEntry& entry, const char* filter);
 	AutoCompleteEntry* AddEntry(const AutoCompleteEntry& entry);
 
 	AutoCompleteBase();

+ 25 - 1
IDEHelper/Compiler/BfCompiler.cpp

@@ -8007,7 +8007,9 @@ void BfCompiler::GenerateAutocompleteInfo()
 		}
 		std::sort(entries.begin(), entries.end(), [](AutoCompleteEntry* lhs, AutoCompleteEntry* rhs)
 			{
-				return stricmp(lhs->mDisplay, rhs->mDisplay) < 0;
+				// TODO(FUZZY): SORT BY Score
+				return lhs->mScore > rhs->mScore;
+				//return stricmp(lhs->mDisplay, rhs->mDisplay) < 0;
 			});
 				
 		String docString;
@@ -8022,6 +8024,28 @@ void BfCompiler::GenerateAutocompleteInfo()
 				autoCompleteResultString += '@';
 			autoCompleteResultString += String(entry->mDisplay);
 
+			// TODO(FUZZY): OUTPUT
+			// TODO(FUZZY): this is not really efficient
+			autoCompleteResultString += "\x02";
+			for (int i = 0; i < 256; i++)
+			{
+				int match = entry->mMatches[i];
+
+				// no more matches after this
+				if (match == 0 && i != 0)
+					break;
+
+				// Need max 3 chars (largest Hex (FF) + '\0')
+				char buffer[3];
+
+				_itoa_s(match, buffer, 16);
+
+				autoCompleteResultString += String(buffer);
+				autoCompleteResultString += ",";
+			}
+
+			autoCompleteResultString += "X";
+
 			if (entry->mDocumentation != NULL)
 			{
 				autoCompleteResultString += '\x03';

+ 223 - 0
IDEHelper/Compiler/FtsFuzzyMatch.h

@@ -0,0 +1,223 @@
+// LICENSE
+//
+//   This software is dual-licensed to the public domain and under the following
+//   license: you are granted a perpetual, irrevocable license to copy, modify,
+//   publish, and distribute this file as you see fit.
+//
+// VERSION 
+//   0.2.0  (2017-02-18)  Scored matches perform exhaustive search for best score
+//   0.1.0  (2016-03-28)  Initial release
+//
+// AUTHOR
+//   Forrest Smith
+//
+// NOTES
+//   Compiling
+//     You MUST add '#define FTS_FUZZY_MATCH_IMPLEMENTATION' before including this header in ONE source file to create implementation.
+//
+//   fuzzy_match_simple(...)
+//     Returns true if each character in pattern is found sequentially within str
+//
+//   fuzzy_match(...)
+//     Returns true if pattern is found AND calculates a score.
+//     Performs exhaustive search via recursion to find all possible matches and match with highest score.
+//     Scores values have no intrinsic meaning. Possible score range is not normalized and varies with pattern.
+//     Recursion is limited internally (default=10) to prevent degenerate cases (pattern="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
+//     Uses uint8_t for match indices. Therefore patterns are limited to 256 characters.
+//     Score system should be tuned for YOUR use case. Words, sentences, file names, or method names all prefer different tuning.
+
+
+#ifndef FTS_FUZZY_MATCH_H
+#define FTS_FUZZY_MATCH_H
+
+
+#include <cstdint> // uint8_t
+#include <ctype.h> // ::tolower, ::toupper
+#include <cstring> // memcpy
+
+#include <cstdio>
+
+// Public interface
+namespace fts {
+    static bool fuzzy_match_simple(char const* pattern, char const* str);
+    static bool fuzzy_match(char const* pattern, char const* str, int& outScore);
+    static bool fuzzy_match(char const* pattern, char const* str, int& outScore, uint8_t* matches, int maxMatches);
+}
+
+
+#ifdef FTS_FUZZY_MATCH_IMPLEMENTATION
+namespace fts {
+
+    // Forward declarations for "private" implementation
+    namespace fuzzy_internal {
+        static bool fuzzy_match_recursive(const char* pattern, const char* str, int& outScore, const char* strBegin,
+            uint8_t const* srcMatches, uint8_t* newMatches, int maxMatches, int nextMatch,
+            int& recursionCount, int recursionLimit);
+    }
+
+    // Public interface
+    static bool fuzzy_match_simple(char const* pattern, char const* str) {
+        while (*pattern != '\0' && *str != '\0') {
+            if (tolower(*pattern) == tolower(*str))
+                ++pattern;
+            ++str;
+        }
+
+        return *pattern == '\0' ? true : false;
+    }
+
+    static bool fuzzy_match(char const* pattern, char const* str, int& outScore) {
+
+        uint8_t matches[256];
+        return fuzzy_match(pattern, str, outScore, matches, sizeof(matches));
+    }
+
+    static bool fuzzy_match(char const* pattern, char const* str, int& outScore, uint8_t* matches, int maxMatches) {
+        int recursionCount = 0;
+        int recursionLimit = 10;
+
+        return fuzzy_internal::fuzzy_match_recursive(pattern, str, outScore, str, nullptr, matches, maxMatches, 0, recursionCount, recursionLimit);
+    }
+
+    // Private implementation
+    static bool fuzzy_internal::fuzzy_match_recursive(const char* pattern, const char* str, int& outScore,
+        const char* strBegin, uint8_t const* srcMatches, uint8_t* matches, int maxMatches,
+        int nextMatch, int& recursionCount, int recursionLimit)
+    {
+        // Count recursions
+        ++recursionCount;
+        if (recursionCount >= recursionLimit)
+            return false;
+
+        // Detect end of strings
+        if (*pattern == '\0' || *str == '\0')
+            return false;
+
+        // Recursion params
+        bool recursiveMatch = false;
+        uint8_t bestRecursiveMatches[256];
+        int bestRecursiveScore = 0;
+
+        // Loop through pattern and str looking for a match
+        bool first_match = true;
+        while (*pattern != '\0' && *str != '\0') {
+
+            // Found match
+            if (tolower(*pattern) == tolower(*str)) {
+
+                // Supplied matches buffer was too short
+                if (nextMatch >= maxMatches)
+                    return false;
+
+                // "Copy-on-Write" srcMatches into matches
+                if (first_match && srcMatches) {
+                    memcpy(matches, srcMatches, nextMatch);
+                    first_match = false;
+                }
+
+                // Recursive call that "skips" this match
+                uint8_t recursiveMatches[256];
+                int recursiveScore;
+                if (fuzzy_match_recursive(pattern, str + 1, recursiveScore, strBegin, matches, recursiveMatches, sizeof(recursiveMatches), nextMatch, recursionCount, recursionLimit)) {
+
+                    // Pick best recursive score
+                    if (!recursiveMatch || recursiveScore > bestRecursiveScore) {
+                        memcpy(bestRecursiveMatches, recursiveMatches, 256);
+                        bestRecursiveScore = recursiveScore;
+                    }
+                    recursiveMatch = true;
+                }
+
+                // Advance
+                matches[nextMatch++] = (uint8_t)(str - strBegin);
+                // Clear the next char so that we know which match is the last one
+                matches[nextMatch + 1] = 0;
+                ++pattern;
+            }
+            ++str;
+        }
+
+        // Determine if full pattern was matched
+        bool matched = *pattern == '\0' ? true : false;
+
+        // Calculate score
+        if (matched) {
+            const int sequential_bonus = 15;            // bonus for adjacent matches
+            const int separator_bonus = 30;             // bonus if match occurs after a separator
+            const int camel_bonus = 30;                 // bonus if match is uppercase and prev is lower
+            const int first_letter_bonus = 15;          // bonus if the first letter is matched
+
+            const int leading_letter_penalty = -5;      // penalty applied for every letter in str before the first match
+            const int max_leading_letter_penalty = -15; // maximum penalty for leading letters
+            const int unmatched_letter_penalty = -1;    // penalty for every letter that doesn't matter
+
+            // Iterate str to end
+            while (*str != '\0')
+                ++str;
+
+            // Initialize score
+            outScore = 100;
+
+            // Apply leading letter penalty
+            int penalty = leading_letter_penalty * matches[0];
+            if (penalty < max_leading_letter_penalty)
+                penalty = max_leading_letter_penalty;
+            outScore += penalty;
+
+            // Apply unmatched penalty
+            int unmatched = (int)(str - strBegin) - nextMatch;
+            outScore += unmatched_letter_penalty * unmatched;
+
+            // Apply ordering bonuses
+            for (int i = 0; i < nextMatch; ++i) {
+                uint8_t currIdx = matches[i];
+
+                if (i > 0) {
+                    uint8_t prevIdx = matches[i - 1];
+
+                    // Sequential
+                    if (currIdx == (prevIdx + 1))
+                        outScore += sequential_bonus;
+                }
+
+                // Check for bonuses based on neighbor character value
+                if (currIdx > 0) {
+                    // Camel case
+                    char neighbor = strBegin[currIdx - 1];
+                    char curr = strBegin[currIdx];
+                    if (::islower(neighbor) && ::isupper(curr))
+                        outScore += camel_bonus;
+
+                    // Separator
+                    bool neighborSeparator = neighbor == '_' || neighbor == ' ';
+                    if (neighborSeparator)
+                        outScore += separator_bonus;
+                }
+                else {
+                    // First letter
+                    outScore += first_letter_bonus;
+                }
+            }
+        }
+
+        // Return best result
+        if (recursiveMatch && (!matched || bestRecursiveScore > outScore)) {
+            // Recursive score is better than "this"
+            memcpy(matches, bestRecursiveMatches, maxMatches);
+            outScore = bestRecursiveScore;
+            return true;
+        }
+        else if (matched) {
+            // "this" score is better than recursive
+            return true;
+        }
+        else {
+            // no match
+            return false;
+        }
+    }
+} // namespace fts
+
+#endif // FTS_FUZZY_MATCH_IMPLEMENTATION
+
+#endif // FTS_FUZZY_MATCH_H

+ 1 - 0
IDEHelper/IDEHelper.vcxproj

@@ -400,6 +400,7 @@
     <ClInclude Include="Compiler\BfUtil.h" />
     <ClInclude Include="Compiler\BfVarDeclChecker.h" />
     <ClInclude Include="Compiler\CeMachine.h" />
+    <ClInclude Include="Compiler\FtsFuzzyMatch.h" />
     <ClInclude Include="Compiler\MemReporter.h" />
     <ClInclude Include="DbgMiniDump.h" />
     <ClInclude Include="Debugger.h" />

+ 1 - 0
IDEHelper/IDEHelper.vcxproj.filters

@@ -399,5 +399,6 @@
     <ClInclude Include="Compiler\CeMachine.h">
       <Filter>Compiler</Filter>
     </ClInclude>
+    <ClInclude Include="Compiler\FtsFuzzyMatch.h" />
   </ItemGroup>
 </Project>