]> git.lizzy.rs Git - rust.git/commitdiff
rustdoc: compute maximum Levenshtein distance based on the query
authorMichael Howell <michael@notriddle.com>
Sat, 29 Oct 2022 04:54:09 +0000 (21:54 -0700)
committerMichael Howell <michael@notriddle.com>
Sat, 21 Jan 2023 07:11:39 +0000 (00:11 -0700)
The heuristic is pretty close to the name resolver.

Fixes #103357

src/librustdoc/html/static/js/search.js
tests/rustdoc-js-std/typed-query.js
tests/rustdoc-js/doc-alias.js

index 88592fa0c84c1453fbda216b9f3d92eddb0ab307..251e806c2d90d086c5bec0a25ba4769ec3a7ba46 100644 (file)
@@ -112,7 +112,6 @@ function levenshtein(s1, s2) {
 }
 
 function initSearch(rawSearchIndex) {
-    const MAX_LEV_DISTANCE = 3;
     const MAX_RESULTS = 200;
     const NO_TYPE_FILTER = -1;
     /**
@@ -897,13 +896,13 @@ function initSearch(rawSearchIndex) {
          * @param {QueryElement} elem  - The element from the parsed query.
          * @param {integer} defaultLev - This is the value to return in case there are no generics.
          *
-         * @return {integer}           - Returns the best match (if any) or `MAX_LEV_DISTANCE + 1`.
+         * @return {integer}           - Returns the best match (if any) or `maxLevDistance + 1`.
          */
-        function checkGenerics(row, elem, defaultLev) {
+        function checkGenerics(row, elem, defaultLev, maxLevDistance) {
             if (row.generics.length === 0) {
-                return elem.generics.length === 0 ? defaultLev : MAX_LEV_DISTANCE + 1;
+                return elem.generics.length === 0 ? defaultLev : maxLevDistance + 1;
             } else if (row.generics.length > 0 && row.generics[0].name === null) {
-                return checkGenerics(row.generics[0], elem, defaultLev);
+                return checkGenerics(row.generics[0], elem, defaultLev, maxLevDistance);
             }
             // The names match, but we need to be sure that all generics kinda
             // match as well.
@@ -914,8 +913,8 @@ function initSearch(rawSearchIndex) {
                     elem_name = entry.name;
                     if (elem_name === "") {
                         // Pure generic, needs to check into it.
-                        if (checkGenerics(entry, elem, MAX_LEV_DISTANCE + 1) !== 0) {
-                            return MAX_LEV_DISTANCE + 1;
+                        if (checkGenerics(entry, elem, maxLevDistance + 1, maxLevDistance) !== 0) {
+                            return maxLevDistance + 1;
                         }
                         continue;
                     }
@@ -942,7 +941,7 @@ function initSearch(rawSearchIndex) {
                         }
                     }
                     if (match === null) {
-                        return MAX_LEV_DISTANCE + 1;
+                        return maxLevDistance + 1;
                     }
                     elems[match] -= 1;
                     if (elems[match] === 0) {
@@ -951,7 +950,7 @@ function initSearch(rawSearchIndex) {
                 }
                 return 0;
             }
-            return MAX_LEV_DISTANCE + 1;
+            return maxLevDistance + 1;
         }
 
         /**
@@ -963,10 +962,10 @@ function initSearch(rawSearchIndex) {
           *
           * @return {integer} - Returns a Levenshtein distance to the best match.
           */
-        function checkIfInGenerics(row, elem) {
-            let lev = MAX_LEV_DISTANCE + 1;
+        function checkIfInGenerics(row, elem, maxLevDistance) {
+            let lev = maxLevDistance + 1;
             for (const entry of row.generics) {
-                lev = Math.min(checkType(entry, elem, true), lev);
+                lev = Math.min(checkType(entry, elem, true, maxLevDistance), lev);
                 if (lev === 0) {
                     break;
                 }
@@ -983,15 +982,15 @@ function initSearch(rawSearchIndex) {
           * @param {boolean} literalSearch
           *
           * @return {integer} - Returns a Levenshtein distance to the best match. If there is
-          *                     no match, returns `MAX_LEV_DISTANCE + 1`.
+          *                     no match, returns `maxLevDistance + 1`.
           */
-        function checkType(row, elem, literalSearch) {
+        function checkType(row, elem, literalSearch, maxLevDistance) {
             if (row.name === null) {
                 // This is a pure "generic" search, no need to run other checks.
                 if (row.generics.length > 0) {
-                    return checkIfInGenerics(row, elem);
+                    return checkIfInGenerics(row, elem, maxLevDistance);
                 }
-                return MAX_LEV_DISTANCE + 1;
+                return maxLevDistance + 1;
             }
 
             let lev = levenshtein(row.name, elem.name);
@@ -1005,9 +1004,9 @@ function initSearch(rawSearchIndex) {
                             return 0;
                         }
                     }
-                    return MAX_LEV_DISTANCE + 1;
+                    return maxLevDistance + 1;
                 } else if (elem.generics.length > 0) {
-                    return checkGenerics(row, elem, MAX_LEV_DISTANCE + 1);
+                    return checkGenerics(row, elem, maxLevDistance + 1, maxLevDistance);
                 }
                 return 0;
             } else if (row.generics.length > 0) {
@@ -1017,22 +1016,20 @@ function initSearch(rawSearchIndex) {
                     }
                     // The name didn't match so we now check if the type we're looking for is inside
                     // the generics!
-                    lev = checkIfInGenerics(row, elem);
-                    // Now whatever happens, the returned distance is "less good" so we should mark
-                    // it as such, and so we add 0.5 to the distance to make it "less good".
-                    return lev + 0.5;
-                } else if (lev > MAX_LEV_DISTANCE) {
+                    lev = Math.min(lev, checkIfInGenerics(row, elem, maxLevDistance));
+                    return lev;
+                } else if (lev > maxLevDistance) {
                     // So our item's name doesn't match at all and has generics.
                     //
                     // Maybe it's present in a sub generic? For example "f<A<B<C>>>()", if we're
                     // looking for "B<C>", we'll need to go down.
-                    return checkIfInGenerics(row, elem);
+                    return checkIfInGenerics(row, elem, maxLevDistance);
                 } else {
                     // At this point, the name kinda match and we have generics to check, so
                     // let's go!
-                    const tmp_lev = checkGenerics(row, elem, lev);
-                    if (tmp_lev > MAX_LEV_DISTANCE) {
-                        return MAX_LEV_DISTANCE + 1;
+                    const tmp_lev = checkGenerics(row, elem, lev, maxLevDistance);
+                    if (tmp_lev > maxLevDistance) {
+                        return maxLevDistance + 1;
                     }
                     // We compute the median value of both checks and return it.
                     return (tmp_lev + lev) / 2;
@@ -1040,7 +1037,7 @@ function initSearch(rawSearchIndex) {
             } else if (elem.generics.length > 0) {
                 // In this case, we were expecting generics but there isn't so we simply reject this
                 // one.
-                return MAX_LEV_DISTANCE + 1;
+                return maxLevDistance + 1;
             }
             // No generics on our query or on the target type so we can return without doing
             // anything else.
@@ -1055,23 +1052,26 @@ function initSearch(rawSearchIndex) {
          * @param {integer} typeFilter
          *
          * @return {integer} - Returns a Levenshtein distance to the best match. If there is no
-         *                      match, returns `MAX_LEV_DISTANCE + 1`.
+         *                      match, returns `maxLevDistance + 1`.
          */
-        function findArg(row, elem, typeFilter) {
-            let lev = MAX_LEV_DISTANCE + 1;
+        function findArg(row, elem, typeFilter, maxLevDistance) {
+            let lev = maxLevDistance + 1;
 
             if (row && row.type && row.type.inputs && row.type.inputs.length > 0) {
                 for (const input of row.type.inputs) {
                     if (!typePassesFilter(typeFilter, input.ty)) {
                         continue;
                     }
-                    lev = Math.min(lev, checkType(input, elem, parsedQuery.literalSearch));
+                    lev = Math.min(
+                        lev,
+                        checkType(input, elem, parsedQuery.literalSearch, maxLevDistance)
+                    );
                     if (lev === 0) {
                         return 0;
                     }
                 }
             }
-            return parsedQuery.literalSearch ? MAX_LEV_DISTANCE + 1 : lev;
+            return parsedQuery.literalSearch ? maxLevDistance + 1 : lev;
         }
 
         /**
@@ -1082,10 +1082,10 @@ function initSearch(rawSearchIndex) {
          * @param {integer} typeFilter
          *
          * @return {integer} - Returns a Levenshtein distance to the best match. If there is no
-         *                      match, returns `MAX_LEV_DISTANCE + 1`.
+         *                      match, returns `maxLevDistance + 1`.
          */
-        function checkReturned(row, elem, typeFilter) {
-            let lev = MAX_LEV_DISTANCE + 1;
+        function checkReturned(row, elem, typeFilter, maxLevDistance) {
+            let lev = maxLevDistance + 1;
 
             if (row && row.type && row.type.output.length > 0) {
                 const ret = row.type.output;
@@ -1093,20 +1093,23 @@ function initSearch(rawSearchIndex) {
                     if (!typePassesFilter(typeFilter, ret_ty.ty)) {
                         continue;
                     }
-                    lev = Math.min(lev, checkType(ret_ty, elem, parsedQuery.literalSearch));
+                    lev = Math.min(
+                        lev,
+                        checkType(ret_ty, elem, parsedQuery.literalSearch, maxLevDistance)
+                    );
                     if (lev === 0) {
                         return 0;
                     }
                 }
             }
-            return parsedQuery.literalSearch ? MAX_LEV_DISTANCE + 1 : lev;
+            return parsedQuery.literalSearch ? maxLevDistance + 1 : lev;
         }
 
-        function checkPath(contains, ty) {
+        function checkPath(contains, ty, maxLevDistance) {
             if (contains.length === 0) {
                 return 0;
             }
-            let ret_lev = MAX_LEV_DISTANCE + 1;
+            let ret_lev = maxLevDistance + 1;
             const path = ty.path.split("::");
 
             if (ty.parent && ty.parent.name) {
@@ -1116,7 +1119,7 @@ function initSearch(rawSearchIndex) {
             const length = path.length;
             const clength = contains.length;
             if (clength > length) {
-                return MAX_LEV_DISTANCE + 1;
+                return maxLevDistance + 1;
             }
             for (let i = 0; i < length; ++i) {
                 if (i + clength > length) {
@@ -1126,7 +1129,7 @@ function initSearch(rawSearchIndex) {
                 let aborted = false;
                 for (let x = 0; x < clength; ++x) {
                     const lev = levenshtein(path[i + x], contains[x]);
-                    if (lev > MAX_LEV_DISTANCE) {
+                    if (lev > maxLevDistance) {
                         aborted = true;
                         break;
                     }
@@ -1231,7 +1234,7 @@ function initSearch(rawSearchIndex) {
          * following condition:
          *
          * * If it is a "literal search" (`parsedQuery.literalSearch`), then `lev` must be 0.
-         * * If it is not a "literal search", `lev` must be <= `MAX_LEV_DISTANCE`.
+         * * If it is not a "literal search", `lev` must be <= `maxLevDistance`.
          *
          * The `results` map contains information which will be used to sort the search results:
          *
@@ -1249,8 +1252,8 @@ function initSearch(rawSearchIndex) {
          * @param {integer} lev
          * @param {integer} path_lev
          */
-        function addIntoResults(results, fullId, id, index, lev, path_lev) {
-            const inBounds = lev <= MAX_LEV_DISTANCE || index !== -1;
+        function addIntoResults(results, fullId, id, index, lev, path_lev, maxLevDistance) {
+            const inBounds = lev <= maxLevDistance || index !== -1;
             if (lev === 0 || (!parsedQuery.literalSearch && inBounds)) {
                 if (results[fullId] !== undefined) {
                     const result = results[fullId];
@@ -1289,7 +1292,8 @@ function initSearch(rawSearchIndex) {
             elem,
             results_others,
             results_in_args,
-            results_returned
+            results_returned,
+            maxLevDistance
         ) {
             if (!row || (filterCrates !== null && row.crate !== filterCrates)) {
                 return;
@@ -1298,13 +1302,13 @@ function initSearch(rawSearchIndex) {
             const fullId = row.id;
             const searchWord = searchWords[pos];
 
-            const in_args = findArg(row, elem, parsedQuery.typeFilter);
-            const returned = checkReturned(row, elem, parsedQuery.typeFilter);
+            const in_args = findArg(row, elem, parsedQuery.typeFilter, maxLevDistance);
+            const returned = checkReturned(row, elem, parsedQuery.typeFilter, maxLevDistance);
 
             // path_lev is 0 because no parent path information is currently stored
             // in the search index
-            addIntoResults(results_in_args, fullId, pos, -1, in_args, 0);
-            addIntoResults(results_returned, fullId, pos, -1, returned, 0);
+            addIntoResults(results_in_args, fullId, pos, -1, in_args, 0, maxLevDistance);
+            addIntoResults(results_returned, fullId, pos, -1, returned, 0, maxLevDistance);
 
             if (!typePassesFilter(parsedQuery.typeFilter, row.ty)) {
                 return;
@@ -1328,16 +1332,16 @@ function initSearch(rawSearchIndex) {
             // No need to check anything else if it's a "pure" generics search.
             if (elem.name.length === 0) {
                 if (row.type !== null) {
-                    lev = checkGenerics(row.type, elem, MAX_LEV_DISTANCE + 1);
+                    lev = checkGenerics(row.type, elem, maxLevDistance + 1, maxLevDistance);
                     // path_lev is 0 because we know it's empty
-                    addIntoResults(results_others, fullId, pos, index, lev, 0);
+                    addIntoResults(results_others, fullId, pos, index, lev, 0, maxLevDistance);
                 }
                 return;
             }
 
             if (elem.fullPath.length > 1) {
-                path_lev = checkPath(elem.pathWithoutLast, row);
-                if (path_lev > MAX_LEV_DISTANCE) {
+                path_lev = checkPath(elem.pathWithoutLast, row, maxLevDistance);
+                if (path_lev > maxLevDistance) {
                     return;
                 }
             }
@@ -1351,11 +1355,11 @@ function initSearch(rawSearchIndex) {
 
             lev = levenshtein(searchWord, elem.pathLast);
 
-            if (index === -1 && lev + path_lev > MAX_LEV_DISTANCE) {
+            if (index === -1 && lev + path_lev > maxLevDistance) {
                 return;
             }
 
-            addIntoResults(results_others, fullId, pos, index, lev, path_lev);
+            addIntoResults(results_others, fullId, pos, index, lev, path_lev, maxLevDistance);
         }
 
         /**
@@ -1367,7 +1371,7 @@ function initSearch(rawSearchIndex) {
          * @param {integer} pos      - Position in the `searchIndex`.
          * @param {Object} results
          */
-        function handleArgs(row, pos, results) {
+        function handleArgs(row, pos, results, maxLevDistance) {
             if (!row || (filterCrates !== null && row.crate !== filterCrates)) {
                 return;
             }
@@ -1379,7 +1383,7 @@ function initSearch(rawSearchIndex) {
             function checkArgs(elems, callback) {
                 for (const elem of elems) {
                     // There is more than one parameter to the query so all checks should be "exact"
-                    const lev = callback(row, elem, NO_TYPE_FILTER);
+                    const lev = callback(row, elem, NO_TYPE_FILTER, maxLevDistance);
                     if (lev <= 1) {
                         nbLev += 1;
                         totalLev += lev;
@@ -1400,12 +1404,21 @@ function initSearch(rawSearchIndex) {
                 return;
             }
             const lev = Math.round(totalLev / nbLev);
-            addIntoResults(results, row.id, pos, 0, lev, 0);
+            addIntoResults(results, row.id, pos, 0, lev, 0, maxLevDistance);
         }
 
         function innerRunQuery() {
             let elem, i, nSearchWords, in_returned, row;
 
+            let queryLen = 0;
+            for (const elem of parsedQuery.elems) {
+                queryLen += elem.name.length;
+            }
+            for (const elem of parsedQuery.returned) {
+                queryLen += elem.name.length;
+            }
+            const maxLevDistance = Math.floor(queryLen / 3);
+
             if (parsedQuery.foundElems === 1) {
                 if (parsedQuery.elems.length === 1) {
                     elem = parsedQuery.elems[0];
@@ -1418,7 +1431,8 @@ function initSearch(rawSearchIndex) {
                             elem,
                             results_others,
                             results_in_args,
-                            results_returned
+                            results_returned,
+                            maxLevDistance
                         );
                     }
                 } else if (parsedQuery.returned.length === 1) {
@@ -1426,13 +1440,18 @@ function initSearch(rawSearchIndex) {
                     elem = parsedQuery.returned[0];
                     for (i = 0, nSearchWords = searchWords.length; i < nSearchWords; ++i) {
                         row = searchIndex[i];
-                        in_returned = checkReturned(row, elem, parsedQuery.typeFilter);
-                        addIntoResults(results_others, row.id, i, -1, in_returned);
+                        in_returned = checkReturned(
+                            row,
+                            elem,
+                            parsedQuery.typeFilter,
+                            maxLevDistance
+                        );
+                        addIntoResults(results_others, row.id, i, -1, in_returned, maxLevDistance);
                     }
                 }
             } else if (parsedQuery.foundElems > 0) {
                 for (i = 0, nSearchWords = searchWords.length; i < nSearchWords; ++i) {
-                    handleArgs(searchIndex[i], i, results_others);
+                    handleArgs(searchIndex[i], i, results_others, maxLevDistance);
                 }
             }
         }
@@ -1470,7 +1489,7 @@ function initSearch(rawSearchIndex) {
      *
      * @return {boolean}       - Whether the result is valid or not
      */
-    function validateResult(name, path, keys, parent) {
+    function validateResult(name, path, keys, parent, maxLevDistance) {
         if (!keys || !keys.length) {
             return true;
         }
@@ -1485,7 +1504,7 @@ function initSearch(rawSearchIndex) {
                 (parent !== undefined && parent.name !== undefined &&
                     parent.name.toLowerCase().indexOf(key) > -1) ||
                 // lastly check to see if the name was a levenshtein match
-                levenshtein(name, key) <= MAX_LEV_DISTANCE)) {
+                levenshtein(name, key) <= maxLevDistance)) {
                 return false;
             }
         }
index fd5c5489d79cfe04a4d844cfdec619ec542719f7..9d7e73a7999c97e29fbabe844fc2f0e0d1458776 100644 (file)
@@ -10,8 +10,5 @@ const EXPECTED = {
         { 'path': 'std', 'name': 'eprint' },
         { 'path': 'std', 'name': 'eprintln' },
         { 'path': 'std::pin', 'name': 'pin' },
-        { 'path': 'std::future', 'name': 'join' },
-        { 'path': 'std', 'name': 'line' },
-        { 'path': 'std', 'name': 'write' },
     ],
 };
index 7bb0cbe388fee1c8d7626b2d4aa74edd9ac80d10..62c8e7a74b9405ed2a22075ec9464d189fce2228 100644 (file)
@@ -1,5 +1,3 @@
-// exact-check
-
 const QUERY = [
     'StructItem',
     'StructFieldItem',