export default function fuzzymatch(name: string, names: string[]) {
// 根据当前已有数据建立模糊集,如果有字符需要进行匹配、则可以对对象进行缓存
const set = new FuzzySet(names);
const matches = set.get(name);
// 如果有匹配项,且匹配度大于 0.7,返回匹配单词,否则返回 null
return matches && matches[0] && matches[0][0] > 0.7 ? matches[0][1] : null;
// adapted from https://github.com/Glench/fuzzyset.js/blob/master/lib/fuzzyset.js
const GRAM_SIZE_LOWER = 2;
const GRAM_SIZE_UPPER = 3;
// 进行 Levenshtein 计算,更适合输入完整单词的匹配
function _distance(str1: string, str2: string) {
if (str1 === null && str2 === null)
throw 'Trying to compare two null values';
if (str1 === null || str2 === null) return 0;
const distance = levenshtein(str1, str2);
if (str1.length > str2.length) {
return 1 - distance / str1.length;
return 1 - distance / str2.length;
// Levenshtein距离,是指两个字串之间,由一个转成另一个所需的最少的编辑操作次数。
function levenshtein(str1: string, str2: string) {
const current: number[] = [];
for (let i = 0; i <= str2.length; i++) {
for (let j = 0; j <= str1.length; j++) {
if (str1.charAt(j - 1) === str2.charAt(i - 1)) {
value = Math.min(current[j], current[j - 1], prev) + 1;
// 正则匹配除单词 字母 数字以及逗号和空格外的数据
const non_word_regex = /[^\w, ]+/;
function iterate_grams(value: string, gram_size = 2) {
const simplified = '-' + value.toLowerCase().replace(non_word_regex, '') + '-';
const len_diff = gram_size - simplified.length;
for (let i = 0; i < len_diff; ++i) {
for (let i = 0; i < simplified.length - gram_size + 1; ++i) {
results.push(simplified.slice(i, i + gram_size));
function gram_counter(value: string, gram_size = 2) {
const grams = iterate_grams(value, gram_size);
for (i; i < grams.length; ++i) {
if (grams[i] in result) {
function sort_descending(a, b) {
// 1.优化初始化时候,相同的可选项数据,同时避免多次计算相同向量
// 2.当前输入的值与可选项相等,直接返回,无需计算
// 如 match_dist['ba'] = [
// 后面单词匹配时候,可以根据单词索引进行匹配然后计算最终分数
// 根据不同子字符串获取不同的单词向量,最终有不同的匹配度
// item[2] = [[2.6457513110645907, "aaab"]]
constructor(arr: string[]) {
for (let i = GRAM_SIZE_LOWER; i < GRAM_SIZE_UPPER + 1; ++i) {
for (let i = 0; i < arr.length; ++i) {
const normalized_value = value.toLowerCase();
if (normalized_value in this.exact_set) {
for (let i = GRAM_SIZE_LOWER; i < GRAM_SIZE_UPPER + 1; ++i) {
_add(value: string, gram_size: number) {
const normalized_value = value.toLowerCase();
const items = this.items[gram_size] || [];
const index = items.length;
// 没有看出有实际的用处?实验也没有什么作用?不会影响
const gram_counts = gram_counter(normalized_value, gram_size);
let sum_of_square_gram_counts = 0;
// 同上述代码,只不过把所有的匹配项目和当前索引都加入 match_dict 中去
// 如 this.match_dict['aq'] = [[1, 2], [3,3]]
for (gram in gram_counts) {
gram_count = gram_counts[gram];
sum_of_square_gram_counts += Math.pow(gram_count, 2);
if (gram in this.match_dict) {
this.match_dict[gram].push([index, gram_count]);
this.match_dict[gram] = [[index, gram_count]];
const vector_normal = Math.sqrt(sum_of_square_gram_counts);
// 添加向量 如: this.items[2][3] = [4.323, 'sqaaaa']
items[index] = [vector_normal, normalized_value];
this.items[gram_size] = items;
this.exact_set[normalized_value] = value;
const normalized_value = value.toLowerCase();
const result = this.exact_set[normalized_value];
// 从多到少,如果多子字符串没有结果,转到较小的大小
let gram_size = GRAM_SIZE_UPPER;
gram_size >= GRAM_SIZE_LOWER;
results = this.__get(value, gram_size);
__get(value: string, gram_size: number) {
const normalized_value = value.toLowerCase();
const gram_counts = gram_counter(normalized_value, gram_size);
const items = this.items[gram_size];
let sum_of_square_gram_counts = 0;
for (gram in gram_counts) {
gram_count = gram_counts[gram];
sum_of_square_gram_counts += Math.pow(gram_count, 2);
// 取得当前匹配的 [index, gram_count]
if (gram in this.match_dict) {
// 获取所有匹配当前向量单词的项目,并且根据 索引加入 matches
for (i = 0; i < this.match_dict[gram].length; ++i) {
// 获得当前匹配的索引 === 输入单词[index]
index = this.match_dict[gram][i][0];
other_gram_count = this.match_dict[gram][i][1];
// 单词索引添加,注:只要和当前子字符串匹配的 索引都会加入 matches
matches[index] += gram_count * other_gram_count;
matches[index] = gram_count * other_gram_count;
const vector_normal = Math.sqrt(sum_of_square_gram_counts);
for (const match_index in matches) {
match_score = matches[match_index];
match_score / (vector_normal * items[match_index][0]),
// 虽然所有的与之匹配子字符串都会进入,但我们只需要最高的分数
results.sort(sort_descending);
// 如果匹配数目很大,只取的前 50 个数据进行计算
const end_index = Math.min(50, results.length);
// 由于是字符类型数据,根据当前数据在此计算 levenshtein 距离
for (let i = 0; i < end_index; ++i) {
_distance(results[i][1], normalized_value), results[i][1]
results.sort(sort_descending);
for (let i = 0; i < results.length; ++i) {
// 因为 第一的分数是最高的,所有后面的数据如果等于第一个
if (results[i][0] == results[0][0]) {
new_results.push([results[i][0], this.exact_set[results[i][1]]]);