Comparer les chaînes Javascript Return% of Likely

Je recherche une fonction JavaScript qui peut comparer deux chaînes et renvoyer la probabilité qu'elles soient identiques. J'ai regardé soundex mais ce n'est pas vraiment génial pour les chaînes multi-mots ou les non-noms. Je cherche une fonction comme:

function compare(strA,strB){ } compare("Apples","apple") = Some X Percentage.

La fonction fonctionnerait avec tous les types de chaînes, y compris les nombres, les valeurs multi-mots et les noms. Peut-être existe-t-il un algorithme simple que je pourrais utiliser?

 Ultimately none of these served my purpose so I used this: function compare(c, u) { var incept = false; var ca = c.split(","); u = clean(u); //ca = correct answer array (Collection of all correct answer) //caa = a single correct answer word array (collection of words of a single correct answer) //u = array of user answer words cleaned using custom clean function for (var z = 0; z < ca.length; z++) { caa = $.trim(ca[z]).split(" "); var pc = 0; for (var x = 0; x < caa.length; x++) { for (var y = 0; y < u.length; y++) { if (soundex(u[y]) != null && soundex(caa[x]) != null) { if (soundex(u[y]) == soundex(caa[x])) { pc = pc + 1; } } else { if (u[y].indexOf(caa[x]) > -1) { pc = pc + 1; } } } } if ((pc / caa.length) > 0.5) { return true; } } return false; } // create object listing the SOUNDEX values for each letter // -1 indicates that the letter is not coded, but is used for coding // 0 indicates that the letter is omitted for modern census archives // but acts like -1 for older census archives // 1 is for BFPV // 2 is for CGJKQSXZ // 3 is for DT // 4 is for L // 5 is for MN my home state // 6 is for R function makesoundex() { this.a = -1 this.b = 1 this.c = 2 this.d = 3 this.e = -1 this.f = 1 this.g = 2 this.h = 0 this.i = -1 this.j = 2 this.k = 2 this.l = 4 this.m = 5 this.n = 5 this.o = -1 this.p = 1 this.q = 2 this.r = 6 this.s = 2 this.t = 3 this.u = -1 this.v = 1 this.w = 0 this.x = 2 this.y = -1 this.z = 2 } var sndx = new makesoundex() // check to see that the input is valid function isSurname(name) { if (name == "" || name == null) { return false } else { for (var i = 0; i < name.length; i++) { var letter = name.charAt(i) if (!(letter >= 'a' && letter <= 'z' || letter >= 'A' && letter <= 'Z')) { return false } } } return true } // Collapse out directly adjacent sounds // 1. Assume that surname.length>=1 // 2. Assume that surname contains only lowercase letters function collapse(surname) { if (surname.length == 1) { return surname } var right = collapse(surname.substring(1, surname.length)) if (sndx[surname.charAt(0)] == sndx[right.charAt(0)]) { return surname.charAt(0) + right.substring(1, right.length) } return surname.charAt(0) + right } // Collapse out directly adjacent sounds using the new National Archives method // 1. Assume that surname.length>=1 // 2. Assume that surname contains only lowercase letters // 3. H and W are completely ignored function omit(surname) { if (surname.length == 1) { return surname } var right = omit(surname.substring(1, surname.length)) if (!sndx[right.charAt(0)]) { return surname.charAt(0) + right.substring(1, right.length) } return surname.charAt(0) + right } // Output the coded sequence function output_sequence(seq) { var output = seq.charAt(0).toUpperCase() // Retain first letter output += "-" // Separate letter with a dash var stage2 = seq.substring(1, seq.length) var count = 0 for (var i = 0; i < stage2.length && count < 3; i++) { if (sndx[stage2.charAt(i)] > 0) { output += sndx[stage2.charAt(i)] count++ } } for (; count < 3; count++) { output += "0" } return output } // Compute the SOUNDEX code for the surname function soundex(value) { if (!isSurname(value)) { return null } var stage1 = collapse(value.toLowerCase()) //form.result.value=output_sequence(stage1); var stage1 = omit(value.toLowerCase()) var stage2 = collapse(stage1) return output_sequence(stage2); } function clean(u) { var u = u.replace(/\,/g, ""); u = u.toLowerCase().split(" "); var cw = ["ARRAY OF WORDS TO BE EXCLUDED FROM COMPARISON"]; var n = []; for (var y = 0; y < u.length; y++) { var test = false; for (var z = 0; z < cw.length; z++) { if (u[y] != "" && u[y] != cw[z]) { test = true; break; } } if (test) { //Don't use & or $ in comparison var val = u[y].replace("$", "").replace("&", ""); n.push(val); } } return n; }

Voici une réponse basée sur la distance de Levenshtein https://en.wikipedia.org/wiki/Levenshtein_distance

 function similarity(s1, s2) { var longer = s1; var shorter = s2; if (s1.length < s2.length) { longer = s2; shorter = s1; } var longerLength = longer.length; if (longerLength == 0) { return 1.0; } return (longerLength - editDistance(longer, shorter)) / parseFloat(longerLength); }

Pour calculer la distance d'édition

 function editDistance(s1, s2) { s1 = s1.toLowerCase(); s2 = s2.toLowerCase(); var costs = new Array(); for (var i = 0; i <= s1.length; i++) { var lastValue = i; for (var j = 0; j <= s2.length; j++) { if (i == 0) costs[j] = j; else { if (j > 0) { var newValue = costs[j - 1]; if (s1.charAt(i - 1) != s2.charAt(j - 1)) newValue = Math.min(Math.min(newValue, lastValue), costs[j]) + 1; costs[j - 1] = lastValue; lastValue = newValue; } } } if (i > 0) costs[s2.length] = lastValue; } return costs[s2.length]; }

Usage

 similarity('Stack Overflow','Stack Ovrflw')

Renvoie 0,8571428571428571

Vous pouvez y jouer ci-dessous:

 function checkSimilarity(){ var str1 = document.getElementById("lhsInput").value; var str2 = document.getElementById("rhsInput").value; document.getElementById("output").innerHTML = similarity(str1, str2); } function similarity(s1, s2) { var longer = s1; var shorter = s2; if (s1.length < s2.length) { longer = s2; shorter = s1; } var longerLength = longer.length; if (longerLength == 0) { return 1.0; } return (longerLength - editDistance(longer, shorter)) / parseFloat(longerLength); } function editDistance(s1, s2) { s1 = s1.toLowerCase(); s2 = s2.toLowerCase(); var costs = new Array(); for (var i = 0; i <= s1.length; i++) { var lastValue = i; for (var j = 0; j <= s2.length; j++) { if (i == 0) costs[j] = j; else { if (j > 0) { var newValue = costs[j - 1]; if (s1.charAt(i - 1) != s2.charAt(j - 1)) newValue = Math.min(Math.min(newValue, lastValue), costs[j]) + 1; costs[j - 1] = lastValue; lastValue = newValue; } } } if (i > 0) costs[s2.length] = lastValue; } return costs[s2.length]; }

 <div><label for="lhsInput">String 1:</label> <input type="text" id="lhsInput" oninput="checkSimilarity()" /></div> <div><label for="rhsInput">String 2:</label> <input type="text" id="rhsInput" oninput="checkSimilarity()" /></div> <div>Match: <span id="output">No Input</span></div>

Voici une fonction très simple qui fait une comparaison et renvoie un pourcentage basé sur l'équivalence. Bien qu'il n'ait pas été testé pour tous les scénarios possibles, cela peut vous aider à démarrer.

 function similar(a,b) { var lengthA = a.length; var lengthB = b.length; var equivalency = 0; var minLength = (a.length > b.length) ? b.length : a.length; var maxLength = (a.length < b.length) ? b.length : a.length; for(var i = 0; i < minLength; i++) { if(a[i] == b[i]) { equivalency++; } } var weight = equivalency / maxLength; return (weight * 100) + "%"; } alert(similar("test","tes")); // 75% alert(similar("test","test")); // 100% alert(similar("test","testt")); // 80% alert(similar("test","tess")); // 75%

Que diriez-vous de la fonction similar_text partir de la bibliothèque PHP.js ?

Il est basé sur une fonction PHP avec le même nom .

 function similar_text (first, second) { // Calculates the similarity between two strings // discuss at: http://phpjs.org/functions/similar_text if (first === null || second === null || typeof first === 'undefined' || typeof second === 'undefined') { return 0; } first += ''; second += ''; var pos1 = 0, pos2 = 0, max = 0, firstLength = first.length, secondLength = second.length, p, q, l, sum; max = 0; for (p = 0; p < firstLength; p++) { for (q = 0; q < secondLength; q++) { for (l = 0; (p + l < firstLength) && (q + l < secondLength) && (first.charAt(p + l) === second.charAt(q + l)); l++); if (l > max) { max = l; pos1 = p; pos2 = q; } } } sum = max; if (sum) { if (pos1 && pos2) { sum += this.similar_text(first.substr(0, pos2), second.substr(0, pos2)); } if ((pos1 + max < firstLength) && (pos2 + max < secondLength)) { sum += this.similar_text(first.substr(pos1 + max, firstLength - pos1 - max), second.substr(pos2 + max, secondLength - pos2 - max)); } } return sum; }

Juste un que j'ai rapidement écrit que cela pourrait être assez bon pour vos besoins:

 function Compare(strA,strB){ for(var result = 0, i = strA.length; i--;){ if(typeof strB[i] == 'undefined' || strA[i] == strB[i]); else if(strA[i].toLowerCase() == strB[i].toLowerCase()) result++; else result += 4; } return 1 - (result + 4*Math.abs(strA.length - strB.length))/(2*(strA.length+strB.length)); }

Cela pèse des caractères qui sont identiques, mais différents cas 1 trimestre aussi lourdement que des personnages complètement différents ou manquants. Il renvoie un nombre entre 0 et 1, 1 signifiant que les chaînes sont identiques. 0 signifiant qu'ils n'ont pas de similitudes. Exemples:

 Compare("Apple", "Apple") // 1 Compare("Apples", "Apple") // 0.8181818181818181 Compare("Apples", "apple") // 0.7727272727272727 Compare("a", "A") // 0.75 Compare("Apples", "appppp") // 0.45833333333333337 Compare("a", "b") // 0