Bånder

TextSimilarityService
in package

Table of Contents

Constants

CACHE_SIZE_LIMIT  = 1000
DEFAULT_NGRAM_SIZE  = 2
SIMILARITY_THRESHOLDS  = ['EXACT' => 100.0, 'VERY_HIGH' => 95.0, 'HIGH' => 85.0, 'MEDIUM' => 70.0, 'LOW' => 50.0]

Properties

$ngramCache  : array<string|int, mixed>
$scriptCache  : array<string|int, mixed>
$transliterationCache  : array<string|int, mixed>

Methods

calculateBatchSimilarity()  : array<string|int, mixed>
Batch similarity calculation for multiple comparisons
calculateInternationalNameSimilarity()  : array<string|int, mixed>
calculateNGramSimilarity()  : float
calculateSimilarity()  : float
Calculate the best similarity score between two texts using multiple algorithms.
calculateWeightedAverage()  : float
clearCaches()  : void
extractInternationalTokens()  : array<string|int, mixed>
findBestMatches()  : array<string|int, mixed>
Find the best matches above a threshold
getBestInternationalSimilarity()  : float
getSimilarityLevel()  : string
Get similarity confidence level
getUnicodeScripts()  : array<string|int, mixed>
hasNonLatinScript()  : bool
normalizeInternationalText()  : string
transliterateToLatin()  : string
cacheResult()  : mixed
cacheScript()  : array<string|int, mixed>
cacheTransliteration()  : string
calculateRomanizationSimilarity()  : float
calculateTokenSimilarity()  : float
calculateTransliterationSimilarity()  : float
calculateUnicodeLevenshtein()  : float
calculateUnicodeScriptSimilarity()  : float
extractNGrams()  : array<string|int, mixed>
fallbackTransliteration()  : string
longestCommonSubsequenceSimilarity()  : float
makeCacheKey()  : string
performTransliteration()  : string
unicodeLevenshteinDistance()  : int
unicodeLongestCommonSubsequence()  : int

Constants

SIMILARITY_THRESHOLDS

private mixed SIMILARITY_THRESHOLDS = ['EXACT' => 100.0, 'VERY_HIGH' => 95.0, 'HIGH' => 85.0, 'MEDIUM' => 70.0, 'LOW' => 50.0]

Properties

Methods

calculateBatchSimilarity()

Batch similarity calculation for multiple comparisons

public calculateBatchSimilarity(string $target, array<string|int, mixed> $candidates) : array<string|int, mixed>
Parameters
$target : string
$candidates : array<string|int, mixed>
Return values
array<string|int, mixed>

calculateInternationalNameSimilarity()

public calculateInternationalNameSimilarity(string $name1, string $name2) : array<string|int, mixed>
Parameters
$name1 : string
$name2 : string
Return values
array<string|int, mixed>

calculateNGramSimilarity()

public calculateNGramSimilarity(string $str1, string $str2[, int $n = self::DEFAULT_NGRAM_SIZE ]) : float
Parameters
$str1 : string
$str2 : string
$n : int = self::DEFAULT_NGRAM_SIZE
Return values
float

calculateSimilarity()

Calculate the best similarity score between two texts using multiple algorithms.

public calculateSimilarity(string $text1, string $text2) : float
Parameters
$text1 : string
$text2 : string
Return values
float

calculateWeightedAverage()

public calculateWeightedAverage(array<string|int, mixed> $scores, array<string|int, mixed> $weights) : float
Parameters
$scores : array<string|int, mixed>
$weights : array<string|int, mixed>
Return values
float

extractInternationalTokens()

public extractInternationalTokens(string $text) : array<string|int, mixed>
Parameters
$text : string
Return values
array<string|int, mixed>

findBestMatches()

Find the best matches above a threshold

public findBestMatches(string $target, array<string|int, mixed> $candidates[, float $threshold = 70.0 ][, int $limit = 5 ]) : array<string|int, mixed>
Parameters
$target : string
$candidates : array<string|int, mixed>
$threshold : float = 70.0
$limit : int = 5
Return values
array<string|int, mixed>

getBestInternationalSimilarity()

public getBestInternationalSimilarity(string $name1, string $name2) : float
Parameters
$name1 : string
$name2 : string
Return values
float

getSimilarityLevel()

Get similarity confidence level

public getSimilarityLevel(float $score) : string
Parameters
$score : float
Return values
string

getUnicodeScripts()

public getUnicodeScripts(string $text) : array<string|int, mixed>
Parameters
$text : string
Return values
array<string|int, mixed>

hasNonLatinScript()

public hasNonLatinScript(string $text) : bool
Parameters
$text : string
Return values
bool

normalizeInternationalText()

public normalizeInternationalText(string $text) : string
Parameters
$text : string
Return values
string

transliterateToLatin()

public transliterateToLatin(string $text) : string
Parameters
$text : string
Return values
string

cacheResult()

private cacheResult(string $key, mixed $result) : mixed
Parameters
$key : string
$result : mixed

cacheScript()

private cacheScript(string $key, array<string|int, mixed> $result) : array<string|int, mixed>
Parameters
$key : string
$result : array<string|int, mixed>
Return values
array<string|int, mixed>

cacheTransliteration()

private cacheTransliteration(string $key, string $result) : string
Parameters
$key : string
$result : string
Return values
string

calculateRomanizationSimilarity()

private calculateRomanizationSimilarity(string $name1, string $name2) : float
Parameters
$name1 : string
$name2 : string
Return values
float

calculateTokenSimilarity()

private calculateTokenSimilarity(string $name1, string $name2) : float
Parameters
$name1 : string
$name2 : string
Return values
float

calculateTransliterationSimilarity()

private calculateTransliterationSimilarity(string $name1, string $name2) : float
Parameters
$name1 : string
$name2 : string
Return values
float

calculateUnicodeLevenshtein()

private calculateUnicodeLevenshtein(string $str1, string $str2, int $maxLen) : float
Parameters
$str1 : string
$str2 : string
$maxLen : int
Return values
float

calculateUnicodeScriptSimilarity()

private calculateUnicodeScriptSimilarity(string $name1, string $name2) : float
Parameters
$name1 : string
$name2 : string
Return values
float

extractNGrams()

private extractNGrams(string $text, int $n) : array<string|int, mixed>
Parameters
$text : string
$n : int
Return values
array<string|int, mixed>

fallbackTransliteration()

private fallbackTransliteration(string $text) : string
Parameters
$text : string
Return values
string

longestCommonSubsequenceSimilarity()

private longestCommonSubsequenceSimilarity(string $str1, string $str2) : float
Parameters
$str1 : string
$str2 : string
Return values
float

makeCacheKey()

private makeCacheKey(string $str1, string $str2) : string
Parameters
$str1 : string
$str2 : string
Return values
string

performTransliteration()

private performTransliteration(string $text) : string
Parameters
$text : string
Return values
string

unicodeLevenshteinDistance()

private unicodeLevenshteinDistance(string $str1, string $str2) : int
Parameters
$str1 : string
$str2 : string
Return values
int

unicodeLongestCommonSubsequence()

private unicodeLongestCommonSubsequence(string $str1, string $str2) : int
Parameters
$str1 : string
$str2 : string
Return values
int

        
On this page

Search results