Commit ff7b2bba authored by Hamdi Sahloul's avatar Hamdi Sahloul

CVV - Workaround for VS compatibility with unicode strings

parent a2769e15
...@@ -13,263 +13,263 @@ namespace stfl ...@@ -13,263 +13,263 @@ namespace stfl
int stringEquality(const QString &str1, const QString &str2) int stringEquality(const QString &str1, const QString &str2)
{ {
if (isSingleWord(str1) && isSingleWord(str2)) if (isSingleWord(str1) && isSingleWord(str2))
{ {
return phoneticEquality(str1, str2); return phoneticEquality(str1, str2);
} }
return editDistance(str1, str2); return editDistance(str1, str2);
} }
size_t editDistance(const QString &str1, const QString &str2) size_t editDistance(const QString &str1, const QString &str2)
{ {
const unsigned len1 = str1.size(); const unsigned len1 = str1.size();
const unsigned len2 = str2.size(); const unsigned len2 = str2.size();
std::vector<size_t> col(len2 + 1); std::vector<size_t> col(len2 + 1);
std::vector<size_t> prevCol(len2 + 1); std::vector<size_t> prevCol(len2 + 1);
// fills the vector with ascending numbers, starting by 0 // fills the vector with ascending numbers, starting by 0
std::iota(prevCol.begin(), prevCol.end(), 0); std::iota(prevCol.begin(), prevCol.end(), 0);
for (unsigned i = 0; i < len1; i++) for (unsigned i = 0; i < len1; i++)
{ {
col[0] = i + 1; col[0] = i + 1;
for (unsigned j = 0; j < len2; j++) for (unsigned j = 0; j < len2; j++)
{ {
if (str1[i] == str2[j]) if (str1[i] == str2[j])
col[j + 1] = col[j + 1] =
std::min({ 1 + col[j], 1 + prevCol[1 + j], std::min({ 1 + col[j], 1 + prevCol[1 + j],
prevCol[j] }); prevCol[j] });
else else
col[j + 1] = col[j + 1] =
std::min({ 1 + col[j], 1 + prevCol[1 + j], std::min({ 1 + col[j], 1 + prevCol[1 + j],
prevCol[j] + 1 }); prevCol[j] + 1 });
} }
std::swap(col, prevCol); std::swap(col, prevCol);
} }
return prevCol[len2]; return prevCol[len2];
} }
int phoneticEquality(const QString &word1, const QString &word2) int phoneticEquality(const QString &word1, const QString &word2)
{ {
if (word1 == word2) if (word1 == word2)
{ {
return 0; return 0;
} }
return editDistance(nysiisForWord(word1), nysiisForWord(word2)) + 1; return editDistance(nysiisForWord(word1), nysiisForWord(word2)) + 1;
} }
QString nysiisForWord(QString word) QString nysiisForWord(QString word)
{ {
static std::map<QString, QString> replacements = { { "MAC", "MCC" }, static std::map<QString, QString> replacements = { { "MAC", "MCC" },
{ "KN", "NN" }, { "KN", "NN" },
{ "K", "C" }, { "K", "C" },
{ "PH", "FF" }, { "PH", "FF" },
{ "PF", "FF" }, { "PF", "FF" },
{ "SCH", "SSS" } }; { "SCH", "SSS" } };
static std::map<QString, QString> replacements2 = { { "EE", "Y" }, static std::map<QString, QString> replacements2 = { { "EE", "Y" },
{ "IE", "Y" }, { "IE", "Y" },
{ "DT", "D" }, { "DT", "D" },
{ "RT", "D" }, { "RT", "D" },
{ "NT", "D" }, { "NT", "D" },
{ "ND", "D" } }; { "ND", "D" } };
static std::map<QString, QString> replacements3 = { { "EV", "AF" }, static std::map<QString, QString> replacements3 = { { "EV", "AF" },
{ ", "A" }, { "\xC3\x9C", "A" },
{ ", "A" }, { "\xC3\x96", "A" },
{ ", "A" }, { "\xC3\x84", "A" },
{ "O", "G" }, { "O", "G" },
{ "Z", "S" }, { "Z", "S" },
{ "M", "N" }, { "M", "N" },
{ "KN", "N" }, { "KN", "N" },
{ "K", "C" }, { "K", "C" },
{ "SCH", "SSS" }, { "SCH", "SSS" },
{ "PH", "FF" } }; { "PH", "FF" } };
if (word.isEmpty()) if (word.isEmpty())
{ {
return ""; return "";
} }
QString code; QString code;
word = word.toUpper(); word = word.toUpper();
replaceIfStartsWith(word, replacements); replaceIfStartsWith(word, replacements);
replaceIfEndsWith(word, replacements2); replaceIfEndsWith(word, replacements2);
code.append(word[0]); code.append(word[0]);
word = word.right(word.size() - 1); word = word.right(word.size() - 1);
while (word.size() > 0) while (word.size() > 0)
{ {
if (isVowel(word[0])) if (isVowel(word[0]))
word[0] = QChar('A'); word[0] = QChar('A');
replaceIfStartsWith(word, replacements); replaceIfStartsWith(word, replacements);
if (!(word.startsWith("H") && if (!(word.startsWith("H") &&
(!isVowel(code[code.size() - 1]) || (!isVowel(code[code.size() - 1]) ||
(word.size() >= 2 && !isVowel(word[1])))) && (word.size() >= 2 && !isVowel(word[1])))) &&
!(word.startsWith("W") && isVowel(code[code.size() - 1]))) !(word.startsWith("W") && isVowel(code[code.size() - 1])))
{ {
if (word[0] != code[code.size() - 1]) if (word[0] != code[code.size() - 1])
{ {
code.append(word[0]); code.append(word[0]);
} }
} }
word = word.right(word.size() - 1); word = word.right(word.size() - 1);
} }
if (code.endsWith("S")) if (code.endsWith("S"))
{ {
code = code.left(code.size() - 1); code = code.left(code.size() - 1);
} }
if (code.endsWith("AY")) if (code.endsWith("AY"))
{ {
code = code.right(code.size() - 1); code = code.right(code.size() - 1);
code[code.size() - 1] = QChar('Y'); code[code.size() - 1] = QChar('Y');
} }
else if (code.endsWith("A")) else if (code.endsWith("A"))
{ {
code = code.left(code.size() - 1); code = code.left(code.size() - 1);
} }
code = removeRepeatedCharacters(code); code = removeRepeatedCharacters(code);
return code; return code;
} }
QString nysiisForWordCached(const QString &word) QString nysiisForWordCached(const QString &word)
{ {
static std::map<QString, QString> cache; static std::map<QString, QString> cache;
if (word.isEmpty()) if (word.isEmpty())
return ""; return "";
if (cache.count(word)) if (cache.count(word))
{ {
return cache[word]; return cache[word];
} }
else else
{ {
QString code = nysiisForWord(word); QString code = nysiisForWord(word);
cache[word] = code; cache[word] = code;
return code; return code;
} }
} }
QString removeRepeatedCharacters(const QString &str) QString removeRepeatedCharacters(const QString &str)
{ {
if (str.isEmpty()) if (str.isEmpty())
{ {
return ""; return "";
} }
QString res; QString res;
res += str[0]; res += str[0];
auto iterator = str.begin(); auto iterator = str.begin();
iterator++; iterator++;
std::copy_if(str.begin(), str.end(), std::back_inserter(res), std::copy_if(str.begin(), str.end(), std::back_inserter(res),
[res](QChar c) [res](QChar c)
{ return c != res[res.size() - 1]; }); { return c != res[res.size() - 1]; });
return res; return res;
} }
void replaceIfStartsWith(QString &str, const QString &search, void replaceIfStartsWith(QString &str, const QString &search,
const QString &replacement) const QString &replacement)
{ {
if (str.startsWith(search)) if (str.startsWith(search))
{ {
if (search.size() == replacement.size()) if (search.size() == replacement.size())
{ {
for (int i = 0; i < replacement.size(); i++) for (int i = 0; i < replacement.size(); i++)
{ {
str[i] = replacement[i]; str[i] = replacement[i];
} }
} }
else else
{ {
str = str.right(str.size() - search.size()) str = str.right(str.size() - search.size())
.prepend(replacement); .prepend(replacement);
} }
} }
} }
void replaceIfStartsWith(QString &word, void replaceIfStartsWith(QString &word,
const std::map<QString, QString> &replacements) const std::map<QString, QString> &replacements)
{ {
for (auto iterator = replacements.begin(); for (auto iterator = replacements.begin();
iterator != replacements.end(); iterator++) iterator != replacements.end(); iterator++)
{ {
replaceIfStartsWith(word, iterator->first, iterator->second); replaceIfStartsWith(word, iterator->first, iterator->second);
} }
} }
void replaceIfEndsWith(QString &str, const QString &search, void replaceIfEndsWith(QString &str, const QString &search,
const QString &replacement) const QString &replacement)
{ {
if (str.endsWith(search)) if (str.endsWith(search))
{ {
if (search.length() == replacement.length()) if (search.length() == replacement.length())
{ {
for (int i = str.length() - replacement.length(); for (int i = str.length() - replacement.length();
i < str.length(); i++) i < str.length(); i++)
{ {
str[i] = replacement[i]; str[i] = replacement[i];
} }
} }
else else
{ {
str = str.left(str.length() - search.length()) str = str.left(str.length() - search.length())
.append(replacement); .append(replacement);
} }
} }
} }
void replaceIfEndsWith(QString &word, void replaceIfEndsWith(QString &word,
const std::map<QString, QString> &replacements) const std::map<QString, QString> &replacements)
{ {
for (auto iterator = replacements.begin(); for (auto iterator = replacements.begin();
iterator != replacements.end(); iterator++) iterator != replacements.end(); iterator++)
{ {
replaceIfEndsWith(word, iterator->first, iterator->second); replaceIfEndsWith(word, iterator->first, iterator->second);
} }
} }
bool isVowel(const QChar &someChar) bool isVowel(const QChar &someChar)
{ {
static std::vector<QChar> vowels = { 'a', 'e', 'i', 'o', 'u' }; static std::vector<QChar> vowels = { 'a', 'e', 'i', 'o', 'u' };
return std::find(vowels.begin(), vowels.end(), someChar) != return std::find(vowels.begin(), vowels.end(), someChar) !=
vowels.end(); vowels.end();
} }
bool isSingleWord(const QString &str) bool isSingleWord(const QString &str)
{ {
const auto isLetter = [](QChar c) const auto isLetter = [](QChar c)
{ return c.isLetter(); }; { return c.isLetter(); };
return std::find_if_not(str.begin(), str.end(), isLetter) != str.end(); return std::find_if_not(str.begin(), str.end(), isLetter) != str.end();
} }
void unescapeCommas(QString &str) void unescapeCommas(QString &str)
{ {
str.replace("\\,", ","); str.replace("\\,", ",");
} }
QString shortenString(QString &str, int maxLength, bool cutEnd, bool fill) QString shortenString(QString &str, int maxLength, bool cutEnd, bool fill)
{ {
if (str.size() > maxLength) if (str.size() > maxLength)
{ {
if (cutEnd) if (cutEnd)
{ {
str = str.mid(0, maxLength - 1) + u8"…"; str = str.mid(0, maxLength - 1) + u8"…";
} }
else else
{ {
str = u8"…" + str = u8"…" +
str.mid(str.size() + 1 - maxLength, str.size()); str.mid(str.size() + 1 - maxLength, str.size());
} }
} }
else if (fill) else if (fill)
{ {
str = str + QString(maxLength - str.size(), ' '); str = str + QString(maxLength - str.size(), ' ');
} }
return str; return str;
} }
QString asciiCharVectorToQString(std::vector<char> chars) QString asciiCharVectorToQString(std::vector<char> chars)
{ {
return QString::fromStdString(std::string(chars.begin(), chars.end())); return QString::fromStdString(std::string(chars.begin(), chars.end()));
} }
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment