stringutils.hpp 4.72 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
#ifndef CVVISUAL_STRINGUTILS_HPP
#define CVVISUAL_STRINGUTILS_HPP

#include <map>
#include <vector>

#include <QString>
#include <QHash>
#include <QRegExp>

namespace cvv
{
namespace stfl
{

/**
 * @brief Calculates the equality of two strings.
 * If both strings are only single words, a combination of the
 * levenshtein edit distance and a phonetic matching algorithm is used.
 * If not only the first is used.
 * Attention: using a phonetic algorithm is much slower, than the simple
 * levenshtein.
 * @param str1 first string
 * @param str2 second string
 * @return equality of both strings, 0 means both string are equal,
 *  the greater the number, the more unequal are both strings.
 */
int stringEquality(const QString &str1, const QString &str2);

/**
 * @brief Implementation of the levenshtein distance.
 * The levenshtein distance is a metric for the edit distance between to
 * strings.
 * Based on
 * http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Levenshtein_distance#C.2B.2B
 * @param str1 first string
 * @param str2 second string
 * @return edit distance
 */
size_t editDistance(const QString &str1, const QString &str2);

/**
 * @brief Implementation of a phonetic algorithm to compare two words.
 * It generates the NYSIIS for both words and returns the levenshtein
 * edit distance between them.
 * @attention using a phonetic algorithm is much slower, than the simple
 * levenshtein,
 * and also consumes much more memory as it uses the cached version the NYSIIS
 * algorithm
 * @param word1 first word
 * @param word2 second word
 * @return equality of both words, 0 means both words are equal,
 * the greater the number, the more unequal are both words.
 */
int phoneticEquality(const QString &word1, const QString &word2);

/**
 * @brief Examines the NYSIIS of the given word.
 * The NYSIIS is the New York State Identification and Intelligence System
 * Phonetic Code,
 * http://en.wikipedia.org/wiki/NYSIIS.
 * @param word given word
 * @return NYSIIS of the given word
 */
QString nysiisForWord(QString word);

/**
 * @brief Examines the NYSIIS of the given word and caches it.
 * It's faster than the uncached method, at the cost of consuming more memory.
 * @param word given word
 * @return NYSIIS of the given word
 * @see nysiisForWord
 */
QString nysiisForWordCached(const QString &word);

/**
 * @brief Removes repeated chars in the given string.
 * E.g. "Hello World!!!" => "Helo World!"
 * @param str given string
 * @return resulting string
 */
QString removeRepeatedCharacters(const QString &str);

/**
 * @brief Replace the search string with its replacement at the very beginning
 * of the given string.
 * @param str given string
 */
void replaceIfStartsWith(QString &str, const QString &search,
                         const QString &replacement);

/**
 * @brief Replace the replacements at the very beginning of the given string.
 * Replace the key of the replacements map with its map value in the given
 * string
 * @param str given string
 * @param replacements replacements map
 */
void replaceIfStartsWith(QString &str,
                         const std::map<QString, QString> &replacements);

/**
 * @brief Replace the search string with its replacement at the end of the given
 * string.
 * @param str given string
 */
void replaceIfEndsWith(QString &str, const QString &search,
                       const QString &replacement);

/**
 * @brief Replace the replacements at the end of the given string.
 * Replace the key of the replacements map with its map value in the given
 * string
 * @param str given string
 * @param replacements replacements map
 */
void replaceIfEndsWith(QString &str,
                       const std::map<QString, QString> &replacements);

/**
 * Check whether or not the given char is a vowel.
 */
bool isVowel(const QChar &someChar);

/**
 * @brief Check wether the given string is a single word.
 * A word consists only of letters.
 *
 * @param str string to ckeck
 */
bool isSingleWord(const QString &str);

/**
 * @brief Unescapes escaped commas in the given string.
 *
 * @param str given string
 */
void unescapeCommas(QString &str);

/**
 * @brief Shortens the given string to the given length and append "..." if
 * needed.
 * @param str given string
 * @param maxLength maximum length of the returned string
 * @param cutEnd does this method shorten the given string at the end?
 * @param fill should the resulting string be filled up with whitespace to
 * ensure all strings have length maxLength?
 */
QString shortenString(QString &str, int maxLength, bool cutEnd = true,
                      bool fill = false);

/**
 * @brief Converts a given vector of chars into a valid QString.
 * @param chars given vector of chars
 * @return resulting QString
 */
QString asciiCharVectorToQString(std::vector<char> chars);
}
}

#endif