String searching

...string_searchingstring_searchingstring_searching.cpp 1
#include <vector>
#include <string>
#include <algorithm>
#include <iostream>
/**
*
* @copyright Copyright Dr Russell John Childs, Phd, 2020-01-14
*
* Find occurrences of a substr S of length m, in a target string T of length n.
* Order-of-complexity: O(n+m) in time, O(1) in space.
*
* Operation:
* (1) Convert char to integer, I.
* (2) Use std::pair sliding window, w, with cumulative sums of I at positions
* p and p-m of target, where p is the range (substr length, target length).
* (3) Compare cumulative sum of I for substr against w.second - w.first
* (4) If (3) succeeds, perform O(m) char-by-char check that S=T[p-m]...T[m].
* (5) if (4) succeeds, append p-m to return vector of "found positions".
*
* Notes: Code is simpler than trie or KMP. Compiled under Visual Studio.
* The O(m) verification step (4) is only necessary because "+" commutes, which
* poses a problem for anagrams, a+b = b+a. There is a Group Theoretic way of
* creating an O(1)-updatable hash function that eliminates the need for (4).
* A char encoding, such as c -> (c-' ')*('~' - ' '), may differentiate between
* some same-length strings with different chars and product(c->prime) may
* differentiate between all same-length strings with differents chars, but not
* anagrams.
*
* @param substr : const std::string& - substring sought in target
* @param target : const std::string& - target string being searched
* @return std::vector<unsigned> - positions in target of matches, none -> empty
*
*/
std::vector<unsigned>
find(const std::string& substr, const std::string& target)
{
std::vector<unsigned> ret_val;
std::pair<unsigned long long, unsigned long long> window(0, 0);
unsigned signature = 0;
for (unsigned i = 0; (target.length() >= substr.length())
&& (i < target.length()); ++i)
{
//Get cumulative sum at pos i of target string being searched
window.second += target[i];
//Get cumulative sum for substring being sought
if (i < substr.length())
{
signature += substr[i];
}
if (i >= substr.length()-1)
{
//Check if cumul sum of substr == cumul sum of window
if ((window.second - window.first) == signature)
{

...string_searchingstring_searchingstring_searching.cpp 2
//Perform O(m) check of letter-by-letter match
bool match = true;
for (unsigned j = 0; j < substr.length(); ++j) match &=
(substr[j] == target[i - substr.length() + j + 1]);
//If matched, add index to vector of found positions
if (match) ret_val.push_back(i - substr.length() + 1);
}
//Get cumul sum at pos i - m of target string being searched
window.first += target[i - substr.length() + 1];
}
}
return ret_val;
}
int main(void)
{
//Bounds check on target length < substr length, expect empty list
std::string substr = "Amazon";
std::string target = "Amazo";
std::cout << substr << " found in " << target << " at ";
std::cout << std::endl;
//Check for substr at beginning and end, expect 0 and 19
target = "Amazon----Amazo----Amazon";
for (auto& elem : find(substr, target)) std::cout << elem << " ";
//Check for substr in middle, expect 9
target = "Amazo----Amazon----mazon";
//Check for repeated substr, expect 0,6,12,18,24,30
target = "AmazonAmazonAmazonAmazonAmazonAmazon";
//Check for substr not matched, expect empty list
target = "Ama----z----on";
}

String searching

More Related Content

What's hot

Similar to String searching

More from Russell Childs

Recently uploaded

String searching