1 #ifndef BTLLIB_ROLLING_HASH_HPP
2 #define BTLLIB_ROLLING_HASH_HPP
13 class SeedRollingHash;
14 using SpacedSeed = std::vector<unsigned>;
15 static std::vector<SpacedSeed>
16 parse_seeds(
const std::vector<std::string>& seed_strings);
37 RollingHash(
const char* seq,
size_t seq_len,
unsigned k,
unsigned hash_num);
45 RollingHash(
const std::string& seq,
unsigned k,
unsigned hash_num);
49 const uint64_t* hashes()
const;
51 size_t get_pos()
const {
return pos; }
52 unsigned get_k()
const {
return k; }
53 unsigned get_hash_num()
const {
return hash_num; }
62 const unsigned hash_num;
64 std::vector<uint64_t> hashes_vector;
65 uint64_t forward_hash = 0;
66 uint64_t reverse_hash = 0;
76 const std::vector<SpacedSeed>& seeds,
77 unsigned hash_num_per_seed);
80 const std::vector<SpacedSeed>& seeds,
81 unsigned hash_num_per_seed);
85 const std::vector<std::string>& seeds,
86 unsigned hash_num_per_seed);
89 const std::vector<std::string>& seeds,
90 unsigned hash_num_per_seed);
92 unsigned get_hash_num_per_seed()
const {
return hash_num_per_seed; }
99 const unsigned hash_num_per_seed;
100 std::vector<SpacedSeed> seeds;
112 hashes_vector.resize(hash_num);
118 :
RollingHash(seq.c_str(), seq.size(), k, hash_num)
121 inline SeedRollingHash::SeedRollingHash(
const char* seq,
124 const std::vector<SpacedSeed>& seeds,
125 unsigned hash_num_per_seed)
126 :
RollingHash(seq, seq_len, k, seeds.size() * hash_num_per_seed)
127 , hash_num_per_seed(hash_num_per_seed)
131 inline SeedRollingHash::SeedRollingHash(
const std::string& seq,
133 const std::vector<SpacedSeed>& seeds,
134 unsigned hash_num_per_seed)
135 : RollingHash(seq, k, seeds.size() * hash_num_per_seed)
136 , hash_num_per_seed(hash_num_per_seed)
140 inline SeedRollingHash::SeedRollingHash(
const char* seq,
143 const std::vector<std::string>& seeds,
144 unsigned hash_num_per_seed)
145 : RollingHash(seq, seq_len, k, seeds.size() * hash_num_per_seed)
146 , hash_num_per_seed(hash_num_per_seed)
147 , seeds(parse_seeds(seeds))
150 inline SeedRollingHash::SeedRollingHash(
const std::string& seq,
152 const std::vector<std::string>& seeds,
153 unsigned hash_num_per_seed)
154 : RollingHash(seq, k, seeds.size() * hash_num_per_seed)
155 , hash_num_per_seed(hash_num_per_seed)
156 , seeds(parse_seeds(seeds))
159 static std::vector<SpacedSeed>
160 parse_seeds(
const std::vector<std::string>& seed_strings)
162 std::vector<SpacedSeed> seed_set;
163 for (
const auto& seed_string : seed_strings) {
166 for (
const auto& c : seed_string) {
172 seed_set.push_back(seed);
178 #define ROLLING_HASH_INIT(CLASS, NTHASH_CALL) \
179 inline bool CLASS::init() \
182 pos = std::numeric_limits<std::size_t>::max(); \
186 while ((pos < seq_len - k + 1) && !(NTHASH_CALL)) { \
189 if (pos > seq_len - k) { \
190 pos = std::numeric_limits<std::size_t>::max(); \
198 #define ROLLING_HASH_ROLL(CLASS, NTHASH_CALL) \
199 inline bool CLASS::roll() \
204 if (pos > seq_len - k) { \
207 if (seed_tab[(unsigned char)(seq[pos + k - 1])] == seedN) { \
216 ROLLING_HASH_INIT(RollingHash,
223 hashes_vector.data()))
224 ROLLING_HASH_ROLL(RollingHash,
231 hashes_vector.data()))
233 ROLLING_HASH_INIT(SeedRollingHash,
242 hashes_vector.data()))
243 ROLLING_HASH_ROLL(SeedRollingHash,
253 hashes_vector.data()))
255 #undef ROLLING_HASH_INIT
256 #undef ROLLING_HASH_ROLL
258 inline const uint64_t*
259 RollingHash::hashes()
const
261 return hashes_vector.data();