8#ifndef BTLLIB_NTHASH_LOWLEVEL_HPP
9#define BTLLIB_NTHASH_LOWLEVEL_HPP
11#include "btllib/nthash_consts.hpp"
22canonical(
const T fwd,
const T rev)
27static_assert(std::numeric_limits<unsigned>::max() + 1 == 0,
28 "Integers don't overflow on this platform which is necessary for "
29 "ntHash canonical hash computation.");
34using SpacedSeed = std::vector<unsigned>;
37using SpacedSeedBlocks = std::vector<std::array<unsigned, 2>>;
40using SpacedSeedMonomers = std::vector<unsigned>;
53 uint64_t m = ((x & 0x8000000000000000ULL) >> 30) |
54 ((x & 0x100000000ULL) >> 32);
55 return ((x << 1) & 0xFFFFFFFDFFFFFFFFULL) | m;
68srol(
const uint64_t x,
const unsigned d)
70 uint64_t v = (x << d) | (x >> (64 - d));
71 uint64_t y = (v ^ (v >> 33)) &
72 (std::numeric_limits<uint64_t>::max() >> (64 - d));
73 return v ^ (y | (y << 33));
87 uint64_t m = ((x & 0x200000000ULL) << 30) | ((x & 1ULL) << 32);
88 return ((x >> 1) & 0xFFFFFFFEFFFFFFFFULL) | m;
100ntf64(
const char* kmer_seq,
unsigned k);
112ntr64(
const char* kmer_seq,
unsigned k);
128 unsigned char char_out,
129 unsigned char char_in);
146 unsigned char char_out,
147 unsigned char char_in);
158ntc64(
const char* kmer_seq,
unsigned k);
172ntc64(
const char* kmer_seq,
unsigned k, uint64_t& fh_val, uint64_t& rh_val);
187 unsigned char char_in,
205 unsigned char char_out,
206 unsigned char char_in);
221 unsigned char char_out,
222 unsigned char char_in);
238 unsigned char char_in,
252nte64(uint64_t bh_val,
unsigned k,
unsigned h, uint64_t* h_val);
263ntmc64(
const char* kmer_seq,
unsigned k,
unsigned m, uint64_t* h_val);
277ntmc64(
const char* kmer_seq,
296ntmc64(
unsigned char char_out,
297 unsigned char char_in,
316ntmc64l(
unsigned char char_out,
317 unsigned char char_in,
337ntc64(
const char* kmer_seq,
unsigned k, uint64_t& h_val,
unsigned& loc_n);
353ntmc64(
const char* kmer_seq,
397ntmc64(
const char* kmer_seq,
423ntmc64(
const char* kmer_seq,
446ntmc64(
unsigned char char_out,
447 unsigned char char_in,
471 const char* seed_seq,
472 const char* kmer_seq,
492 const char* kmer_seq,
493 const std::vector<unsigned>& positions,
494 const std::vector<unsigned char>& new_bases,
524ntmsm64(
const char* kmer_seq,
525 const std::vector<SpacedSeedBlocks>& seeds_blocks,
526 const std::vector<SpacedSeedMonomers>& seeds_monomers,
530 uint64_t* fh_nomonos,
531 uint64_t* rh_nomonos,
537#define NTMSM64(ROL_HANDLING, IN_HANDLING, OUT_HANDLING, ROR_HANDLING) \
538 unsigned char char_out, char_in; \
539 uint64_t fh_seed, rh_seed; \
540 unsigned i_out, i_in, i_base; \
541 for (unsigned i_seed = 0; i_seed < m; i_seed++) { \
543 for (const auto& block : seeds_blocks[i_seed]) \
547 fh_seed ^= MS_TAB(char_out, k - i_out); \
548 fh_seed ^= MS_TAB(char_in, k - i_in); \
549 rh_seed ^= MS_TAB(char_out & CP_OFF, i_out); \
550 rh_seed ^= MS_TAB(char_in & CP_OFF, i_in); \
553 fh_nomonos[i_seed] = fh_seed; \
554 rh_nomonos[i_seed] = rh_seed; \
555 for (const auto& pos : seeds_monomers[i_seed]) { \
556 fh_seed ^= MS_TAB((unsigned char)kmer_seq[pos + 1], k - 1 - pos); \
557 rh_seed ^= MS_TAB((unsigned char)kmer_seq[pos + 1] & CP_OFF, pos); \
559 fh_val[i_seed] = fh_seed; \
560 rh_val[i_seed] = rh_seed; \
561 i_base = i_seed * m2; \
562 h_val[i_base] = canonical(fh_seed, rh_seed); \
563 for (unsigned i_hash = 1; i_hash < m2; i_hash++) { \
564 h_val[i_base + i_hash] = h_val[i_base] * (i_hash ^ k * MULTISEED); \
565 h_val[i_base + i_hash] ^= h_val[i_base + i_hash] >> MULTISHIFT; \
591ntmsm64(
const char* kmer_seq,
592 const std::vector<SpacedSeedBlocks>& seeds_blocks,
593 const std::vector<SpacedSeedMonomers>& seeds_monomers,
597 uint64_t* fh_nomonos,
598 uint64_t* rh_nomonos,
625ntmsm64l(
const char* kmer_seq,
626 const std::vector<SpacedSeedBlocks>& seeds_blocks,
627 const std::vector<SpacedSeedMonomers>& seeds_monomers,
631 uint64_t* fh_nomonos,
632 uint64_t* rh_nomonos,
659ntmsm64(
const char* kmer_seq,
661 const std::vector<SpacedSeedBlocks>& seeds_blocks,
662 const std::vector<SpacedSeedMonomers>& seeds_monomers,
666 uint64_t* fh_nomonos,
667 uint64_t* rh_nomonos,
694ntmsm64l(
const char* kmer_seq,
696 const std::vector<SpacedSeedBlocks>& seeds_blocks,
697 const std::vector<SpacedSeedMonomers>& seeds_monomers,
701 uint64_t* fh_nomonos,
702 uint64_t* rh_nomonos,
Definition: bloom_filter.hpp:16
uint64_t ntf64l(uint64_t rh_val, unsigned k, unsigned char char_out, unsigned char char_in)
void sub_hash(uint64_t fh_val, uint64_t rh_val, const char *kmer_seq, const std::vector< unsigned > &positions, const std::vector< unsigned char > &new_bases, unsigned k, unsigned m, uint64_t *h_val)
uint64_t ntr64l(uint64_t fh_val, unsigned k, unsigned char char_out, unsigned char char_in)
uint64_t srol(const uint64_t x)
Definition: nthash_lowlevel.hpp:51
uint64_t mask_hash(uint64_t &fk_val, uint64_t &rk_val, const char *seed_seq, const char *kmer_seq, unsigned k)
uint64_t ntc64l(unsigned char char_out, unsigned char char_in, unsigned k, uint64_t &fh_val, uint64_t &rh_val)
uint64_t sror(const uint64_t x)
Definition: nthash_lowlevel.hpp:85
uint64_t ntc64(const char *kmer_seq, unsigned k)
uint64_t ntr64(const char *kmer_seq, unsigned k)
void nte64(uint64_t bh_val, unsigned k, unsigned h, uint64_t *h_val)
uint64_t ntf64(const char *kmer_seq, unsigned k)