btllib
|
Classes | |
class | BlindNtHash |
class | BloomFilter |
class | CountingBloomFilter |
class | Indexlr |
class | KmerBloomFilter |
class | KmerCountingBloomFilter |
class | MIBloomFilter |
class | NtHash |
class | ProcessPipeline |
class | SeedBloomFilter |
class | SeedNtHash |
class | SeqReader |
class | SeqWriter |
Typedefs | |
using | CountingBloomFilter8 = CountingBloomFilter< uint8_t > |
using | CountingBloomFilter16 = CountingBloomFilter< uint16_t > |
using | CountingBloomFilter32 = CountingBloomFilter< uint32_t > |
using | KmerCountingBloomFilter8 = KmerCountingBloomFilter< uint8_t > |
using | KmerCountingBloomFilter16 = KmerCountingBloomFilter< uint16_t > |
using | KmerCountingBloomFilter32 = KmerCountingBloomFilter< uint32_t > |
using | NTHASH_HASH_NUM_TYPE = uint8_t |
using | NTHASH_K_TYPE = uint16_t |
using | SpacedSeed = std::vector< unsigned > |
using | SpacedSeedBlocks = std::vector< std::array< unsigned, 2 > > |
using | SpacedSeedMonomers = std::vector< unsigned > |
using | PipeId = unsigned long |
using | PipelineId = unsigned long |
Functions | |
std::vector< SpacedSeed > | parse_seeds (const std::vector< std::string > &seed_strings) |
void | parse_seeds (const std::vector< std::string > &seed_strings, std::vector< SpacedSeedBlocks > &blocks, std::vector< SpacedSeedMonomers > &monomers) |
void | parsed_seeds_to_blocks (const std::vector< SpacedSeed > &seeds, unsigned k, std::vector< SpacedSeedBlocks > &blocks, std::vector< SpacedSeedMonomers > &monomers) |
void | check_seeds (const std::vector< std::string > &seeds, unsigned k) |
BTLLIB_NTHASH_INIT (NtHash, ntmc64(seq+pos, k, hash_num, forward_hash, reverse_hash, posN, hashes_array.get()),) BTLLIB_NTHASH_ROLL(NtHash | |
roll () | |
ntmc64 (seq[pos], seq[pos+k], k, hash_num, forward_hash, reverse_hash, hashes_array.get()) | |
BTLLIB_NTHASH_ROLL_BACK (NtHash, roll_back(), ntmc64l(seq[pos+k - 1], seq[pos - 1], k, hash_num, forward_hash, reverse_hash, hashes_array.get());,) BTLLIB_NTHASH_PEEK(NtHash | |
peek () | |
ntmc64 (seq[pos], seq[pos+k], k, hash_num, forward_hash_tmp, reverse_hash_tmp, hashes_array.get()) | |
BTLLIB_NTHASH_PEEK (NtHash, peek(char char_in), { uint64_t forward_hash_tmp=forward_hash;uint64_t reverse_hash_tmp=reverse_hash;ntmc64(seq[pos], char_in, k, hash_num, forward_hash_tmp, reverse_hash_tmp, hashes_array.get());},) BTLLIB_NTHASH_PEEK(NtHash | |
peek_back () | |
ntmc64l (seq[pos+k - 1], seq[pos - 1], k, hash_num, forward_hash_tmp, reverse_hash_tmp, hashes_array.get()) | |
BTLLIB_NTHASH_PEEK (NtHash, peek_back(char char_in), { uint64_t forward_hash_tmp=forward_hash;uint64_t reverse_hash_tmp=reverse_hash;ntmc64l(seq[pos+k - 1], char_in, k, hash_num, forward_hash_tmp, reverse_hash_tmp, hashes_array.get());},) BTLLIB_NTHASH_INIT(BlindNtHash | |
ntmc64 (seq.get()+pos, k, hash_num, forward_hash, reverse_hash, posN, hashes_array.get()) | |
BTLLIB_NTHASH_ROLL (BlindNtHash, roll(char char_in), { ntmc64(seq[pos % seq_len], char_in, k, hash_num, forward_hash, reverse_hash, hashes_array.get());seq[pos % seq_len]=char_in;},) BTLLIB_NTHASH_ROLL_BACK(BlindNtHash | |
roll_back (char char_in) | |
BTLLIB_NTHASH_PEEK (BlindNtHash, peek(char char_in), { uint64_t forward_hash_tmp=forward_hash;uint64_t reverse_hash_tmp=reverse_hash;ntmc64(seq[pos % seq_len], char_in, k, hash_num, forward_hash_tmp, reverse_hash_tmp, hashes_array.get());},) BTLLIB_NTHASH_PEEK(BlindNtHash | |
peek_back (char char_in) | |
ntmc64l (seq[(pos+k - 1) % seq_len], char_in, k, hash_num, forward_hash_tmp, reverse_hash_tmp, hashes_array.get()) | |
BTLLIB_NTHASH_INIT (SeedNtHash, ntmsm64(nthash.seq+nthash.pos, blocks, monomers, nthash.k, blocks.size(), hash_num_per_seed, fh_no_monomers.get(), rh_no_monomers.get(), forward_hash.get(), reverse_hash.get(), posN, nthash.hashes_array.get()), nthash.) BTLLIB_NTHASH_ROLL(SeedNtHash | |
ntmsm64 (nthash.seq+nthash.pos, blocks, monomers, nthash.k, blocks.size(), hash_num_per_seed, fh_no_monomers.get(), rh_no_monomers.get(), forward_hash.get(), reverse_hash.get(), nthash.hashes_array.get()) | |
nthash | BTLLIB_NTHASH_ROLL_BACK (SeedNtHash, roll_back(), ntmsm64l(nthash.seq+nthash.pos - 1, blocks, monomers, nthash.k, blocks.size(), hash_num_per_seed, fh_no_monomers.get(), rh_no_monomers.get(), forward_hash.get(), reverse_hash.get(), nthash.hashes_array.get());, nthash.) BTLLIB_NTHASH_PEEK(SeedNtHash |
nthash std::unique_ptr< uint64_t[]> | rh_no_monomers_tmp (new uint64_t[blocks.size()]) |
std::unique_ptr< uint64_t[]> | forward_hash_tmp (new uint64_t[blocks.size()]) |
std::unique_ptr< uint64_t[]> | reverse_hash_tmp (new uint64_t[blocks.size()]) |
ntmsm64 (nthash.seq+nthash.pos, blocks, monomers, nthash.k, blocks.size(), hash_num_per_seed, fh_no_monomers_tmp.get(), rh_no_monomers_tmp.get(), forward_hash_tmp.get(), reverse_hash_tmp.get(), nthash.hashes_array.get()) | |
nthash | BTLLIB_NTHASH_PEEK (SeedNtHash, peek(char char_in), { std::unique_ptr< uint64_t[]> fh_no_monomers_tmp(new uint64_t[blocks.size()]);std::unique_ptr< uint64_t[]> rh_no_monomers_tmp(new uint64_t[blocks.size()]);std::unique_ptr< uint64_t[]> forward_hash_tmp(new uint64_t[blocks.size()]);std::unique_ptr< uint64_t[]> reverse_hash_tmp(new uint64_t[blocks.size()]);std::memcpy(fh_no_monomers_tmp.get(), forward_hash.get(), blocks.size() *sizeof(uint64_t));std::memcpy(rh_no_monomers_tmp.get(), reverse_hash.get(), blocks.size() *sizeof(uint64_t));std::memcpy(forward_hash_tmp.get(), forward_hash.get(), blocks.size() *sizeof(uint64_t));std::memcpy(reverse_hash_tmp.get(), reverse_hash.get(), blocks.size() *sizeof(uint64_t));ntmsm64(nthash.seq+nthash.pos, char_in, blocks, monomers, nthash.k, blocks.size(), hash_num_per_seed, fh_no_monomers_tmp.get(), rh_no_monomers_tmp.get(), forward_hash_tmp.get(), reverse_hash_tmp.get(), nthash.hashes_array.get());}, nthash.) BTLLIB_NTHASH_PEEK(SeedNtHash |
ntmsm64l (nthash.seq+nthash.pos - 1, blocks, monomers, nthash.k, blocks.size(), hash_num_per_seed, fh_no_monomers_tmp.get(), rh_no_monomers_tmp.get(), forward_hash_tmp.get(), reverse_hash_tmp.get(), nthash.hashes_array.get()) | |
template<typename T > | |
T | canonical (const T fwd, const T rev) |
uint64_t | srol (const uint64_t x) |
uint64_t | srol (const uint64_t x, const unsigned d) |
uint64_t | sror (const uint64_t x) |
uint64_t | ntf64 (const char *kmer_seq, unsigned k) |
uint64_t | ntr64 (const char *kmer_seq, unsigned k) |
uint64_t | ntf64 (uint64_t fh_val, unsigned k, unsigned char char_out, unsigned char char_in) |
uint64_t | ntr64 (uint64_t rh_val, unsigned k, unsigned char char_out, unsigned char char_in) |
uint64_t | ntc64 (const char *kmer_seq, unsigned k) |
uint64_t | ntc64 (const char *kmer_seq, unsigned k, uint64_t &fh_val, uint64_t &rh_val) |
uint64_t | ntc64 (unsigned char char_out, unsigned char char_in, unsigned k, uint64_t &fh_val, uint64_t &rh_val) |
uint64_t | ntf64l (uint64_t rh_val, unsigned k, unsigned char char_out, unsigned char char_in) |
uint64_t | ntr64l (uint64_t fh_val, unsigned k, unsigned char char_out, unsigned char char_in) |
uint64_t | ntc64l (unsigned char char_out, unsigned char char_in, unsigned k, uint64_t &fh_val, uint64_t &rh_val) |
void | nte64 (uint64_t bh_val, unsigned k, unsigned h, uint64_t *h_val) |
void | ntmc64 (const char *kmer_seq, unsigned k, unsigned m, uint64_t *h_val) |
void | ntmc64 (const char *kmer_seq, unsigned k, unsigned m, uint64_t &fh_val, uint64_t &rh_val, uint64_t *h_val) |
void | ntmc64 (unsigned char char_out, unsigned char char_in, unsigned k, unsigned m, uint64_t &fh_val, uint64_t &rh_val, uint64_t *h_val) |
void | ntmc64l (unsigned char char_out, unsigned char char_in, unsigned k, unsigned m, uint64_t &fh_val, uint64_t &rh_val, uint64_t *h_val) |
bool | ntc64 (const char *kmer_seq, unsigned k, uint64_t &h_val, unsigned &loc_n) |
bool | ntmc64 (const char *kmer_seq, unsigned k, unsigned m, unsigned &loc_n, uint64_t *h_val) |
bool | ntc64 (const char *kmer_seq, unsigned k, uint64_t &fh_val, uint64_t &rh_val, uint64_t &h_val, unsigned &loc_n) |
bool | ntmc64 (const char *kmer_seq, unsigned k, unsigned m, uint64_t &fh_val, uint64_t &rh_val, unsigned &loc_n, uint64_t *h_val) |
bool | ntmc64 (const char *kmer_seq, unsigned k, unsigned m, uint64_t &fh_val, uint64_t &rh_val, unsigned &loc_n, uint64_t *h_val, bool &h_stn) |
void | ntmc64 (unsigned char char_out, unsigned char char_in, unsigned k, unsigned m, uint64_t &fh_val, uint64_t &rh_val, uint64_t *h_val, bool &h_stn) |
uint64_t | mask_hash (uint64_t &fk_val, uint64_t &rk_val, const char *seed_seq, const char *kmer_seq, unsigned k) |
void | sub_hash (uint64_t fh_val, uint64_t rh_val, const char *kmer_seq, const std::vector< unsigned > &positions, const std::vector< unsigned char > &new_bases, unsigned k, unsigned m, uint64_t *h_val) |
bool | ntmsm64 (const char *kmer_seq, const std::vector< SpacedSeedBlocks > &seeds_blocks, const std::vector< SpacedSeedMonomers > &seeds_monomers, unsigned k, unsigned m, unsigned m2, uint64_t *fh_nomonos, uint64_t *rh_nomonos, uint64_t *fh_val, uint64_t *rh_val, unsigned &loc_n, uint64_t *h_val) |
void | ntmsm64 (const char *kmer_seq, const std::vector< SpacedSeedBlocks > &seeds_blocks, const std::vector< SpacedSeedMonomers > &seeds_monomers, unsigned k, unsigned m, unsigned m2, uint64_t *fh_nomonos, uint64_t *rh_nomonos, uint64_t *fh_val, uint64_t *rh_val, uint64_t *h_val) |
void | ntmsm64l (const char *kmer_seq, const std::vector< SpacedSeedBlocks > &seeds_blocks, const std::vector< SpacedSeedMonomers > &seeds_monomers, unsigned k, unsigned m, unsigned m2, uint64_t *fh_nomonos, uint64_t *rh_nomonos, uint64_t *fh_val, uint64_t *rh_val, uint64_t *h_val) |
void | ntmsm64 (const char *kmer_seq, char in, const std::vector< SpacedSeedBlocks > &seeds_blocks, const std::vector< SpacedSeedMonomers > &seeds_monomers, unsigned k, unsigned m, unsigned m2, uint64_t *fh_nomonos, uint64_t *rh_nomonos, uint64_t *fh_val, uint64_t *rh_val, uint64_t *h_val) |
void | ntmsm64l (const char *kmer_seq, char in, const std::vector< SpacedSeedBlocks > &seeds_blocks, const std::vector< SpacedSeedMonomers > &seeds_monomers, unsigned k, unsigned m, unsigned m2, uint64_t *fh_nomonos, uint64_t *rh_nomonos, uint64_t *fh_val, uint64_t *rh_val, uint64_t *h_val) |
void | reverse_complement (std::string &seq) |
std::string | get_reverse_complement (const std::string &seq) |
std::string | get_time () |
void | log_info (const std::string &msg) |
void | log_warning (const std::string &msg) |
void | log_error (const std::string &msg) |
void | check_info (bool condition, const std::string &msg) |
void | check_warning (bool condition, const std::string &msg) |
void | check_error (bool condition, const std::string &msg) |
std::string | get_strerror () |
void | check_stream (const std::ios &stream, const std::string &name) |
std::vector< std::string > | split (const std::string &s, const std::string &delim) |
std::string | join (const std::vector< std::string > &s, const std::string &delim) |
void | ltrim (std::string &s) |
void | ltrim (btllib::CString &s) |
void | rtrim (std::string &s) |
void | rtrim (btllib::CString &s) |
void | trim (std::string &s) |
void | trim (btllib::CString &s) |
bool | startswith (std::string s, std::string prefix) |
bool | endswith (std::string s, std::string suffix) |
std::string | get_basename (const std::string &path) |
std::string | get_dirname (const std::string &path) |
Functions for sequence manipulation.
Functions for logging and error checking.
Random utility functions.
void btllib::check_error | ( | bool | condition, |
const std::string & | msg | ||
) |
Conditionally log error level events. The program exits if the condition is true.
condition | If this is true, the message is printed and the program exits. |
msg | Message to print. |
void btllib::check_info | ( | bool | condition, |
const std::string & | msg | ||
) |
Conditionally log info level events.
condition | If this is true, the message is printed. |
msg | Message to print. |
void btllib::check_stream | ( | const std::ios & | stream, |
const std::string & | name | ||
) |
Check whether the stream is good. Program prints an error message and exits if not.
stream | Stream to check goodness of. |
name | Name of the stream, e.g. filepath or stdin |
void btllib::check_warning | ( | bool | condition, |
const std::string & | msg | ||
) |
Conditionally log warning level events.
condition | If this is true, the message is printed. |
msg | Message to print. |
bool btllib::endswith | ( | std::string | s, |
std::string | suffix | ||
) |
Check whether the given string ends with a suffix.
s | String to check. |
suffix | Suffix to check for. |
std::string btllib::get_basename | ( | const std::string & | path | ) |
Equivalent to the GNU implementation of basename, but returns a string copy of the result.
path | The path to get basename from. |
std::string btllib::get_dirname | ( | const std::string & | path | ) |
Equivalent to the GNU implementation of dirname, but returns a string copy of the result.
path | The path to get dirname from. |
std::string btllib::get_reverse_complement | ( | const std::string & | seq | ) |
Obtain a reverse complement of the provided sequence. The argument sequence is left untouched.
seq | Sequence to reverse complement. |
std::string btllib::join | ( | const std::vector< std::string > & | s, |
const std::string & | delim | ||
) |
Join a vector of strings into a single string with a delimiter.
s | Vector of strings to join. |
delim | Delimiter to join the strings with. |
void btllib::log_error | ( | const std::string & | msg | ) |
Log error level events.
msg | Message to print. |
void btllib::log_info | ( | const std::string & | msg | ) |
Log info level events.
msg | Message to print. |
void btllib::log_warning | ( | const std::string & | msg | ) |
Log warning level events.
msg | Message to print. |
void btllib::ltrim | ( | std::string & | s | ) |
Trim whitespace on the left side of the given string.
s | String to trim, edited in-place. |
uint64_t btllib::mask_hash | ( | uint64_t & | fk_val, |
uint64_t & | rk_val, | ||
const char * | seed_seq, | ||
const char * | kmer_seq, | ||
unsigned | k | ||
) |
Generate a hash value for the input spaced seed by excluding all don't care positions.
fk_val | Forward hash value of the k-mer (ignoring the spaced seed). |
rk_val | Reverse hash value of the k-mer (ignoring the spaced seed). |
seed_seq | Array of characters representing the spaced seed. Anything other than '1' is treated as a don't care. |
kmer_seq | Array of character representing the k-mer. |
k | k-mer size. |
uint64_t btllib::ntc64 | ( | const char * | kmer_seq, |
unsigned | k | ||
) |
Generate a canonical hash value for the first k-mer.
kmer_seq | C array containing the sequence's characters. |
k | k-mer size. |
uint64_t btllib::ntc64 | ( | const char * | kmer_seq, |
unsigned | k, | ||
uint64_t & | fh_val, | ||
uint64_t & | rh_val | ||
) |
Generate a canonical hash value for the first k-mer and update both strands' hash values.
kmer_seq | C array containing the sequence's characters. |
k | k-mer size. |
fh_val | Forward strand hash value container. |
rh_val | Reverse strand hash value container. |
bool btllib::ntc64 | ( | const char * | kmer_seq, |
unsigned | k, | ||
uint64_t & | fh_val, | ||
uint64_t & | rh_val, | ||
uint64_t & | h_val, | ||
unsigned & | loc_n | ||
) |
Generate a canonical hash value for the first k-mer, find the first ignored character and return the strand-specific hash values.
kmer_seq | Array containing the sequence's characters. |
k | k-mer size. |
fh_val | Container for the forward hash value. |
rh_val | Container for the reverse hash value. |
h_val | Container for the output hash value. |
loc_n | Location of the first unknown character. |
bool btllib::ntc64 | ( | const char * | kmer_seq, |
unsigned | k, | ||
uint64_t & | h_val, | ||
unsigned & | loc_n | ||
) |
Generate a canonical hash value for the first k-mer and find the first ignored character.
kmer_seq | Array containing the sequence's characters. |
k | k-mer size. |
h_val | Container for the output hash value. |
loc_n | Location of the first unknown character. |
uint64_t btllib::ntc64 | ( | unsigned char | char_out, |
unsigned char | char_in, | ||
unsigned | k, | ||
uint64_t & | fh_val, | ||
uint64_t & | rh_val | ||
) |
Perform a roll operation on the sequence and generate a canonical hash value.
char_out | Character to be removed. |
char_in | Character to be included. |
k | k-mer size. |
fh_val | Previous hash value for the forward strand. |
rh_val | Previous hash value for the reverse-complement. |
uint64_t btllib::ntc64l | ( | unsigned char | char_out, |
unsigned char | char_in, | ||
unsigned | k, | ||
uint64_t & | fh_val, | ||
uint64_t & | rh_val | ||
) |
Perform a roll-back operation on the canonical hash value and update previous hashes for both strands.
char_out | Character to be removed. |
char_in | Character to be included. |
k | k-mer size. |
fh_val | Previous forward hash value computed for the sequence. |
rh_val | Previous reverse hash value computed for the sequence. |
void btllib::nte64 | ( | uint64_t | bh_val, |
unsigned | k, | ||
unsigned | h, | ||
uint64_t * | h_val | ||
) |
Extend hash array using a base hash value.
bh_val | Base hash value. |
k | k-mer size. |
h | Size of the resulting hash array (number of extra hashes minus one). |
h_val | Array of size h for storing the output hashes. |
uint64_t btllib::ntf64 | ( | const char * | kmer_seq, |
unsigned | k | ||
) |
Generate the forward-strand hash value of the first k-mer in the sequence.
kmer_seq | C array containing the sequence's characters. |
k | k-mer size. |
uint64_t btllib::ntf64 | ( | uint64_t | fh_val, |
unsigned | k, | ||
unsigned char | char_out, | ||
unsigned char | char_in | ||
) |
Perform a roll operation on the forward strand by removing char_out and including char_in.
fh_val | Previous hash value computed for the sequence. |
k | k-mer size. |
char_out | Character to be removed. |
char_in | Character to be included. |
uint64_t btllib::ntf64l | ( | uint64_t | rh_val, |
unsigned | k, | ||
unsigned char | char_out, | ||
unsigned char | char_in | ||
) |
Perform a roll-back operation on the forward strand.
rh_val | Previous forward hash value computed for the sequence. |
k | k-mer size. |
char_out | Character to be removed. |
char_in | Character to be included. |
void btllib::ntmc64 | ( | const char * | kmer_seq, |
unsigned | k, | ||
unsigned | m, | ||
uint64_t & | fh_val, | ||
uint64_t & | rh_val, | ||
uint64_t * | h_val | ||
) |
Generate multiple canonical hash values for the first k-mer and return strand-specific hash values.
kmer_seq | Array containing the sequence's characters. |
k | k-mer size. |
m | Number of hashes per k-mer. |
fh_val | Unsigned 64-bit int container for the forward hash. |
rh_val | Unsigned 64-bit int container for the reverse-complement hash. |
h_val | Array of size m for storing the hash values. |
bool btllib::ntmc64 | ( | const char * | kmer_seq, |
unsigned | k, | ||
unsigned | m, | ||
uint64_t & | fh_val, | ||
uint64_t & | rh_val, | ||
unsigned & | loc_n, | ||
uint64_t * | h_val | ||
) |
Generate multiple canonical hash value for the first k-mer, find the first ignored character and return the strand-specific hash values.
kmer_seq | Array containing the sequence's characters. |
k | k-mer size. |
m | Number of hashes per k-mer. |
fh_val | Container for the forward hash value. |
rh_val | Container for the reverse hash value. |
loc_n | Location of the first unknown character. |
h_val | Array of size m for storing the output hash values. |
bool btllib::ntmc64 | ( | const char * | kmer_seq, |
unsigned | k, | ||
unsigned | m, | ||
uint64_t & | fh_val, | ||
uint64_t & | rh_val, | ||
unsigned & | loc_n, | ||
uint64_t * | h_val, | ||
bool & | h_stn | ||
) |
Generate multiple canonical hash values for the first k-mer, find the first ignored character, and returning the strand-specific hash values and strand selections.
kmer_seq | Array containing the sequence's characters. |
k | k-mer size. |
m | Number of hashes per k-mer. |
fh_val | Container for the forward hash value. |
rh_val | Container for the reverse hash value. |
loc_n | Location of the first unknown character. |
h_val | Array of size m for storing the output hash values. |
h_stn | true if the reverse strand was selected, otherwise false. |
void btllib::ntmc64 | ( | const char * | kmer_seq, |
unsigned | k, | ||
unsigned | m, | ||
uint64_t * | h_val | ||
) |
Generate multiple canonical hash values for the first k-mer.
kmer_seq | Array containing the sequence's characters. |
k | k-mer size. |
m | Number of hashes per k-mer. |
h_val | Array of size m for storing the hash values. |
bool btllib::ntmc64 | ( | const char * | kmer_seq, |
unsigned | k, | ||
unsigned | m, | ||
unsigned & | loc_n, | ||
uint64_t * | h_val | ||
) |
Generate multiple canonical hash values for the first k-mer and find the first ignored character.
kmer_seq | Array containing the sequence's characters. |
k | k-mer size. |
m | Number of hashes per k-mer. |
h_val | Array of size m for storing the output hash values. |
loc_n | Location of the first unknown character. |
void btllib::ntmc64 | ( | unsigned char | char_out, |
unsigned char | char_in, | ||
unsigned | k, | ||
unsigned | m, | ||
uint64_t & | fh_val, | ||
uint64_t & | rh_val, | ||
uint64_t * | h_val | ||
) |
Generate a new canonical hash value by performing a roll operation.
char_out | Character to be removed. |
char_in | Character to be included. |
k | k-mer size. |
m | Number of hashes per k-mer. |
fh_val | Previous forward hash value. |
rh_val | Previous reverse hash value. |
h_val | Array of size m for storing the output hash values. |
void btllib::ntmc64 | ( | unsigned char | char_out, |
unsigned char | char_in, | ||
unsigned | k, | ||
unsigned | m, | ||
uint64_t & | fh_val, | ||
uint64_t & | rh_val, | ||
uint64_t * | h_val, | ||
bool & | h_stn | ||
) |
Generate multiple canonical hash values by performing a roll operation, returning the strand-specific hash values and strand selections.
char_out | Character to be removed. |
char_in | Character to be included. |
k | k-mer size. |
m | Number of hashes per k-mer. |
fh_val | Container for the forward hash value. |
rh_val | Container for the reverse hash value. |
h_val | Array of size m for storing the output hash values. |
h_stn | true if the reverse strand was selected, otherwise false. |
void btllib::ntmc64l | ( | unsigned char | char_out, |
unsigned char | char_in, | ||
unsigned | k, | ||
unsigned | m, | ||
uint64_t & | fh_val, | ||
uint64_t & | rh_val, | ||
uint64_t * | h_val | ||
) |
Generate a new canonical hash value by performing a roll-back operation.
char_out | Character to be removed. |
char_in | Character to be included. |
k | k-mer size. |
m | Number of hashes per k-mer. |
fh_val | Previous forward hash value. |
rh_val | Previous reverse hash value. |
h_val | Array of size m for storing the output hash values. |
void btllib::ntmsm64 | ( | const char * | kmer_seq, |
char | in, | ||
const std::vector< SpacedSeedBlocks > & | seeds_blocks, | ||
const std::vector< SpacedSeedMonomers > & | seeds_monomers, | ||
unsigned | k, | ||
unsigned | m, | ||
unsigned | m2, | ||
uint64_t * | fh_nomonos, | ||
uint64_t * | rh_nomonos, | ||
uint64_t * | fh_val, | ||
uint64_t * | rh_val, | ||
uint64_t * | h_val | ||
) |
Generate multiple hash values for the input spaced seeds and the next k-mer by performing a forward peek operation.
kmer_seq | Array of characters representing the previous k-mer. |
seed_seq | Array of SpacedSeed objects representing the seeds' blocks. |
monomers | List of the positions that represent blocks of size one for each seed. |
k | k-mer size. |
m | Number of spaced seeds. |
m2 | Number of hashes per seed. |
fh_nomonos | Previous forward hash values before including the size-one blocks. |
rh_nomonos | Previous reverse hash values before including the size-one blocks. |
fh_val | Previous forward hash values after including the size-one blocks. |
rh_val | Previous reverse hash values after including the size-one blocks. |
h_val | Array of size m * m2 for storing the output hash values. |
void btllib::ntmsm64 | ( | const char * | kmer_seq, |
const std::vector< SpacedSeedBlocks > & | seeds_blocks, | ||
const std::vector< SpacedSeedMonomers > & | seeds_monomers, | ||
unsigned | k, | ||
unsigned | m, | ||
unsigned | m2, | ||
uint64_t * | fh_nomonos, | ||
uint64_t * | rh_nomonos, | ||
uint64_t * | fh_val, | ||
uint64_t * | rh_val, | ||
uint64_t * | h_val | ||
) |
Generate multiple hash values for the input spaced seeds and the next k-mer by performing a forward roll operation.
kmer_seq | Array of characters representing the previous k-mer. |
seed_seq | Array of SpacedSeed objects representing the seeds' blocks. |
monomers | List of the positions that represent blocks of size one for each seed. |
k | k-mer size. |
m | Number of spaced seeds. |
m2 | Number of hashes per seed. |
fh_nomonos | Previous forward hash values before including the size-one blocks. |
rh_nomonos | Previous reverse hash values before including the size-one blocks. |
fh_val | Previous forward hash values after including the size-one blocks. |
rh_val | Previous reverse hash values after including the size-one blocks. |
h_val | Array of size m * m2 for storing the output hash values. |
bool btllib::ntmsm64 | ( | const char * | kmer_seq, |
const std::vector< SpacedSeedBlocks > & | seeds_blocks, | ||
const std::vector< SpacedSeedMonomers > & | seeds_monomers, | ||
unsigned | k, | ||
unsigned | m, | ||
unsigned | m2, | ||
uint64_t * | fh_nomonos, | ||
uint64_t * | rh_nomonos, | ||
uint64_t * | fh_val, | ||
uint64_t * | rh_val, | ||
unsigned & | loc_n, | ||
uint64_t * | h_val | ||
) |
Generate multiple hash values for the input spaced seeds and first k-mer.
kmer_seq | Array of characters representing the k-mer. |
seed_seq | Array of SpacedSeed objects representing the seeds' blocks. |
monomers | List of the positions that represent blocks of size one for each seed. |
k | k-mer size. |
m | Number of spaced seeds. |
m2 | Number of hashes per seed. |
fh_nomonos | Container for the forward hash values before including the size-one blocks. |
rh_nomonos | Container for the reverse hash values before including the size-one blocks. |
fh_val | Container for the forward hash values after including the size-one blocks. |
rh_val | Container for the reverse hash values after including the size-one blocks. |
loc_n | Location of the first unknown character in the first sequence. |
h_val | Array of size m * m2 for storing the output hash values. |
void btllib::ntmsm64l | ( | const char * | kmer_seq, |
char | in, | ||
const std::vector< SpacedSeedBlocks > & | seeds_blocks, | ||
const std::vector< SpacedSeedMonomers > & | seeds_monomers, | ||
unsigned | k, | ||
unsigned | m, | ||
unsigned | m2, | ||
uint64_t * | fh_nomonos, | ||
uint64_t * | rh_nomonos, | ||
uint64_t * | fh_val, | ||
uint64_t * | rh_val, | ||
uint64_t * | h_val | ||
) |
Generate multiple hash values for the input spaced seeds and the next k-mer by performing a backwards peek operation.
kmer_seq | Array of characters representing the previous k-mer. |
seed_seq | Array of SpacedSeed objects representing the seeds' blocks. |
monomers | List of the positions that represent blocks of size one for each seed. |
k | k-mer size. |
m | Number of spaced seeds. |
m2 | Number of hashes per seed. |
fh_nomonos | Previous forward hash values before including the size-one blocks. |
rh_nomonos | Previous reverse hash values before including the size-one blocks. |
fh_val | Previous forward hash values after including the size-one blocks. |
rh_val | Previous reverse hash values after including the size-one blocks. |
h_val | Array of size m * m2 for storing the output hash values. |
void btllib::ntmsm64l | ( | const char * | kmer_seq, |
const std::vector< SpacedSeedBlocks > & | seeds_blocks, | ||
const std::vector< SpacedSeedMonomers > & | seeds_monomers, | ||
unsigned | k, | ||
unsigned | m, | ||
unsigned | m2, | ||
uint64_t * | fh_nomonos, | ||
uint64_t * | rh_nomonos, | ||
uint64_t * | fh_val, | ||
uint64_t * | rh_val, | ||
uint64_t * | h_val | ||
) |
Generate multiple hash values for the input spaced seeds and the next k-mer by performing a backward roll operation.
kmer_seq | Array of characters representing the previous k-mer. |
seed_seq | Array of SpacedSeed objects representing the seeds' blocks. |
monomers | List of the positions that represent blocks of size one for each seed. |
k | k-mer size. |
m | Number of spaced seeds. |
m2 | Number of hashes per seed. |
fh_nomonos | Previous forward hash values before including the size-one blocks. |
rh_nomonos | Previous reverse hash values before including the size-one blocks. |
fh_val | Previous forward hash values after including the size-one blocks. |
rh_val | Previous reverse hash values after including the size-one blocks. |
h_val | Array of size m * m2 for storing the output hash values. |
uint64_t btllib::ntr64 | ( | const char * | kmer_seq, |
unsigned | k | ||
) |
Generate a hash value for the reverse-complement of the first k-mer in the sequence.
kmer_seq | C array containing the sequence's characters. |
k | k-mer size. |
uint64_t btllib::ntr64 | ( | uint64_t | rh_val, |
unsigned | k, | ||
unsigned char | char_out, | ||
unsigned char | char_in | ||
) |
Perform a roll operation on the reverse-complement by removing char_out and including char_in.
rh_val | Previous reverse-complement hash value computed for the sequence. |
k | k-mer size. |
char_out | Character to be removed. |
char_in | Character to be included. |
uint64_t btllib::ntr64l | ( | uint64_t | fh_val, |
unsigned | k, | ||
unsigned char | char_out, | ||
unsigned char | char_in | ||
) |
Perform a roll-back operation on the reverse-complement.
rh_val | Previous reverse hash value computed for the sequence. |
k | k-mer size. |
char_out | Character to be removed. |
char_in | Character to be included. |
void btllib::reverse_complement | ( | std::string & | seq | ) |
Reverse complement a sequence in-place.
seq | Sequence to reverse complement. |
nthash std::unique_ptr< uint64_t[]> btllib::rh_no_monomers_tmp | ( | new uint64_t | [blocks.size()] | ) |
void btllib::rtrim | ( | std::string & | s | ) |
Trim whitespace on the right side of the given string.
s | String to trim, edited in-place. |
std::vector< std::string > btllib::split | ( | const std::string & | s, |
const std::string & | delim | ||
) |
Split a string into component substrings with a delimiter.
s | String to split. |
delim | Delimiter to split with. |
delim
, excluding delimiters themselves.
|
inline |
Split a 64-bit word into 33 and 31-bit subwords and left-rotate them separately.
x | A 64-bit unsigned integer. |
|
inline |
Split a 64-bit word into 33 and 31-bit subwords and left-rotate them separately multiple times.
x | A 64-bit unsigned integer. |
d | Number of rotations. |
|
inline |
Split a 64-bit word into 33 and 31-bit subwords and right-rotate them separately.
x | A 64-bit unsigned integer. |
bool btllib::startswith | ( | std::string | s, |
std::string | prefix | ||
) |
Check whether the given string starts with a prefix.
s | String to check. |
prefix | Prefix to check for. |
void btllib::sub_hash | ( | uint64_t | fh_val, |
uint64_t | rh_val, | ||
const char * | kmer_seq, | ||
const std::vector< unsigned > & | positions, | ||
const std::vector< unsigned char > & | new_bases, | ||
unsigned | k, | ||
unsigned | m, | ||
uint64_t * | h_val | ||
) |
Generate multiple new hash values for the input k-mer by substituting multiple characters.
fh_val | Forward hash value of the k-mer. |
rh_val | Reverse hash value of the k-mer. |
kmer_seq | Array of characters representing the k-mer. |
positions | Indicies of the positions to be substituted. |
new_bases | Characters to be placed in the indicies indicated in positions. |
k | k-mer size. |
m | Number of hashes per k-mer. |
h_val | Array of size m for storing the output hash values. |
void btllib::trim | ( | std::string & | s | ) |
Trim whitespace on the left and right side of the given string.
s | String to trim, edited in-place. |