btllib
Classes | Typedefs | Functions | Variables
btllib Namespace Reference

Classes

class  BlindNtHash
 
class  BloomFilter
 
class  CountingBloomFilter
 
class  Indexlr
 
class  KmerBloomFilter
 
class  KmerCountingBloomFilter
 
class  MIBloomFilter
 
class  NtHash
 
class  ProcessPipeline
 
class  SeedBloomFilter
 
class  SeedNtHash
 
class  SeqReader
 
class  SeqWriter
 

Typedefs

using CountingBloomFilter8 = CountingBloomFilter< uint8_t >
 
using CountingBloomFilter16 = CountingBloomFilter< uint16_t >
 
using CountingBloomFilter32 = CountingBloomFilter< uint32_t >
 
using KmerCountingBloomFilter8 = KmerCountingBloomFilter< uint8_t >
 
using KmerCountingBloomFilter16 = KmerCountingBloomFilter< uint16_t >
 
using KmerCountingBloomFilter32 = KmerCountingBloomFilter< uint32_t >
 
using NTHASH_HASH_NUM_TYPE = uint8_t
 
using NTHASH_K_TYPE = uint16_t
 
using SpacedSeed = std::vector< unsigned >
 
using SpacedSeedBlocks = std::vector< std::array< unsigned, 2 > >
 
using SpacedSeedMonomers = std::vector< unsigned >
 
using PipeId = unsigned long
 
using PipelineId = unsigned long
 

Functions

std::vector< SpacedSeed > parse_seeds (const std::vector< std::string > &seed_strings)
 
void parse_seeds (const std::vector< std::string > &seed_strings, std::vector< SpacedSeedBlocks > &blocks, std::vector< SpacedSeedMonomers > &monomers)
 
void parsed_seeds_to_blocks (const std::vector< SpacedSeed > &seeds, unsigned k, std::vector< SpacedSeedBlocks > &blocks, std::vector< SpacedSeedMonomers > &monomers)
 
void check_seeds (const std::vector< std::string > &seeds, unsigned k)
 
 BTLLIB_NTHASH_INIT (NtHash, ntmc64(seq+pos, k, hash_num, forward_hash, reverse_hash, posN, hashes_array.get()),) BTLLIB_NTHASH_ROLL(NtHash
 
 roll ()
 
 ntmc64 (seq[pos], seq[pos+k], k, hash_num, forward_hash, reverse_hash, hashes_array.get())
 
 BTLLIB_NTHASH_ROLL_BACK (NtHash, roll_back(), ntmc64l(seq[pos+k - 1], seq[pos - 1], k, hash_num, forward_hash, reverse_hash, hashes_array.get());,) BTLLIB_NTHASH_PEEK(NtHash
 
 peek ()
 
 ntmc64 (seq[pos], seq[pos+k], k, hash_num, forward_hash_tmp, reverse_hash_tmp, hashes_array.get())
 
 BTLLIB_NTHASH_PEEK (NtHash, peek(char char_in), { uint64_t forward_hash_tmp=forward_hash;uint64_t reverse_hash_tmp=reverse_hash;ntmc64(seq[pos], char_in, k, hash_num, forward_hash_tmp, reverse_hash_tmp, hashes_array.get());},) BTLLIB_NTHASH_PEEK(NtHash
 
 peek_back ()
 
 ntmc64l (seq[pos+k - 1], seq[pos - 1], k, hash_num, forward_hash_tmp, reverse_hash_tmp, hashes_array.get())
 
 BTLLIB_NTHASH_PEEK (NtHash, peek_back(char char_in), { uint64_t forward_hash_tmp=forward_hash;uint64_t reverse_hash_tmp=reverse_hash;ntmc64l(seq[pos+k - 1], char_in, k, hash_num, forward_hash_tmp, reverse_hash_tmp, hashes_array.get());},) BTLLIB_NTHASH_INIT(BlindNtHash
 
 ntmc64 (seq.get()+pos, k, hash_num, forward_hash, reverse_hash, posN, hashes_array.get())
 
 BTLLIB_NTHASH_ROLL (BlindNtHash, roll(char char_in), { ntmc64(seq[pos % seq_len], char_in, k, hash_num, forward_hash, reverse_hash, hashes_array.get());seq[pos % seq_len]=char_in;},) BTLLIB_NTHASH_ROLL_BACK(BlindNtHash
 
 roll_back (char char_in)
 
 BTLLIB_NTHASH_PEEK (BlindNtHash, peek(char char_in), { uint64_t forward_hash_tmp=forward_hash;uint64_t reverse_hash_tmp=reverse_hash;ntmc64(seq[pos % seq_len], char_in, k, hash_num, forward_hash_tmp, reverse_hash_tmp, hashes_array.get());},) BTLLIB_NTHASH_PEEK(BlindNtHash
 
 peek_back (char char_in)
 
 ntmc64l (seq[(pos+k - 1) % seq_len], char_in, k, hash_num, forward_hash_tmp, reverse_hash_tmp, hashes_array.get())
 
 BTLLIB_NTHASH_INIT (SeedNtHash, ntmsm64(nthash.seq+nthash.pos, blocks, monomers, nthash.k, blocks.size(), hash_num_per_seed, fh_no_monomers.get(), rh_no_monomers.get(), forward_hash.get(), reverse_hash.get(), posN, nthash.hashes_array.get()), nthash.) BTLLIB_NTHASH_ROLL(SeedNtHash
 
 ntmsm64 (nthash.seq+nthash.pos, blocks, monomers, nthash.k, blocks.size(), hash_num_per_seed, fh_no_monomers.get(), rh_no_monomers.get(), forward_hash.get(), reverse_hash.get(), nthash.hashes_array.get())
 
nthash BTLLIB_NTHASH_ROLL_BACK (SeedNtHash, roll_back(), ntmsm64l(nthash.seq+nthash.pos - 1, blocks, monomers, nthash.k, blocks.size(), hash_num_per_seed, fh_no_monomers.get(), rh_no_monomers.get(), forward_hash.get(), reverse_hash.get(), nthash.hashes_array.get());, nthash.) BTLLIB_NTHASH_PEEK(SeedNtHash
 
nthash std::unique_ptr< uint64_t[]> rh_no_monomers_tmp (new uint64_t[blocks.size()])
 
std::unique_ptr< uint64_t[]> forward_hash_tmp (new uint64_t[blocks.size()])
 
std::unique_ptr< uint64_t[]> reverse_hash_tmp (new uint64_t[blocks.size()])
 
 ntmsm64 (nthash.seq+nthash.pos, blocks, monomers, nthash.k, blocks.size(), hash_num_per_seed, fh_no_monomers_tmp.get(), rh_no_monomers_tmp.get(), forward_hash_tmp.get(), reverse_hash_tmp.get(), nthash.hashes_array.get())
 
nthash BTLLIB_NTHASH_PEEK (SeedNtHash, peek(char char_in), { std::unique_ptr< uint64_t[]> fh_no_monomers_tmp(new uint64_t[blocks.size()]);std::unique_ptr< uint64_t[]> rh_no_monomers_tmp(new uint64_t[blocks.size()]);std::unique_ptr< uint64_t[]> forward_hash_tmp(new uint64_t[blocks.size()]);std::unique_ptr< uint64_t[]> reverse_hash_tmp(new uint64_t[blocks.size()]);std::memcpy(fh_no_monomers_tmp.get(), forward_hash.get(), blocks.size() *sizeof(uint64_t));std::memcpy(rh_no_monomers_tmp.get(), reverse_hash.get(), blocks.size() *sizeof(uint64_t));std::memcpy(forward_hash_tmp.get(), forward_hash.get(), blocks.size() *sizeof(uint64_t));std::memcpy(reverse_hash_tmp.get(), reverse_hash.get(), blocks.size() *sizeof(uint64_t));ntmsm64(nthash.seq+nthash.pos, char_in, blocks, monomers, nthash.k, blocks.size(), hash_num_per_seed, fh_no_monomers_tmp.get(), rh_no_monomers_tmp.get(), forward_hash_tmp.get(), reverse_hash_tmp.get(), nthash.hashes_array.get());}, nthash.) BTLLIB_NTHASH_PEEK(SeedNtHash
 
 ntmsm64l (nthash.seq+nthash.pos - 1, blocks, monomers, nthash.k, blocks.size(), hash_num_per_seed, fh_no_monomers_tmp.get(), rh_no_monomers_tmp.get(), forward_hash_tmp.get(), reverse_hash_tmp.get(), nthash.hashes_array.get())
 
template<typename T >
canonical (const T fwd, const T rev)
 
uint64_t srol (const uint64_t x)
 
uint64_t srol (const uint64_t x, const unsigned d)
 
uint64_t sror (const uint64_t x)
 
uint64_t ntf64 (const char *kmer_seq, unsigned k)
 
uint64_t ntr64 (const char *kmer_seq, unsigned k)
 
uint64_t ntf64 (uint64_t fh_val, unsigned k, unsigned char char_out, unsigned char char_in)
 
uint64_t ntr64 (uint64_t rh_val, unsigned k, unsigned char char_out, unsigned char char_in)
 
uint64_t ntc64 (const char *kmer_seq, unsigned k)
 
uint64_t ntc64 (const char *kmer_seq, unsigned k, uint64_t &fh_val, uint64_t &rh_val)
 
uint64_t ntc64 (unsigned char char_out, unsigned char char_in, unsigned k, uint64_t &fh_val, uint64_t &rh_val)
 
uint64_t ntf64l (uint64_t rh_val, unsigned k, unsigned char char_out, unsigned char char_in)
 
uint64_t ntr64l (uint64_t fh_val, unsigned k, unsigned char char_out, unsigned char char_in)
 
uint64_t ntc64l (unsigned char char_out, unsigned char char_in, unsigned k, uint64_t &fh_val, uint64_t &rh_val)
 
void nte64 (uint64_t bh_val, unsigned k, unsigned h, uint64_t *h_val)
 
void ntmc64 (const char *kmer_seq, unsigned k, unsigned m, uint64_t *h_val)
 
void ntmc64 (const char *kmer_seq, unsigned k, unsigned m, uint64_t &fh_val, uint64_t &rh_val, uint64_t *h_val)
 
void ntmc64 (unsigned char char_out, unsigned char char_in, unsigned k, unsigned m, uint64_t &fh_val, uint64_t &rh_val, uint64_t *h_val)
 
void ntmc64l (unsigned char char_out, unsigned char char_in, unsigned k, unsigned m, uint64_t &fh_val, uint64_t &rh_val, uint64_t *h_val)
 
bool ntc64 (const char *kmer_seq, unsigned k, uint64_t &h_val, unsigned &loc_n)
 
bool ntmc64 (const char *kmer_seq, unsigned k, unsigned m, unsigned &loc_n, uint64_t *h_val)
 
bool ntc64 (const char *kmer_seq, unsigned k, uint64_t &fh_val, uint64_t &rh_val, uint64_t &h_val, unsigned &loc_n)
 
bool ntmc64 (const char *kmer_seq, unsigned k, unsigned m, uint64_t &fh_val, uint64_t &rh_val, unsigned &loc_n, uint64_t *h_val)
 
bool ntmc64 (const char *kmer_seq, unsigned k, unsigned m, uint64_t &fh_val, uint64_t &rh_val, unsigned &loc_n, uint64_t *h_val, bool &h_stn)
 
void ntmc64 (unsigned char char_out, unsigned char char_in, unsigned k, unsigned m, uint64_t &fh_val, uint64_t &rh_val, uint64_t *h_val, bool &h_stn)
 
uint64_t mask_hash (uint64_t &fk_val, uint64_t &rk_val, const char *seed_seq, const char *kmer_seq, unsigned k)
 
void sub_hash (uint64_t fh_val, uint64_t rh_val, const char *kmer_seq, const std::vector< unsigned > &positions, const std::vector< unsigned char > &new_bases, unsigned k, unsigned m, uint64_t *h_val)
 
bool ntmsm64 (const char *kmer_seq, const std::vector< SpacedSeedBlocks > &seeds_blocks, const std::vector< SpacedSeedMonomers > &seeds_monomers, unsigned k, unsigned m, unsigned m2, uint64_t *fh_nomonos, uint64_t *rh_nomonos, uint64_t *fh_val, uint64_t *rh_val, unsigned &loc_n, uint64_t *h_val)
 
void ntmsm64 (const char *kmer_seq, const std::vector< SpacedSeedBlocks > &seeds_blocks, const std::vector< SpacedSeedMonomers > &seeds_monomers, unsigned k, unsigned m, unsigned m2, uint64_t *fh_nomonos, uint64_t *rh_nomonos, uint64_t *fh_val, uint64_t *rh_val, uint64_t *h_val)
 
void ntmsm64l (const char *kmer_seq, const std::vector< SpacedSeedBlocks > &seeds_blocks, const std::vector< SpacedSeedMonomers > &seeds_monomers, unsigned k, unsigned m, unsigned m2, uint64_t *fh_nomonos, uint64_t *rh_nomonos, uint64_t *fh_val, uint64_t *rh_val, uint64_t *h_val)
 
void ntmsm64 (const char *kmer_seq, char in, const std::vector< SpacedSeedBlocks > &seeds_blocks, const std::vector< SpacedSeedMonomers > &seeds_monomers, unsigned k, unsigned m, unsigned m2, uint64_t *fh_nomonos, uint64_t *rh_nomonos, uint64_t *fh_val, uint64_t *rh_val, uint64_t *h_val)
 
void ntmsm64l (const char *kmer_seq, char in, const std::vector< SpacedSeedBlocks > &seeds_blocks, const std::vector< SpacedSeedMonomers > &seeds_monomers, unsigned k, unsigned m, unsigned m2, uint64_t *fh_nomonos, uint64_t *rh_nomonos, uint64_t *fh_val, uint64_t *rh_val, uint64_t *h_val)
 
void reverse_complement (std::string &seq)
 
std::string get_reverse_complement (const std::string &seq)
 
std::string get_time ()
 
void log_info (const std::string &msg)
 
void log_warning (const std::string &msg)
 
void log_error (const std::string &msg)
 
void check_info (bool condition, const std::string &msg)
 
void check_warning (bool condition, const std::string &msg)
 
void check_error (bool condition, const std::string &msg)
 
std::string get_strerror ()
 
void check_stream (const std::ios &stream, const std::string &name)
 
std::vector< std::string > split (const std::string &s, const std::string &delim)
 
std::string join (const std::vector< std::string > &s, const std::string &delim)
 
void ltrim (std::string &s)
 
void ltrim (btllib::CString &s)
 
void rtrim (std::string &s)
 
void rtrim (btllib::CString &s)
 
void trim (std::string &s)
 
void trim (btllib::CString &s)
 
bool startswith (std::string s, std::string prefix)
 
bool endswith (std::string s, std::string suffix)
 
std::string get_basename (const std::string &path)
 
std::string get_dirname (const std::string &path)
 

Variables

uint64_t reverse_hash_tmp = reverse_hash
 
 seq [(pos+k - 1) % seq_len] = char_in
 
const uint8_t CP_OFF = 0x07
 
const int MULTISHIFT = 27
 
const uint64_t SEED_TAB [ASCII_SIZE]
 
const uint64_t A33R [33]
 
const uint64_t A31L [31]
 
const uint64_t C33R [33]
 
const uint64_t C31L [31]
 
const uint64_t G33R [33]
 
const uint64_t G31L [31]
 
const uint64_t T33R [33]
 
const uint64_t T31L [31]
 
const uint64_t N33R [33]
 
const uint64_t N31L [31]
 
const uint64_t *const MS_TAB_33R [ASCII_SIZE]
 
const uint64_t *const MS_TAB_31L [ASCII_SIZE]
 
const uint8_t CONVERT_TAB [ASCII_SIZE]
 
const uint8_t RC_CONVERT_TAB [ASCII_SIZE]
 
const uint64_t DIMER_TAB [4 *4]
 
const uint64_t TRIMER_TAB [4 *4 *4]
 
const uint64_t TETRAMER_TAB [4 *4 *4 *4]
 
const char COMPLEMENTS [256]
 
const char CAPITALS [256]
 
constexpr const char * PRINT_COLOR_INFO = "\33[32m"
 
constexpr const char * PRINT_COLOR_WARNING = "\33[33m"
 
constexpr const char * PRINT_COLOR_ERROR = "\33[31m"
 
constexpr const char * PRINT_COLOR_END = "\33[0m"
 

Detailed Description

Functions for sequence manipulation.

Functions for logging and error checking.

Random utility functions.

Function Documentation

◆ check_error()

void btllib::check_error ( bool  condition,
const std::string &  msg 
)

Conditionally log error level events. The program exits if the condition is true.

Parameters
conditionIf this is true, the message is printed and the program exits.
msgMessage to print.

◆ check_info()

void btllib::check_info ( bool  condition,
const std::string &  msg 
)

Conditionally log info level events.

Parameters
conditionIf this is true, the message is printed.
msgMessage to print.

◆ check_stream()

void btllib::check_stream ( const std::ios &  stream,
const std::string &  name 
)

Check whether the stream is good. Program prints an error message and exits if not.

Parameters
streamStream to check goodness of.
nameName of the stream, e.g. filepath or stdin

◆ check_warning()

void btllib::check_warning ( bool  condition,
const std::string &  msg 
)

Conditionally log warning level events.

Parameters
conditionIf this is true, the message is printed.
msgMessage to print.

◆ endswith()

bool btllib::endswith ( std::string  s,
std::string  suffix 
)

Check whether the given string ends with a suffix.

Parameters
sString to check.
suffixSuffix to check for.

◆ get_basename()

std::string btllib::get_basename ( const std::string &  path)

Equivalent to the GNU implementation of basename, but returns a string copy of the result.

Parameters
pathThe path to get basename from.
Returns
The basename of the path.

◆ get_dirname()

std::string btllib::get_dirname ( const std::string &  path)

Equivalent to the GNU implementation of dirname, but returns a string copy of the result.

Parameters
pathThe path to get dirname from.
Returns
The dirname of the path.

◆ get_reverse_complement()

std::string btllib::get_reverse_complement ( const std::string &  seq)

Obtain a reverse complement of the provided sequence. The argument sequence is left untouched.

Parameters
seqSequence to reverse complement.
Returns
Reverse complemented sequence.

◆ join()

std::string btllib::join ( const std::vector< std::string > &  s,
const std::string &  delim 
)

Join a vector of strings into a single string with a delimiter.

Parameters
sVector of strings to join.
delimDelimiter to join the strings with.
Returns
String with all the components joined.

◆ log_error()

void btllib::log_error ( const std::string &  msg)

Log error level events.

Parameters
msgMessage to print.

◆ log_info()

void btllib::log_info ( const std::string &  msg)

Log info level events.

Parameters
msgMessage to print.

◆ log_warning()

void btllib::log_warning ( const std::string &  msg)

Log warning level events.

Parameters
msgMessage to print.

◆ ltrim()

void btllib::ltrim ( std::string &  s)

Trim whitespace on the left side of the given string.

Parameters
sString to trim, edited in-place.

◆ mask_hash()

uint64_t btllib::mask_hash ( uint64_t &  fk_val,
uint64_t &  rk_val,
const char *  seed_seq,
const char *  kmer_seq,
unsigned  k 
)

Generate a hash value for the input spaced seed by excluding all don't care positions.

Parameters
fk_valForward hash value of the k-mer (ignoring the spaced seed).
rk_valReverse hash value of the k-mer (ignoring the spaced seed).
seed_seqArray of characters representing the spaced seed. Anything other than '1' is treated as a don't care.
kmer_seqArray of character representing the k-mer.
kk-mer size.
Returns
Canonical hash value for the k-mer masked with the spaced seed.

◆ ntc64() [1/5]

uint64_t btllib::ntc64 ( const char *  kmer_seq,
unsigned  k 
)

Generate a canonical hash value for the first k-mer.

Parameters
kmer_seqC array containing the sequence's characters.
kk-mer size.
Returns
Canonical hash value of k-mer_0.

◆ ntc64() [2/5]

uint64_t btllib::ntc64 ( const char *  kmer_seq,
unsigned  k,
uint64_t &  fh_val,
uint64_t &  rh_val 
)

Generate a canonical hash value for the first k-mer and update both strands' hash values.

Parameters
kmer_seqC array containing the sequence's characters.
kk-mer size.
fh_valForward strand hash value container.
rh_valReverse strand hash value container.
Returns
Canonical hash value of k-mer_0.

◆ ntc64() [3/5]

bool btllib::ntc64 ( const char *  kmer_seq,
unsigned  k,
uint64_t &  fh_val,
uint64_t &  rh_val,
uint64_t &  h_val,
unsigned &  loc_n 
)

Generate a canonical hash value for the first k-mer, find the first ignored character and return the strand-specific hash values.

Parameters
kmer_seqArray containing the sequence's characters.
kk-mer size.
fh_valContainer for the forward hash value.
rh_valContainer for the reverse hash value.
h_valContainer for the output hash value.
loc_nLocation of the first unknown character.
Returns
true if all the characters of the first k-mer are known, otherwise false.

◆ ntc64() [4/5]

bool btllib::ntc64 ( const char *  kmer_seq,
unsigned  k,
uint64_t &  h_val,
unsigned &  loc_n 
)

Generate a canonical hash value for the first k-mer and find the first ignored character.

Parameters
kmer_seqArray containing the sequence's characters.
kk-mer size.
h_valContainer for the output hash value.
loc_nLocation of the first unknown character.
Returns
true if all the characters of the first k-mer are known, otherwise false.

◆ ntc64() [5/5]

uint64_t btllib::ntc64 ( unsigned char  char_out,
unsigned char  char_in,
unsigned  k,
uint64_t &  fh_val,
uint64_t &  rh_val 
)

Perform a roll operation on the sequence and generate a canonical hash value.

Parameters
char_outCharacter to be removed.
char_inCharacter to be included.
kk-mer size.
fh_valPrevious hash value for the forward strand.
rh_valPrevious hash value for the reverse-complement.
Returns
Canonical hash value after including char_in and removing char_out.

◆ ntc64l()

uint64_t btllib::ntc64l ( unsigned char  char_out,
unsigned char  char_in,
unsigned  k,
uint64_t &  fh_val,
uint64_t &  rh_val 
)

Perform a roll-back operation on the canonical hash value and update previous hashes for both strands.

Parameters
char_outCharacter to be removed.
char_inCharacter to be included.
kk-mer size.
fh_valPrevious forward hash value computed for the sequence.
rh_valPrevious reverse hash value computed for the sequence.
Returns
Roll back result for the canonical hash value.

◆ nte64()

void btllib::nte64 ( uint64_t  bh_val,
unsigned  k,
unsigned  h,
uint64_t *  h_val 
)

Extend hash array using a base hash value.

Parameters
bh_valBase hash value.
kk-mer size.
hSize of the resulting hash array (number of extra hashes minus one).
h_valArray of size h for storing the output hashes.

◆ ntf64() [1/2]

uint64_t btllib::ntf64 ( const char *  kmer_seq,
unsigned  k 
)

Generate the forward-strand hash value of the first k-mer in the sequence.

Parameters
kmer_seqC array containing the sequence's characters.
kk-mer size.
Returns
Hash value of k-mer_0.

◆ ntf64() [2/2]

uint64_t btllib::ntf64 ( uint64_t  fh_val,
unsigned  k,
unsigned char  char_out,
unsigned char  char_in 
)

Perform a roll operation on the forward strand by removing char_out and including char_in.

Parameters
fh_valPrevious hash value computed for the sequence.
kk-mer size.
char_outCharacter to be removed.
char_inCharacter to be included.
Returns
Rolled forward hash value.

◆ ntf64l()

uint64_t btllib::ntf64l ( uint64_t  rh_val,
unsigned  k,
unsigned char  char_out,
unsigned char  char_in 
)

Perform a roll-back operation on the forward strand.

Parameters
rh_valPrevious forward hash value computed for the sequence.
kk-mer size.
char_outCharacter to be removed.
char_inCharacter to be included.
Returns
Resulting hash value.

◆ ntmc64() [1/7]

void btllib::ntmc64 ( const char *  kmer_seq,
unsigned  k,
unsigned  m,
uint64_t &  fh_val,
uint64_t &  rh_val,
uint64_t *  h_val 
)

Generate multiple canonical hash values for the first k-mer and return strand-specific hash values.

Parameters
kmer_seqArray containing the sequence's characters.
kk-mer size.
mNumber of hashes per k-mer.
fh_valUnsigned 64-bit int container for the forward hash.
rh_valUnsigned 64-bit int container for the reverse-complement hash.
h_valArray of size m for storing the hash values.

◆ ntmc64() [2/7]

bool btllib::ntmc64 ( const char *  kmer_seq,
unsigned  k,
unsigned  m,
uint64_t &  fh_val,
uint64_t &  rh_val,
unsigned &  loc_n,
uint64_t *  h_val 
)

Generate multiple canonical hash value for the first k-mer, find the first ignored character and return the strand-specific hash values.

Parameters
kmer_seqArray containing the sequence's characters.
kk-mer size.
mNumber of hashes per k-mer.
fh_valContainer for the forward hash value.
rh_valContainer for the reverse hash value.
loc_nLocation of the first unknown character.
h_valArray of size m for storing the output hash values.
Returns
true if all the characters of the first k-mer are known, otherwise false.

◆ ntmc64() [3/7]

bool btllib::ntmc64 ( const char *  kmer_seq,
unsigned  k,
unsigned  m,
uint64_t &  fh_val,
uint64_t &  rh_val,
unsigned &  loc_n,
uint64_t *  h_val,
bool &  h_stn 
)

Generate multiple canonical hash values for the first k-mer, find the first ignored character, and returning the strand-specific hash values and strand selections.

Parameters
kmer_seqArray containing the sequence's characters.
kk-mer size.
mNumber of hashes per k-mer.
fh_valContainer for the forward hash value.
rh_valContainer for the reverse hash value.
loc_nLocation of the first unknown character.
h_valArray of size m for storing the output hash values.
h_stntrue if the reverse strand was selected, otherwise false.
Returns
true if all the characters of the first k-mer are known, otherwise false.

◆ ntmc64() [4/7]

void btllib::ntmc64 ( const char *  kmer_seq,
unsigned  k,
unsigned  m,
uint64_t *  h_val 
)

Generate multiple canonical hash values for the first k-mer.

Parameters
kmer_seqArray containing the sequence's characters.
kk-mer size.
mNumber of hashes per k-mer.
h_valArray of size m for storing the hash values.

◆ ntmc64() [5/7]

bool btllib::ntmc64 ( const char *  kmer_seq,
unsigned  k,
unsigned  m,
unsigned &  loc_n,
uint64_t *  h_val 
)

Generate multiple canonical hash values for the first k-mer and find the first ignored character.

Parameters
kmer_seqArray containing the sequence's characters.
kk-mer size.
mNumber of hashes per k-mer.
h_valArray of size m for storing the output hash values.
loc_nLocation of the first unknown character.
Returns
true if all the characters of the first k-mer are known, otherwise false.

◆ ntmc64() [6/7]

void btllib::ntmc64 ( unsigned char  char_out,
unsigned char  char_in,
unsigned  k,
unsigned  m,
uint64_t &  fh_val,
uint64_t &  rh_val,
uint64_t *  h_val 
)

Generate a new canonical hash value by performing a roll operation.

Parameters
char_outCharacter to be removed.
char_inCharacter to be included.
kk-mer size.
mNumber of hashes per k-mer.
fh_valPrevious forward hash value.
rh_valPrevious reverse hash value.
h_valArray of size m for storing the output hash values.

◆ ntmc64() [7/7]

void btllib::ntmc64 ( unsigned char  char_out,
unsigned char  char_in,
unsigned  k,
unsigned  m,
uint64_t &  fh_val,
uint64_t &  rh_val,
uint64_t *  h_val,
bool &  h_stn 
)

Generate multiple canonical hash values by performing a roll operation, returning the strand-specific hash values and strand selections.

Parameters
char_outCharacter to be removed.
char_inCharacter to be included.
kk-mer size.
mNumber of hashes per k-mer.
fh_valContainer for the forward hash value.
rh_valContainer for the reverse hash value.
h_valArray of size m for storing the output hash values.
h_stntrue if the reverse strand was selected, otherwise false.

◆ ntmc64l()

void btllib::ntmc64l ( unsigned char  char_out,
unsigned char  char_in,
unsigned  k,
unsigned  m,
uint64_t &  fh_val,
uint64_t &  rh_val,
uint64_t *  h_val 
)

Generate a new canonical hash value by performing a roll-back operation.

Parameters
char_outCharacter to be removed.
char_inCharacter to be included.
kk-mer size.
mNumber of hashes per k-mer.
fh_valPrevious forward hash value.
rh_valPrevious reverse hash value.
h_valArray of size m for storing the output hash values.

◆ ntmsm64() [1/3]

void btllib::ntmsm64 ( const char *  kmer_seq,
char  in,
const std::vector< SpacedSeedBlocks > &  seeds_blocks,
const std::vector< SpacedSeedMonomers > &  seeds_monomers,
unsigned  k,
unsigned  m,
unsigned  m2,
uint64_t *  fh_nomonos,
uint64_t *  rh_nomonos,
uint64_t *  fh_val,
uint64_t *  rh_val,
uint64_t *  h_val 
)

Generate multiple hash values for the input spaced seeds and the next k-mer by performing a forward peek operation.

Parameters
kmer_seqArray of characters representing the previous k-mer.
seed_seqArray of SpacedSeed objects representing the seeds' blocks.
monomersList of the positions that represent blocks of size one for each seed.
kk-mer size.
mNumber of spaced seeds.
m2Number of hashes per seed.
fh_nomonosPrevious forward hash values before including the size-one blocks.
rh_nomonosPrevious reverse hash values before including the size-one blocks.
fh_valPrevious forward hash values after including the size-one blocks.
rh_valPrevious reverse hash values after including the size-one blocks.
h_valArray of size m * m2 for storing the output hash values.

◆ ntmsm64() [2/3]

void btllib::ntmsm64 ( const char *  kmer_seq,
const std::vector< SpacedSeedBlocks > &  seeds_blocks,
const std::vector< SpacedSeedMonomers > &  seeds_monomers,
unsigned  k,
unsigned  m,
unsigned  m2,
uint64_t *  fh_nomonos,
uint64_t *  rh_nomonos,
uint64_t *  fh_val,
uint64_t *  rh_val,
uint64_t *  h_val 
)

Generate multiple hash values for the input spaced seeds and the next k-mer by performing a forward roll operation.

Parameters
kmer_seqArray of characters representing the previous k-mer.
seed_seqArray of SpacedSeed objects representing the seeds' blocks.
monomersList of the positions that represent blocks of size one for each seed.
kk-mer size.
mNumber of spaced seeds.
m2Number of hashes per seed.
fh_nomonosPrevious forward hash values before including the size-one blocks.
rh_nomonosPrevious reverse hash values before including the size-one blocks.
fh_valPrevious forward hash values after including the size-one blocks.
rh_valPrevious reverse hash values after including the size-one blocks.
h_valArray of size m * m2 for storing the output hash values.

◆ ntmsm64() [3/3]

bool btllib::ntmsm64 ( const char *  kmer_seq,
const std::vector< SpacedSeedBlocks > &  seeds_blocks,
const std::vector< SpacedSeedMonomers > &  seeds_monomers,
unsigned  k,
unsigned  m,
unsigned  m2,
uint64_t *  fh_nomonos,
uint64_t *  rh_nomonos,
uint64_t *  fh_val,
uint64_t *  rh_val,
unsigned &  loc_n,
uint64_t *  h_val 
)

Generate multiple hash values for the input spaced seeds and first k-mer.

Parameters
kmer_seqArray of characters representing the k-mer.
seed_seqArray of SpacedSeed objects representing the seeds' blocks.
monomersList of the positions that represent blocks of size one for each seed.
kk-mer size.
mNumber of spaced seeds.
m2Number of hashes per seed.
fh_nomonosContainer for the forward hash values before including the size-one blocks.
rh_nomonosContainer for the reverse hash values before including the size-one blocks.
fh_valContainer for the forward hash values after including the size-one blocks.
rh_valContainer for the reverse hash values after including the size-one blocks.
loc_nLocation of the first unknown character in the first sequence.
h_valArray of size m * m2 for storing the output hash values.
Returns
true if all the care positions of the first k-mer are valid, otherwise false.

◆ ntmsm64l() [1/2]

void btllib::ntmsm64l ( const char *  kmer_seq,
char  in,
const std::vector< SpacedSeedBlocks > &  seeds_blocks,
const std::vector< SpacedSeedMonomers > &  seeds_monomers,
unsigned  k,
unsigned  m,
unsigned  m2,
uint64_t *  fh_nomonos,
uint64_t *  rh_nomonos,
uint64_t *  fh_val,
uint64_t *  rh_val,
uint64_t *  h_val 
)

Generate multiple hash values for the input spaced seeds and the next k-mer by performing a backwards peek operation.

Parameters
kmer_seqArray of characters representing the previous k-mer.
seed_seqArray of SpacedSeed objects representing the seeds' blocks.
monomersList of the positions that represent blocks of size one for each seed.
kk-mer size.
mNumber of spaced seeds.
m2Number of hashes per seed.
fh_nomonosPrevious forward hash values before including the size-one blocks.
rh_nomonosPrevious reverse hash values before including the size-one blocks.
fh_valPrevious forward hash values after including the size-one blocks.
rh_valPrevious reverse hash values after including the size-one blocks.
h_valArray of size m * m2 for storing the output hash values.

◆ ntmsm64l() [2/2]

void btllib::ntmsm64l ( const char *  kmer_seq,
const std::vector< SpacedSeedBlocks > &  seeds_blocks,
const std::vector< SpacedSeedMonomers > &  seeds_monomers,
unsigned  k,
unsigned  m,
unsigned  m2,
uint64_t *  fh_nomonos,
uint64_t *  rh_nomonos,
uint64_t *  fh_val,
uint64_t *  rh_val,
uint64_t *  h_val 
)

Generate multiple hash values for the input spaced seeds and the next k-mer by performing a backward roll operation.

Parameters
kmer_seqArray of characters representing the previous k-mer.
seed_seqArray of SpacedSeed objects representing the seeds' blocks.
monomersList of the positions that represent blocks of size one for each seed.
kk-mer size.
mNumber of spaced seeds.
m2Number of hashes per seed.
fh_nomonosPrevious forward hash values before including the size-one blocks.
rh_nomonosPrevious reverse hash values before including the size-one blocks.
fh_valPrevious forward hash values after including the size-one blocks.
rh_valPrevious reverse hash values after including the size-one blocks.
h_valArray of size m * m2 for storing the output hash values.

◆ ntr64() [1/2]

uint64_t btllib::ntr64 ( const char *  kmer_seq,
unsigned  k 
)

Generate a hash value for the reverse-complement of the first k-mer in the sequence.

Parameters
kmer_seqC array containing the sequence's characters.
kk-mer size.
Returns
Hash value of the reverse-complement of k-mer_0.

◆ ntr64() [2/2]

uint64_t btllib::ntr64 ( uint64_t  rh_val,
unsigned  k,
unsigned char  char_out,
unsigned char  char_in 
)

Perform a roll operation on the reverse-complement by removing char_out and including char_in.

Parameters
rh_valPrevious reverse-complement hash value computed for the sequence.
kk-mer size.
char_outCharacter to be removed.
char_inCharacter to be included.
Returns
Rolled hash value for the reverse-complement.

◆ ntr64l()

uint64_t btllib::ntr64l ( uint64_t  fh_val,
unsigned  k,
unsigned char  char_out,
unsigned char  char_in 
)

Perform a roll-back operation on the reverse-complement.

Parameters
rh_valPrevious reverse hash value computed for the sequence.
kk-mer size.
char_outCharacter to be removed.
char_inCharacter to be included.
Returns
Resulting hash value for the reverse-complement.

◆ reverse_complement()

void btllib::reverse_complement ( std::string &  seq)

Reverse complement a sequence in-place.

Parameters
seqSequence to reverse complement.

◆ rh_no_monomers_tmp()

nthash std::unique_ptr< uint64_t[]> btllib::rh_no_monomers_tmp ( new uint64_t  [blocks.size()])
Initial value:
{
std::unique_ptr<uint64_t[]> fh_no_monomers_tmp(new uint64_t[blocks.size()])

◆ rtrim()

void btllib::rtrim ( std::string &  s)

Trim whitespace on the right side of the given string.

Parameters
sString to trim, edited in-place.

◆ split()

std::vector< std::string > btllib::split ( const std::string &  s,
const std::string &  delim 
)

Split a string into component substrings with a delimiter.

Parameters
sString to split.
delimDelimiter to split with.
Returns
Vector of substrings delimited by delim, excluding delimiters themselves.

◆ srol() [1/2]

uint64_t btllib::srol ( const uint64_t  x)
inline

Split a 64-bit word into 33 and 31-bit subwords and left-rotate them separately.

Parameters
xA 64-bit unsigned integer.
Returns
Split-rotation result.

◆ srol() [2/2]

uint64_t btllib::srol ( const uint64_t  x,
const unsigned  d 
)
inline

Split a 64-bit word into 33 and 31-bit subwords and left-rotate them separately multiple times.

Parameters
xA 64-bit unsigned integer.
dNumber of rotations.
Returns
Split-rotation result.

◆ sror()

uint64_t btllib::sror ( const uint64_t  x)
inline

Split a 64-bit word into 33 and 31-bit subwords and right-rotate them separately.

Parameters
xA 64-bit unsigned integer.
Returns
Split-rotation result.

◆ startswith()

bool btllib::startswith ( std::string  s,
std::string  prefix 
)

Check whether the given string starts with a prefix.

Parameters
sString to check.
prefixPrefix to check for.

◆ sub_hash()

void btllib::sub_hash ( uint64_t  fh_val,
uint64_t  rh_val,
const char *  kmer_seq,
const std::vector< unsigned > &  positions,
const std::vector< unsigned char > &  new_bases,
unsigned  k,
unsigned  m,
uint64_t *  h_val 
)

Generate multiple new hash values for the input k-mer by substituting multiple characters.

Parameters
fh_valForward hash value of the k-mer.
rh_valReverse hash value of the k-mer.
kmer_seqArray of characters representing the k-mer.
positionsIndicies of the positions to be substituted.
new_basesCharacters to be placed in the indicies indicated in positions.
kk-mer size.
mNumber of hashes per k-mer.
h_valArray of size m for storing the output hash values.

◆ trim()

void btllib::trim ( std::string &  s)

Trim whitespace on the left and right side of the given string.

Parameters
sString to trim, edited in-place.