1#ifndef BTLLIB_SEQ_READER_HPP
2#define BTLLIB_SEQ_READER_HPP
4#include "btllib/cstring.hpp"
5#include "btllib/data_stream.hpp"
6#include "btllib/order_queue.hpp"
7#include "btllib/seq.hpp"
8#include "btllib/seq_reader_fasta_module.hpp"
9#include "btllib/seq_reader_fastq_module.hpp"
10#include "btllib/seq_reader_gfa2_module.hpp"
11#include "btllib/seq_reader_multiline_fasta_module.hpp"
12#include "btllib/seq_reader_multiline_fastq_module.hpp"
13#include "btllib/seq_reader_sam_module.hpp"
14#include "btllib/status.hpp"
18#include <condition_variable>
72 unsigned threads = 3);
82 void close() noexcept;
99 friend std::ostream& operator<<(std::ostream& os,
const Format f)
101 return os << static_cast<int32_t>(f);
104 Format get_format()
const {
return format; }
108 size_t num = std::numeric_limits<size_t>::max();
114 operator bool()
const {
return !seq.empty(); }
123 static const size_t MAX_SIMULTANEOUS_SEQREADERS = 256;
130 void operator++() { record = reader.read(); }
131 bool operator!=(
const RecordIterator& i)
133 return bool(record) || bool(i.record);
135 Record operator*() {
return std::move(record); }
139 auto val = operator*();
147 RecordIterator(
SeqReader& reader,
bool end)
160 RecordIterator
begin() {
return RecordIterator(*
this,
false); }
161 RecordIterator end() {
return RecordIterator(*
this,
true); }
163 size_t get_buffer_size()
const {
return buffer_size; }
164 size_t get_block_size()
const {
return block_size; }
166 static const size_t SHORT_MODE_BUFFER_SIZE = 32;
167 static const size_t SHORT_MODE_BLOCK_SIZE = 32;
169 static const size_t LONG_MODE_BUFFER_SIZE = 4;
170 static const size_t LONG_MODE_BLOCK_SIZE = 1;
172 static const size_t FORMAT_BUFFER_SIZE = 16384;
179 : data(FORMAT_BUFFER_SIZE)
182 std::vector<char> data;
185 bool eof_newline_inserted =
false;
195 const std::string& source_path;
197 const unsigned flags;
198 const unsigned threads;
199 Format format = Format::UNDETERMINED;
200 std::atomic<bool> closed{
false };
202 std::unique_ptr<std::thread> reader_thread;
203 std::vector<std::unique_ptr<std::thread>> processor_threads;
204 std::mutex format_mutex;
205 std::condition_variable format_cv;
206 std::atomic<bool> reader_end{
false };
207 RecordCString* reader_record =
nullptr;
208 const std::atomic<size_t> buffer_size;
209 const std::atomic<size_t> block_size;
210 OrderQueueSPMC<RecordCString> cstring_queue;
211 OrderQueueMPMC<Record> output_queue;
212 std::atomic<size_t> dummy_block_num{ 0 };
216 thread_local static std::unique_ptr<
decltype(output_queue)::Block>
218 ready_records_array[MAX_SIMULTANEOUS_SEQREADERS];
221 thread_local static long ready_records_owners[MAX_SIMULTANEOUS_SEQREADERS];
224 thread_local static size_t ready_records_current[MAX_SIMULTANEOUS_SEQREADERS];
227 static std::atomic<long> last_id;
230 void determine_format();
232 void start_processors();
235 bool readline_buffer_append(CString& s);
236 static void readline_file(CString& s, FILE* f);
237 void readline_file_append(CString& s, FILE* f);
238 static bool file_at_end(FILE* f);
240 int ungetc_buffer(
int c);
242 void update_cstring_records(OrderQueueSPMC<RecordCString>::Block& records,
246 template<
typename Module>
247 void read_from_buffer(Module& module,
248 OrderQueueSPMC<RecordCString>::Block& records,
251 template<
typename Module>
252 void read_transition(Module& module,
253 OrderQueueSPMC<RecordCString>::Block& records,
256 template<
typename Module>
257 void read_from_file(Module& module,
258 OrderQueueSPMC<RecordCString>::Block& records,
262 friend class SeqReaderFastaModule;
263 SeqReaderFastaModule fasta_module;
265 friend class SeqReaderMultilineFastaModule;
266 SeqReaderMultilineFastaModule multiline_fasta_module;
268 friend class SeqReaderFastqModule;
269 SeqReaderFastqModule fastq_module;
271 friend class SeqReaderMultilineFastqModule;
272 SeqReaderMultilineFastqModule multiline_fastq_module;
274 friend class SeqReaderSamModule;
275 SeqReaderSamModule sam_module;
277 friend class SeqReaderGfa2Module;
278 SeqReaderGfa2Module gfa2_module;
280 int module_in_use = 0;
285template<
typename Module>
287SeqReader::read_from_buffer(Module& module,
288 OrderQueueSPMC<RecordCString>::Block& records,
291 while (!reader_end) {
292 reader_record = &(records.data[records.count]);
293 if (!module.read_buffer(*
this, *reader_record) ||
294 reader_record->seq.empty()) {
297 update_cstring_records(records, counter);
301template<
typename Module>
303SeqReader::read_transition(Module& module,
304 OrderQueueSPMC<RecordCString>::Block& records,
308 reader_record = &(records.data[records.count]);
309 module.read_transition(*
this, *reader_record);
310 if (!reader_record->seq.empty()) {
311 update_cstring_records(records, counter);
313 }
else if (!reader_record->seq.empty()) {
314 update_cstring_records(records, counter);
318template<
typename Module>
320SeqReader::read_from_file(Module& module,
321 OrderQueueSPMC<RecordCString>::Block& records,
324 while (!reader_end) {
325 reader_record = &(records.data[records.count]);
326 if (!module.read_file(*
this, *reader_record) ||
327 reader_record->seq.empty()) {
330 update_cstring_records(records, counter);
Definition: seq_reader.hpp:43
SeqReader(const std::string &source_path, unsigned flags, unsigned threads=3)
OrderQueueMPMC< Record >::Block read_block()
RecordIterator begin()
Definition: seq_reader.hpp:160
Definition: bloom_filter.hpp:16
Definition: seq_reader.hpp:50
static const unsigned TRIM_MASKED
Definition: seq_reader.hpp:55
static const unsigned FOLD_CASE
Definition: seq_reader.hpp:52
static const unsigned SHORT_MODE
Definition: seq_reader.hpp:57
static const unsigned LONG_MODE
Definition: seq_reader.hpp:59
Definition: seq_reader.hpp:107