btllib
seq_reader_multiline_fastq_module.hpp
1#ifndef BTLLIB_SEQ_READER_MULTILINE_FASTQ_MODULE_HPP
2#define BTLLIB_SEQ_READER_MULTILINE_FASTQ_MODULE_HPP
3
4#include "btllib/cstring.hpp"
5#include "btllib/status.hpp"
6
7#include <cstdlib>
8
9namespace btllib {
10
12class SeqReaderMultilineFastqModule
13{
14
15private:
16 friend class SeqReader;
17
18 enum class Stage
19 {
20 HEADER,
21 SEQ,
22 TRANSITION,
23 SEP,
24 QUAL
25 };
26
27 Stage stage = Stage::HEADER;
28 CString tmp;
29
30 static bool buffer_valid(const char* buffer, size_t size);
31 template<typename ReaderType, typename RecordType>
32 bool read_buffer(ReaderType& reader, RecordType& record);
33 template<typename ReaderType, typename RecordType>
34 bool read_transition(ReaderType& reader, RecordType& record);
35 template<typename ReaderType, typename RecordType>
36 bool read_file(ReaderType& reader, RecordType& record);
37};
38
39template<typename ReaderType, typename RecordType>
40inline bool
41SeqReaderMultilineFastqModule::read_buffer(ReaderType& reader,
42 RecordType& record)
43{
44 record.header.clear();
45 record.seq.clear();
46 record.qual.clear();
47 if (reader.buffer.start < reader.buffer.end) {
48 int c;
49 for (;;) {
50 switch (stage) {
51 case Stage::HEADER: {
52 if (!reader.readline_buffer_append(record.header)) {
53 return false;
54 }
55 stage = Stage::SEQ;
56 }
57 // fall through
58 case Stage::SEQ: {
59 if (!reader.readline_buffer_append(record.seq)) {
60 return false;
61 }
62 rtrim(record.seq);
63 stage = Stage::TRANSITION;
64 }
65 // fall through
66 case Stage::TRANSITION: {
67 c = reader.getc_buffer();
68 if (c == EOF) {
69 return false;
70 }
71 reader.ungetc_buffer(c);
72 if (c == '+') {
73 stage = Stage::SEP;
74 } else {
75 stage = Stage::SEQ;
76 }
77 break;
78 }
79 case Stage::SEP: {
80 if (!reader.readline_buffer_append(tmp)) {
81 return false;
82 }
83 stage = Stage::QUAL;
84 tmp.clear();
85 }
86 // fallthrough
87 case Stage::QUAL: {
88 if (!reader.readline_buffer_append(record.qual)) {
89 return false;
90 }
91 rtrim(record.qual);
92 if (record.qual.size() == record.seq.size()) {
93 stage = Stage::HEADER;
94 return true;
95 }
96 check_error(record.qual.size() > record.seq.size(),
97 "SeqReader: Multiline FASTQ reader: Quality string is "
98 "longer than sequence string.");
99 break;
100 }
101 default: {
102 log_error("SeqReader has entered an invalid state.");
103 std::exit(EXIT_FAILURE); // NOLINT(concurrency-mt-unsafe)
104 }
105 }
106 }
107 }
108 return false;
109}
110
111template<typename ReaderType, typename RecordType>
112inline bool
113SeqReaderMultilineFastqModule::read_transition(ReaderType& reader,
114 RecordType& record)
115{
116 if (std::ferror(reader.source) == 0 && std::feof(reader.source) == 0) {
117 const auto p = std::fgetc(reader.source);
118 if (p != EOF) {
119 std::ungetc(p, reader.source);
120 int c;
121 for (;;) {
122 switch (stage) {
123 case Stage::HEADER: {
124 reader.readline_file_append(record.header, reader.source);
125 stage = Stage::SEQ;
126 }
127 // fall through
128 case Stage::SEQ: {
129 reader.readline_file_append(record.seq, reader.source);
130 rtrim(record.seq);
131 stage = Stage::TRANSITION;
132 }
133 // fall through
134 case Stage::TRANSITION: {
135 c = std::fgetc(reader.source);
136 if (c == EOF) {
137 return false;
138 }
139 std::ungetc(c, reader.source);
140 if (c == '+') {
141 stage = Stage::SEP;
142 } else {
143 stage = Stage::SEQ;
144 }
145 break;
146 }
147 case Stage::SEP: {
148 reader.readline_file_append(tmp, reader.source);
149 stage = Stage::QUAL;
150 tmp.clear();
151 }
152 // fallthrough
153 case Stage::QUAL: {
154 reader.readline_file_append(record.qual, reader.source);
155 rtrim(record.qual);
156 if (record.qual.size() == record.seq.size()) {
157 stage = Stage::HEADER;
158 return true;
159 }
160 check_error(record.qual.size() > record.seq.size(),
161 "SeqReader: Multiline FASTQ reader: Quality string is "
162 "longer than sequence string.");
163 break;
164 }
165 default: {
166 log_error("SeqReader has entered an invalid state.");
167 std::exit(EXIT_FAILURE); // NOLINT(concurrency-mt-unsafe)
168 }
169 }
170 }
171 }
172 }
173 return false;
174}
175
176template<typename ReaderType, typename RecordType>
177inline bool
178SeqReaderMultilineFastqModule::read_file(ReaderType& reader, RecordType& record)
179{
180 if (!reader.file_at_end(reader.source)) {
181 reader.readline_file(record.header, reader.source);
182 int c;
183 reader.readline_file(record.seq, reader.source);
184 rtrim(record.seq);
185 for (;;) {
186 c = std::fgetc(reader.source);
187 check_error(c == EOF,
188 "SeqReader: Multiline FASTQ reader: Unexpected end.");
189 std::ungetc(c, reader.source);
190 if (c == '+') {
191 reader.readline_file(tmp, reader.source);
192 reader.readline_file(record.qual, reader.source);
193 rtrim(record.qual);
194 size_t prevlen;
195 while (record.qual.size() < record.seq.size()) {
196 prevlen = record.qual.size();
197 reader.readline_file_append(record.qual, reader.source);
198 check_error(prevlen == record.qual.size(),
199 "SeqReader: Multiline FASTQ reader: Failed to read the "
200 "quality string.");
201 rtrim(record.qual);
202 }
203 check_error(record.qual.size() > record.seq.size(),
204 "SeqReader: Multiline FASTQ reader: Quality string is "
205 "longer than sequence string.");
206 return true;
207 }
208 reader.readline_file_append(record.seq, reader.source);
209 rtrim(record.seq);
210 }
211 }
212 return false;
213}
215
216} // namespace btllib
217
218#endif
Definition: bloom_filter.hpp:16
void check_error(bool condition, const std::string &msg)
void rtrim(std::string &s)
void log_error(const std::string &msg)