1 /*
2    This file is part of BioD.
3    Copyright (C) 2016   George Githinji <biorelated@gmail.com>
4    Permission is hereby granted, free of charge, to any person obtaining a
5    copy of this software and associated documentation files (the "Software"),
6    to deal in the Software without restriction, including without limitation
7    the rights to use, copy, modify, merge, publish, distribute, sublicense,
8    and/or sell copies of the Software, and to permit persons to whom the
9    Software is furnished to do so, subject to the following conditions:
10 
11    The above copyright notice and this permission notice shall be included in
12    all copies or substantial portions of the Software.
13 
14    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20    DEALINGS IN THE SOFTWARE.
21  */
22 
23 /*
24    The bio.core.fastq module is based from a question that I posted on forums.dlang.org.
25    I have bundled the answer as a module to support parsing fastq files with D.
26    Credit should go to Rikki Cattermole.
27  */
28 
29 module bio.std.file.fastq;
30 
31 struct FastqRecord {
32     const(char)[] id;
33     const(char)[] seq;
34     const(char)[] qual;
35 
36     static auto read(const(char)[] from) {
37         struct Result {
38             private {
39                 const(char)[] source;
40                 FastqRecord value;
41                 bool isEmpty;
42             }
43 
44             this(const(char)[] source) {
45                 this.source = source;
46                 popFront;
47             }
48 
49             @property {
50                 FastqRecord front() {
51                     return value;
52                 }
53 
54                 bool empty() {
55                     return isEmpty;
56                 }
57             }
58 
59             void popFront() {
60                 import std..string : indexOf;
61 
62                 if (source is null) {
63                     isEmpty = true;
64                     return;
65                 }
66 
67                 void tidyInput() {
68                     foreach(i, c; source) {
69                         switch(c) {
70                             case 0: .. case ' ':
71                                     break;
72                             default:
73                                     source = source[i .. $];
74                                     return;
75                         }
76                     }
77                     source = null;
78                 }
79 
80                 tidyInput();
81 
82                 if (source is null)
83                     return;
84 
85                 // id
86                 assert(source[0] == '@');
87 
88                 ptrdiff_t len = source.indexOf("\n");
89                 assert(len > 0);
90 
91                 value.id = source[1 .. len];
92                 if (value.id[$-1] == "\r"[0])
93                     value.id = value.id[0 .. $-1];
94 
95                 source = source[len + 1 .. $];
96 
97                 // seq
98                 len = source.indexOf("\n");
99                 assert(len > 0);
100 
101                 value.seq = source[0 .. len];
102                 if (value.seq[$-1] == "\r"[0])
103                     value.seq = value.seq[0 .. $-1];
104 
105                 source = source[len + 1 .. $];
106 
107                 // +id
108                 len = source.indexOf("\n");
109                 assert(len > 0);
110                 source = source[len + 1 .. $];
111 
112                 // qual
113                 len = source.indexOf("\n");
114                 assert(len > 0);
115 
116                 value.qual = source[0 .. len];
117                 if (value.qual[$-1] == "\r"[0])
118                     value.qual = value.qual[0 .. $-1];
119 
120                 if (source.length > len + 1) {
121                     source = source[len + 1 .. $];
122                     tidyInput();
123                 } else
124                     source = null;
125             }
126         }
127         return Result(from);
128     }
129 }
130 
131 /* fails with ldc >1.11
132 unittest {
133     string input = """
134         @seq1
135         TTATTTTAAT
136         +
137         ?+BBB/DHH@
138         @seq2
139         GACCCTTTGCA
140         +
141         ?+BHB/DIH@
142         @SEQ_ID
143         GATTTGGGGTTCAAAGCAGTATCGATCAAATAGTAAATCCATTTGTTCAACTCACAGTTT
144         +
145         !''*((((***+))%%%++)(%%%%).1***-+*''))**55CCF>>>>>>CCCCCCC65
146         """[1 .. $];
147 
148     foreach(record; FastqRecord.read(input)) {
149         import std.stdio;
150         // stderr.writeln(record); -> should be an assert statement
151     }
152 }
153 
154 */