tame.parse source code

1 module tame.parse;
2 
3 import std.datetime;
4 import std.conv : ConvException;
5 import std.format : formattedRead;
6 
7 SysTime parseSysTime(S)(S input) @safe {
8 	import std.algorithm.searching;
9 	import std.regex : match;
10 
11 	try {
12 		if (input.match(`\d{4}-\D{3}-\d{2}.*`))
13 			return SysTime.fromSimpleString(input);
14 		if (input.match(`.*[\+|\-]\d{1,2}:\d{1,2}|.*Z`))
15 			return input.canFind('-') ?
16 				SysTime.fromISOExtString(input) : SysTime.fromISOString(input);
17 		return SysTime(parseDateTime(input), UTC());
18 	} catch (ConvException e)
19 		throw new DateTimeException("Can not convert '" ~ input ~ "' to SysTime");
20 }
21 
22 unittest {
23 	// Accept valid (as per D language) systime formats
24 	parseSysTime("2019-May-04 13:34:10.500Z");
25 	parseSysTime("2019-Jan-02 13:34:10-03:00");
26 	parseSysTime("2019-05-04T13:34:10.500Z");
27 	parseSysTime("2019-06-14T13:34:10.500+01:00");
28 	parseSysTime("2019-02-07T13:34:10Z");
29 	parseSysTime("2019-08-12T13:34:10+01:00");
30 	parseSysTime("2019-09-03T13:34:10");
31 
32 	// Accept valid (as per D language) date & datetime timestamps (will default timezone as UTC)
33 	parseSysTime("2010-Dec-30 00:00:00");
34 	parseSysTime("2019-05-04 13:34:10");
35 	// parseSysTime("2019-05-08");
36 
37 	// Accept non-standard (as per D language) timestamp formats
38 	//parseSysTime("2019-05-07 13:32"); // todo: handle missing seconds
39 	//parseSysTime("2019/05/07 13:32"); // todo: handle slash instead of hyphen
40 	//parseSysTime("2010-12-30 12:10:04.1+00"); // postgresql
41 }
42 
43 DateTime parseDateTime(S)(S input) @safe {
44 	import std.string;
45 	import std.regex : match;
46 
47 	try {
48 
49 		if (match(input, r"\d{8}T\d{6}")) {
50 			// ISO String: 'YYYYMMDDTHHMMSS'
51 			return DateTime.fromISOString(input);
52 		} else if (match(input, r"\d{4}-\D{3}-\d{2}.*")) {
53 			// Simple String 'YYYY-Mon-DD HH:MM:SS'
54 			return DateTime.fromSimpleString(input);
55 		} else if (match(input, r"\d{4}-\d{2}-\d{2}.*")) {
56 			// ISO ext string 'YYYY-MM-DDTHH:MM:SS'
57 			return DateTime.fromISOExtString(input.replace(' ', 'T'));
58 		}
59 		throw new ConvException(null);
60 	} catch (ConvException e)
61 		throw new DateTimeException("Can not convert '" ~ input ~ "' to DateTime");
62 }
63 
64 unittest {
65 	// Accept valid (as per D language) datetime formats
66 	parseDateTime("20101230T000000");
67 	parseDateTime("2019-May-04 13:34:10");
68 	parseDateTime("2019-Jan-02 13:34:10");
69 	parseDateTime("2019-05-04T13:34:10");
70 
71 	// Accept non-standard (as per D language) timestamp formats
72 	parseDateTime("2019-06-14 13:34:10"); // accept a non-standard variation (space instead of T)
73 	//parseDateTime("2019-05-07 13:32"); // todo: handle missing seconds
74 	//parseDateTime("2019/05/07 13:32"); // todo: handle slash instead of hyphen
75 }
76 
77 TimeOfDay parseTime(S)(S input) {
78 	int hour, min, sec;
79 	input.formattedRead("%s:%s:%s", &hour, &min, &sec);
80 	return TimeOfDay(hour, min, sec);
81 }
82 
83 Date parseDate(S)(S input) {
84 	int year, month, day;
85 	input.formattedRead("%s-%s-%s", &year, &month, &day);
86 	return Date(year, month, day);
87 }
88 
89 import std.traits;
90 import tame.internal;
91 
92 @nogc nothrow:
93 
94 /**
95  *
96  * Decodes a single hexadecimal character.
97  *
98  * Params:
99  *   c = The hexadecimal digit.
100  *
101  * Returns:
102  *   `c` converted to an integer.
103  *
104  */
105 
106 uint hexDecode(char c) @safe pure {
107 	return c + 9 * (c >> 6) & 15;
108 }
109 
110 uint hexDecode4(ref const(char)* hex) pure {
111 	uint x = *cast(uint*)&hex;
112 	hex += 4;
113 	x = (x & 0x0F0F0F0F) + 9 * (x >> 6 & 0x01010101);
114 	version (LittleEndian) {
115 		return x >> 24 | x >> 12 & 0xF0 | x & 0xF00 | x << 12 & 0xF000;
116 	} else {
117 		x = (x | x >> 4) & 0x00FF00FF;
118 		return (x | x >> 8) & 0x0000FFFF;
119 	}
120 }
121 
122 inout(char)* hexDecode4(ref inout(char)* hex, out uint result) pure {
123 	foreach (i; 0 .. 4) {
124 		result *= 16;
125 		char ch = cast(char)(hex[i] - '0');
126 		if (ch <= 9) {
127 			result += ch;
128 		} else {
129 			ch = cast(char)((ch | 0x20) - 0x31);
130 			if (ch <= 5)
131 				result += ch + 10;
132 			else
133 				return hex + i;
134 		}
135 	}
136 	hex += 4;
137 	return null;
138 }
139 
140 unittest {
141 	string x = "aF09";
142 	const(char)* p = x.ptr;
143 	uint result;
144 	assert(!hexDecode4(p, result));
145 	assert(result == 0xAF09);
146 }
147 
148 /+
149 	String Scanning and Comparison
150  +/
151 
152 /**
153  *
154  * Compares a string of unknown length against a statically known key.
155  *
156  * This function also handles escapes and requires one or more terminator chars.
157  *
158  * Params:
159  *   C = Character with.
160  *   key = The static key string.
161  *   terminators = A list of code units that terminate the string.
162  *   special = A list of code units that are handled by the user callback. Use
163  *             this for escape string handling. Default is `null`.
164  *   p_str = Pointer to the string for the comparison. After the function call
165  *           it will be behind the last matching character.
166  *   callback = User callback to handle special escape characters if `special`
167  *              is non-empty.
168  *
169  * Returns:
170  *   A code with following meanings: -1 = not equal, terminator character hit,
171  *   0 = not equal, but string not exhausted, 1 = string equals key.
172  *
173  */
174 int fixedTermStrCmp(C, immutable C[] key, immutable C[] terminators, immutable C[] special = null)(
175 	ref const(C)* p_str, scope bool delegate(ref immutable(char)*, ref const(char)*) callback = null)
176 in (special.length == 0 || callback) {
177 	import std.algorithm, std.range;
178 	import std.array : staticArray;
179 
180 	static immutable byte[256] classify =
181 		iota(256).map!(c => terminators.canFind(c) ? byte(-1) : special.canFind(c) ? 1 : 0)
182 		.staticArray;
183 
184 	immutable(C)* p_key = key.ptr;
185 	immutable C* e_key = p_key + key.length;
186 
187 	while (p_key !is e_key) {
188 		int clazz = *p_str <= 0xFF ? classify[*p_str] : 0;
189 
190 		if (clazz < 0) {
191 			return clazz;
192 		} else if (clazz == 0) {
193 			if (*p_str != *p_key)
194 				return clazz;
195 
196 			p_str++;
197 			p_key++;
198 		} else if (clazz > 0) {
199 			if (!callback(p_key, p_str))
200 				return 0;
201 		}
202 	}
203 
204 	return classify[*p_str & 0xFF] < 0;
205 }
206 
207 /*
208 void fixedStringCompareSSE4() {
209 	enum words     = key.length / 16;
210 	enum remainder = key.length % 16;
211 	enum contains0 = key.canFind('\0');     // For SSE4.2 string search.
212 	static assert(!contains0, "Not implemented");
213 
214 	size_t remaining = e - b;
215 	auto p = b;
216 
217 	foreach (i; staticIota!(0, words)) {
218 		auto backup = p;
219 		p.vpcmpistri!(char, key[16 * i .. 16 * i + 16], Operation.equalElem, Polarity.negateValid);
220 		p = backup;
221 		p.vpcmpistri!(char, key[16 * i .. 16 * i + 16], Operation.equalElem, Polarity.negateValid);
222 	}
223 }
224 */
225 
226 @forceinline
227 void seekToAnyOf(string cs)(ref const(char)* p) {
228 	bool found = false;
229 	while (*p) {
230 		foreach (c; cs) {
231 			if (c == *p) {
232 				found = true;
233 				break;
234 			}
235 		}
236 		if (found)
237 			break;
238 		else
239 			p++;
240 	}
241 	//p.vpcmpistri!(char, sanitizeChars(cs), Operation.equalAnyElem);
242 }
243 
244 @forceinline
245 void seekToRanges(string cs)(ref const(char)* p) {
246 	bool found = false;
247 	while (*p) {
248 		for (int i = 0; i < cs.length; i += 2) {
249 			if (cs[i] <= *p && cs[i + 1] >= *p) {
250 				found = true;
251 				break;
252 			}
253 		}
254 		if (found)
255 			break;
256 		else
257 			p++;
258 	}
259 	//p.vpcmpistri!(char, sanitizeRanges(cs), Operation.inRanges);
260 }
261 
262 /**
263  *
264  * Searches for a specific character known to appear in the stream and skips the
265  * read pointer over it.
266  *
267  * Params:
268  *   c = the character
269  *   p = the read pointer
270  *
271  */
272 @forceinline
273 void seekPast(char c)(ref const(char)* p) {
274 	while (*p) {
275 		if (c == *p) {
276 			p++;
277 			break;
278 		}
279 		p++;
280 	}
281 	//p.vpcmpistri!(char, c.repeat(16).to!string, Operation.equalElem);
282 }
283 
284 /**
285  *
286  * Skips the read pointer over characters that fall into any of up to 8 ranges
287  * of characters. The first character in `cs` is the start of the first range,
288  * the second character is the end. This is repeated for any other character
289  * pair. A character falls into a range from `a` to `b` if `a <= *p <= b`.
290  *
291  * Params:
292  *   cs = the character ranges
293  *   p = the read pointer
294  *
295  */
296 @forceinline
297 void skipCharRanges(string cs)(ref const(char)* p) {
298 	import std.range : chunks;
299 
300 	while (*p) {
301 		bool found = false;
302 		for (int i = 0; i < cs.length; i += 2) {
303 			if (cs[i] <= *p && cs[i + 1] >= *p) {
304 				found = true;
305 				break;
306 			}
307 		}
308 		if (found)
309 			p++;
310 		else
311 			break;
312 	}
313 	//p.vpcmpistri!(char, cs, Operation.inRanges, Polarity.negate);
314 }
315 
316 /*
317  *
318  * Skips the read pointer over all and any of the given characters.
319  *
320  * Params:
321  *   cs = the characters to skip over
322  *   p = the read pointer
323  *
324  */
325 @forceinline
326 void skipAllOf(string cs)(ref const(char)* p) {
327 	while (*p) {
328 		bool found = false;
329 		foreach (c; cs) {
330 			if (c == *p) {
331 				found = true;
332 				break;
333 			}
334 		}
335 		if (found)
336 			p++;
337 		else
338 			break;
339 	}
340 
341 	//p.vpcmpistri!(char, cs, Operation.equalAnyElem, Polarity.negate);
342 }
343 
344 /*
345  *
346  * Skips the read pointer over ASCII white-space comprising '\t', '\r', '\n' and
347  * ' '.
348  *
349  * Params:
350  *   p = the read pointer
351  *
352  */
353 @forceinline
354 void skipAsciiWhitespace(ref const(char)* p) {
355 	if (*p == ' ')
356 		p++;
357 	if (*p <= ' ')
358 		p.skipAllOf!" \t\r\n";
359 }
360 
361 /*
362  *
363  * Sets the read pointer to the start of the next line.
364  *
365  * Params:
366  *   p = the read pointer
367  *
368  */
369 @forceinline
370 void skipToNextLine(ref const(char)* p) {
371 	// Stop at next \r, \n or \0.
372 	enum cmp_to = "\x09\x0B\x0C\x0E";
373 	while (*p && (*p != cmp_to[0] && *p != cmp_to[1] && *p != cmp_to[2] && *p != cmp_to[3]))
374 		p++;
375 
376 	//p.vpcmpistri!(char, "\x01\x09\x0B\x0C\x0E\xFF", Operation.inRanges, Polarity.negate);
377 	if (p[0] == '\r')
378 		p++;
379 	if (p[0] == '\n')
380 		p++;
381 }