1 module tame.parse; 2 3 import std.datetime; 4 import std.conv : ConvException; 5 import std.format : formattedRead; 6 7 SysTime parseSysTime(S)(S input) @safe { 8 import std.algorithm.searching; 9 import std.regex : match; 10 11 try { 12 if (input.match(`\d{4}-\D{3}-\d{2}.*`)) 13 return SysTime.fromSimpleString(input); 14 if (input.match(`.*[\+|\-]\d{1,2}:\d{1,2}|.*Z`)) 15 return input.canFind('-') ? 16 SysTime.fromISOExtString(input) : SysTime.fromISOString(input); 17 return SysTime(parseDateTime(input), UTC()); 18 } catch (ConvException e) 19 throw new DateTimeException("Can not convert '" ~ input ~ "' to SysTime"); 20 } 21 22 unittest { 23 // Accept valid (as per D language) systime formats 24 parseSysTime("2019-May-04 13:34:10.500Z"); 25 parseSysTime("2019-Jan-02 13:34:10-03:00"); 26 parseSysTime("2019-05-04T13:34:10.500Z"); 27 parseSysTime("2019-06-14T13:34:10.500+01:00"); 28 parseSysTime("2019-02-07T13:34:10Z"); 29 parseSysTime("2019-08-12T13:34:10+01:00"); 30 parseSysTime("2019-09-03T13:34:10"); 31 32 // Accept valid (as per D language) date & datetime timestamps (will default timezone as UTC) 33 parseSysTime("2010-Dec-30 00:00:00"); 34 parseSysTime("2019-05-04 13:34:10"); 35 // parseSysTime("2019-05-08"); 36 37 // Accept non-standard (as per D language) timestamp formats 38 //parseSysTime("2019-05-07 13:32"); // todo: handle missing seconds 39 //parseSysTime("2019/05/07 13:32"); // todo: handle slash instead of hyphen 40 //parseSysTime("2010-12-30 12:10:04.1+00"); // postgresql 41 } 42 43 DateTime parseDateTime(S)(S input) @safe { 44 import std.string; 45 import std.regex : match; 46 47 try { 48 49 if (match(input, r"\d{8}T\d{6}")) { 50 // ISO String: 'YYYYMMDDTHHMMSS' 51 return DateTime.fromISOString(input); 52 } else if (match(input, r"\d{4}-\D{3}-\d{2}.*")) { 53 // Simple String 'YYYY-Mon-DD HH:MM:SS' 54 return DateTime.fromSimpleString(input); 55 } else if (match(input, r"\d{4}-\d{2}-\d{2}.*")) { 56 // ISO ext string 'YYYY-MM-DDTHH:MM:SS' 57 return DateTime.fromISOExtString(input.replace(' ', 'T')); 58 } 59 throw new ConvException(null); 60 } catch (ConvException e) 61 throw new DateTimeException("Can not convert '" ~ input ~ "' to DateTime"); 62 } 63 64 unittest { 65 // Accept valid (as per D language) datetime formats 66 parseDateTime("20101230T000000"); 67 parseDateTime("2019-May-04 13:34:10"); 68 parseDateTime("2019-Jan-02 13:34:10"); 69 parseDateTime("2019-05-04T13:34:10"); 70 71 // Accept non-standard (as per D language) timestamp formats 72 parseDateTime("2019-06-14 13:34:10"); // accept a non-standard variation (space instead of T) 73 //parseDateTime("2019-05-07 13:32"); // todo: handle missing seconds 74 //parseDateTime("2019/05/07 13:32"); // todo: handle slash instead of hyphen 75 } 76 77 TimeOfDay parseTime(S)(S input) { 78 int hour, min, sec; 79 input.formattedRead("%s:%s:%s", &hour, &min, &sec); 80 return TimeOfDay(hour, min, sec); 81 } 82 83 Date parseDate(S)(S input) { 84 int year, month, day; 85 input.formattedRead("%s-%s-%s", &year, &month, &day); 86 return Date(year, month, day); 87 } 88 89 import std.traits; 90 import tame.internal; 91 92 @nogc nothrow: 93 94 /** 95 * 96 * Decodes a single hexadecimal character. 97 * 98 * Params: 99 * c = The hexadecimal digit. 100 * 101 * Returns: 102 * `c` converted to an integer. 103 * 104 */ 105 106 uint hexDecode(char c) @safe pure { 107 return c + 9 * (c >> 6) & 15; 108 } 109 110 uint hexDecode4(ref const(char)* hex) pure { 111 uint x = *cast(uint*)&hex; 112 hex += 4; 113 x = (x & 0x0F0F0F0F) + 9 * (x >> 6 & 0x01010101); 114 version (LittleEndian) { 115 return x >> 24 | x >> 12 & 0xF0 | x & 0xF00 | x << 12 & 0xF000; 116 } else { 117 x = (x | x >> 4) & 0x00FF00FF; 118 return (x | x >> 8) & 0x0000FFFF; 119 } 120 } 121 122 inout(char)* hexDecode4(ref inout(char)* hex, out uint result) pure { 123 foreach (i; 0 .. 4) { 124 result *= 16; 125 char ch = cast(char)(hex[i] - '0'); 126 if (ch <= 9) { 127 result += ch; 128 } else { 129 ch = cast(char)((ch | 0x20) - 0x31); 130 if (ch <= 5) 131 result += ch + 10; 132 else 133 return hex + i; 134 } 135 } 136 hex += 4; 137 return null; 138 } 139 140 unittest { 141 string x = "aF09"; 142 const(char)* p = x.ptr; 143 uint result; 144 assert(!hexDecode4(p, result)); 145 assert(result == 0xAF09); 146 } 147 148 /+ 149 String Scanning and Comparison 150 +/ 151 152 /** 153 * 154 * Compares a string of unknown length against a statically known key. 155 * 156 * This function also handles escapes and requires one or more terminator chars. 157 * 158 * Params: 159 * C = Character with. 160 * key = The static key string. 161 * terminators = A list of code units that terminate the string. 162 * special = A list of code units that are handled by the user callback. Use 163 * this for escape string handling. Default is `null`. 164 * p_str = Pointer to the string for the comparison. After the function call 165 * it will be behind the last matching character. 166 * callback = User callback to handle special escape characters if `special` 167 * is non-empty. 168 * 169 * Returns: 170 * A code with following meanings: -1 = not equal, terminator character hit, 171 * 0 = not equal, but string not exhausted, 1 = string equals key. 172 * 173 */ 174 int fixedTermStrCmp(C, immutable C[] key, immutable C[] terminators, immutable C[] special = null)( 175 ref const(C)* p_str, scope bool delegate(ref immutable(char)*, ref const(char)*) callback = null) 176 in (special.length == 0 || callback) { 177 import std.algorithm, std.range; 178 import std.array : staticArray; 179 180 static immutable byte[256] classify = 181 iota(256).map!(c => terminators.canFind(c) ? byte(-1) : special.canFind(c) ? 1 : 0) 182 .staticArray; 183 184 immutable(C)* p_key = key.ptr; 185 immutable C* e_key = p_key + key.length; 186 187 while (p_key !is e_key) { 188 int clazz = *p_str <= 0xFF ? classify[*p_str] : 0; 189 190 if (clazz < 0) { 191 return clazz; 192 } else if (clazz == 0) { 193 if (*p_str != *p_key) 194 return clazz; 195 196 p_str++; 197 p_key++; 198 } else if (clazz > 0) { 199 if (!callback(p_key, p_str)) 200 return 0; 201 } 202 } 203 204 return classify[*p_str & 0xFF] < 0; 205 } 206 207 /* 208 void fixedStringCompareSSE4() { 209 enum words = key.length / 16; 210 enum remainder = key.length % 16; 211 enum contains0 = key.canFind('\0'); // For SSE4.2 string search. 212 static assert(!contains0, "Not implemented"); 213 214 size_t remaining = e - b; 215 auto p = b; 216 217 foreach (i; staticIota!(0, words)) { 218 auto backup = p; 219 p.vpcmpistri!(char, key[16 * i .. 16 * i + 16], Operation.equalElem, Polarity.negateValid); 220 p = backup; 221 p.vpcmpistri!(char, key[16 * i .. 16 * i + 16], Operation.equalElem, Polarity.negateValid); 222 } 223 } 224 */ 225 226 @forceinline 227 void seekToAnyOf(string cs)(ref const(char)* p) { 228 bool found = false; 229 while (*p) { 230 foreach (c; cs) { 231 if (c == *p) { 232 found = true; 233 break; 234 } 235 } 236 if (found) 237 break; 238 else 239 p++; 240 } 241 //p.vpcmpistri!(char, sanitizeChars(cs), Operation.equalAnyElem); 242 } 243 244 @forceinline 245 void seekToRanges(string cs)(ref const(char)* p) { 246 bool found = false; 247 while (*p) { 248 for (int i = 0; i < cs.length; i += 2) { 249 if (cs[i] <= *p && cs[i + 1] >= *p) { 250 found = true; 251 break; 252 } 253 } 254 if (found) 255 break; 256 else 257 p++; 258 } 259 //p.vpcmpistri!(char, sanitizeRanges(cs), Operation.inRanges); 260 } 261 262 /** 263 * 264 * Searches for a specific character known to appear in the stream and skips the 265 * read pointer over it. 266 * 267 * Params: 268 * c = the character 269 * p = the read pointer 270 * 271 */ 272 @forceinline 273 void seekPast(char c)(ref const(char)* p) { 274 while (*p) { 275 if (c == *p) { 276 p++; 277 break; 278 } 279 p++; 280 } 281 //p.vpcmpistri!(char, c.repeat(16).to!string, Operation.equalElem); 282 } 283 284 /** 285 * 286 * Skips the read pointer over characters that fall into any of up to 8 ranges 287 * of characters. The first character in `cs` is the start of the first range, 288 * the second character is the end. This is repeated for any other character 289 * pair. A character falls into a range from `a` to `b` if `a <= *p <= b`. 290 * 291 * Params: 292 * cs = the character ranges 293 * p = the read pointer 294 * 295 */ 296 @forceinline 297 void skipCharRanges(string cs)(ref const(char)* p) { 298 import std.range : chunks; 299 300 while (*p) { 301 bool found = false; 302 for (int i = 0; i < cs.length; i += 2) { 303 if (cs[i] <= *p && cs[i + 1] >= *p) { 304 found = true; 305 break; 306 } 307 } 308 if (found) 309 p++; 310 else 311 break; 312 } 313 //p.vpcmpistri!(char, cs, Operation.inRanges, Polarity.negate); 314 } 315 316 /* 317 * 318 * Skips the read pointer over all and any of the given characters. 319 * 320 * Params: 321 * cs = the characters to skip over 322 * p = the read pointer 323 * 324 */ 325 @forceinline 326 void skipAllOf(string cs)(ref const(char)* p) { 327 while (*p) { 328 bool found = false; 329 foreach (c; cs) { 330 if (c == *p) { 331 found = true; 332 break; 333 } 334 } 335 if (found) 336 p++; 337 else 338 break; 339 } 340 341 //p.vpcmpistri!(char, cs, Operation.equalAnyElem, Polarity.negate); 342 } 343 344 /* 345 * 346 * Skips the read pointer over ASCII white-space comprising '\t', '\r', '\n' and 347 * ' '. 348 * 349 * Params: 350 * p = the read pointer 351 * 352 */ 353 @forceinline 354 void skipAsciiWhitespace(ref const(char)* p) { 355 if (*p == ' ') 356 p++; 357 if (*p <= ' ') 358 p.skipAllOf!" \t\r\n"; 359 } 360 361 /* 362 * 363 * Sets the read pointer to the start of the next line. 364 * 365 * Params: 366 * p = the read pointer 367 * 368 */ 369 @forceinline 370 void skipToNextLine(ref const(char)* p) { 371 // Stop at next \r, \n or \0. 372 enum cmp_to = "\x09\x0B\x0C\x0E"; 373 while (*p && (*p != cmp_to[0] && *p != cmp_to[1] && *p != cmp_to[2] && *p != cmp_to[3])) 374 p++; 375 376 //p.vpcmpistri!(char, "\x01\x09\x0B\x0C\x0E\xFF", Operation.inRanges, Polarity.negate); 377 if (p[0] == '\r') 378 p++; 379 if (p[0] == '\n') 380 p++; 381 }