1 module tame.ascii;
2 
3 version (Windows)
4 	import core.stdc.string : memicmp;
5 else version (Posix) {
6 package:
7 	pure nothrow @nogc @system extern (C) int strncasecmp(in char*, in char*, size_t);
8 	alias memicmp = strncasecmp;
9 } else
10 	static assert(0, "Unsupported platform");
11 
12 import core.stdc.string : memcmp, memcpy;
13 
14 nothrow @nogc:
15 
16 /+
17 	Convert to lowercase in-place.
18 +/
19 
20 T[] toLower(T)(T[] src) {
21 	foreach (ref c; src)
22 		if (c >= 'A' && c <= 'Z')
23 			c = T(c | 32);
24 	return src;
25 }
26 
27 /+
28 	Convert to uppercase in-place.
29 +/
30 
31 T[] toUpper(T)(T[] src) {
32 	foreach (ref c; src)
33 		if (c >= 'a' && c <= 'z')
34 			c = T(c & ~32);
35 	return src;
36 }
37 
38 /+
39 	Compare two char[] ignoring case. Returns 0 if equal
40 +/
41 
42 int icompare(const(char[]) s1, const(char[]) s2) @trusted nothrow @nogc {
43 	auto len = s1.length;
44 	if (s2.length < len)
45 		len = s2.length;
46 
47 	auto result = memicmp(s1.ptr, s2.ptr, cast(int)len);
48 
49 	if (result == 0)
50 		result = cast(int)s1.length - cast(int)s2.length;
51 	return result;
52 }
53 
54 /+
55 	Compare two char[] with case. Returns 0 if equal
56 +/
57 
58 auto compare(const(char[]) s1, const(char[]) s2) @trusted {
59 	auto len = s1.length;
60 	if (s2.length < len)
61 		len = s2.length;
62 
63 	auto result = memcmp(s1.ptr, s2.ptr, cast(int)len);
64 
65 	if (result == 0)
66 		result = cast(int)s1.length - cast(int)s2.length;
67 	return result;
68 }
69 
70 /+
71 	Return the index position of a text pattern within src, or
72 	src.length upon failure.
73 	This is a case-insensitive search (with thanks to Nietsnie)
74 +/
75 
76 size_t isearch(in char[] src, in char[] pattern)
77 in (src.ptr && pattern.ptr) {
78 	__gshared char[] _caseMap = [
79 		'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
80 		'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
81 		'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
82 		'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
83 		'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
84 		'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
85 		'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
86 		'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
87 		'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
88 		'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
89 		'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
90 		'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
91 		'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
92 		'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
93 		'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
94 		'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
95 		'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
96 		'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
97 		'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
98 		'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
99 		'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
100 		'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
101 		'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
102 		'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
103 		'\300', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
104 		'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
105 		'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
106 		'\370', '\371', '\372', '\333', '\334', '\335', '\336', '\337',
107 		'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
108 		'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
109 		'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
110 		'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
111 	];
112 
113 	int d = cast(int)(src.length - pattern.length);
114 	for (int i1 = 0, i2 = void; i1 <= d; ++i1) {
115 		for (i2 = 0; i2 < pattern.length; ++i2)
116 			if (_caseMap[src[i1 + i2]] != _caseMap[pattern[i2]])
117 				break;
118 
119 		if (i2 == pattern.length)
120 			return i1;
121 	}
122 	return src.length;
123 }
124 
125 unittest {
126 	import core.stdc.stdio;
127 	import core.stdc.string : strcpy;
128 
129 	char[4] tmp;
130 	auto p = tmp.ptr;
131 	strcpy(p, "1bac".ptr);
132 	assert(toLower(tmp) == "1bac");
133 	strcpy(p, "1BAC".ptr);
134 	assert(toLower(tmp) == "1bac");
135 	strcpy(p, "1bac".ptr);
136 	assert(toUpper(tmp) == "1BAC");
137 	strcpy(p, "1BAC".ptr);
138 	assert(toUpper(tmp) == "1BAC");
139 
140 	assert(icompare("ABC", "abc") == 0);
141 	assert(icompare("abc", "abc") == 0);
142 	assert(icompare("abcd", "abc") > 0);
143 	assert(icompare("abc", "abcd") < 0);
144 	assert(icompare("ACC", "abc") > 0);
145 
146 	assert(isearch("ACC", "abc") == 3);
147 	assert(isearch("ACC", "acc") == 0);
148 	assert(isearch("aACC", "acc") == 1);
149 }
150 
151 enum CharClass {
152 	Other,
153 	LowerCase,
154 	UpperCase,
155 	Underscore,
156 	Digit
157 }
158 
159 CharClass classify(char ch) pure {
160 	import std.ascii;
161 
162 	with (CharClass) {
163 		if (isLower(ch))
164 			return LowerCase;
165 		if (isUpper(ch))
166 			return UpperCase;
167 		if (isDigit(ch))
168 			return Digit;
169 		if (ch == '_')
170 			return Underscore;
171 		return Other;
172 	}
173 }