1 /*
2  * Hunt - A xml library for D programming language.
3  *
4  * Copyright (C) 2018-2019 HuntLabs
5  *
6  * Website: https://www.huntlabs.net
7  *
8  * Licensed under the Apache-2.0 License.
9  *
10  */
11 
12 module rapidxml.skip;
13 
14 class parse_error:Exception
15 {
16     this(string msg , string text)
17     {
18         super(msg ~ " " ~ text);
19     }
20 
21     this(string msg , char[] text)
22     {
23         super(msg ~ " " ~ cast(string)text.dup);
24     }
25 }
26 
27 ubyte[256] lookup_whitespace = [
28 // 0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
29     0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  0,  0,  1,  0,  0,  // 0
30     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  // 1
31     1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  // 2
32     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  // 3
33     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  // 4
34     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  // 5
35     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  // 6
36     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  // 7
37     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  // 8
38     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  // 9
39     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  // A
40     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  // B
41     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  // C
42     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  // D
43     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  // E
44     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   // F
45 ];              // Whitespace table
46 
47 ubyte[256] lookup_node_name = [
48 // 0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
49     0,  1,  1,  1,  1,  1,  1,  1,  1,  0,  0,  1,  1,  0,  1,  1,  // 0
50     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 1
51     0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  0,  // 2
52     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  0,  0,  // 3
53     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 4
54     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 5
55     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 6
56     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 7
57     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 8
58     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 9
59     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // A
60     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // B
61     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // C
62     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // D
63     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // E
64     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1   // F
65 ];           // Node name table
66 
67 ubyte[256] lookup_element_name = [
68 // 0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
69     0,  1,  1,  1,  1,  1,  1,  1,  1,  0,  0,  1,  1,  0,  1,  1,  // 0
70     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 1
71     0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  0,  // 2
72     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  0,  1,  1,  1,  0,  0,  // 3
73     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 4
74     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 5
75     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 6
76     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 7
77     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 8
78     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 9
79     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // A
80     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // B
81     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // C
82     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // D
83     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // E
84     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1   // F
85 ];          // Element name table
86 
87 ubyte[256] lookup_text =  [
88 // 0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
89     0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0
90     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 1
91     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 2
92     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  0,  1,  1,  1,  // 3
93     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 4
94     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 5
95     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 6
96     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 7
97     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 8
98     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 9
99     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // A
100     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // B
101     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // C
102     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // D
103     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // E
104     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1   // F
105 ];
106                     // Text table
107 ubyte[256] lookup_text_pure_no_ws =  [
108 // 0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
109     0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0
110     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 1
111     1,  1,  1,  1,  1,  1,  0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 2
112     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  0,  1,  1,  1,  // 3
113     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 4
114     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 5
115     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 6
116     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 7
117     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 8
118     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 9
119     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // A
120     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // B
121     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // C
122     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // D
123     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // E
124     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1   // F
125 ];
126          // Text table
127 ubyte[256] lookup_text_pure_with_ws =  [
128 // 0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
129     0,  1,  1,  1,  1,  1,  1,  1,  1,  0,  0,  1,  1,  0,  1,  1,  // 0
130     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 1
131     0,  1,  1,  1,  1,  1,  0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 2
132     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  0,  1,  1,  1,  // 3
133     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 4
134     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 5
135     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 6
136     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 7
137     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 8
138     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 9
139     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // A
140     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // B
141     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // C
142     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // D
143     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // E
144     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1   // F
145 ];
146 
147 // Text table
148 ubyte[256] lookup_attribute_name  = [
149   // 0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
150     0,  1,  1,  1,  1,  1,  1,  1,  1,  0,  0,  1,  1,  0,  1,  1,  // 0
151     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 1
152     0,  0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  0,  // 2
153     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  0,  0,  0,  0,  // 3
154     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 4
155     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 5
156     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 6
157     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 7
158     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 8
159     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 9
160     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // A
161     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // B
162     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // C
163     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // D
164     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // E
165     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1   // F
166 ];
167 
168 // Attribute name table
169 ubyte[256] lookup_attribute_data_1 = [
170 // 0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
171     0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0
172     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 1
173     1,  1,  1,  1,  1,  1,  1,  0,  1,  1,  1,  1,  1,  1,  1,  1,  // 2
174     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 3
175     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 4
176     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 5
177     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 6
178     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 7
179     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 8
180     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 9
181     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // A
182     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // B
183     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // C
184     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // D
185     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // E
186     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1   // F
187 ];
188 
189 // Attribute data table with single quote
190 ubyte[256] lookup_attribute_data_1_pure =  [
191  // 0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
192     0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0
193     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 1
194     1,  1,  1,  1,  1,  1,  0,  0,  1,  1,  1,  1,  1,  1,  1,  1,  // 2
195     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 3
196     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 4
197     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 5
198     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 6
199     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 7
200     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 8
201     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 9
202     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // A
203     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // B
204     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // C
205     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // D
206     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // E
207     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1   // F
208 ];
209 
210 // Attribute data table with single quote
211 ubyte[256] lookup_attribute_data_2 =  [
212  // 0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
213     0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0
214     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 1
215     1,  1,  0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 2
216     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 3
217     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 4
218     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 5
219     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 6
220     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 7
221     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 8
222     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 9
223     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // A
224     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // B
225     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // C
226     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // D
227     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // E
228     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1   // F
229 ];
230 
231 // Attribute data table with double quotes
232 ubyte[256] lookup_attribute_data_2_pure = [
233  // 0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
234     0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0
235     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 1
236     1,  1,  0,  1,  1,  1,  0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 2
237     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 3
238     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 4
239     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 5
240     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 6
241     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 7
242     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 8
243     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 9
244     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // A
245     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // B
246     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // C
247     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // D
248     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // E
249     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1   // F
250 ];
251 
252 // Attribute data table with double quotes
253 
254 ubyte[256] lookup_digits =  [
255  // 0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
256     255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  // 0
257     255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  // 1
258     255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  // 2
259     0,  1,  2,  3,  4,  5,  6,  7,  8,  9,255,255,255,255,255,255,  // 3
260     255, 10, 11, 12, 13, 14, 15,255,255,255,255,255,255,255,255,255,  // 4
261     255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  // 5
262     255, 10, 11, 12, 13, 14, 15,255,255,255,255,255,255,255,255,255,  // 6
263     255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  // 7
264     255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  // 8
265     255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  // 9
266     255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  // A
267     255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  // B
268     255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  // C
269     255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  // D
270     255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  // E
271     255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255   // F
272 ];
273 
274 // Digits
275 
276 ubyte[256] lookup_upcase =  [
277  // 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  A   B   C   D   E   F
278     0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,   // 0
279     16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,   // 1
280     32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,   // 2
281     48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,   // 3
282     64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,   // 4
283     80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,   // 5
284     96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,   // 6
285     80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 123,124,125,126,127,  // 7
286     128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,  // 8
287     144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,  // 9
288     160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,  // A
289     176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,  // B
290     192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,  // C
291     208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,  // D
292     224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,  // E
293     240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255   // F
294 ]; 
295 
296 // Parsing flags
297 const int parse_no_data_nodes = 0x1;
298 
299 const int parse_no_element_values = 0x2;
300 
301 const int parse_no_string_terminators = 0x4;
302 
303 const int parse_no_entity_translation = 0x8;
304 
305 const int parse_no_utf8 = 0x10;
306 
307 const int parse_declaration_node = 0x20;
308 
309 const int parse_comment_nodes = 0x40;
310 
311 const int parse_doctype_node = 0x80;
312 
313 const int parse_pi_nodes = 0x100;
314 
315 const int parse_validate_closing_tags = 0x200;
316 
317 const int parse_trim_whitespace = 0x400;
318 
319 const int parse_normalize_whitespace = 0x800;
320 
321 const int parse_open_only = 0x1000;
322 
323 const int parse_parse_one = 0x2000;
324 
325 const int parse_validate_xmlns = 0x4000;
326 
327 const int parse_default = 0;
328 
329 const int parse_non_destructive = parse_no_string_terminators | parse_no_entity_translation;
330 
331 const int parse_fastest = parse_non_destructive | parse_no_data_nodes;
332 
333 const int parse_full = parse_declaration_node | parse_comment_nodes | parse_doctype_node | parse_pi_nodes | parse_validate_closing_tags;
334 
335 import std.stdio;
336 
337 void insert_coded_character(int Flags)(ref char[] text,  ulong code)
338 {
339     if (Flags & parse_no_utf8)
340     {
341         // Insert 8-bit ASCII character
342         // Todo: possibly verify that code is less than 256 and use replacement char otherwise?
343         text[0] = (code);
344         text = text[ 1 .. $ - 1];
345     }
346     else
347     {
348         // Insert UTF8 sequence
349         if (code < 0x80)    // 1 byte sequence
350         {
351             text[0] = (code);
352             text = text[1 .. $ - 1];
353         }
354         else if (code < 0x800)  // 2 byte sequence
355         {
356             text[1] = ((code | 0x80) & 0xBF); code >>= 6;
357             text[0] = (code | 0xC0);
358             text = text[ 2 .. $ - 1];
359         }
360         else if (code < 0x10000)    // 3 byte sequence
361         {
362             text[2] = ((code | 0x80) & 0xBF); code >>= 6;
363             text[1] = ((code | 0x80) & 0xBF); code >>= 6;
364             text[0] = (code | 0xE0);
365             text = text[3 .. $ - 1];
366         }
367         else if (code < 0x110000)   // 4 byte sequence
368         {
369             text[3] = ((code | 0x80) & 0xBF); code >>= 6;
370             text[2] = ((code | 0x80) & 0xBF); code >>= 6;
371             text[1] = ((code | 0x80) & 0xBF); code >>= 6;
372             text[0] = (code | 0xF0);
373             text = text[4 .. $ - 1];
374         }
375         else    // Invalid, only codes up to 0x10FFFF are allowed in Unicode
376         {
377             throw new parse_error("invalid numeric character entity", text);
378         }
379     }
380 }
381 
382 // Skip characters until predicate evaluates to true while doing the following:
383 // - replacing XML character entity references with proper characters (&apos; &amp; &quot; &lt; &gt; &#...;)
384 // - condensing whitespace sequences to single space character
385 
386 static  char[] skip_and_expand_character_refs(T , TP , int Flags)(ref char[] text)
387 {
388     // If entity translation, whitespace condense and whitespace trimming is disabled, use plain skip
389     if (Flags & parse_no_entity_translation &&
390         !(Flags & parse_normalize_whitespace) &&
391         !(Flags & parse_trim_whitespace))
392     {
393         skip!(T)(text);
394         return text;
395     }
396 
397     // Use simple skip until first modification is detected
398     skip!(TP)(text);
399     // Use translation skip
400     char[] src = text;
401     char[] dest = src.dup;
402     long index = 0;
403     while (T.test(src[0]))
404     {
405         // If entity translation is enabled
406         if (!(Flags & parse_no_entity_translation))
407         {
408             // Test if replacement is needed
409             if (src[0] == ('&'))
410             {
411                 switch (src[1])
412                 {
413 
414                 // &amp; &apos;
415                 case ('a'):
416                     if (src[2] == ('m') && src[3] == ('p') && src[4] == (';'))
417                     {
418                         dest[index] = ('&');
419                         ++index;
420                         src=src[5..$-1];
421                         continue;
422                     }
423                     if (src[2] == ('p') && src[3] == ('o') && src[4] == ('s') && src[5] == (';'))
424                     {
425                         dest[index] = ('\'');
426                         ++index;
427                         src = src[6 .. $-1];
428                         continue;
429                     }
430                     break;
431 
432                 // &quot;
433                 case ('q'):
434                     if (src[2] == ('u') && src[3] == ('o') && src[4] == ('t') && src[5] == (';'))
435                     {
436                         dest[index] = ('"');
437                         ++index;
438                         src = src[6 .. $ - 1];
439                         continue;
440                     }
441                     break;
442 
443                 // &gt;
444                 case ('g'):
445                     if (src[2] == ('t') && src[3] == (';'))
446                     {
447                         dest[index] = ('>');
448                         ++index;
449                         src = src[4 .. $ - 1];
450                         continue;
451                     }
452                     break;
453 
454                 // &lt;
455                 case ('l'):
456                     if (src[2] == ('t') && src[3] == (';'))
457                     {
458                         dest[index] = ('<');
459                         ++index;
460                         src = src[ 4 .. $ - 1];
461                         continue;
462                     }
463                     break;
464 
465                 // &#...; - assumes ASCII
466                 case ('#'):
467                     if (src[2] == ('x'))
468                     {
469                             ulong code = 0;
470                         src = src[3 .. $ - 1];   // Skip &#x
471                         while (1)
472                         {
473                             ubyte digit = lookup_digits[src[0]];
474                             if (digit == 0xFF)
475                                 break;
476                             code = code * 16 + digit;
477                             src = src[1 .. $ - 1];
478                         }
479                         //   insert_coded_character!Flags(dest, code);    // Put character in output
480                     }
481                     else
482                     {
483                         ulong code = 0;
484                         src = src[2 .. $ - 1];   // Skip &#
485                         while (1)
486                         {
487                             ubyte digit = lookup_digits[src[0]];
488                             if (digit == 0xFF)
489                                 break;
490                             code = code * 10 + digit;
491                             src=src[1 .. $ - 1];
492                         }
493                     //      insert_coded_character!Flags(dest, code);    // Put character in output
494                     }
495                     if (src[0] == (';'))
496                         src=src[1..$ - 1];
497                     else
498                         throw new parse_error("expected ;", src);
499                     continue;
500 
501                 // Something else
502                 default:
503                     // Ignore, just copy '&' verbatim
504                     break;
505 
506                 }
507             }
508         }
509 
510         // If whitespace condensing is enabled
511         if (Flags & parse_normalize_whitespace)
512         {
513             // Test if condensing is needed
514             if (whitespace_pred.test(src[0]))
515             {
516                 dest[index] = (' '); ++index;    // Put single space in dest
517                 src = src[1 .. $ - 1];                      // Skip first whitespace char
518                 // Skip remaining whitespace chars
519                 while (whitespace_pred.test(src[0]))
520                     src = src[1 .. $ - 1];
521                 continue;
522             }
523         }
524 
525         // No replacement, only copy character
526         dest[index] = src[0];
527         ++index;
528         src = src[1 .. $ - 1];
529 
530     }
531 
532     // Return new end
533     text = src;
534     return dest;
535 
536 }
537 
538 void skip(T)(ref char[] text)
539 {
540     int index = 0;
541     int length = cast(int)text.length;
542     while(T.test(text[index]) && index < text.length)
543         index++;
544     text = text[index .. $];
545 }
546 
547 struct whitespace_pred
548 {
549     static ubyte test(ubyte ch)
550     {
551         return lookup_whitespace[ch];
552     }
553 }
554 
555 // Detect node name character
556 struct node_name_pred
557 {
558     static ubyte test(ubyte ch)
559     {
560         return lookup_node_name[ch];
561     }
562 }
563 
564 // Detect element name character
565 struct element_name_pred
566 {
567     static ubyte test(ubyte ch)
568     {
569         return lookup_element_name[ch];
570     }
571 }
572 
573 // Detect attribute name character
574 struct attribute_name_pred
575 {
576     static ubyte test(ubyte ch)
577     {
578         return lookup_attribute_name[ch];
579     }
580 }
581 
582 // Detect text character (PCDATA)
583 struct text_pred
584 {
585     static ubyte test(ubyte ch)
586     {
587         return lookup_text[ch];
588     }
589 }
590 
591 // Detect text character (PCDATA) that does not require processing
592 struct text_pure_no_ws_pred
593 {
594     static ubyte test(ubyte ch)
595     {
596         return lookup_text_pure_no_ws[ch];
597     }
598 }
599 
600 // Detect text character (PCDATA) that does not require processing
601 struct text_pure_with_ws_pred
602 {
603     static  ubyte test(ubyte ch)
604     {
605         return lookup_text_pure_with_ws[ch];
606     }
607 }
608 
609 // Detect attribute value character
610 
611 struct attribute_value_pred(alias Quote)
612 {
613     static ubyte test(ubyte ch)
614     {
615         if (Quote == '\'')
616             return lookup_attribute_data_1[ch];
617         else if (Quote == '"')
618             return lookup_attribute_data_2[ch];
619         else
620             return 0;       // Should never be executed, to avoid warnings on Comeau
621     }
622 }
623 
624 // Detect attribute value character
625 struct attribute_value_pure_pred(alias Quote)
626 {
627     static ubyte test(ubyte ch)
628     {
629         if (Quote == '\'')
630             return lookup_attribute_data_1_pure[ch];
631         else if (Quote == ('"'))
632             return lookup_attribute_data_2_pure[ch];
633         else
634             return 0;       // Should never be executed, to avoid warnings on Comeau
635     }
636 }