View Javadoc

1   /*
2    * $Source: /usr/cvsroot/melati/melati/src/main/java/org/melati/test/CharData.java,v $
3    * $Revision: 1.8 $
4    *
5    * Copyright (C) 2003 Jim Wright
6    *
7    * Part of Melati (http://melati.org), a framework for the rapid
8    * development of clean, maintainable web applications.
9    *
10   * Melati is free software; Permission is granted to copy, distribute
11   * and/or modify this software under the terms either:
12   *
13   * a) the GNU General Public License as published by the Free Software
14   *    Foundation; either version 2 of the License, or (at your option)
15   *    any later version,
16   *
17   *    or
18   *
19   * b) any version of the Melati Software License, as published
20   *    at http://melati.org
21   *
22   * You should have received a copy of the GNU General Public License and
23   * the Melati Software License along with this program;
24   * if not, write to the Free Software Foundation, Inc.,
25   * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA to obtain the
26   * GNU General Public License and visit http://melati.org to obtain the
27   * Melati Software License.
28   *
29   * Feel free to contact the Developers of Melati (http://melati.org),
30   * if you would like to work out a different arrangement than the options
31   * outlined here.  It is our intention to allow Melati to be used by as
32   * wide an audience as possible.
33   *
34   * This program is distributed in the hope that it will be useful,
35   * but WITHOUT ANY WARRANTY; without even the implied warranty of
36   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
37   * GNU General Public License for more details.
38   *
39   * Contact details for copyright holder:
40   *
41   *     Jim Wright <jimw At paneris.org>
42   *     Bohemian Enterprise
43   *     Predmerice nad Jizerou 77
44   *     294 74
45   *     Mlada Boleslav
46   *     Czech Republic
47   */
48  
49  package org.melati.test;
50  
51  import java.nio.ByteBuffer;
52  import java.nio.CharBuffer;
53  import java.nio.charset.CharacterCodingException;
54  import java.nio.charset.Charset;
55  import java.nio.charset.CharsetDecoder;
56  import java.nio.charset.CharsetEncoder;
57  import java.util.Iterator;
58  
59  import org.melati.Melati;
60  
61  /**
62   * Provides character data for test servlets.
63   */
64  public final class CharData {
65  
66    private CharData() {}
67  
68    /**
69     * @return an Iterator of Items
70     */
71    public static Iterator getItems() {
72      return new Iterator() {
73  
74          int i = 0;
75  
76          public boolean hasNext() {
77            return i < charData.length;
78          }
79  
80          public Object next() {
81            return new Item(i++);
82          }
83  
84          public void remove() throws UnsupportedOperationException {
85            throw new UnsupportedOperationException();
86          }
87        };
88    }
89  
90    /** A data item. **/
91    public static class Item {
92  
93      private int i;
94  
95      /**
96       * Constructor.
97       * @param i
98       */
99      public Item(int i) {
100       this.i = i;
101     }
102 
103     /**
104      * @return next character
105      */
106     public final String getChar() {
107       return charData[i][0];
108     }
109 
110     /**
111      * @return description of the character
112      */
113     public final String getDescription() {
114       return charData[i][1];
115     }
116 
117     /**
118      * @return the numeric entity for the character
119      */
120     public final String getReference() {
121       return "&#x" + Integer.toHexString(getChar().charAt(0)) + ";";
122     }
123 
124     /**
125      * @param melati current Melati
126      * @return String status of test
127      */
128     public String encodingTest(Melati melati) {
129       Charset cs = Charset.forName(melati.getEncoding());
130       CharsetEncoder e = cs.newEncoder();
131       CharsetDecoder d = cs.newDecoder();
132       if (e.canEncode(getChar())) {
133         try {
134           CharBuffer cb = CharBuffer.allocate(1).put(getChar());
135           cb.flip();
136           ByteBuffer bb = e.encode(cb);
137           String result = d.decode(bb).toString();
138           if (getChar().equals(result)) {
139             return "Correct";
140           } else {
141             return "Wrong: '" + result;
142           }
143         }
144         catch (CharacterCodingException cce) {
145           return "Exception:" + cce.getMessage();
146         }
147       } else {
148         return "Cannot";
149       }
150     }
151     
152   }
153 
154   /**
155    * This data was originally copied from the unicode database..
156    * <p>
157    * <h2><i><a name="UCD_Terms"><br>UCD Terms of Use</a></i></h2>
158    * <h3><i>Disclaimer</i></h3>
159    * <blockquote>
160    * <p><i>The Unicode Character Database is provided as is by Unicode, Inc. No 
161    * claims are made as to fitness for any particular purpose. 
162    * No warranties of any 
163    * kind are expressed or implied. The recipient agrees to determine 
164    * applicability 
165    * of information provided. If this file has been purchased on magnetic or 
166    * optical media from Unicode, Inc., the sole remedy for any claim will be 
167    * exchange of defective media within 90 days of receipt.</i></p>
168    * <p><i>This disclaimer is applicable for all other data files 
169    * accompanying the 
170    * Unicode Character Database, some of which have been compiled by the Unicode 
171    * Consortium, and some of which have been supplied by other sources.</i></p>
172    * </blockquote>
173    * <h3><i>Limitations on Rights to Redistribute This Data</i></h3>
174    * <blockquote>
175    * <p><i>Recipient is granted the right to make copies in any form for 
176    * internal 
177    * distribution and to freely use the information supplied in the creation of 
178    * products supporting the Unicode<sup>TM</sup> Standard. The files in the 
179    * Unicode Character Database can be redistributed to third parties or other 
180    * organizations (whether for profit or not) as long as this notice and the 
181    * disclaimer notice are retained. 
182    * Information can be extracted from these files 
183    * and used in documentation or programs, as long as there is an accompanying 
184    * notice indicating the source.</i></p>
185    * </blockquote>
186    *
187    * <p><a href="http://www.unicode.org/unicode/copyright.html">
188    * http://www.unicode.org/unicode/copyright.html</a>
189    */
190   public static final String[][] charData = {
191     {"\u0040", "COMMERCIAL AT"},
192     {"\u005E", "CIRCUMFLEX ACCENT"},
193     {"\u0060", "GRAVE ACCENT"},
194     {"\u00A1", "INVERTED EXCLAMATION MARK"},
195     {"\u00A2", "CENT SIGN"},
196     {"\u00A3", "POUND SIGN"},
197     {"\u00A4", "CURRENCY SIGN"},
198     {"\u00A5", "YEN SIGN"},
199     {"\u00A6", "BROKEN BAR"},
200     {"\u00A7", "SECTION SIGN"},
201     {"\u00A8", "DIAERESIS"},
202     {"\u00A9", "COPYRIGHT SIGN"},
203     {"\u00AA", "FEMININE ORDINAL INDICATOR"},
204     {"\u00AB", "LEFT-POINTING DOUBLE ANGLE QUOTATION  MARK"},
205     {"\u00AC", "NOT SIGN"},
206     {"\u00AD", "SOFT HYPHEN"},
207     {"\u00AE", "REGISTERED SIGN"},
208     {"\u00AF", "MACRON"},
209     {"\u00B0", "DEGREE SIGN"},
210     {"\u00B1", "PLUS-MINUS SIGN"},
211     {"\u00B2", "SUPERSCRIPT TWO"},
212     {"\u00B3", "SUPERSCRIPT THREE"},
213     {"\u00B4", "ACUTE ACCENT"},
214     {"\u00B5", "MICRO SIGN"},
215     {"\u00B6", "PILCROW SIGN"},
216     {"\u00B7", "MIDDLE DOT"},
217     {"\u00B8", "CEDILLA"},
218     {"\u00B9", "SUPERSCRIPT ONE"},
219     {"\u00BA", "MASCULINE ORDINAL INDICATOR"},
220     {"\u00BB", "RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK"},
221     {"\u00BC", "VULGAR FRACTION ONE QUARTER"},
222     {"\u00BD", "VULGAR FRACTION ONE HALF"},
223     {"\u00BE", "VULGAR FRACTION THREE QUARTERS"},
224     {"\u00BF", "INVERTED QUESTION MARK"},
225     {"\u00C0", "LATIN CAPITAL LETTER A WITH GRAVE"},
226     {"\u00C1", "LATIN CAPITAL LETTER A WITH ACUTE"},
227     {"\u00C2", "LATIN CAPITAL LETTER A WITH CIRCUMFLEX"},
228     {"\u00C3", "LATIN CAPITAL LETTER A WITH TILDE"},
229     {"\u00C4", "LATIN CAPITAL LETTER A WITH DIAERESIS"},
230     {"\u00C5", "LATIN CAPITAL LETTER A WITH RING ABOVE"},
231     {"\u00C6", "LATIN CAPITAL LETTER AE"},
232     {"\u00C7", "LATIN CAPITAL LETTER C WITH CEDILLA"},
233     {"\u00C8", "LATIN CAPITAL LETTER E WITH GRAVE"},
234     {"\u00C9", "LATIN CAPITAL LETTER E WITH ACUTE"},
235     {"\u00CA", "LATIN CAPITAL LETTER E WITH CIRCUMFLEX"},
236     {"\u00CB", "LATIN CAPITAL LETTER E WITH DIAERESIS"},
237     {"\u00CC", "LATIN CAPITAL LETTER I WITH GRAVE"},
238     {"\u00CD", "LATIN CAPITAL LETTER I WITH ACUTE"},
239     {"\u00CE", "LATIN CAPITAL LETTER I WITH CIRCUMFLEX"},
240     {"\u00CF", "LATIN CAPITAL LETTER I WITH DIAERESIS"},
241     {"\u00D0", "LATIN CAPITAL LETTER ETH"},
242     {"\u00D1", "LATIN CAPITAL LETTER N WITH TILDE"},
243     {"\u00D2", "LATIN CAPITAL LETTER O WITH GRAVE"},
244     {"\u00D3", "LATIN CAPITAL LETTER O WITH ACUTE"},
245     {"\u00D4", "LATIN CAPITAL LETTER O WITH CIRCUMFLEX"},
246     {"\u00D5", "LATIN CAPITAL LETTER O WITH TILDE"},
247     {"\u00D6", "LATIN CAPITAL LETTER O WITH DIAERESIS"},
248     {"\u00D7", "MULTIPLICATION SIGN"},
249     {"\u00D8", "LATIN CAPITAL LETTER O WITH STROKE"},
250     {"\u00D9", "LATIN CAPITAL LETTER U WITH GRAVE"},
251     {"\u00DA", "LATIN CAPITAL LETTER U WITH ACUTE"},
252     {"\u00DB", "LATIN CAPITAL LETTER U WITH CIRCUMFLEX"},
253     {"\u00DC", "LATIN CAPITAL LETTER U WITH DIAERESIS"},
254     {"\u00DD", "LATIN CAPITAL LETTER Y WITH ACUTE"},
255     {"\u00DE", "LATIN CAPITAL LETTER THORN"},
256     {"\u00DF", "LATIN SMALL LETTER SHARP S"},
257     {"\u00E0", "LATIN SMALL LETTER A WITH GRAVE"},
258     {"\u00E1", "LATIN SMALL LETTER A WITH ACUTE"},
259     {"\u00E2", "LATIN SMALL LETTER A WITH CIRCUMFLEX"},
260     {"\u00E3", "LATIN SMALL LETTER A WITH TILDE"},
261     {"\u00E4", "LATIN SMALL LETTER A WITH DIAERESIS"},
262     {"\u00E5", "LATIN SMALL LETTER A WITH RING ABOVE"},
263     {"\u00E6", "LATIN SMALL LETTER AE"},
264     {"\u00E7", "LATIN SMALL LETTER C WITH CEDILLA"},
265     {"\u00E8", "LATIN SMALL LETTER E WITH GRAVE"},
266     {"\u00E9", "LATIN SMALL LETTER E WITH ACUTE"},
267     {"\u00EA", "LATIN SMALL LETTER E WITH CIRCUMFLEX"},
268     {"\u00EB", "LATIN SMALL LETTER E WITH DIAERESIS"},
269     {"\u00EC", "LATIN SMALL LETTER I WITH GRAVE"},
270     {"\u00ED", "LATIN SMALL LETTER I WITH ACUTE"},
271     {"\u00EE", "LATIN SMALL LETTER I WITH CIRCUMFLEX"},
272     {"\u00EF", "LATIN SMALL LETTER I WITH DIAERESIS"},
273     {"\u00F0", "LATIN SMALL LETTER ETH"},
274     {"\u00F1", "LATIN SMALL LETTER N WITH TILDE"},
275     {"\u00F2", "LATIN SMALL LETTER O WITH GRAVE"},
276     {"\u00F3", "LATIN SMALL LETTER O WITH ACUTE"},
277     {"\u00F4", "LATIN SMALL LETTER O WITH CIRCUMFLEX"},
278     {"\u00F5", "LATIN SMALL LETTER O WITH TILDE"},
279     {"\u00F6", "LATIN SMALL LETTER O WITH DIAERESIS"},
280     {"\u00F7", "DIVISION SIGN"},
281     {"\u00F8", "LATIN SMALL LETTER O WITH STROKE"},
282     {"\u00F9", "LATIN SMALL LETTER U WITH GRAVE"},
283     {"\u00FA", "LATIN SMALL LETTER U WITH ACUTE"},
284     {"\u00FB", "LATIN SMALL LETTER U WITH CIRCUMFLEX"},
285     {"\u00FC", "LATIN SMALL LETTER U WITH DIAERESIS"},
286     {"\u00FD", "LATIN SMALL LETTER Y WITH ACUTE"},
287     {"\u00FE", "LATIN SMALL LETTER THORN"},
288     {"\u00FF", "LATIN SMALL LETTER Y WITH DIAERESIS"},
289     {"\u0100", "LATIN CAPITAL LETTER A WITH MACRON"},
290     {"\u0101", "LATIN SMALL LETTER A WITH MACRON"},
291     {"\u0102", "LATIN CAPITAL LETTER A WITH BREVE"},
292     {"\u0103", "LATIN SMALL LETTER A WITH BREVE"},
293     {"\u0104", "LATIN CAPITAL LETTER A WITH OGONEK"},
294     {"\u0105", "LATIN SMALL LETTER A WITH OGONEK"},
295     {"\u0106", "LATIN CAPITAL LETTER C WITH ACUTE"},
296     {"\u0107", "LATIN SMALL LETTER C WITH ACUTE"},
297     {"\u0108", "LATIN CAPITAL LETTER C WITH CIRCUMFLEX"},
298     {"\u0109", "LATIN SMALL LETTER C WITH CIRCUMFLEX"},
299     {"\u010A", "LATIN CAPITAL LETTER C WITH DOT ABOVE"},
300     {"\u010B", "LATIN SMALL LETTER C WITH DOT ABOVE"},
301     {"\u010C", "LATIN CAPITAL LETTER C WITH CARON"},
302     {"\u010D", "LATIN SMALL LETTER C WITH CARON"},
303     {"\u010E", "LATIN CAPITAL LETTER D WITH CARON"},
304     {"\u010F", "LATIN SMALL LETTER D WITH CARON"},
305     {"\u0110", "LATIN CAPITAL LETTER D WITH STROKE"},
306     {"\u0111", "LATIN SMALL LETTER D WITH STROKE"},
307     {"\u0112", "LATIN CAPITAL LETTER E WITH MACRON"},
308     {"\u0113", "LATIN SMALL LETTER E WITH MACRON"},
309     {"\u0114", "LATIN CAPITAL LETTER E WITH BREVE"},
310     {"\u0115", "LATIN SMALL LETTER E WITH BREVE"},
311     {"\u0116", "LATIN CAPITAL LETTER E WITH DOT ABOVE"},
312     {"\u0117", "LATIN SMALL LETTER E WITH DOT ABOVE"},
313     {"\u0118", "LATIN CAPITAL LETTER E WITH OGONEK"},
314     {"\u0119", "LATIN SMALL LETTER E WITH OGONEK"},
315     {"\u011A", "LATIN CAPITAL LETTER E WITH CARON"},
316     {"\u011B", "LATIN SMALL LETTER E WITH CARON"},
317     {"\u011C", "LATIN CAPITAL LETTER G WITH CIRCUMFLEX"},
318     {"\u011D", "LATIN SMALL LETTER G WITH CIRCUMFLEX"},
319     {"\u011E", "LATIN CAPITAL LETTER G WITH BREVE"},
320     {"\u011F", "LATIN SMALL LETTER G WITH BREVE"},
321     {"\u0120", "LATIN CAPITAL LETTER G WITH DOT ABOVE"},
322     {"\u0121", "LATIN SMALL LETTER G WITH DOT ABOVE"},
323     {"\u0122", "LATIN CAPITAL LETTER G WITH CEDILLA"},
324     {"\u0123", "LATIN SMALL LETTER G WITH CEDILLA"},
325     {"\u0124", "LATIN CAPITAL LETTER H WITH CIRCUMFLEX"},
326     {"\u0125", "LATIN SMALL LETTER H WITH CIRCUMFLEX"},
327     {"\u0126", "LATIN CAPITAL LETTER H WITH STROKE"},
328     {"\u0127", "LATIN SMALL LETTER H WITH STROKE"},
329     {"\u0128", "LATIN CAPITAL LETTER I WITH TILDE"},
330     {"\u0129", "LATIN SMALL LETTER I WITH TILDE"},
331     {"\u012A", "LATIN CAPITAL LETTER I WITH MACRON"},
332     {"\u012B", "LATIN SMALL LETTER I WITH MACRON"},
333     {"\u012C", "LATIN CAPITAL LETTER I WITH BREVE"},
334     {"\u012D", "LATIN SMALL LETTER I WITH BREVE"},
335     {"\u012E", "LATIN CAPITAL LETTER I WITH OGONEK"},
336     {"\u012F", "LATIN SMALL LETTER I WITH OGONEK"},
337     {"\u0130", "LATIN CAPITAL LETTER I WITH DOT ABOVE"},
338     {"\u0131", "LATIN SMALL LETTER DOTLESS I"},
339     {"\u0132", "LATIN CAPITAL LIGATURE IJ"},
340     {"\u0133", "LATIN SMALL LIGATURE IJ"},
341     {"\u0134", "LATIN CAPITAL LETTER J WITH CIRCUMFLEX"},
342     {"\u0135", "LATIN SMALL LETTER J WITH CIRCUMFLEX"},
343     {"\u0136", "LATIN CAPITAL LETTER K WITH CEDILLA"},
344     {"\u0137", "LATIN SMALL LETTER K WITH CEDILLA"},
345     {"\u0138", "LATIN SMALL LETTER KRA"},
346     {"\u0139", "LATIN CAPITAL LETTER L WITH ACUTE"},
347     {"\u013A", "LATIN SMALL LETTER L WITH ACUTE"},
348     {"\u013B", "LATIN CAPITAL LETTER L WITH CEDILLA"},
349     {"\u013C", "LATIN SMALL LETTER L WITH CEDILLA"},
350     {"\u013D", "LATIN CAPITAL LETTER L WITH CARON"},
351     {"\u013E", "LATIN SMALL LETTER L WITH CARON"},
352     {"\u013F", "LATIN CAPITAL LETTER L WITH MIDDLE DOT"},
353     {"\u0140", "LATIN SMALL LETTER L WITH MIDDLE DOT"},
354     {"\u0141", "LATIN CAPITAL LETTER L WITH STROKE"},
355     {"\u0142", "LATIN SMALL LETTER L WITH STROKE"},
356     {"\u0143", "LATIN CAPITAL LETTER N WITH ACUTE"},
357     {"\u0144", "LATIN SMALL LETTER N WITH ACUTE"},
358     {"\u0145", "LATIN CAPITAL LETTER N WITH CEDILLA"},
359     {"\u0146", "LATIN SMALL LETTER N WITH CEDILLA"},
360     {"\u0147", "LATIN CAPITAL LETTER N WITH CARON"},
361     {"\u0148", "LATIN SMALL LETTER N WITH CARON"},
362     {"\u0149", "LATIN SMALL LETTER N PRECEDED BY APOSTROPHE"},
363     {"\u014A", "LATIN CAPITAL LETTER ENG"},
364     {"\u014B", "LATIN SMALL LETTER ENG"},
365     {"\u014C", "LATIN CAPITAL LETTER O WITH MACRON"},
366     {"\u014D", "LATIN SMALL LETTER O WITH MACRON"},
367     {"\u014E", "LATIN CAPITAL LETTER O WITH BREVE"},
368     {"\u014F", "LATIN SMALL LETTER O WITH BREVE"},
369     {"\u0150", "LATIN CAPITAL LETTER O WITH DOUBLE ACUTE"},
370     {"\u0151", "LATIN SMALL LETTER O WITH DOUBLE ACUTE"},
371     {"\u0152", "LATIN CAPITAL LIGATURE OE"},
372     {"\u0153", "LATIN SMALL LIGATURE OE"},
373     {"\u0154", "LATIN CAPITAL LETTER R WITH ACUTE"},
374     {"\u0155", "LATIN SMALL LETTER R WITH ACUTE"},
375     {"\u0156", "LATIN CAPITAL LETTER R WITH CEDILLA"},
376     {"\u0157", "LATIN SMALL LETTER R WITH CEDILLA"},
377     {"\u0158", "LATIN CAPITAL LETTER R WITH CARON"},
378     {"\u0159", "LATIN SMALL LETTER R WITH CARON"},
379     {"\u015A", "LATIN CAPITAL LETTER S WITH ACUTE"},
380     {"\u015B", "LATIN SMALL LETTER S WITH ACUTE"},
381     {"\u015C", "LATIN CAPITAL LETTER S WITH CIRCUMFLEX"},
382     {"\u015D", "LATIN SMALL LETTER S WITH CIRCUMFLEX"},
383     {"\u015E", "LATIN CAPITAL LETTER S WITH CEDILLA"},
384     {"\u015F", "LATIN SMALL LETTER S WITH CEDILLA"},
385     {"\u0160", "LATIN CAPITAL LETTER S WITH CARON"},
386     {"\u0161", "LATIN SMALL LETTER S WITH CARON"},
387     {"\u0162", "LATIN CAPITAL LETTER T WITH CEDILLA"},
388     {"\u0163", "LATIN SMALL LETTER T WITH CEDILLA"},
389     {"\u0164", "LATIN CAPITAL LETTER T WITH CARON"},
390     {"\u0165", "LATIN SMALL LETTER T WITH CARON"},
391     {"\u0166", "LATIN CAPITAL LETTER T WITH STROKE"},
392     {"\u0167", "LATIN SMALL LETTER T WITH STROKE"},
393     {"\u0168", "LATIN CAPITAL LETTER U WITH TILDE"},
394     {"\u0169", "LATIN SMALL LETTER U WITH TILDE"},
395     {"\u016A", "LATIN CAPITAL LETTER U WITH MACRON"},
396     {"\u016B", "LATIN SMALL LETTER U WITH MACRON"},
397     {"\u016C", "LATIN CAPITAL LETTER U WITH BREVE"},
398     {"\u016D", "LATIN SMALL LETTER U WITH BREVE"},
399     {"\u016E", "LATIN CAPITAL LETTER U WITH RING ABOVE"},
400     {"\u016F", "LATIN SMALL LETTER U WITH RING ABOVE"},
401     {"\u0170", "LATIN CAPITAL LETTER U WITH DOUBLE ACUTE"},
402     {"\u0171", "LATIN SMALL LETTER U WITH DOUBLE ACUTE"},
403     {"\u0172", "LATIN CAPITAL LETTER U WITH OGONEK"},
404     {"\u0173", "LATIN SMALL LETTER U WITH OGONEK"},
405     {"\u0174", "LATIN CAPITAL LETTER W WITH CIRCUMFLEX"},
406     {"\u0175", "LATIN SMALL LETTER W WITH CIRCUMFLEX"},
407     {"\u0176", "LATIN CAPITAL LETTER Y WITH CIRCUMFLEX"},
408     {"\u0177", "LATIN SMALL LETTER Y WITH CIRCUMFLEX"},
409     {"\u0178", "LATIN CAPITAL LETTER Y WITH DIAERESIS"},
410     {"\u0179", "LATIN CAPITAL LETTER Z WITH ACUTE"},
411     {"\u017A", "LATIN SMALL LETTER Z WITH ACUTE"},
412     {"\u017B", "LATIN CAPITAL LETTER Z WITH DOT ABOVE"},
413     {"\u017C", "LATIN SMALL LETTER Z WITH DOT ABOVE"},
414     {"\u017D", "LATIN CAPITAL LETTER Z WITH CARON"},
415     {"\u017E", "LATIN SMALL LETTER Z WITH CARON"},
416     {"\u017F", "LATIN SMALL LETTER LONG S"},
417     {"\u0180", "LATIN SMALL LETTER B WITH STROKE"},
418     {"\u0181", "LATIN CAPITAL LETTER B WITH HOOK"},
419     {"\u0182", "LATIN CAPITAL LETTER B WITH TOPBAR"},
420     {"\u0183", "LATIN SMALL LETTER B WITH TOPBAR"},
421     {"\u0184", "LATIN CAPITAL LETTER TONE SIX"},
422     {"\u0185", "LATIN SMALL LETTER TONE SIX"},
423     {"\u0185", "LATIN SMALL LETTER TONE SIX"},
424     {"\u0186", "LATIN CAPITAL LETTER OPEN O"},
425     {"\u0187", "LATIN CAPITAL LETTER C WITH HOOK"},
426     {"\u0188", "LATIN SMALL LETTER C WITH HOOK"},
427     {"\u0189", "LATIN CAPITAL LETTER AFRICAN D"},
428     {"\u018A", "LATIN CAPITAL LETTER D WITH HOOK"},
429     {"\u018B", "LATIN CAPITAL LETTER D WITH TOPBAR"},
430     {"\u018C", "LATIN SMALL LETTER D WITH TOPBAR"},
431     {"\u018D", "LATIN SMALL LETTER TURNED DELTA"},
432     {"\u018E", "LATIN CAPITAL LETTER REVERSED E"},
433     {"\u018F", "LATIN CAPITAL LETTER SCHWA"},
434     {"\u0190", "LATIN CAPITAL LETTER OPEN E"},
435     {"\u0191", "LATIN CAPITAL LETTER F WITH HOOK"},
436     {"\u0192", "LATIN SMALL LETTER F WITH HOOK"},
437     {"\u0193", "LATIN CAPITAL LETTER G WITH HOOK"},
438     {"\u0194", "LATIN CAPITAL LETTER GAMMA"},
439     {"\u0195", "LATIN SMALL LETTER HV"},
440     {"\u0196", "LATIN CAPITAL LETTER IOTA"},
441     {"\u0197", "LATIN CAPITAL LETTER I WITH STROKE"},
442     {"\u0198", "LATIN CAPITAL LETTER K WITH HOOK"},
443     {"\u0199", "LATIN SMALL LETTER K WITH HOOK"},
444     {"\u019A", "LATIN SMALL LETTER L WITH BAR"},
445     {"\u019B", "LATIN SMALL LETTER LAMBDA WITH STROKE"},
446     {"\u019C", "LATIN CAPITAL LETTER TURNED M"},
447     {"\u019D", "LATIN CAPITAL LETTER N WITH LEFT HOOK"},
448     {"\u019E", "LATIN SMALL LETTER N WITH LONG RIGHT LEG"},
449     {"\u019F", "LATIN CAPITAL LETTER O WITH MIDDLE TILDE"},
450     {"\u01A0", "LATIN CAPITAL LETTER O WITH HORN"},
451     {"\u01A1", "LATIN SMALL LETTER O WITH HORN"},
452     {"\u01A2", "LATIN CAPITAL LETTER OI"},
453     {"\u01A3", "LATIN SMALL LETTER OI"},
454     {"\u01A4", "LATIN CAPITAL LETTER P WITH HOOK"},
455     {"\u01A5", "LATIN SMALL LETTER P WITH HOOK"},
456     {"\u01A6", "LATIN LETTER YR"},
457     {"\u01A7", "LATIN CAPITAL LETTER TONE TWO"},
458     {"\u01A8", "LATIN SMALL LETTER TONE TWO"},
459     {"\u01A9", "LATIN CAPITAL LETTER ESH"},
460     {"\u01AA", "LATIN LETTER REVERSED ESH LOOP"},
461     {"\u01AB", "LATIN SMALL LETTER T WITH PALATAL HOOK"},
462     {"\u01AC", "LATIN CAPITAL LETTER T WITH HOOK"},
463     {"\u01AD", "LATIN SMALL LETTER T WITH HOOK"},
464     {"\u01AE", "LATIN CAPITAL LETTER T WITH RETROFLEX HOOK"},
465     {"\u01AF", "LATIN CAPITAL LETTER U WITH HORN"},
466     {"\u01B0", "LATIN SMALL LETTER U WITH HORN"},
467     {"\u01B1", "LATIN CAPITAL LETTER UPSILON"},
468     {"\u01B2", "LATIN CAPITAL LETTER V WITH HOOK"},
469     {"\u01B3", "LATIN CAPITAL LETTER Y WITH HOOK"},
470     {"\u01B4", "LATIN SMALL LETTER Y WITH HOOK"},
471     {"\u01B5", "LATIN CAPITAL LETTER Z WITH STROKE"},
472     {"\u01B6", "LATIN SMALL LETTER Z WITH STROKE"},
473     {"\u01B7", "LATIN CAPITAL LETTER EZH"},
474     {"\u01B8", "LATIN CAPITAL LETTER EZH REVERSED"},
475     {"\u01B9", "LATIN SMALL LETTER EZH REVERSED"},
476     {"\u01BA", "LATIN SMALL LETTER EZH WITH TAIL"},
477     {"\u01BB", "LATIN LETTER TWO WITH STROKE"},
478     {"\u01BC", "LATIN CAPITAL LETTER TONE FIVE"},
479     {"\u01BD", "LATIN SMALL LETTER TONE FIVE"},
480     {"\u01BE", "LATIN LETTER INVERTED GLOTTAL STOP WITH STROKE"},
481     {"\u01BF", "LATIN LETTER WYNN"},
482     {"\u01C0", "LATIN LETTER DENTAL CLICK"},
483     {"\u01C1", "LATIN LETTER LATERAL CLICK"},
484     {"\u01C2", "LATIN LETTER ALVEOLAR CLICK"},
485     {"\u01C3", "LATIN LETTER RETROFLEX CLICK"},
486     {"\u01C4", "LATIN CAPITAL LETTER DZ WITH CARON"},
487     {"\u01C5", "LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON"},
488     {"\u01C6", "LATIN SMALL LETTER DZ WITH CARON"},
489     {"\u01C7", "LATIN CAPITAL LETTER LJ"},
490     {"\u01C8", "LATIN CAPITAL LETTER L WITH SMALL LETTER J"},
491     {"\u01C9", "LATIN SMALL LETTER LJ"},
492     {"\u01CA", "LATIN CAPITAL LETTER NJ"},
493     {"\u01CB", "LATIN CAPITAL LETTER N WITH SMALL LETTER J"},
494     {"\u01CC", "LATIN SMALL LETTER NJ"},
495     {"\u01CD", "LATIN CAPITAL LETTER A WITH CARON"},
496     {"\u01CE", "LATIN SMALL LETTER A WITH CARON"},
497     {"\u01CF", "LATIN CAPITAL LETTER I WITH CARON"},
498     {"\u01D0", "LATIN SMALL LETTER I WITH CARON"},
499     {"\u01D1", "LATIN CAPITAL LETTER O WITH CARON"},
500     {"\u01D2", "LATIN SMALL LETTER O WITH CARON"},
501     {"\u01D3", "LATIN CAPITAL LETTER U WITH CARON"},
502     {"\u01D4", "LATIN SMALL LETTER U WITH CARON"},
503     {"\u01D5", "LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON"},
504     {"\u01D6", "LATIN SMALL LETTER U WITH DIAERESIS AND MACRON"},
505     {"\u01D7", "LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE"},
506     {"\u01D8", "LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE"},
507     {"\u01D9", "LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON"},
508     {"\u01DA", "LATIN SMALL LETTER U WITH DIAERESIS AND CARON"},
509     {"\u01DB", "LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE"},
510     {"\u01DC", "LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE"},
511     {"\u01DD", "LATIN SMALL LETTER TURNED E"},
512     {"\u01DE", "LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON"},
513     {"\u01DF", "LATIN SMALL LETTER A WITH DIAERESIS AND MACRON"},
514     {"\u01E0", "LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON"},
515     {"\u01E1", "LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON"},
516     {"\u01E2", "LATIN CAPITAL LETTER AE WITH MACRON"},
517     {"\u01E3", "LATIN SMALL LETTER AE WITH MACRON"},
518     {"\u01E4", "LATIN CAPITAL LETTER G WITH STROKE"},
519     {"\u01E5", "LATIN SMALL LETTER G WITH STROKE"},
520     {"\u01E6", "LATIN CAPITAL LETTER G WITH CARON"},
521     {"\u01E7", "LATIN SMALL LETTER G WITH CARON"},
522     {"\u01E8", "LATIN CAPITAL LETTER K WITH CARON"},
523     {"\u01E9", "LATIN SMALL LETTER K WITH CARON"},
524     {"\u01EA", "LATIN CAPITAL LETTER O WITH OGONEK"},
525     {"\u01EB", "LATIN SMALL LETTER O WITH OGONEK"},
526     {"\u01EC", "LATIN CAPITAL LETTER O WITH OGONEK AND MACRON"},
527     {"\u01ED", "LATIN SMALL LETTER O WITH OGONEK AND MACRON"},
528     {"\u01EE", "LATIN CAPITAL LETTER EZH WITH CARON"},
529     {"\u01EF", "LATIN SMALL LETTER EZH WITH CARON"},
530     {"\u01F0", "LATIN SMALL LETTER J WITH CARON"},
531     {"\u01F1", "LATIN CAPITAL LETTER DZ"},
532     {"\u01F2", "LATIN CAPITAL LETTER D WITH SMALL LETTER Z"},
533     {"\u01F3", "LATIN SMALL LETTER DZ"},
534     {"\u01F4", "LATIN CAPITAL LETTER G WITH ACUTE"},
535     {"\u01F5", "LATIN SMALL LETTER G WITH ACUTE"},
536     {"\u01FA", "LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE"},
537     {"\u01FB", "LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE"},
538     {"\u01FC", "LATIN CAPITAL LETTER AE WITH ACUTE"},
539     {"\u01FD", "LATIN SMALL LETTER AE WITH ACUTE"},
540     {"\u01FE", "LATIN CAPITAL LETTER O WITH STROKE AND ACUTE"},
541     {"\u01FF", "LATIN SMALL LETTER O WITH STROKE AND ACUTE"},
542     {"\u0200", "LATIN CAPITAL LETTER A WITH DOUBLE GRAVE"},
543     {"\u0201", "LATIN SMALL LETTER A WITH DOUBLE GRAVE"},
544     {"\u0202", "LATIN CAPITAL LETTER A WITH INVERTED BREVE"},
545     {"\u0203", "LATIN SMALL LETTER A WITH INVERTED BREVE"},
546     {"\u0204", "LATIN CAPITAL LETTER E WITH DOUBLE GRAVE"},
547     {"\u0205", "LATIN SMALL LETTER E WITH DOUBLE GRAVE"},
548     {"\u0206", "LATIN CAPITAL LETTER E WITH INVERTED BREVE"},
549     {"\u0207", "LATIN SMALL LETTER E WITH INVERTED BREVE"},
550     {"\u0208", "LATIN CAPITAL LETTER I WITH DOUBLE GRAVE"},
551     {"\u0209", "LATIN SMALL LETTER I WITH DOUBLE GRAVE"},
552     {"\u020A", "LATIN CAPITAL LETTER I WITH INVERTED BREVE"},
553     {"\u020B", "LATIN SMALL LETTER I WITH INVERTED BREVE"},
554     {"\u020C", "LATIN CAPITAL LETTER O WITH DOUBLE GRAVE"},
555     {"\u020D", "LATIN SMALL LETTER O WITH DOUBLE GRAVE"},
556     {"\u020E", "LATIN CAPITAL LETTER O WITH INVERTED BREVE"},
557     {"\u020F", "LATIN SMALL LETTER O WITH INVERTED BREVE"},
558     {"\u0210", "LATIN CAPITAL LETTER R WITH DOUBLE GRAVE"},
559     {"\u0211", "LATIN SMALL LETTER R WITH DOUBLE GRAVE"},
560     {"\u0212", "LATIN CAPITAL LETTER R WITH INVERTED BREVE"},
561     {"\u0213", "LATIN SMALL LETTER R WITH INVERTED BREVE"},
562     {"\u0214", "LATIN CAPITAL LETTER U WITH DOUBLE GRAVE"},
563     {"\u0215", "LATIN SMALL LETTER U WITH DOUBLE GRAVE"},
564     {"\u0216", "LATIN CAPITAL LETTER U WITH INVERTED BREVE"},
565     {"\u0217", "LATIN SMALL LETTER U WITH INVERTED BREVE"},
566     {"\u0250", "LATIN SMALL LETTER TURNED A"},
567     {"\u0251", "LATIN SMALL LETTER ALPHA"},
568     {"\u0252", "LATIN SMALL LETTER TURNED ALPHA"},
569     {"\u0253", "LATIN SMALL LETTER B WITH HOOK"},
570     {"\u0254", "LATIN SMALL LETTER OPEN O"},
571     {"\u0255", "LATIN SMALL LETTER C WITH CURL"},
572     {"\u0256", "LATIN SMALL LETTER D WITH TAIL"},
573     {"\u0257", "LATIN SMALL LETTER D WITH HOOK"},
574     {"\u0258", "LATIN SMALL LETTER REVERSED E"},
575     {"\u0259", "LATIN SMALL LETTER SCHWA"},
576     {"\u025A", "LATIN SMALL LETTER SCHWA WITH HOOK"},
577     {"\u025B", "LATIN SMALL LETTER OPEN E"},
578     {"\u025C", "LATIN SMALL LETTER REVERSED OPEN E"},
579     {"\u025D", "LATIN SMALL LETTER REVERSED OPEN E WITH HOOK"},
580     {"\u025E", "LATIN SMALL LETTER CLOSED REVERSED OPEN E"},
581     {"\u025F", "LATIN SMALL LETTER DOTLESS J WITH STROKE"},
582     {"\u0260", "LATIN SMALL LETTER G WITH HOOK"},
583     {"\u0261", "LATIN SMALL LETTER SCRIPT G"},
584     {"\u0262", "LATIN LETTER SMALL CAPITAL G"},
585     {"\u0263", "LATIN SMALL LETTER GAMMA"},
586     {"\u0264", "LATIN SMALL LETTER RAMS HORN"},
587     {"\u0265", "LATIN SMALL LETTER TURNED H"},
588     {"\u0266", "LATIN SMALL LETTER H WITH HOOK"},
589     {"\u0267", "LATIN SMALL LETTER HENG WITH HOOK"},
590     {"\u0268", "LATIN SMALL LETTER I WITH STROKE"},
591     {"\u0269", "LATIN SMALL LETTER IOTA"},
592     {"\u026A", "LATIN LETTER SMALL CAPITAL I"},
593     {"\u026B", "LATIN SMALL LETTER L WITH MIDDLE TILDE"},
594     {"\u026C", "LATIN SMALL LETTER L WITH BELT"},
595     {"\u026D", "LATIN SMALL LETTER L WITH RETROFLEX HOOK"},
596     {"\u026E", "LATIN SMALL LETTER LEZH"},
597     {"\u026F", "LATIN SMALL LETTER TURNED M"},
598     {"\u0270", "LATIN SMALL LETTER TURNED M WITH LONG LEG"},
599     {"\u0271", "LATIN SMALL LETTER M WITH HOOK"},
600     {"\u0272", "LATIN SMALL LETTER N WITH LEFT HOOK"},
601     {"\u0273", "LATIN SMALL LETTER N WITH RETROFLEX HOOK"},
602     {"\u0274", "LATIN LETTER SMALL CAPITAL N"},
603     {"\u0275", "LATIN SMALL LETTER BARRED O"},
604     {"\u0276", "LATIN LETTER SMALL CAPITAL OE"},
605     {"\u0277", "LATIN SMALL LETTER CLOSED OMEGA"},
606     {"\u0278", "LATIN SMALL LETTER PHI"},
607     {"\u0279", "LATIN SMALL LETTER TURNED R"},
608     {"\u027A", "LATIN SMALL LETTER TURNED R WITH LONG LEG"},
609     {"\u027B", "LATIN SMALL LETTER TURNED R WITH HOOK"},
610     {"\u027C", "LATIN SMALL LETTER R WITH LONG LEG"},
611     {"\u027D", "LATIN SMALL LETTER R WITH TAIL"},
612     {"\u027E", "LATIN SMALL LETTER R WITH FISHHOOK"},
613     {"\u027F", "LATIN SMALL LETTER REVERSED R WITH FISHHOOK"},
614     {"\u0280", "LATIN LETTER SMALL CAPITAL R"},
615     {"\u0281", "LATIN LETTER SMALL CAPITAL INVERTED R"},
616     {"\u0282", "LATIN SMALL LETTER S WITH HOOK"},
617     {"\u0283", "LATIN SMALL LETTER ESH"},
618     {"\u0284", "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK"},
619     {"\u0285", "LATIN SMALL LETTER SQUAT REVERSED ESH"},
620     {"\u0286", "LATIN SMALL LETTER ESH WITH CURL"},
621     {"\u0287", "LATIN SMALL LETTER TURNED T"},
622     {"\u0288", "LATIN SMALL LETTER T WITH RETROFLEX HOOK"},
623     {"\u0289", "LATIN SMALL LETTER U BAR"},
624     {"\u028A", "LATIN SMALL LETTER UPSILON"},
625     {"\u028B", "LATIN SMALL LETTER V WITH HOOK"},
626     {"\u028C", "LATIN SMALL LETTER TURNED V"},
627     {"\u028D", "LATIN SMALL LETTER TURNED W"},
628     {"\u028E", "LATIN SMALL LETTER TURNED Y"},
629     {"\u028F", "LATIN LETTER SMALL CAPITAL Y"},
630     {"\u0290", "LATIN SMALL LETTER Z WITH RETROFLEX HOOK"},
631     {"\u0291", "LATIN SMALL LETTER Z WITH CURL"},
632     {"\u0292", "LATIN SMALL LETTER EZH"},
633     {"\u0293", "LATIN SMALL LETTER EZH WITH CURL"},
634     {"\u0294", "LATIN LETTER GLOTTAL STOP"},
635     {"\u0295", "LATIN LETTER PHARYNGEAL VOICED FRICATIVE"},
636     {"\u0296", "LATIN LETTER INVERTED GLOTTAL STOP"},
637     {"\u0297", "LATIN LETTER STRETCHED C"},
638     {"\u0298", "LATIN LETTER BILABIAL CLICK"},
639     {"\u0299", "LATIN LETTER SMALL CAPITAL B"},
640     {"\u029A", "LATIN SMALL LETTER CLOSED OPEN E"},
641     {"\u029B", "LATIN LETTER SMALL CAPITAL G WITH HOOK"},
642     {"\u029C", "LATIN LETTER SMALL CAPITAL H"},
643     {"\u029D", "LATIN SMALL LETTER J WITH CROSSED-TAIL"},
644     {"\u029E", "LATIN SMALL LETTER TURNED K"},
645     {"\u029F", "LATIN LETTER SMALL CAPITAL L"},
646     {"\u02A0", "LATIN SMALL LETTER Q WITH HOOK"},
647     {"\u02A1", "LATIN LETTER GLOTTAL STOP WITH STROKE"},
648     {"\u02A2", "LATIN LETTER REVERSED GLOTTAL STOP WITH STROKE"},
649     {"\u02A3", "LATIN SMALL LETTER DZ DIGRAPH"},
650     {"\u02A4", "LATIN SMALL LETTER DEZH DIGRAPH"},
651     {"\u02A5", "LATIN SMALL LETTER DZ DIGRAPH WITH CURL"},
652     {"\u02A6", "LATIN SMALL LETTER TS DIGRAPH"},
653     {"\u02A7", "LATIN SMALL LETTER TESH DIGRAPH"},
654     {"\u02A8", "LATIN SMALL LETTER TC DIGRAPH WITH CURL"},
655     {"\u02B0", "MODIFIER LETTER SMALL H"},
656     {"\u02B1", "MODIFIER LETTER SMALL H WITH HOOK"},
657     {"\u02B2", "MODIFIER LETTER SMALL J"},
658     {"\u02B3", "MODIFIER LETTER SMALL R"},
659     {"\u02B4", "MODIFIER LETTER SMALL TURNED R"},
660     {"\u02B5", "MODIFIER LETTER SMALL TURNED R WITH HOOK"},
661     {"\u02B6", "MODIFIER LETTER SMALL CAPITAL INVERTED R"},
662     {"\u02B7", "MODIFIER LETTER SMALL W"},
663     {"\u02B8", "MODIFIER LETTER SMALL Y"},
664     {"\u02B9", "MODIFIER LETTER PRIME"},
665     {"\u02BA", "MODIFIER LETTER DOUBLE PRIME"},
666     {"\u02BB", "MODIFIER LETTER TURNED COMMA"},
667     {"\u02BC", "MODIFIER LETTER APOSTROPHE"},
668     {"\u02BD", "MODIFIER LETTER REVERSED COMMA"},
669     {"\u02BE", "MODIFIER LETTER RIGHT HALF RING"},
670     {"\u02BF", "MODIFIER LETTER LEFT HALF RING"},
671     {"\u02C0", "MODIFIER LETTER GLOTTAL STOP"},
672     {"\u02C1", "MODIFIER LETTER REVERSED GLOTTAL STOP"},
673     {"\u02C2", "MODIFIER LETTER LEFT ARROWHEAD"},
674     {"\u02C3", "MODIFIER LETTER RIGHT ARROWHEAD"},
675     {"\u02C4", "MODIFIER LETTER UP ARROWHEAD"},
676     {"\u02C5", "MODIFIER LETTER DOWN ARROWHEAD"},
677     {"\u02C6", "MODIFIER LETTER CIRCUMFLEX ACCENT"},
678     {"\u02C7", "CARON"},
679     {"\u02C8", "MODIFIER LETTER VERTICAL LINE"},
680     {"\u02C9", "MODIFIER LETTER MACRON"},
681     {"\u02CA", "MODIFIER LETTER ACUTE ACCENT"},
682     {"\u02CB", "MODIFIER LETTER GRAVE ACCENT"},
683     {"\u02CC", "MODIFIER LETTER LOW VERTICAL LINE"},
684     {"\u02CD", "MODIFIER LETTER LOW MACRON"},
685     {"\u02CE", "MODIFIER LETTER LOW GRAVE ACCENT"},
686     {"\u02CF", "MODIFIER LETTER LOW ACUTE ACCENT"},
687     {"\u02D0", "MODIFIER LETTER TRIANGULAR COLON"},
688     {"\u02D1", "MODIFIER LETTER HALF TRIANGULAR COLON"},
689     {"\u02D2", "MODIFIER LETTER CENTRED RIGHT HALF RING"},
690     {"\u02D3", "MODIFIER LETTER CENTRED LEFT HALF RING"},
691     {"\u02D4", "MODIFIER LETTER UP TACK"},
692     {"\u02D5", "MODIFIER LETTER DOWN TACK"},
693     {"\u02D6", "MODIFIER LETTER PLUS SIGN"},
694     {"\u02D7", "MODIFIER LETTER MINUS SIGN"},
695     {"\u02D8", "BREVE"},
696     {"\u02D9", "DOT ABOVE"},
697     {"\u02DA", "RING ABOVE"},
698     {"\u02DB", "OGONEK"},
699     {"\u02DC", "SMALL TILDE"},
700     {"\u02DD", "DOUBLE ACUTE ACCENT"},
701     {"\u02DE", "MODIFIER LETTER RHOTIC HOOK"},
702     {"\u02E0", "MODIFIER LETTER SMALL GAMMA"},
703     {"\u02E1", "MODIFIER LETTER SMALL L"},
704     {"\u02E2", "MODIFIER LETTER SMALL S"},
705     {"\u02E3", "MODIFIER LETTER SMALL X"},
706     {"\u02E4", "MODIFIER LETTER SMALL REVERSED GLOTTAL STOP"},
707     {"\u02E5", "MODIFIER LETTER EXTRA-HIGH TONE BAR"},
708     {"\u02E6", "MODIFIER LETTER HIGH TONE BAR"},
709     {"\u02E7", "MODIFIER LETTER MID TONE BAR"},
710     {"\u02E8", "MODIFIER LETTER LOW TONE BAR"},
711     {"\u02E9", "MODIFIER LETTER EXTRA-LOW TONE BAR"},
712     {"\u0300", "COMBINING GRAVE ACCENT"},
713     {"\u0301", "COMBINING ACUTE ACCENT"},
714     {"\u0302", "COMBINING CIRCUMFLEX ACCENT"},
715     {"\u0303", "COMBINING TILDE"},
716     {"\u0304", "COMBINING MACRON"},
717     {"\u0305", "COMBINING OVERLINE"},
718     {"\u0306", "COMBINING BREVE"},
719     {"\u0307", "COMBINING DOT ABOVE"},
720     {"\u0308", "COMBINING DIAERESIS"},
721     {"\u0309", "COMBINING HOOK ABOVE"},
722     {"\u030A", "COMBINING RING ABOVE"},
723     {"\u030B", "COMBINING DOUBLE ACUTE ACCENT"},
724     {"\u030C", "COMBINING CARON"},
725     {"\u030D", "COMBINING VERTICAL LINE ABOVE"},
726     {"\u030E", "COMBINING DOUBLE VERTICAL LINE ABOVE"},
727     {"\u030F", "COMBINING DOUBLE GRAVE ACCENT"},
728     {"\u0310", "COMBINING CANDRABINDU"},
729     {"\u0311", "COMBINING INVERTED BREVE"},
730     {"\u0312", "COMBINING TURNED COMMA ABOVE"},
731     {"\u0313", "COMBINING COMMA ABOVE"},
732     {"\u0314", "COMBINING REVERSED COMMA ABOVE"},
733     {"\u0315", "COMBINING COMMA ABOVE RIGHT"},
734     {"\u0316", "COMBINING GRAVE ACCENT BELOW"},
735     {"\u0317", "COMBINING ACUTE ACCENT BELOW"},
736     {"\u0318", "COMBINING LEFT TACK BELOW"},
737     {"\u0319", "COMBINING RIGHT TACK BELOW"},
738     {"\u031A", "COMBINING LEFT ANGLE ABOVE"},
739     {"\u031B", "COMBINING HORN"},
740     {"\u031C", "COMBINING LEFT HALF RING BELOW"},
741     {"\u031D", "COMBINING UP TACK BELOW"},
742     {"\u031E", "COMBINING DOWN TACK BELOW"},
743     {"\u031F", "COMBINING PLUS SIGN BELOW"},
744     {"\u0320", "COMBINING MINUS SIGN BELOW"},
745     {"\u0321", "COMBINING PALATALIZED HOOK BELOW"},
746     {"\u0322", "COMBINING RETROFLEX HOOK BELOW"},
747     {"\u0323", "COMBINING DOT BELOW"},
748     {"\u0324", "COMBINING DIAERESIS BELOW"},
749     {"\u0325", "COMBINING RING BELOW"},
750     {"\u0326", "COMBINING COMMA BELOW"},
751     {"\u0327", "COMBINING CEDILLA"},
752     {"\u0328", "COMBINING OGONEK"},
753     {"\u0329", "COMBINING VERTICAL LINE BELOW"},
754     {"\u032A", "COMBINING BRIDGE BELOW"},
755     {"\u032B", "COMBINING INVERTED DOUBLE ARCH BELOW"},
756     {"\u032C", "COMBINING CARON BELOW"},
757     {"\u032D", "COMBINING CIRCUMFLEX ACCENT BELOW"},
758     {"\u032E", "COMBINING BREVE BELOW"},
759     {"\u032F", "COMBINING INVERTED BREVE BELOW"},
760     {"\u0330", "COMBINING TILDE BELOW"},
761     {"\u0331", "COMBINING MACRON BELOW"},
762     {"\u0332", "COMBINING LOW LINE"},
763     {"\u0333", "COMBINING DOUBLE LOW LINE"},
764     {"\u0334", "COMBINING TILDE OVERLAY"},
765     {"\u0335", "COMBINING SHORT STROKE OVERLAY"},
766     {"\u0336", "COMBINING LONG STROKE OVERLAY"},
767     {"\u0337", "COMBINING SHORT SOLIDUS OVERLAY"},
768     {"\u0338", "COMBINING LONG SOLIDUS OVERLAY"},
769     {"\u0339", "COMBINING RIGHT HALF RING BELOW"},
770     {"\u033A", "COMBINING INVERTED BRIDGE BELOW"},
771     {"\u033B", "COMBINING SQUARE BELOW"},
772     {"\u033C", "COMBINING SEAGULL BELOW"},
773     {"\u033D", "COMBINING X ABOVE"},
774     {"\u033E", "COMBINING VERTICAL TILDE"},
775     {"\u033F", "COMBINING DOUBLE OVERLINE"},
776     {"\u0340", "COMBINING GRAVE TONE MARK"},
777     {"\u0341", "COMBINING ACUTE TONE MARK"},
778     {"\u0342", "COMBINING GREEK PERISPOMENI"},
779     {"\u0343", "COMBINING GREEK KORONIS"},
780     {"\u0344", "COMBINING GREEK DIALYTIKA TONOS"},
781     {"\u0345", "COMBINING GREEK YPOGEGRAMMENI"},
782     {"\u0360", "COMBINING DOUBLE TILDE"},
783     {"\u0361", "COMBINING DOUBLE INVERTED BREVE"},
784     {"\u0374", "GREEK NUMERAL SIGN"},
785     {"\u0375", "GREEK LOWER NUMERAL SIGN"},
786     {"\u037A", "GREEK YPOGEGRAMMENI"},
787     {"\u037E", "GREEK QUESTION MARK"},
788     {"\u0384", "GREEK TONOS"},
789     {"\u0385", "GREEK DIALYTIKA TONOS"},
790     {"\u0386", "GREEK CAPITAL LETTER ALPHA WITH TONOS"},
791     {"\u0387", "GREEK ANO TELEIA"},
792     {"\u0388", "GREEK CAPITAL LETTER EPSILON WITH TONOS"},
793     {"\u0389", "GREEK CAPITAL LETTER ETA WITH TONOS"},
794     {"\u038A", "GREEK CAPITAL LETTER IOTA WITH TONOS"},
795     {"\u038C", "GREEK CAPITAL LETTER OMICRON WITH TONOS"},
796     {"\u038E", "GREEK CAPITAL LETTER UPSILON WITH TONOS"},
797     {"\u038F", "GREEK CAPITAL LETTER OMEGA WITH TONOS"},
798     {"\u0390", "GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS"},
799     {"\u0391", "GREEK CAPITAL LETTER ALPHA"},
800     {"\u0392", "GREEK CAPITAL LETTER BETA"},
801     {"\u0393", "GREEK CAPITAL LETTER GAMMA"},
802     {"\u0394", "GREEK CAPITAL LETTER DELTA"},
803     {"\u0395", "GREEK CAPITAL LETTER EPSILON"},
804     {"\u0396", "GREEK CAPITAL LETTER ZETA"},
805     {"\u0397", "GREEK CAPITAL LETTER ETA"},
806     {"\u0398", "GREEK CAPITAL LETTER THETA"},
807     {"\u0399", "GREEK CAPITAL LETTER IOTA"},
808     {"\u039A", "GREEK CAPITAL LETTER KAPPA"},
809     {"\u039B", "GREEK CAPITAL LETTER LAMDA"},
810     {"\u039C", "GREEK CAPITAL LETTER MU"},
811     {"\u039D", "GREEK CAPITAL LETTER NU"},
812     {"\u039E", "GREEK CAPITAL LETTER XI"},
813     {"\u039F", "GREEK CAPITAL LETTER OMICRON"},
814     {"\u03A0", "GREEK CAPITAL LETTER PI"},
815     {"\u03A1", "GREEK CAPITAL LETTER RHO"},
816     {"\u03A3", "GREEK CAPITAL LETTER SIGMA"},
817     {"\u03A4", "GREEK CAPITAL LETTER TAU"},
818     {"\u03A5", "GREEK CAPITAL LETTER UPSILON"},
819     {"\u03A6", "GREEK CAPITAL LETTER PHI"},
820     {"\u03A7", "GREEK CAPITAL LETTER CHI"},
821     {"\u03A8", "GREEK CAPITAL LETTER PSI"},
822     {"\u03A9", "GREEK CAPITAL LETTER OMEGA"},
823     {"\u03AA", "GREEK CAPITAL LETTER IOTA WITH DIALYTIKA"},
824     {"\u03AB", "GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA"},
825     {"\u03AC", "GREEK SMALL LETTER ALPHA WITH TONOS"},
826     {"\u03AD", "GREEK SMALL LETTER EPSILON WITH TONOS"},
827     {"\u03AE", "GREEK SMALL LETTER ETA WITH TONOS"},
828     {"\u03AF", "GREEK SMALL LETTER IOTA WITH TONOS"},
829     {"\u03B0", "GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS"},
830     {"\u03B1", "GREEK SMALL LETTER ALPHA"},
831     {"\u03B2", "GREEK SMALL LETTER BETA"},
832     {"\u03B3", "GREEK SMALL LETTER GAMMA"},
833     {"\u03B4", "GREEK SMALL LETTER DELTA"},
834     {"\u03B5", "GREEK SMALL LETTER EPSILON"},
835     {"\u03B6", "GREEK SMALL LETTER ZETA"},
836     {"\u03B7", "GREEK SMALL LETTER ETA"},
837     {"\u03B8", "GREEK SMALL LETTER THETA"},
838     {"\u03B9", "GREEK SMALL LETTER IOTA"},
839     {"\u03BA", "GREEK SMALL LETTER KAPPA"},
840     {"\u03BB", "GREEK SMALL LETTER LAMDA"},
841     {"\u03BC", "GREEK SMALL LETTER MU"},
842     {"\u03BD", "GREEK SMALL LETTER NU"},
843     {"\u03BE", "GREEK SMALL LETTER XI"},
844     {"\u03BF", "GREEK SMALL LETTER OMICRON"},
845     {"\u03C0", "GREEK SMALL LETTER PI"},
846     {"\u03C1", "GREEK SMALL LETTER RHO"},
847     {"\u03C2", "GREEK SMALL LETTER FINAL SIGMA"},
848     {"\u03C3", "GREEK SMALL LETTER SIGMA"},
849     {"\u03C4", "GREEK SMALL LETTER TAU"},
850     {"\u03C5", "GREEK SMALL LETTER UPSILON"},
851     {"\u03C6", "GREEK SMALL LETTER PHI"},
852     {"\u03C7", "GREEK SMALL LETTER CHI"},
853     {"\u03C8", "GREEK SMALL LETTER PSI"},
854     {"\u03C9", "GREEK SMALL LETTER OMEGA"},
855     {"\u03CA", "GREEK SMALL LETTER IOTA WITH DIALYTIKA"},
856     {"\u03CB", "GREEK SMALL LETTER UPSILON WITH DIALYTIKA"},
857     {"\u03CC", "GREEK SMALL LETTER OMICRON WITH TONOS"},
858     {"\u03CD", "GREEK SMALL LETTER UPSILON WITH TONOS"},
859     {"\u03CE", "GREEK SMALL LETTER OMEGA WITH TONOS"},
860     {"\u03D0", "GREEK BETA SYMBOL"},
861     {"\u03D1", "GREEK THETA SYMBOL"},
862     {"\u03D2", "GREEK UPSILON WITH HOOK SYMBOL"},
863     {"\u03D3", "GREEK UPSILON WITH ACUTE AND HOOK SYMBOL"},
864     {"\u03D4", "GREEK UPSILON WITH DIAERESIS AND HOOK SYMBOL"},
865     {"\u03D5", "GREEK PHI SYMBOL"},
866     {"\u03D6", "GREEK PI SYMBOL"},
867     {"\u03DA", "GREEK LETTER STIGMA"},
868     {"\u03DC", "GREEK LETTER DIGAMMA"},
869     {"\u03DE", "GREEK LETTER KOPPA"},
870     {"\u03E0", "GREEK LETTER SAMPI"},
871     {"\u03E2", "COPTIC CAPITAL LETTER SHEI"},
872     {"\u03E3", "COPTIC SMALL LETTER SHEI"},
873     {"\u03E4", "COPTIC CAPITAL LETTER FEI"},
874     {"\u03E5", "COPTIC SMALL LETTER FEI"},
875     {"\u03E6", "COPTIC CAPITAL LETTER KHEI"},
876     {"\u03E7", "COPTIC SMALL LETTER KHEI"},
877     {"\u03E8", "COPTIC CAPITAL LETTER HORI"},
878     {"\u03E9", "COPTIC SMALL LETTER HORI"},
879     {"\u03EA", "COPTIC CAPITAL LETTER GANGIA"},
880     {"\u03EB", "COPTIC SMALL LETTER GANGIA"},
881     {"\u03EC", "COPTIC CAPITAL LETTER SHIMA"},
882     {"\u03ED", "COPTIC SMALL LETTER SHIMA"},
883     {"\u03EE", "COPTIC CAPITAL LETTER DEI"},
884     {"\u03EF", "COPTIC SMALL LETTER DEI"},
885     {"\u03F0", "GREEK KAPPA SYMBOL"},
886     {"\u03F1", "GREEK RHO SYMBOL"},
887     {"\u03F2", "GREEK LUNATE SIGMA SYMBOL"},
888     {"\u03F3", "GREEK LETTER YOT"},
889     {"\u0401", "CYRILLIC CAPITAL LETTER IO"},
890     {"\u0402", "CYRILLIC CAPITAL LETTER DJE"},
891     {"\u0403", "CYRILLIC CAPITAL LETTER GJE"},
892     {"\u0404", "CYRILLIC CAPITAL LETTER UKRAINIAN IE"},
893     {"\u0405", "CYRILLIC CAPITAL LETTER DZE"},
894     {"\u0406", "CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I"},
895     {"\u0407", "CYRILLIC CAPITAL LETTER YI"},
896     {"\u0408", "CYRILLIC CAPITAL LETTER JE"},
897     {"\u0409", "CYRILLIC CAPITAL LETTER LJE"},
898     {"\u040A", "CYRILLIC CAPITAL LETTER NJE"},
899     {"\u040B", "CYRILLIC CAPITAL LETTER TSHE"},
900     {"\u040C", "CYRILLIC CAPITAL LETTER KJE"},
901     {"\u040E", "CYRILLIC CAPITAL LETTER SHORT U"},
902     {"\u040F", "CYRILLIC CAPITAL LETTER DZHE"},
903     {"\u0410", "CYRILLIC CAPITAL LETTER A"},
904     {"\u0411", "CYRILLIC CAPITAL LETTER BE"},
905     {"\u0412", "CYRILLIC CAPITAL LETTER VE"},
906     {"\u0413", "CYRILLIC CAPITAL LETTER GHE"},
907     {"\u0414", "CYRILLIC CAPITAL LETTER DE"},
908     {"\u0415", "CYRILLIC CAPITAL LETTER IE"},
909     {"\u0416", "CYRILLIC CAPITAL LETTER ZHE"},
910     {"\u0417", "CYRILLIC CAPITAL LETTER ZE"},
911     {"\u0418", "CYRILLIC CAPITAL LETTER I"},
912     {"\u0419", "CYRILLIC CAPITAL LETTER SHORT I"},
913     {"\u041A", "CYRILLIC CAPITAL LETTER KA"},
914     {"\u041B", "CYRILLIC CAPITAL LETTER EL"},
915     {"\u041C", "CYRILLIC CAPITAL LETTER EM"},
916     {"\u041D", "CYRILLIC CAPITAL LETTER EN"},
917     {"\u041E", "CYRILLIC CAPITAL LETTER O"},
918     {"\u041F", "CYRILLIC CAPITAL LETTER PE"},
919     {"\u0420", "CYRILLIC CAPITAL LETTER ER"},
920     {"\u0421", "CYRILLIC CAPITAL LETTER ES"},
921     {"\u0422", "CYRILLIC CAPITAL LETTER TE"},
922     {"\u0423", "CYRILLIC CAPITAL LETTER U"},
923     {"\u0424", "CYRILLIC CAPITAL LETTER EF"},
924     {"\u0425", "CYRILLIC CAPITAL LETTER HA"},
925     {"\u0426", "CYRILLIC CAPITAL LETTER TSE"},
926     {"\u0427", "CYRILLIC CAPITAL LETTER CHE"},
927     {"\u0428", "CYRILLIC CAPITAL LETTER SHA"},
928     {"\u0429", "CYRILLIC CAPITAL LETTER SHCHA"},
929     {"\u042A", "CYRILLIC CAPITAL LETTER HARD SIGN"},
930     {"\u042B", "CYRILLIC CAPITAL LETTER YERU"},
931     {"\u042C", "CYRILLIC CAPITAL LETTER SOFT SIGN"},
932     {"\u042D", "CYRILLIC CAPITAL LETTER E"},
933     {"\u042E", "CYRILLIC CAPITAL LETTER YU"},
934     {"\u042F", "CYRILLIC CAPITAL LETTER YA"},
935     {"\u0430", "CYRILLIC SMALL LETTER A"},
936     {"\u0431", "CYRILLIC SMALL LETTER BE"},
937     {"\u0432", "CYRILLIC SMALL LETTER VE"},
938     {"\u0433", "CYRILLIC SMALL LETTER GHE"},
939     {"\u0434", "CYRILLIC SMALL LETTER DE"},
940     {"\u0435", "CYRILLIC SMALL LETTER IE"},
941     {"\u0436", "CYRILLIC SMALL LETTER ZHE"},
942     {"\u0437", "CYRILLIC SMALL LETTER ZE"},
943     {"\u0438", "CYRILLIC SMALL LETTER I"},
944     {