View Javadoc
1   /*
2    * $Source$
3    * $Revision$
4    *
5    * Copyright (C) 2001 Myles Chippendale
6    *
7    * Part of Melati (http://melati.org), a framework for the rapid
8    * development of clean, maintainable web applications.
9    *
10   * Melati is free software; Permission is granted to copy, distribute
11   * and/or modify this software under the terms either:
12   *
13   * a) the GNU General Public License as published by the Free Software
14   *    Foundation; either version 2 of the License, or (at your option)
15   *    any later version,
16   *
17   *    or
18   *
19   * b) any version of the Melati Software License, as published
20   *    at http://melati.org
21   *
22   * You should have received a copy of the GNU General Public License and
23   * the Melati Software License along with this program;
24   * if not, write to the Free Software Foundation, Inc.,
25   * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA to obtain the
26   * GNU General Public License and visit http://melati.org to obtain the
27   * Melati Software License.
28   *
29   * Feel free to contact the Developers of Melati (http://melati.org),
30   * if you would like to work out a different arrangement than the options
31   * outlined here.  It is our intention to allow Melati to be used by as
32   * wide an audience as possible.
33   *
34   * This program is distributed in the hope that it will be useful,
35   * but WITHOUT ANY WARRANTY; without even the implied warranty of
36   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
37   * GNU General Public License for more details.
38   *
39   * Contact details for copyright holder:
40   *
41   *     Myles Chippendale <mylesc At paneris.org>
42   */
43  package org.melati.util;
44  
45  import java.io.DataInputStream;
46  import java.io.FileNotFoundException;
47  import java.io.IOException;
48  import java.io.InputStream;
49  import java.io.StringWriter;
50  import java.io.Writer;
51  import java.nio.charset.Charset;
52  import java.nio.charset.CharsetEncoder;
53  import java.util.Enumeration;
54  
55  import javax.swing.text.AttributeSet;
56  import javax.swing.text.html.HTML;
57  import javax.swing.text.html.parser.AttributeList;
58  import javax.swing.text.html.parser.ContentModel;
59  import javax.swing.text.html.parser.DTD;
60  import javax.swing.text.html.parser.DTDConstants;
61  import javax.swing.text.html.parser.Element;
62  
63  /**
64   * An assortment of useful things to do with HTML.
65   */
66  public final class HTMLUtils {
67  
68    private HTMLUtils() {}
69  
70    /** The DTD name. */
71    public static final String dtdNameForHTMLParser = "html32.bdtd";
72  
73    private static DTD dtdForHTMLParser = null;
74  
75    /**
76     * Add an Element to the ContentModel.
77     * @param cm the ContentModel to add to
78     * @param existing existing element
79     * @param alt alternate element
80     */
81    public static void add(ContentModel cm, Element existing, Element alt) {
82      if (cm.content == existing) {
83        ContentModel twig =
84            new ContentModel(0, existing, new ContentModel(0, alt, null));
85        if (cm.type == 0) {
86          cm.type = '|';
87          cm.content = twig;
88        }
89        else
90          cm.content = new ContentModel('|', twig);
91      }
92      else if (cm.content instanceof ContentModel)
93        add((ContentModel)cm.content, existing, alt);
94  
95      if (cm.next != null)
96        add(cm.next, existing, alt);
97    }
98  
99    /**
100    * Add element to a DTD.
101    * @param dtd DTD to add to 
102    * @param existing existing element
103    * @param alt alternate element
104    */
105   public static void addToContentModels(DTD dtd,
106                                         Element existing, Element alt) {
107     for (Enumeration<Element> els = dtd.elementHash.elements();
108          els.hasMoreElements();) {
109       ContentModel c = ((Element)els.nextElement()).content;
110       if (c != null)
111         add(c, existing, alt);
112     }
113   }
114 
115   /**
116    * @return a DTD
117    */
118   public static DTD dtdForHTMLParser() {
119     // not clear HTF this putDTDHash/getDTD API is meant to be useful ...
120 
121     if (dtdForHTMLParser == null)
122       try {
123         dtdForHTMLParser = DTD.getDTD(dtdNameForHTMLParser);
124         InputStream res = dtdForHTMLParser.getClass().
125                               getResourceAsStream(dtdNameForHTMLParser);
126         if (res == null)
127           throw new FileNotFoundException(
128               "Resource " + dtdNameForHTMLParser + " not found: " +
129               "but it ought to be in rt.jar?!");
130         dtdForHTMLParser.read(new DataInputStream(res));
131 
132         // add <SPAN CLASS=...> with the same content model as <DIV>
133         // [which is a hack for bibliomania!!]
134         // usable in the same places as <I>
135 
136         Element div = (Element)dtdForHTMLParser.elementHash.get("div");
137         Element i = (Element)dtdForHTMLParser.elementHash.get("i");
138 
139         dtdForHTMLParser.defineElement(
140            "span", DTDConstants.STARTTAG, false, false, div.content, null, null,
141            new AttributeList("class", DTDConstants.CDATA,
142                              0, null, null, null));
143 
144         Element span = (Element)dtdForHTMLParser.elementHash.get("span");
145 
146         addToContentModels(dtdForHTMLParser, i, span);
147       }
148       catch (Exception e) {
149         throw new UnexpectedExceptionException(
150           "making the DTD for Sun's HTML parser", e);
151       }
152 
153     return dtdForHTMLParser;
154   }
155 
156   /**
157    * If the given character has special meaning in HTML or will not
158    * necessarily encode in the character set, then return an escape string.
159    * <p>
160    * The name of this method implies the character is escaped as a
161    * character entity but if the second argument is true then newlines
162    * are encoded as &lt;BR&gt;.
163    * This is not required for attribute values.
164    * <p>
165    * Which characters will necessarily encode depends on the charset.
166    * For backward compatibility if a charset is not passed we assume the
167    * character will encode.
168    * If a charset is passed and a character does not encode then we
169    * replace it with a numeric character reference (not an entity
170    * either but pretty similar).
171    *
172    * @param c character to lookup entity for 
173    * @param mapBR whether to replace line ends
174    * @param ce an encoder
175    * @param markup whether string contains markup 
176    * @return an entity or null
177    */
178   public static String entityFor(char c, boolean mapBR, CharsetEncoder ce, boolean markup) {
179     switch (c) {
180       case '\n': return mapBR && !markup ? "<BR>\n" : null;
181       case '<' : return markup ? null : "&lt;" ;
182       case '>' : return markup ? null : "&gt;" ;
183       case '&' : return markup ? null : "&amp;" ;
184       // Unicode and ISO 8859-1
185 
186       case 163 : return "&pound;" ;
187       case 192 : return "&Agrave;" ;
188       case 193 : return "&Aacute;" ;
189       case 194 : return "&Acirc;" ;
190       case 199 : return "&Ccedil;" ;
191       case 200 : return "&Egrave;" ;
192       case 201 : return "&Eacute;" ;
193       case 202 : return "&Ecirc;" ;
194       case 204 : return "&Igrave;" ;
195       case 205 : return "&Iacute;" ;
196       case 206 : return "&Icirc;" ;
197       case 210 : return "&Ograve;" ;
198       case 211 : return "&Oacute;" ;
199       case 212 : return "&Ocirc;" ;
200       case 217 : return "&Ugrave;" ;
201       case 218 : return "&Uacute;" ;
202       case 219 : return "&Ucirc;" ;
203       case 224 : return "&agrave;" ;
204       case 225 : return "&aacute;" ;
205       case 226 : return "&acirc;" ;
206       case 228 : return "&auml;" ;
207       case 231 : return "&ccedil;" ;
208       case 232 : return "&egrave;" ;
209       case 233 : return "&eacute;" ;
210       case 234 : return "&ecirc;" ;
211       case 236 : return "&igrave;" ;
212       case 237 : return "&iacute;" ;
213       case 238 : return "&icirc;" ;
214       case 242 : return "&ograve;" ;
215       case 243 : return "&oacute;" ;
216       case 244 : return "&ocirc;" ;
217       case 249 : return "&ugrave;" ;
218       case 250 : return "&uacute;" ;
219       case 251 : return "&ucirc;" ;
220       case 252 : return "&uuml;" ;
221       
222       
223       case '"' : return markup ? null : "&quot;";
224       case '\'': return markup ? null : "&#39;";
225       default:
226         if (ce == null || ce.canEncode(c)) {
227           return null;  
228         } else {
229           String result = "&#x" + Integer.toHexString(c) + ";";
230           //System.err.println("Cannot encode: " + c + " so encoded as: " + result);
231           return result;
232         }
233     }
234   }
235 
236   /**
237    * Return the String with all high value ASCII characters 
238    * replaced with HTML entities.
239    * 
240    * @param s input String
241    * @param mapBR whether to replace line ends with html breaks
242    * @param encoding the encoding of the input string
243    * @param markup whether string is an sgml fragment
244    * @return the input with appropriate substitutions
245    */
246   public static String entitied(String s, boolean mapBR, String encoding, boolean markup) {
247     int length = s.length();
248     int i;
249     String entity = null;
250 
251     CharsetEncoder ce = null;
252     if (encoding != null) {
253       ce = Charset.forName(encoding).newEncoder();
254     }
255 
256     for (i = 0;
257          i < length && (entity = entityFor(s.charAt(i), mapBR, ce, markup)) == null;
258          ++i);
259 
260     if (entity == null) return s;
261 
262     StringBuffer b = new StringBuffer(length * 2);
263     for (int j = 0; j < i; ++j)
264       b.append(s.charAt(j));
265 
266     b.append(entity);
267 
268     char c;
269     for (++i; i < length; ++i) {
270       c = s.charAt(i);
271       entity = entityFor(c, mapBR, ce, markup);
272       if (entity != null) {
273         b.append(entity);
274       } else
275         b.append(c);
276     }
277     return b.toString();
278   }
279 
280   /**
281    * Escape the given string as PCDATA without regard for any characters that
282    * cannot be encoded in some required character set.
283    * <p>
284    * This is for backward compatibility as it is used below.
285    *
286    * @param s the String to replace special characters from
287    * @return a new String with special characters replaced with entities
288    * @see #entitied(String, boolean, String, boolean)
289    */
290   public static String entitied(String s) {
291     return entitied(s, true, null, false);
292   }
293 
294   /**
295    * Javascript escape sequence for a character, if any, 
296    * otherwise null.
297    * @param c the character
298    * @return an escape sequence or null
299    */
300   public static String jsEscapeFor(char c) {
301     switch (c) {
302       case '\n': return "\\012";
303       case '"': return "\\042";
304       case '\'': return "\\047";
305       default: return null;
306     }
307   } 
308 
309   /**
310    * Javascript escape a String.
311    * @param s the String to escape
312    * @return the escaped String
313    */
314   public static String jsEscaped(String s) {
315     int length = s.length();
316     int i = 0;
317     String escape = null;
318     for (i = 0; i < length && (escape = jsEscapeFor(s.charAt(i))) == null; ++i);
319 
320     if (escape == null) return s;
321 
322     StringBuffer b = new StringBuffer(length * 2);
323     for (int j = 0; j < i; ++j)
324       b.append(s.charAt(j));
325 
326     b.append(escape);
327 
328     char c;
329     for (++i; i < length; ++i) {
330       c = s.charAt(i);
331       escape = jsEscapeFor(c);
332       if (escape != null)
333         b.append(escape);
334       else
335         b.append(c);
336     }
337     return b.toString();
338   }
339 
340   /**
341    * Write a tag to a Writer.
342    * @param w the Writer to write to
343    * @param tag the Tag to write
344    * @param attributes the Tag's attributes
345    * @throws IOException if there is a problem writing
346    */
347   public static void write(Writer w, HTML.Tag tag, AttributeSet attributes)
348       throws IOException {
349     w.write('<');
350     w.write(tag.toString());
351     for (Enumeration<?> a = attributes.getAttributeNames();
352          a.hasMoreElements();) {
353       Object n = a.nextElement();
354       if (attributes.isDefined(n)) {
355         w.write(' ');
356         w.write(n.toString());
357         w.write("=\"");
358         w.write(entitied(attributes.getAttribute(n).toString()));
359         w.write('"');
360       }
361     }
362     w.write('>');
363   }
364 
365   /**
366    * @param tag the Tag
367    * @param attributes the Tag's attributes
368    * @return a String version of the Tag
369    */
370   public static String stringOf(HTML.Tag tag, AttributeSet attributes) {
371     StringWriter w = new StringWriter();
372 
373     try {
374       write(w, tag, attributes);
375     }
376     catch (IOException e) {
377       throw new UnexpectedExceptionException(e);
378     }
379 
380     return w.toString();
381   }
382 
383 /**
384  * An Instance of a tag.
385  */
386   public static class TagInstance {
387     /** The tag. */
388     public final HTML.Tag tag;
389     /** Its attributes. */
390     public final AttributeSet attributes;
391 
392     /** Constructor. */
393     public TagInstance(HTML.Tag tag, AttributeSet attributes) {
394       this.tag = tag;
395       this.attributes = attributes;
396     }
397 
398     /**
399      * Write tag to specified Writer. 
400      * @param w The Writer to write to.
401      */
402     public void write(Writer w) throws IOException {
403       HTMLUtils.write(w, tag, attributes);
404     }
405 
406     /** A String representation. 
407      * {@inheritDoc}
408      * @see java.lang.Object#toString()
409      */
410     public String toString() {
411       return HTMLUtils.stringOf(tag, attributes);
412     }
413   }
414 }