View Javadoc

1   /*
2    * $Source: /usr/cvsroot/melati/melati/src/main/java/org/melati/util/HTMLUtils.java,v $
3    * $Revision: 1.29 $
4    *
5    * Copyright (C) 2001 Myles Chippendale
6    *
7    * Part of Melati (http://melati.org), a framework for the rapid
8    * development of clean, maintainable web applications.
9    *
10   * Melati is free software; Permission is granted to copy, distribute
11   * and/or modify this software under the terms either:
12   *
13   * a) the GNU General Public License as published by the Free Software
14   *    Foundation; either version 2 of the License, or (at your option)
15   *    any later version,
16   *
17   *    or
18   *
19   * b) any version of the Melati Software License, as published
20   *    at http://melati.org
21   *
22   * You should have received a copy of the GNU General Public License and
23   * the Melati Software License along with this program;
24   * if not, write to the Free Software Foundation, Inc.,
25   * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA to obtain the
26   * GNU General Public License and visit http://melati.org to obtain the
27   * Melati Software License.
28   *
29   * Feel free to contact the Developers of Melati (http://melati.org),
30   * if you would like to work out a different arrangement than the options
31   * outlined here.  It is our intention to allow Melati to be used by as
32   * wide an audience as possible.
33   *
34   * This program is distributed in the hope that it will be useful,
35   * but WITHOUT ANY WARRANTY; without even the implied warranty of
36   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
37   * GNU General Public License for more details.
38   *
39   * Contact details for copyright holder:
40   *
41   *     Myles Chippendale <mylesc At paneris.org>
42   */
43  package org.melati.util;
44  
45  import java.io.DataInputStream;
46  import java.io.FileNotFoundException;
47  import java.io.IOException;
48  import java.io.InputStream;
49  import java.io.StringWriter;
50  import java.io.Writer;
51  import java.nio.charset.Charset;
52  import java.nio.charset.CharsetEncoder;
53  import java.util.Enumeration;
54  
55  import javax.swing.text.AttributeSet;
56  import javax.swing.text.html.HTML;
57  import javax.swing.text.html.parser.AttributeList;
58  import javax.swing.text.html.parser.ContentModel;
59  import javax.swing.text.html.parser.DTD;
60  import javax.swing.text.html.parser.DTDConstants;
61  import javax.swing.text.html.parser.Element;
62  
63  /**
64   * An assortment of useful things to do with HTML.
65   */
66  public final class HTMLUtils {
67  
68    private HTMLUtils() {}
69  
70    /** The DTD name. */
71    public static final String dtdNameForHTMLParser = "html32.bdtd";
72  
73    private static DTD dtdForHTMLParser = null;
74  
75    /**
76     * Add an Element to the ContentModel.
77     * @param cm the ContentModel to add to
78     * @param existing existing element
79     * @param alt alternate element
80     */
81    public static void add(ContentModel cm, Element existing, Element alt) {
82      if (cm.content == existing) {
83        ContentModel twig =
84            new ContentModel(0, existing, new ContentModel(0, alt, null));
85        if (cm.type == 0) {
86          cm.type = '|';
87          cm.content = twig;
88        }
89        else
90          cm.content = new ContentModel('|', twig);
91      }
92      else if (cm.content instanceof ContentModel)
93        add((ContentModel)cm.content, existing, alt);
94  
95      if (cm.next != null)
96        add(cm.next, existing, alt);
97    }
98  
99    /**
100    * Add element to a DTD.
101    * @param dtd DTD to add to 
102    * @param existing existing element
103    * @param alt alternate element
104    */
105   public static void addToContentModels(DTD dtd,
106                                         Element existing, Element alt) {
107     for (Enumeration els = dtd.elementHash.elements();
108          els.hasMoreElements();) {
109       ContentModel c = ((Element)els.nextElement()).content;
110       if (c != null)
111         add(c, existing, alt);
112     }
113   }
114 
115   /**
116    * @return a DTD
117    */
118   public static DTD dtdForHTMLParser() {
119     // not clear HTF this putDTDHash/getDTD API is meant to be useful ...
120 
121     if (dtdForHTMLParser == null)
122       try {
123         dtdForHTMLParser = DTD.getDTD(dtdNameForHTMLParser);
124         InputStream res = dtdForHTMLParser.getClass().
125                               getResourceAsStream(dtdNameForHTMLParser);
126         if (res == null)
127           throw new FileNotFoundException(
128               "Resource " + dtdNameForHTMLParser + " not found: " +
129               "but it ought to be in rt.jar?!");
130         dtdForHTMLParser.read(new DataInputStream(res));
131 
132         // add <SPAN CLASS=...> with the same content model as <DIV>
133         // [which is a hack for bibliomania!!]
134         // usable in the same places as <I>
135 
136         Element div = (Element)dtdForHTMLParser.elementHash.get("div");
137         Element i = (Element)dtdForHTMLParser.elementHash.get("i");
138 
139         dtdForHTMLParser.defineElement(
140            "span", DTDConstants.STARTTAG, false, false, div.content, null, null,
141            new AttributeList("class", DTDConstants.CDATA,
142                              0, null, null, null));
143 
144         Element span = (Element)dtdForHTMLParser.elementHash.get("span");
145 
146         addToContentModels(dtdForHTMLParser, i, span);
147       }
148       catch (Exception e) {
149         throw new UnexpectedExceptionException(
150           "making the DTD for Sun's HTML parser", e);
151       }
152 
153     return dtdForHTMLParser;
154   }
155 
156   /**
157    * If the given character has special meaning in HTML or will not
158    * necessarily encode in the character set, then return an escape string.
159    * <p>
160    * The name of this method implies the character is escaped as a
161    * character entity but if the second argument is true then newlines
162    * are encoded as &lt;BR&gt;.
163    * This is not required for attribute values.
164    * <p>
165    * Which characters will necessarily encode depends on the charset.
166    * For backward compatibility if a charset is not passed we assume the
167    * character will encode.
168    * If a charset is passed and a character does not encode then we
169    * replace it with a numeric character reference (not an entity
170    * either but pretty similar).
171    *
172    * @param c character to lookup entity for 
173    * @param mapBR whether to replace line ends
174    * @param ce an encoder
175    * @param markup whether string contains markup 
176    * @return an entity or null
177    */
178   public static String entityFor(char c, boolean mapBR, CharsetEncoder ce, boolean markup) {
179     switch (c) {
180       case '\n': return mapBR && !markup ? "<BR>\n" : null;
181       case '<' : return markup ? null : "&lt;" ;
182       case '>' : return markup ? null : "&gt;" ;
183       case '&' : return markup ? null : "&amp;" ;
184       // Unicode and ISO 8859-1
185 
186       case 192 : return "&Agrave;" ;
187       case 193 : return "&Aacute;" ;
188       case 194 : return "&Acirc;" ;
189       case 199 : return "&Ccedil;" ;
190       case 200 : return "&Egrave;" ;
191       case 201 : return "&Eacute;" ;
192       case 202 : return "&Ecirc;" ;
193       case 204 : return "&Igrave;" ;
194       case 205 : return "&Iacute;" ;
195       case 206 : return "&Icirc;" ;
196       case 210 : return "&Ograve;" ;
197       case 211 : return "&Oacute;" ;
198       case 212 : return "&Ocirc;" ;
199       case 217 : return "&Ugrave;" ;
200       case 218 : return "&Uacute;" ;
201       case 219 : return "&Ucirc;" ;
202       case 224 : return "&agrave;" ;
203       case 225 : return "&aacute;" ;
204       case 226 : return "&acirc;" ;
205       case 228 : return "&auml;" ;
206       case 231 : return "&ccedil;" ;
207       case 232 : return "&egrave;" ;
208       case 233 : return "&eacute;" ;
209       case 234 : return "&ecirc;" ;
210       case 236 : return "&igrave;" ;
211       case 237 : return "&iacute;" ;
212       case 238 : return "&icirc;" ;
213       case 242 : return "&ograve;" ;
214       case 243 : return "&oacute;" ;
215       case 244 : return "&ocirc;" ;
216       case 249 : return "&ugrave;" ;
217       case 250 : return "&uacute;" ;
218       case 251 : return "&ucirc;" ;
219       case 252 : return "&uuml;" ;
220       
221       
222       case '"' : return markup ? null : "&quot;";
223       case '\'': return markup ? null : "&#39;";
224       default:
225         if (ce == null || ce.canEncode(c)) {
226           return null;  
227         } else {
228           String result = "&#x" + Integer.toHexString(c) + ";";
229           //System.err.println("Cannot encode: " + c + " so encoded as: " + result);
230           return result;
231         }
232     }
233   }
234 
235   /**
236    * Return the String with all high value ASCII characters 
237    * replaced with HTML entities.
238    * 
239    * @param s input String
240    * @param mapBR whether to replace line ends with html breaks
241    * @param encoding the encoding of the input string
242    * @param markup whether string is an sgml fragment
243    * @return the input with appropriate substitutions
244    */
245   public static String entitied(String s, boolean mapBR, String encoding, boolean markup) {
246     System.err.println("encoding:" + encoding);
247     int length = s.length();
248     int i;
249     String entity = null;
250 
251     CharsetEncoder ce = null;
252     if (encoding != null) {
253       ce = Charset.forName(encoding).newEncoder();
254     }
255 
256     for (i = 0;
257          i < length && (entity = entityFor(s.charAt(i), mapBR, ce, markup)) == null;
258          ++i);
259 
260     if (entity == null) return s;
261     System.err.println("entitied:" + new Integer(s.charAt(i))+ "=" + entity);
262 
263     StringBuffer b = new StringBuffer(length * 2);
264     for (int j = 0; j < i; ++j)
265       b.append(s.charAt(j));
266 
267     b.append(entity);
268 
269     char c;
270     for (++i; i < length; ++i) {
271       c = s.charAt(i);
272       entity = entityFor(c, mapBR, ce, markup);
273       if (entity != null) {
274         b.append(entity);
275         System.err.println(new Integer(c) + "=" + entity);
276       } else
277         b.append(c);
278     }
279     return b.toString();
280   }
281 
282   /**
283    * Escape the given string as PCDATA without regard for any characters that
284    * cannot be encoded in some required character set.
285    * <p>
286    * This is for backward compatibility as it is used below.
287    *
288    * @param s the String to replace special characters from
289    * @return a new String with special characters replaced with entities
290    * @see #entitied(String, boolean, String, boolean)
291    */
292   public static String entitied(String s) {
293     return entitied(s, true, null, false);
294   }
295 
296   /**
297    * Javascript escape sequence for a character, if any, 
298    * otherwise null.
299    * @param c the character
300    * @return an escape sequence or null
301    */
302   public static String jsEscapeFor(char c) {
303     switch (c) {
304       case '\n': return "\\012";
305       case '"': return "\\042";
306       case '\'': return "\\047";
307       default: return null;
308     }
309   } 
310 
311   /**
312    * Javascript escape a String.
313    * @param s the String to escape
314    * @return the escaped String
315    */
316   public static String jsEscaped(String s) {
317     int length = s.length();
318     int i = 0;
319     String escape = null;
320     for (i = 0; i < length && (escape = jsEscapeFor(s.charAt(i))) == null; ++i);
321 
322     if (escape == null) return s;
323 
324     StringBuffer b = new StringBuffer(length * 2);
325     for (int j = 0; j < i; ++j)
326       b.append(s.charAt(j));
327 
328     b.append(escape);
329 
330     char c;
331     for (++i; i < length; ++i) {
332       c = s.charAt(i);
333       escape = jsEscapeFor(c);
334       if (escape != null)
335         b.append(escape);
336       else
337         b.append(c);
338     }
339     return b.toString();
340   }
341 
342   /**
343    * Write a tag to a Writer.
344    * @param w the Writer to write to
345    * @param tag the Tag to write
346    * @param attributes the Tag's attributes
347    * @throws IOException if there is a problem writing
348    */
349   public static void write(Writer w, HTML.Tag tag, AttributeSet attributes)
350       throws IOException {
351     w.write('<');
352     w.write(tag.toString());
353     for (Enumeration a = attributes.getAttributeNames();
354          a.hasMoreElements();) {
355       Object n = a.nextElement();
356       if (attributes.isDefined(n)) {
357         w.write(' ');
358         w.write(n.toString());
359         w.write("=\"");
360         w.write(entitied(attributes.getAttribute(n).toString()));
361         w.write('"');
362       }
363     }
364     w.write('>');
365   }
366 
367   /**
368    * @param tag the Tag
369    * @param attributes the Tag's attributes
370    * @return a String version of the Tag
371    */
372   public static String stringOf(HTML.Tag tag, AttributeSet attributes) {
373     StringWriter w = new StringWriter();
374 
375     try {
376       write(w, tag, attributes);
377     }
378     catch (IOException e) {
379       throw new UnexpectedExceptionException(e);
380     }
381 
382     return w.toString();
383   }
384 
385 /**
386  * An Instance of a tag.
387  */
388   public static class TagInstance {
389     /** The tag. */
390     public final HTML.Tag tag;
391     /** Its attributes. */
392     public final AttributeSet attributes;
393 
394     /** Constructor. */
395     public TagInstance(HTML.Tag tag, AttributeSet attributes) {
396       this.tag = tag;
397       this.attributes = attributes;
398     }
399 
400     /**
401      * Write tag to specified Writer. 
402      * @param w The Writer to write to.
403      */
404     public void write(Writer w) throws IOException {
405       HTMLUtils.write(w, tag, attributes);
406     }
407 
408     /** A String representation. 
409      * {@inheritDoc}
410      * @see java.lang.Object#toString()
411      */
412     public String toString() {
413       return HTMLUtils.stringOf(tag, attributes);
414     }
415   }
416 }