View Javadoc

1   /*
2    * $Source: /usr/cvsroot/melati/melati/src/main/java/org/melati/util/HttpHeader.java,v $
3    * $Revision: 1.13 $
4    *
5    * Copyright (C) 2003 Jim Wright
6    *
7    * Part of Melati (http://melati.org), a framework for the rapid
8    * development of clean, maintainable web applications.
9    *
10   * Melati is free software; Permission is granted to copy, distribute
11   * and/or modify this software under the terms either:
12   *
13   * a) the GNU General Public License as published by the Free Software
14   *    Foundation; either version 2 of the License, or (at your option)
15   *    any later version,
16   *
17   *    or
18   *
19   * b) any version of the Melati Software License, as published
20   *    at http://melati.org
21   *
22   * You should have received a copy of the GNU General Public License and
23   * the Melati Software License along with this program;
24   * if not, write to the Free Software Foundation, Inc.,
25   * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA to obtain the
26   * GNU General Public License and visit http://melati.org to obtain the
27   * Melati Software License.
28   *
29   * Feel free to contact the Developers of Melati (http://melati.org),
30   * if you would like to work out a different arrangement than the options
31   * outlined here.  It is our intention to allow Melati to be used by as
32   * wide an audience as possible.
33   *
34   * This program is distributed in the hope that it will be useful,
35   * but WITHOUT ANY WARRANTY; without even the implied warranty of
36   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
37   * GNU General Public License for more details.
38   *
39   * Contact details for copyright holder:
40   *
41   *     Jim Wright <jimw At paneris.org>
42   *     Bohemian Enterprise
43   *     Predmerice nad Jizerou 77
44   *     294 74
45   *     Mlada Boleslav
46   *     Czech Republic
47   */
48  
49  package org.melati.util;
50  
51  import java.io.StreamTokenizer;
52  import java.io.StringReader;
53  import java.io.IOException;
54  import java.util.Iterator;
55  import java.util.Enumeration;
56  
57  /**
58   * Representation of occurences of an HTTP header field.
59   * <p>
60   * These are defined in RFC 2616 and have the same general form as in
61   * RFC 822 section 3.1.
62   * <P>
63   * We generally assume that all continuation lines and occurences in
64   * a message are concatenated with comma separators.
65   *
66   * @author  Jim Wright
67   */
68  public class HttpHeader {
69  
70    /**
71     * Instance of inner {@link Tokenizer}.
72     */
73    protected Tokenizer tokenizer;
74  
75    /**
76     * Create an instance representing the given comma separated fields.
77     */
78    public HttpHeader(String values) throws HttpHeaderException {
79      // System.err.println("Tested 21");
80      tokenizer = new Tokenizer(values);
81    }
82  
83    /**
84     * Abstract enumeration of fields.
85     * <p>
86     * Subtypes decide what type of token to return and how
87     * to represent it.
88     * <p>
89     * This class serves to remove doubts about whether we should and can
90     * implement <code>Iterator</code> or <code>Enumeration</code> and
91     * proves itself unnecessary ;-). But we can factor stuff out and
92     * re-use it later.
93     * <p>
94     * Actually, it also removes the need to think about exceptions in
95     * subtypes.
96     */
97    public abstract class FieldIterator implements Iterator, Enumeration {
98  
99      /**
100      * {@inheritDoc}
101      * @see java.util.Enumeration#hasMoreElements()
102      */
103     public final boolean hasMoreElements() {
104       return hasNext();
105     }
106 
107     /**
108      * {@inheritDoc}
109      * @see java.util.Enumeration#nextElement()
110      */
111     public final Object nextElement() {
112       return next();
113     }
114 
115     /**
116      * {@inheritDoc}
117      * @see java.util.Iterator#hasNext()
118      * @see #next()
119      */
120     public final boolean hasNext() {
121       // System.err.println("Tested 24");
122       return tokenizer.ttype != StreamTokenizer.TT_EOF;
123     }
124 
125     /**
126      * {@inheritDoc}
127      * @see java.util.Iterator#remove()
128      */
129     public void remove() throws UnsupportedOperationException {
130       // System.err.println("Tested 25");
131       throw new UnsupportedOperationException("Cannot remove tokens from the HTTP header");
132     }
133 
134     /**
135      * Return the next element or an exception.
136      *
137      * @return An exception if an object of the anticipated type cannot be returned
138      */
139     public Object next() {
140       try {
141         // System.err.println("Tested 26");
142         return nextToken();
143       }
144       catch (HttpHeaderException e) {
145         // System.err.println("Tested 27");
146         return e;
147       }
148     }
149 
150     /**
151      * @return the next token or throws an exception
152      */
153     public abstract Object nextToken() throws HttpHeaderException;
154 
155   }
156 
157   /**
158    * Iteration over {@link HttpHeader.TokenAndQValue}s.
159    */
160   public class WordIterator extends FieldIterator {
161 
162     /**
163      * @return the next word
164      */
165     public String nextWord() throws HttpHeaderException {
166       String result = tokenizer.readWord();
167       tokenizer.skipAnyCommaSeparator();
168       return result;
169     }
170 
171     /**
172      * {@inheritDoc}
173      * @see org.melati.util.HttpHeader.FieldIterator#nextToken()
174      */
175     public Object nextToken() throws HttpHeaderException {
176       return nextWord();
177     }
178 
179   }
180 
181   /**
182    * Factory method to create and return an iterator of words.
183    * 
184    * @return a new WordIterator
185    */
186   public final WordIterator wordIterator() {
187     return new WordIterator();
188   }
189 
190   /**
191    * Iteration over {@link HttpHeader.TokenAndQValue}s.
192    */
193   public class TokenAndQValueIterator extends FieldIterator {
194 
195     /**
196      * @return the next TokenAndQValue
197      * @throws HttpHeaderException
198      */
199     public TokenAndQValue nextTokenAndQValue() throws HttpHeaderException {
200       return HttpHeader.this.nextTokenAndQValue();
201     }
202 
203     /**
204      * {@inheritDoc}
205      * @see org.melati.util.HttpHeader.FieldIterator#nextToken()
206      */
207     public Object nextToken() throws HttpHeaderException {
208       return nextTokenAndQValue();
209     }
210 
211   }
212 
213   /**
214    * Factory method to create and return the next
215    * {@link HttpHeader.TokenAndQValue}.
216    * @return a new TokenAndQValue
217    */
218   public TokenAndQValue nextTokenAndQValue() throws HttpHeaderException {
219     return new TokenAndQValue(tokenizer);
220   }
221 
222   /**
223    * Factory method to create and return an iterator of {@link TokenAndQValue}'s.
224    * @return a new TokenAndQValueIterator
225    */
226   public TokenAndQValueIterator tokenAndQValueIterator() {
227     return new TokenAndQValueIterator();
228   }
229 
230   /**
231    * A token and associated qvalue.
232    */
233   public static class TokenAndQValue {
234 
235     /**
236      * Token followed by a semicolon separator.
237      */
238     public String token;
239 
240     /**
241      * Value between zero and one with at most 3 decimal places.
242      * <p>
243      * q stands for "quality" but the RFC 2616 says this is not
244      * completely accurate.
245      * Values closer to 1.0 are better.
246      * Zero means completely unfit.
247      * <p>
248      * The default is 1.0 if not explicitly initialised and this
249      * appears to be correct for most possible uses if not all.
250      */
251     public float q = 1.0f;
252 
253     /**
254      * Create an uninitialised instance.
255      */
256     public TokenAndQValue() {
257     }
258 
259     /**
260      * Create an instance and initialise it by reading the given
261      * tokenizer.
262      */
263     public TokenAndQValue(Tokenizer t) throws HttpHeaderException {
264       this();
265       t.readTokenAndQValue(this);
266       t.skipAnyCommaSeparator();
267     }
268 
269   }  
270   
271   /**
272    * Tokenizer for parsing occurences of a field.
273    * <p>
274    * Header fields have format defined in RFC 2616 and have the same
275    * general form as in RFC 822 section 3.1.
276    * <p>
277    * This is for fields consisting of tokens, quoted strings and
278    * separators and not those consisting of an arbitrary sequence of
279    * octets.
280    * Tokens are US ASCII characters other than:
281    * <ul>
282    * <li> control characters 0000 to 001F and 007E;
283    * <li> separators defined in RFC 2616;
284    * </ul>
285    * <p>
286    * The convenience methods defined here provide some guidance on how
287    * to interact with the super-type but you can also use inherited
288    * methods.
289    * <p>
290    * We assume that the next token is always already read when a method
291    * starts to interpret a sequence of tokens.
292    * In other words the first token is read by the constructor(s) and then
293    * each such
294    * method returns as a result of reading a token or EOF that it cannot
295    * process but without pushing it back.
296    * The next token to be interpreted is hence the current token
297    * described by the inherited instance variables.
298    * <p>
299    * Note that whitespace is automatically skipped by the supertype.
300    *
301    * @author  Jim Wright
302    */
303   public static class Tokenizer extends StreamTokenizer {
304 
305     /**
306      * Create an instance from a string formed by concatenation of
307      * continuation lines and all occurences of a field, with comma
308      * separators.
309      * <p>
310      * In theory a separator can consist of one or more commas and
311      * spaces and tab.
312      * Fields are never empty.
313      * We cope with this but I doubt typical callers ever encounter
314      * such strings.
315      * <p>
316      * The field list should not be empty but null is
317      * allowed to explicitly indicate that there are no such fields,
318      * if an instance if required nevertheless to provide other
319      * functionality.
320      *
321      * @throws HttpHeaderException Error detected in the argument.
322      */
323     public Tokenizer(String fields) throws HttpHeaderException {
324       super(new StringReader(fields == null ? "" : fields));
325 
326       if (fields != null && fields.length() == 0) {
327         // System.err.println("Tested 35");
328         throw new HttpHeaderException("Empty sequence of HTTP header fields");        
329       }
330       resetSyntax();
331       // Initially make all non-control characters token
332       // characters
333       wordChars('\u0020', '\u007E');
334       // Now change separators back. Tab is not
335       // necessary and there are some ranges but let's
336       // not try and be clever.
337       String separator = "()<>@,;:\\\"/[]?={} \t";
338       for (int i = 0; i < separator.length(); i++) {
339         ordinaryChar(separator.charAt(i));
340         // System.err.println("Tested 34");
341       }
342 
343       // Resetting effectively did this to whitespace chars
344       // ordinaryChars('\u0000', '\u0020');
345       // Set space and table characters as whitespace
346       whitespaceChars(' ', ' ');
347       whitespaceChars('\t', '\t');
348 
349       quoteChar('"');
350 
351       parseNumbers();
352 
353       // Here are some things we have effectively done by resetting
354       // ordinaryChar('/');
355       // ordinaryChar('\'');
356 
357       // Do not do any other special processing
358       eolIsSignificant(false);
359       lowerCaseMode(false);
360       slashSlashComments(false);
361       slashStarComments(false);
362 
363       // Read the first token
364       nextLToken();
365       if (ttype == ',') {
366         // System.err.println("Tested 36");
367         throw new HttpHeaderException("HTTP header fields starts with comma separator");
368       }
369     }
370 
371     /**
372      * Same as <code>nextToken()</code> but does not throw an <code>IOException</code>
373      * and handles erroneous line breaks.
374      *
375      * @return int value of next LToken
376      * @throws HttpHeaderException Error detected in the fields.
377      */
378     public int nextLToken() throws HttpHeaderException {
379       int result;
380       try {
381         result = nextToken();
382         if (ttype == TT_EOL) {
383           System.err.println("Not tested 38");
384           throw new HttpHeaderException("HTTP header fields span unquoted line breaks");
385         }
386         // System.err.println("Tested 39");
387         return result;
388       }
389       catch (IOException e) {
390         //assert false : "We are reading from a string";
391         return 0;
392       }
393     }
394 
395     /**
396      * Read up to and including the next token after comma
397      * separator(s) and whitespace assuming the current token is a comma.
398      *
399      * @return Resulting ttype.
400      */
401     public final int skipCommaSeparator() throws HttpHeaderException {
402       if (ttype != ',') {
403         throw new IllegalStateException("Not at a comma");
404       }
405       while (nextLToken() == ',')
406         ;
407       return ttype;
408     }
409 
410     /**
411      * Read up to and including the next token after any comma
412      * separator(s) and whitespace.
413      * <p>
414      * This is the same as {@link #skipCommaSeparator()} but it does
415      * nothing if we are and EOF.
416      *
417      * @return Resulting ttype.
418      */
419     public final int skipAnyCommaSeparator() throws HttpHeaderException {
420       if (ttype != TT_EOF) {
421         skipCommaSeparator();
422       }
423       return ttype;
424     }
425 
426     /**
427      * Convenience method to test for token or quoted string.
428      * <p>
429      * If this returns true then the token value is in <code>sval</code>
430      * with any quotes removed.
431      * @return whether token is an SVal
432      */
433     public final boolean isSVal() {
434       return ttype == TT_WORD || ttype == '"';
435     }
436 
437     /**
438      * Read the word token or quoted string that comes next.
439      *
440      * @return the SVal 
441      * @throws HttpHeaderException Error detected in the fields.
442      */
443     public final String readSVal() throws HttpHeaderException {
444       if (! isSVal()) {
445         throw new HttpHeaderException("Next token is not a (possibly quoted) word: " +
446             toString());
447       }      
448       String result = sval;
449       nextLToken();
450       return result;
451     }
452 
453     /**
454      * Read the word token that comes next.
455      * 
456      * @return the word as a String
457      * @throws HttpHeaderException Error detected in the fields.
458      */
459     public final String readWord() throws HttpHeaderException {
460       if (ttype != TT_WORD) {
461         throw new HttpHeaderException("Next token is not a word token: " +
462                                       toString());
463       }      
464       String result = sval;
465       nextLToken();
466       // System.err.println("Tested 47");
467       return result;
468     }
469 
470     /**
471      * Read the given word token that comes next.
472      *
473      * @throws HttpHeaderException Error detected in the fields.
474      */
475     public final void readWord(String word) throws HttpHeaderException {
476       String read = readWord();
477       if (! read.equals(word)) {
478         // System.err.println("Tested 48 by temporary hack");
479         throw new HttpHeaderException("Expecting '" + word +
480                                       "' but encountered: " + toString());
481       }
482     }
483 
484     /**
485      * Read the given character that comes next.
486      *
487      * @throws HttpHeaderException Error detected in the fields.
488      */
489     public final void readChar(char c) throws HttpHeaderException {
490       if (ttype != c) {
491         // System.err.println("Tested 49");
492         throw new HttpHeaderException("Expecting '" + c +
493                                       "' but encountered: " +
494                                       toString());
495       }
496       nextLToken();
497     }
498 
499     /**
500      * Read the number token that comes next.
501      * @return the number's value as a double
502      * @throws HttpHeaderException Error detected in the fields.
503      */
504     public final double readNVal() throws HttpHeaderException {
505       if (ttype != TT_NUMBER) {
506         throw new HttpHeaderException("Next token is not a number: " +
507             toString());
508       }      
509       double result = nval;
510       nextLToken();
511       return result;
512     }
513 
514     /**
515      * Read a token sequence of the form "; q = 0.42" and return the number.
516      * @return the number's value as a float
517      *
518      * @throws IllegalStateException Current token not semicolon.
519      * @throws HttpHeaderException Error detected in the fields.
520      */
521     public final float readQValue() 
522         throws IllegalStateException, HttpHeaderException {
523       if (ttype != ';') {
524         throw new IllegalStateException("Not at a semicolon");
525       }
526       readChar(';');
527       readWord("q");
528       readChar('=');
529       return (float)readNVal();
530     }
531 
532     /**
533      * Read a word or quoted string token optionally followed by a string
534      * of the form "; q = 0.42" and initialises the given object.
535      * @return current TokenAndQValue
536      */
537     protected TokenAndQValue readTokenAndQValue(TokenAndQValue result)
538           throws HttpHeaderException {
539       result.token = readSVal();
540       switch (ttype) {
541       case TT_EOF :
542       case ',' :
543         break;
544       case ';' :
545         result.q = readQValue();
546         break;
547       default:
548         throw new HttpHeaderException("Word token: \'" + result.token +
549             "\' is followed by something unexpected: " + toString());
550       }
551       return result;
552     }
553 
554   }
555 
556   /**
557    * Exception detected in an {@link HttpHeader}.
558    * <p>
559    * We might want to declare some supertype as thrown or make this
560    * outer.
561    * <p>
562    * Header fields are usually obtained from servlet containers or
563    * similar after some processing.
564    * But its possible that some unusual client has sent something
565    * erroneous or just unusual that has not been filtered out
566    * earlier and causes an error here.
567    * <p>
568    * In general detecting such problems requires parsing.
569    * So although we could nearly always blame the caller we provide
570    * a service instead (as part of the contract).
571    * <p>
572    * We do sometime blame the caller because we assume that the
573    * caller has checked the next token type before some call.
574    * We do this by throwing an <code>IllegalStateException</code>
575    * instead.
576    */
577   public static class HttpHeaderException extends java.lang.Exception {
578     private static final long serialVersionUID = 1L;
579 
580     /**
581      * Create an instance with message.
582      */
583     public HttpHeaderException(String message) {
584       super(message);
585     }
586 
587     /**
588      * Create an instance with message and cause.
589      */
590     public HttpHeaderException(String message, Exception e) {
591       super(message, e);
592     }
593 
594   }
595 
596 }