1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49 package org.melati.util;
50
51 import java.io.StreamTokenizer;
52 import java.io.StringReader;
53 import java.io.IOException;
54 import java.util.Iterator;
55 import java.util.Enumeration;
56
57 /**
58 * Representation of occurences of an HTTP header field.
59 * <p>
60 * These are defined in RFC 2616 and have the same general form as in
61 * RFC 822 section 3.1.
62 * <P>
63 * We generally assume that all continuation lines and occurences in
64 * a message are concatenated with comma separators.
65 *
66 * @author Jim Wright
67 */
68 public class HttpHeader {
69
70 /**
71 * Instance of inner {@link Tokenizer}.
72 */
73 protected Tokenizer tokenizer;
74
75 /**
76 * Create an instance representing the given comma separated fields.
77 */
78 public HttpHeader(String values) throws HttpHeaderException {
79
80 tokenizer = new Tokenizer(values);
81 }
82
83 /**
84 * Abstract enumeration of fields.
85 * <p>
86 * Subtypes decide what type of token to return and how
87 * to represent it.
88 * <p>
89 * This class serves to remove doubts about whether we should and can
90 * implement <code>Iterator</code> or <code>Enumeration</code> and
91 * proves itself unnecessary ;-). But we can factor stuff out and
92 * re-use it later.
93 * <p>
94 * Actually, it also removes the need to think about exceptions in
95 * subtypes.
96 */
97 public abstract class FieldIterator implements Iterator, Enumeration {
98
99 /**
100 * {@inheritDoc}
101 * @see java.util.Enumeration#hasMoreElements()
102 */
103 public final boolean hasMoreElements() {
104 return hasNext();
105 }
106
107 /**
108 * {@inheritDoc}
109 * @see java.util.Enumeration#nextElement()
110 */
111 public final Object nextElement() {
112 return next();
113 }
114
115 /**
116 * {@inheritDoc}
117 * @see java.util.Iterator#hasNext()
118 * @see #next()
119 */
120 public final boolean hasNext() {
121
122 return tokenizer.ttype != StreamTokenizer.TT_EOF;
123 }
124
125 /**
126 * {@inheritDoc}
127 * @see java.util.Iterator#remove()
128 */
129 public void remove() throws UnsupportedOperationException {
130
131 throw new UnsupportedOperationException("Cannot remove tokens from the HTTP header");
132 }
133
134 /**
135 * Return the next element or an exception.
136 *
137 * @return An exception if an object of the anticipated type cannot be returned
138 */
139 public Object next() {
140 try {
141
142 return nextToken();
143 }
144 catch (HttpHeaderException e) {
145
146 return e;
147 }
148 }
149
150 /**
151 * @return the next token or throws an exception
152 */
153 public abstract Object nextToken() throws HttpHeaderException;
154
155 }
156
157 /**
158 * Iteration over {@link HttpHeader.TokenAndQValue}s.
159 */
160 public class WordIterator extends FieldIterator {
161
162 /**
163 * @return the next word
164 */
165 public String nextWord() throws HttpHeaderException {
166 String result = tokenizer.readWord();
167 tokenizer.skipAnyCommaSeparator();
168 return result;
169 }
170
171 /**
172 * {@inheritDoc}
173 * @see org.melati.util.HttpHeader.FieldIterator#nextToken()
174 */
175 public Object nextToken() throws HttpHeaderException {
176 return nextWord();
177 }
178
179 }
180
181 /**
182 * Factory method to create and return an iterator of words.
183 *
184 * @return a new WordIterator
185 */
186 public final WordIterator wordIterator() {
187 return new WordIterator();
188 }
189
190 /**
191 * Iteration over {@link HttpHeader.TokenAndQValue}s.
192 */
193 public class TokenAndQValueIterator extends FieldIterator {
194
195 /**
196 * @return the next TokenAndQValue
197 * @throws HttpHeaderException
198 */
199 public TokenAndQValue nextTokenAndQValue() throws HttpHeaderException {
200 return HttpHeader.this.nextTokenAndQValue();
201 }
202
203 /**
204 * {@inheritDoc}
205 * @see org.melati.util.HttpHeader.FieldIterator#nextToken()
206 */
207 public Object nextToken() throws HttpHeaderException {
208 return nextTokenAndQValue();
209 }
210
211 }
212
213 /**
214 * Factory method to create and return the next
215 * {@link HttpHeader.TokenAndQValue}.
216 * @return a new TokenAndQValue
217 */
218 public TokenAndQValue nextTokenAndQValue() throws HttpHeaderException {
219 return new TokenAndQValue(tokenizer);
220 }
221
222 /**
223 * Factory method to create and return an iterator of {@link TokenAndQValue}'s.
224 * @return a new TokenAndQValueIterator
225 */
226 public TokenAndQValueIterator tokenAndQValueIterator() {
227 return new TokenAndQValueIterator();
228 }
229
230 /**
231 * A token and associated qvalue.
232 */
233 public static class TokenAndQValue {
234
235 /**
236 * Token followed by a semicolon separator.
237 */
238 public String token;
239
240 /**
241 * Value between zero and one with at most 3 decimal places.
242 * <p>
243 * q stands for "quality" but the RFC 2616 says this is not
244 * completely accurate.
245 * Values closer to 1.0 are better.
246 * Zero means completely unfit.
247 * <p>
248 * The default is 1.0 if not explicitly initialised and this
249 * appears to be correct for most possible uses if not all.
250 */
251 public float q = 1.0f;
252
253 /**
254 * Create an uninitialised instance.
255 */
256 public TokenAndQValue() {
257 }
258
259 /**
260 * Create an instance and initialise it by reading the given
261 * tokenizer.
262 */
263 public TokenAndQValue(Tokenizer t) throws HttpHeaderException {
264 this();
265 t.readTokenAndQValue(this);
266 t.skipAnyCommaSeparator();
267 }
268
269 }
270
271 /**
272 * Tokenizer for parsing occurences of a field.
273 * <p>
274 * Header fields have format defined in RFC 2616 and have the same
275 * general form as in RFC 822 section 3.1.
276 * <p>
277 * This is for fields consisting of tokens, quoted strings and
278 * separators and not those consisting of an arbitrary sequence of
279 * octets.
280 * Tokens are US ASCII characters other than:
281 * <ul>
282 * <li> control characters 0000 to 001F and 007E;
283 * <li> separators defined in RFC 2616;
284 * </ul>
285 * <p>
286 * The convenience methods defined here provide some guidance on how
287 * to interact with the super-type but you can also use inherited
288 * methods.
289 * <p>
290 * We assume that the next token is always already read when a method
291 * starts to interpret a sequence of tokens.
292 * In other words the first token is read by the constructor(s) and then
293 * each such
294 * method returns as a result of reading a token or EOF that it cannot
295 * process but without pushing it back.
296 * The next token to be interpreted is hence the current token
297 * described by the inherited instance variables.
298 * <p>
299 * Note that whitespace is automatically skipped by the supertype.
300 *
301 * @author Jim Wright
302 */
303 public static class Tokenizer extends StreamTokenizer {
304
305 /**
306 * Create an instance from a string formed by concatenation of
307 * continuation lines and all occurences of a field, with comma
308 * separators.
309 * <p>
310 * In theory a separator can consist of one or more commas and
311 * spaces and tab.
312 * Fields are never empty.
313 * We cope with this but I doubt typical callers ever encounter
314 * such strings.
315 * <p>
316 * The field list should not be empty but null is
317 * allowed to explicitly indicate that there are no such fields,
318 * if an instance if required nevertheless to provide other
319 * functionality.
320 *
321 * @throws HttpHeaderException Error detected in the argument.
322 */
323 public Tokenizer(String fields) throws HttpHeaderException {
324 super(new StringReader(fields == null ? "" : fields));
325
326 if (fields != null && fields.length() == 0) {
327
328 throw new HttpHeaderException("Empty sequence of HTTP header fields");
329 }
330 resetSyntax();
331
332
333 wordChars('\u0020', '\u007E');
334
335
336
337 String separator = "()<>@,;:\\\"/[]?={} \t";
338 for (int i = 0; i < separator.length(); i++) {
339 ordinaryChar(separator.charAt(i));
340
341 }
342
343
344
345
346 whitespaceChars(' ', ' ');
347 whitespaceChars('\t', '\t');
348
349 quoteChar('"');
350
351 parseNumbers();
352
353
354
355
356
357
358 eolIsSignificant(false);
359 lowerCaseMode(false);
360 slashSlashComments(false);
361 slashStarComments(false);
362
363
364 nextLToken();
365 if (ttype == ',') {
366
367 throw new HttpHeaderException("HTTP header fields starts with comma separator");
368 }
369 }
370
371 /**
372 * Same as <code>nextToken()</code> but does not throw an <code>IOException</code>
373 * and handles erroneous line breaks.
374 *
375 * @return int value of next LToken
376 * @throws HttpHeaderException Error detected in the fields.
377 */
378 public int nextLToken() throws HttpHeaderException {
379 int result;
380 try {
381 result = nextToken();
382 if (ttype == TT_EOL) {
383 System.err.println("Not tested 38");
384 throw new HttpHeaderException("HTTP header fields span unquoted line breaks");
385 }
386
387 return result;
388 }
389 catch (IOException e) {
390
391 return 0;
392 }
393 }
394
395 /**
396 * Read up to and including the next token after comma
397 * separator(s) and whitespace assuming the current token is a comma.
398 *
399 * @return Resulting ttype.
400 */
401 public final int skipCommaSeparator() throws HttpHeaderException {
402 if (ttype != ',') {
403 throw new IllegalStateException("Not at a comma");
404 }
405 while (nextLToken() == ',')
406 ;
407 return ttype;
408 }
409
410 /**
411 * Read up to and including the next token after any comma
412 * separator(s) and whitespace.
413 * <p>
414 * This is the same as {@link #skipCommaSeparator()} but it does
415 * nothing if we are and EOF.
416 *
417 * @return Resulting ttype.
418 */
419 public final int skipAnyCommaSeparator() throws HttpHeaderException {
420 if (ttype != TT_EOF) {
421 skipCommaSeparator();
422 }
423 return ttype;
424 }
425
426 /**
427 * Convenience method to test for token or quoted string.
428 * <p>
429 * If this returns true then the token value is in <code>sval</code>
430 * with any quotes removed.
431 * @return whether token is an SVal
432 */
433 public final boolean isSVal() {
434 return ttype == TT_WORD || ttype == '"';
435 }
436
437 /**
438 * Read the word token or quoted string that comes next.
439 *
440 * @return the SVal
441 * @throws HttpHeaderException Error detected in the fields.
442 */
443 public final String readSVal() throws HttpHeaderException {
444 if (! isSVal()) {
445 throw new HttpHeaderException("Next token is not a (possibly quoted) word: " +
446 toString());
447 }
448 String result = sval;
449 nextLToken();
450 return result;
451 }
452
453 /**
454 * Read the word token that comes next.
455 *
456 * @return the word as a String
457 * @throws HttpHeaderException Error detected in the fields.
458 */
459 public final String readWord() throws HttpHeaderException {
460 if (ttype != TT_WORD) {
461 throw new HttpHeaderException("Next token is not a word token: " +
462 toString());
463 }
464 String result = sval;
465 nextLToken();
466
467 return result;
468 }
469
470 /**
471 * Read the given word token that comes next.
472 *
473 * @throws HttpHeaderException Error detected in the fields.
474 */
475 public final void readWord(String word) throws HttpHeaderException {
476 String read = readWord();
477 if (! read.equals(word)) {
478
479 throw new HttpHeaderException("Expecting '" + word +
480 "' but encountered: " + toString());
481 }
482 }
483
484 /**
485 * Read the given character that comes next.
486 *
487 * @throws HttpHeaderException Error detected in the fields.
488 */
489 public final void readChar(char c) throws HttpHeaderException {
490 if (ttype != c) {
491
492 throw new HttpHeaderException("Expecting '" + c +
493 "' but encountered: " +
494 toString());
495 }
496 nextLToken();
497 }
498
499 /**
500 * Read the number token that comes next.
501 * @return the number's value as a double
502 * @throws HttpHeaderException Error detected in the fields.
503 */
504 public final double readNVal() throws HttpHeaderException {
505 if (ttype != TT_NUMBER) {
506 throw new HttpHeaderException("Next token is not a number: " +
507 toString());
508 }
509 double result = nval;
510 nextLToken();
511 return result;
512 }
513
514 /**
515 * Read a token sequence of the form "; q = 0.42" and return the number.
516 * @return the number's value as a float
517 *
518 * @throws IllegalStateException Current token not semicolon.
519 * @throws HttpHeaderException Error detected in the fields.
520 */
521 public final float readQValue()
522 throws IllegalStateException, HttpHeaderException {
523 if (ttype != ';') {
524 throw new IllegalStateException("Not at a semicolon");
525 }
526 readChar(';');
527 readWord("q");
528 readChar('=');
529 return (float)readNVal();
530 }
531
532 /**
533 * Read a word or quoted string token optionally followed by a string
534 * of the form "; q = 0.42" and initialises the given object.
535 * @return current TokenAndQValue
536 */
537 protected TokenAndQValue readTokenAndQValue(TokenAndQValue result)
538 throws HttpHeaderException {
539 result.token = readSVal();
540 switch (ttype) {
541 case TT_EOF :
542 case ',' :
543 break;
544 case ';' :
545 result.q = readQValue();
546 break;
547 default:
548 throw new HttpHeaderException("Word token: \'" + result.token +
549 "\' is followed by something unexpected: " + toString());
550 }
551 return result;
552 }
553
554 }
555
556 /**
557 * Exception detected in an {@link HttpHeader}.
558 * <p>
559 * We might want to declare some supertype as thrown or make this
560 * outer.
561 * <p>
562 * Header fields are usually obtained from servlet containers or
563 * similar after some processing.
564 * But its possible that some unusual client has sent something
565 * erroneous or just unusual that has not been filtered out
566 * earlier and causes an error here.
567 * <p>
568 * In general detecting such problems requires parsing.
569 * So although we could nearly always blame the caller we provide
570 * a service instead (as part of the contract).
571 * <p>
572 * We do sometime blame the caller because we assume that the
573 * caller has checked the next token type before some call.
574 * We do this by throwing an <code>IllegalStateException</code>
575 * instead.
576 */
577 public static class HttpHeaderException extends java.lang.Exception {
578 private static final long serialVersionUID = 1L;
579
580 /**
581 * Create an instance with message.
582 */
583 public HttpHeaderException(String message) {
584 super(message);
585 }
586
587 /**
588 * Create an instance with message and cause.
589 */
590 public HttpHeaderException(String message, Exception e) {
591 super(message, e);
592 }
593
594 }
595
596 }