1 /* 2 * $Source$ 3 * $Revision$ 4 * 5 * Copyright (C) 2003 Jim Wright 6 * 7 * Part of Melati (http://melati.org), a framework for the rapid 8 * development of clean, maintainable web applications. 9 * 10 * Melati is free software; Permission is granted to copy, distribute 11 * and/or modify this software under the terms either: 12 * 13 * a) the GNU General Public License as published by the Free Software 14 * Foundation; either version 2 of the License, or (at your option) 15 * any later version, 16 * 17 * or 18 * 19 * b) any version of the Melati Software License, as published 20 * at http://melati.org 21 * 22 * You should have received a copy of the GNU General Public License and 23 * the Melati Software License along with this program; 24 * if not, write to the Free Software Foundation, Inc., 25 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA to obtain the 26 * GNU General Public License and visit http://melati.org to obtain the 27 * Melati Software License. 28 * 29 * Feel free to contact the Developers of Melati (http://melati.org), 30 * if you would like to work out a different arrangement than the options 31 * outlined here. It is our intention to allow Melati to be used by as 32 * wide an audience as possible. 33 * 34 * This program is distributed in the hope that it will be useful, 35 * but WITHOUT ANY WARRANTY; without even the implied warranty of 36 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 37 * GNU General Public License for more details. 38 * 39 * Contact details for copyright holder: 40 * 41 * Jim Wright <jimw At paneris.org> 42 * Bohemian Enterprise 43 * Predmerice nad Jizerou 77 44 * 294 74 45 * Mlada Boleslav 46 * Czech Republic 47 */ 48 49 package org.melati.util; 50 51 import java.io.StreamTokenizer; 52 import java.io.StringReader; 53 import java.io.IOException; 54 import java.util.Iterator; 55 import java.util.Enumeration; 56 57 /** 58 * Representation of occurrences of an HTTP header field. 59 * <p> 60 * These are defined in RFC 2616 and have the same general form as in 61 * RFC 822 section 3.1. 62 * <P> 63 * We generally assume that all continuation lines and occurrences in 64 * a message are concatenated with comma separators. 65 * 66 * @author Jim Wright 67 */ 68 public class HttpHeader { 69 70 /** 71 * Instance of inner {@link Tokenizer}. 72 */ 73 protected Tokenizer tokenizer; 74 75 /** 76 * Create an instance representing the given comma separated fields. 77 */ 78 public HttpHeader(String values) { 79 if(values == null) 80 values= ""; 81 tokenizer = new Tokenizer(values); 82 } 83 84 /** 85 * Abstract enumeration of fields. 86 * <p> 87 * Subtypes decide what type of token to return and how 88 * to represent it. 89 * <p> 90 * This class serves to remove doubts about whether we should and can 91 * implement <code>Iterator</code> or <code>Enumeration</code> and 92 * proves itself unnecessary ;-). But we can factor stuff out and 93 * re-use it later. 94 * <p> 95 * Actually, it also removes the need to think about exceptions in 96 * subtypes. 97 */ 98 @SuppressWarnings("rawtypes") 99 public abstract class FieldIterator implements Iterator<Object>, Enumeration{ 100 101 /** 102 * {@inheritDoc} 103 * @see java.util.Enumeration#hasMoreElements() 104 */ 105 public final boolean hasMoreElements() { 106 return hasNext(); 107 } 108 109 /** 110 * {@inheritDoc} 111 * @see java.util.Enumeration#nextElement() 112 */ 113 public final Object nextElement() { 114 return next(); 115 } 116 117 /** 118 * {@inheritDoc} 119 * @see java.util.Iterator#hasNext() 120 * @see #next() 121 */ 122 public final boolean hasNext() { 123 return tokenizer.ttype != StreamTokenizer.TT_EOF; 124 } 125 126 /** 127 * {@inheritDoc} 128 * @see java.util.Iterator#remove() 129 */ 130 public void remove() throws UnsupportedOperationException { 131 throw new UnsupportedOperationException("Cannot remove tokens from the HTTP header"); 132 } 133 134 /** 135 * Return the next element or an exception. 136 * 137 * @return An exception if an object of the anticipated type cannot be returned 138 */ 139 public Object next() { 140 try { 141 return nextToken(); 142 } 143 catch (HttpHeaderException e) { 144 return e; 145 } 146 } 147 148 /** 149 * @return the next token or throws an exception 150 */ 151 public abstract Object nextToken() throws HttpHeaderException; 152 153 } 154 155 /** 156 * Iteration over {@link HttpHeader.TokenAndQValue}s. 157 */ 158 public class WordIterator extends FieldIterator { 159 160 /** 161 * @return the next word 162 */ 163 public String nextWord() throws HttpHeaderException { 164 String result = tokenizer.readWord(); 165 tokenizer.skipAnyCommaSeparator(); 166 return result; 167 } 168 169 /** 170 * {@inheritDoc} 171 * @see org.melati.util.HttpHeader.FieldIterator#nextToken() 172 */ 173 public Object nextToken() throws HttpHeaderException { 174 return nextWord(); 175 } 176 177 } 178 179 /** 180 * Factory method to create and return an iterator of words. 181 * 182 * @return a new WordIterator 183 */ 184 public final WordIterator wordIterator() { 185 return new WordIterator(); 186 } 187 188 /** 189 * Iteration over {@link HttpHeader.TokenAndQValue}s. 190 */ 191 public class TokenAndQValueIterator extends FieldIterator { 192 193 /** 194 * @return the next TokenAndQValue 195 * @throws HttpHeaderException 196 */ 197 public TokenAndQValue nextTokenAndQValue() throws HttpHeaderException { 198 return HttpHeader.this.nextTokenAndQValue(); 199 } 200 201 /** 202 * {@inheritDoc} 203 * @see org.melati.util.HttpHeader.FieldIterator#nextToken() 204 */ 205 public Object nextToken() throws HttpHeaderException { 206 return nextTokenAndQValue(); 207 } 208 209 } 210 211 /** 212 * Factory method to create and return the next 213 * {@link HttpHeader.TokenAndQValue}. 214 * @return a new TokenAndQValue 215 */ 216 public TokenAndQValue nextTokenAndQValue() throws HttpHeaderException { 217 return new TokenAndQValue(tokenizer); 218 } 219 220 /** 221 * Factory method to create and return an iterator of {@link TokenAndQValue}'s. 222 * @return a new TokenAndQValueIterator 223 */ 224 public TokenAndQValueIterator tokenAndQValueIterator() { 225 return new TokenAndQValueIterator(); 226 } 227 228 /** 229 * A token and associated qvalue. 230 */ 231 public static class TokenAndQValue { 232 233 /** 234 * Token followed by a semicolon separator. 235 */ 236 public String token; 237 238 /** 239 * Value between zero and one with at most 3 decimal places. 240 * <p> 241 * q stands for "quality" but the RFC 2616 says this is not 242 * completely accurate. 243 * Values closer to 1.0 are better. 244 * Zero means completely unfit. 245 * <p> 246 * The default is 1.0 if not explicitly initialised and this 247 * appears to be correct for most possible uses if not all. 248 */ 249 public float q = 1.0f; 250 251 /** 252 * Create an uninitialised instance. 253 */ 254 public TokenAndQValue() { 255 } 256 257 /** 258 * Create an instance and initialise it by reading the given 259 * tokenizer. 260 */ 261 public TokenAndQValue(Tokenizer t) throws HttpHeaderException { 262 this(); 263 t.readTokenAndQValue(this); 264 t.skipAnyCommaSeparator(); 265 } 266 267 } 268 269 /** 270 * Tokenizer for parsing occurences of a field. 271 * <p> 272 * Header fields have format defined in RFC 2616 and have the same 273 * general form as in RFC 822 section 3.1. 274 * <p> 275 * This is for fields consisting of tokens, quoted strings and 276 * separators and not those consisting of an arbitrary sequence of 277 * octets. 278 * Tokens are US ASCII characters other than: 279 * <ul> 280 * <li> control characters 0000 to 001F and 007E; 281 * <li> separators defined in RFC 2616; 282 * </ul> 283 * <p> 284 * The convenience methods defined here provide some guidance on how 285 * to interact with the super-type but you can also use inherited 286 * methods. 287 * <p> 288 * We assume that the next token is always already read when a method 289 * starts to interpret a sequence of tokens. 290 * In other words the first token is read by the constructor(s) and then 291 * each such 292 * method returns as a result of reading a token or EOF that it cannot 293 * process but without pushing it back. 294 * The next token to be interpreted is hence the current token 295 * described by the inherited instance variables. 296 * <p> 297 * Note that whitespace is automatically skipped by the supertype. 298 * 299 * @author Jim Wright 300 */ 301 public static class Tokenizer extends StreamTokenizer { 302 303 /** 304 * Create an instance from a string formed by concatenation of 305 * continuation lines and all occurences of a field, with comma 306 * separators. 307 * <p> 308 * In theory a separator can consist of one or more commas and 309 * spaces and tab. 310 * Fields are never empty. 311 * We cope with this but I doubt typical callers ever encounter 312 * such strings. 313 * <p> 314 * The field list should not be empty but null is 315 * allowed to explicitly indicate that there are no such fields, 316 * if an instance if required nevertheless to provide other 317 * functionality. 318 * 319 * @param fields A non-null, non-empty String 320 * @throws HttpHeaderException Error detected in the argument. 321 */ 322 Tokenizer(String fields) { 323 super(new StringReader(fields)); 324 325 326 resetSyntax(); 327 // Initially make all non-control characters token 328 // characters 329 wordChars('\u0020', '\u007E'); 330 // Now change separators back. Tab is not 331 // necessary and there are some ranges but let's 332 // not try and be clever. 333 String separator = "()<>@,;:\\\"/[]?={} \t"; 334 for (int i = 0; i < separator.length(); i++) { 335 ordinaryChar(separator.charAt(i)); 336 // System.err.println("Tested 34"); 337 } 338 339 // Resetting effectively did this to whitespace chars 340 // ordinaryChars('\u0000', '\u0020'); 341 // Set space and table characters as whitespace 342 whitespaceChars(' ', ' '); 343 whitespaceChars('\t', '\t'); 344 345 quoteChar('"'); 346 347 parseNumbers(); 348 349 // Here are some things we have effectively done by resetting 350 // ordinaryChar('/'); 351 // ordinaryChar('\''); 352 353 // Do not do any other special processing 354 eolIsSignificant(false); 355 lowerCaseMode(false); 356 slashSlashComments(false); 357 slashStarComments(false); 358 359 // Read the first token 360 nextLToken(); 361 if (ttype == ',') { 362 // System.err.println("Tested 36"); 363 throw new HttpHeaderException("HTTP header fields starts with comma separator"); 364 } 365 } 366 367 /** 368 * Same as <code>nextToken()</code> but does not throw an <code>IOException</code> 369 * and handles erroneous line breaks. 370 * 371 * @return int value of next LToken 372 * @throws HttpHeaderException Error detected in the fields. 373 */ 374 public int nextLToken() throws HttpHeaderException { 375 int result; 376 try { 377 result = nextToken(); 378 if (ttype == TT_EOL) { 379 System.err.println("Not tested 38"); 380 throw new HttpHeaderException("HTTP header fields span unquoted line breaks"); 381 } 382 // System.err.println("Tested 39"); 383 return result; 384 } 385 catch (IOException e) { 386 //assert false : "We are reading from a string"; 387 return 0; 388 } 389 } 390 391 /** 392 * Read up to and including the next token after comma 393 * separator(s) and whitespace assuming the current token is a comma. 394 * 395 * @return Resulting ttype. 396 */ 397 public final int skipCommaSeparator() throws HttpHeaderException { 398 if (ttype != ',') { 399 throw new IllegalStateException("Not at a comma"); 400 } 401 while (nextLToken() == ',') 402 ; 403 return ttype; 404 } 405 406 /** 407 * Read up to and including the next token after any comma 408 * separator(s) and whitespace. 409 * <p> 410 * This is the same as {@link #skipCommaSeparator()} but it does 411 * nothing if we are and EOF. 412 * 413 * @return Resulting ttype. 414 */ 415 public final int skipAnyCommaSeparator() throws HttpHeaderException { 416 if (ttype != TT_EOF) { 417 skipCommaSeparator(); 418 } 419 return ttype; 420 } 421 422 /** 423 * Convenience method to test for token or quoted string. 424 * <p> 425 * If this returns true then the token value is in <code>sval</code> 426 * with any quotes removed. 427 * @return whether token is an SVal 428 */ 429 public final boolean isSVal() { 430 return ttype == TT_WORD || ttype == '"'; 431 } 432 433 /** 434 * Read the word token or quoted string that comes next. 435 * 436 * @return the SVal 437 * @throws HttpHeaderException Error detected in the fields. 438 */ 439 public final String readSVal() throws HttpHeaderException { 440 if (! isSVal()) { 441 throw new HttpHeaderException("Next token is not a (possibly quoted) word: " + 442 toString()); 443 } 444 String result = sval; 445 nextLToken(); 446 return result; 447 } 448 449 /** 450 * Read the word token that comes next. 451 * 452 * @return the word as a String 453 * @throws HttpHeaderException Error detected in the fields. 454 */ 455 public final String readWord() throws HttpHeaderException { 456 if (ttype != TT_WORD) { 457 throw new HttpHeaderException("Next token is not a word token: " + 458 toString()); 459 } 460 String result = sval; 461 nextLToken(); 462 // System.err.println("Tested 47"); 463 return result; 464 } 465 466 /** 467 * Read the given word token that comes next. 468 * 469 * @throws HttpHeaderException Error detected in the fields. 470 */ 471 public final void readWord(String word) throws HttpHeaderException { 472 String read = readWord(); 473 if (! read.equals(word)) { 474 // System.err.println("Tested 48 by temporary hack"); 475 throw new HttpHeaderException("Expecting '" + word + 476 "' but encountered: " + toString()); 477 } 478 } 479 480 /** 481 * Read the given character that comes next. 482 * 483 * @throws HttpHeaderException Error detected in the fields. 484 */ 485 public final void readChar(char c) throws HttpHeaderException { 486 if (ttype != c) { 487 // System.err.println("Tested 49"); 488 throw new HttpHeaderException("Expecting '" + c + 489 "' but encountered: " + 490 toString()); 491 } 492 nextLToken(); 493 } 494 495 /** 496 * Read the number token that comes next. 497 * @return the number's value as a double 498 * @throws HttpHeaderException Error detected in the fields. 499 */ 500 public final double readNVal() throws HttpHeaderException { 501 if (ttype != TT_NUMBER) { 502 throw new HttpHeaderException("Next token is not a number: " + 503 toString()); 504 } 505 double result = nval; 506 nextLToken(); 507 return result; 508 } 509 510 /** 511 * Read a token sequence of the form "; q = 0.42" and return the number. 512 * @return the number's value as a float 513 * 514 * @throws IllegalStateException Current token not semicolon. 515 * @throws HttpHeaderException Error detected in the fields. 516 */ 517 public final float readQValue() 518 throws IllegalStateException, HttpHeaderException { 519 if (ttype != ';') { 520 throw new IllegalStateException("Not at a semicolon"); 521 } 522 readChar(';'); 523 readWord("q"); 524 readChar('='); 525 return (float)readNVal(); 526 } 527 528 /** 529 * Read a word or quoted string token optionally followed by a string 530 * of the form "; q = 0.42" and initialises the given object. 531 * @return current TokenAndQValue 532 */ 533 protected TokenAndQValue readTokenAndQValue(TokenAndQValue result) 534 throws HttpHeaderException { 535 result.token = readSVal(); 536 switch (ttype) { 537 case TT_EOF : 538 case ',' : 539 break; 540 case ';' : 541 result.q = readQValue(); 542 break; 543 default: 544 throw new HttpHeaderException("Word token: \'" + result.token + 545 "\' is followed by something unexpected: " + toString()); 546 } 547 return result; 548 } 549 550 } 551 552 553 public static class HttpHeaderException extends MelatiRuntimeException { 554 555 private static final long serialVersionUID = -8870151118057435290L; 556 557 /** 558 * Create an instance with message. 559 */ 560 public HttpHeaderException(String message) { 561 super(message); 562 } 563 564 /** 565 * Create an instance with message and cause. 566 */ 567 public HttpHeaderException(String message, Exception e) { 568 super(message, e); 569 } 570 571 } 572 573 }