HttpHeader.java
/*
* $Source$
* $Revision$
*
* Copyright (C) 2003 Jim Wright
*
* Part of Melati (http://melati.org), a framework for the rapid
* development of clean, maintainable web applications.
*
* Melati is free software; Permission is granted to copy, distribute
* and/or modify this software under the terms either:
*
* a) the GNU General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version,
*
* or
*
* b) any version of the Melati Software License, as published
* at http://melati.org
*
* You should have received a copy of the GNU General Public License and
* the Melati Software License along with this program;
* if not, write to the Free Software Foundation, Inc.,
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA to obtain the
* GNU General Public License and visit http://melati.org to obtain the
* Melati Software License.
*
* Feel free to contact the Developers of Melati (http://melati.org),
* if you would like to work out a different arrangement than the options
* outlined here. It is our intention to allow Melati to be used by as
* wide an audience as possible.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* Contact details for copyright holder:
*
* Jim Wright <jimw At paneris.org>
* Bohemian Enterprise
* Predmerice nad Jizerou 77
* 294 74
* Mlada Boleslav
* Czech Republic
*/
package org.melati.util;
import java.io.StreamTokenizer;
import java.io.StringReader;
import java.io.IOException;
import java.util.Iterator;
import java.util.Enumeration;
/**
* Representation of occurrences of an HTTP header field.
* <p>
* These are defined in RFC 2616 and have the same general form as in
* RFC 822 section 3.1.
* <P>
* We generally assume that all continuation lines and occurrences in
* a message are concatenated with comma separators.
*
* @author Jim Wright
*/
public class HttpHeader {
/**
* Instance of inner {@link Tokenizer}.
*/
protected Tokenizer tokenizer;
/**
* Create an instance representing the given comma separated fields.
*/
public HttpHeader(String values) {
if(values == null)
values= "";
tokenizer = new Tokenizer(values);
}
/**
* Abstract enumeration of fields.
* <p>
* Subtypes decide what type of token to return and how
* to represent it.
* <p>
* This class serves to remove doubts about whether we should and can
* implement <code>Iterator</code> or <code>Enumeration</code> and
* proves itself unnecessary ;-). But we can factor stuff out and
* re-use it later.
* <p>
* Actually, it also removes the need to think about exceptions in
* subtypes.
*/
@SuppressWarnings("rawtypes")
public abstract class FieldIterator implements Iterator<Object>, Enumeration{
/**
* {@inheritDoc}
* @see java.util.Enumeration#hasMoreElements()
*/
public final boolean hasMoreElements() {
return hasNext();
}
/**
* {@inheritDoc}
* @see java.util.Enumeration#nextElement()
*/
public final Object nextElement() {
return next();
}
/**
* {@inheritDoc}
* @see java.util.Iterator#hasNext()
* @see #next()
*/
public final boolean hasNext() {
return tokenizer.ttype != StreamTokenizer.TT_EOF;
}
/**
* {@inheritDoc}
* @see java.util.Iterator#remove()
*/
public void remove() throws UnsupportedOperationException {
throw new UnsupportedOperationException("Cannot remove tokens from the HTTP header");
}
/**
* Return the next element or an exception.
*
* @return An exception if an object of the anticipated type cannot be returned
*/
public Object next() {
try {
return nextToken();
}
catch (HttpHeaderException e) {
return e;
}
}
/**
* @return the next token or throws an exception
*/
public abstract Object nextToken() throws HttpHeaderException;
}
/**
* Iteration over {@link HttpHeader.TokenAndQValue}s.
*/
public class WordIterator extends FieldIterator {
/**
* @return the next word
*/
public String nextWord() throws HttpHeaderException {
String result = tokenizer.readWord();
tokenizer.skipAnyCommaSeparator();
return result;
}
/**
* {@inheritDoc}
* @see org.melati.util.HttpHeader.FieldIterator#nextToken()
*/
public Object nextToken() throws HttpHeaderException {
return nextWord();
}
}
/**
* Factory method to create and return an iterator of words.
*
* @return a new WordIterator
*/
public final WordIterator wordIterator() {
return new WordIterator();
}
/**
* Iteration over {@link HttpHeader.TokenAndQValue}s.
*/
public class TokenAndQValueIterator extends FieldIterator {
/**
* @return the next TokenAndQValue
* @throws HttpHeaderException
*/
public TokenAndQValue nextTokenAndQValue() throws HttpHeaderException {
return HttpHeader.this.nextTokenAndQValue();
}
/**
* {@inheritDoc}
* @see org.melati.util.HttpHeader.FieldIterator#nextToken()
*/
public Object nextToken() throws HttpHeaderException {
return nextTokenAndQValue();
}
}
/**
* Factory method to create and return the next
* {@link HttpHeader.TokenAndQValue}.
* @return a new TokenAndQValue
*/
public TokenAndQValue nextTokenAndQValue() throws HttpHeaderException {
return new TokenAndQValue(tokenizer);
}
/**
* Factory method to create and return an iterator of {@link TokenAndQValue}'s.
* @return a new TokenAndQValueIterator
*/
public TokenAndQValueIterator tokenAndQValueIterator() {
return new TokenAndQValueIterator();
}
/**
* A token and associated qvalue.
*/
public static class TokenAndQValue {
/**
* Token followed by a semicolon separator.
*/
public String token;
/**
* Value between zero and one with at most 3 decimal places.
* <p>
* q stands for "quality" but the RFC 2616 says this is not
* completely accurate.
* Values closer to 1.0 are better.
* Zero means completely unfit.
* <p>
* The default is 1.0 if not explicitly initialised and this
* appears to be correct for most possible uses if not all.
*/
public float q = 1.0f;
/**
* Create an uninitialised instance.
*/
public TokenAndQValue() {
}
/**
* Create an instance and initialise it by reading the given
* tokenizer.
*/
public TokenAndQValue(Tokenizer t) throws HttpHeaderException {
this();
t.readTokenAndQValue(this);
t.skipAnyCommaSeparator();
}
}
/**
* Tokenizer for parsing occurences of a field.
* <p>
* Header fields have format defined in RFC 2616 and have the same
* general form as in RFC 822 section 3.1.
* <p>
* This is for fields consisting of tokens, quoted strings and
* separators and not those consisting of an arbitrary sequence of
* octets.
* Tokens are US ASCII characters other than:
* <ul>
* <li> control characters 0000 to 001F and 007E;
* <li> separators defined in RFC 2616;
* </ul>
* <p>
* The convenience methods defined here provide some guidance on how
* to interact with the super-type but you can also use inherited
* methods.
* <p>
* We assume that the next token is always already read when a method
* starts to interpret a sequence of tokens.
* In other words the first token is read by the constructor(s) and then
* each such
* method returns as a result of reading a token or EOF that it cannot
* process but without pushing it back.
* The next token to be interpreted is hence the current token
* described by the inherited instance variables.
* <p>
* Note that whitespace is automatically skipped by the supertype.
*
* @author Jim Wright
*/
public static class Tokenizer extends StreamTokenizer {
/**
* Create an instance from a string formed by concatenation of
* continuation lines and all occurences of a field, with comma
* separators.
* <p>
* In theory a separator can consist of one or more commas and
* spaces and tab.
* Fields are never empty.
* We cope with this but I doubt typical callers ever encounter
* such strings.
* <p>
* The field list should not be empty but null is
* allowed to explicitly indicate that there are no such fields,
* if an instance if required nevertheless to provide other
* functionality.
*
* @param fields A non-null, non-empty String
* @throws HttpHeaderException Error detected in the argument.
*/
Tokenizer(String fields) {
super(new StringReader(fields));
resetSyntax();
// Initially make all non-control characters token
// characters
wordChars('\u0020', '\u007E');
// Now change separators back. Tab is not
// necessary and there are some ranges but let's
// not try and be clever.
String separator = "()<>@,;:\\\"/[]?={} \t";
for (int i = 0; i < separator.length(); i++) {
ordinaryChar(separator.charAt(i));
// System.err.println("Tested 34");
}
// Resetting effectively did this to whitespace chars
// ordinaryChars('\u0000', '\u0020');
// Set space and table characters as whitespace
whitespaceChars(' ', ' ');
whitespaceChars('\t', '\t');
quoteChar('"');
parseNumbers();
// Here are some things we have effectively done by resetting
// ordinaryChar('/');
// ordinaryChar('\'');
// Do not do any other special processing
eolIsSignificant(false);
lowerCaseMode(false);
slashSlashComments(false);
slashStarComments(false);
// Read the first token
nextLToken();
if (ttype == ',') {
// System.err.println("Tested 36");
throw new HttpHeaderException("HTTP header fields starts with comma separator");
}
}
/**
* Same as <code>nextToken()</code> but does not throw an <code>IOException</code>
* and handles erroneous line breaks.
*
* @return int value of next LToken
* @throws HttpHeaderException Error detected in the fields.
*/
public int nextLToken() throws HttpHeaderException {
int result;
try {
result = nextToken();
if (ttype == TT_EOL) {
System.err.println("Not tested 38");
throw new HttpHeaderException("HTTP header fields span unquoted line breaks");
}
// System.err.println("Tested 39");
return result;
}
catch (IOException e) {
//assert false : "We are reading from a string";
return 0;
}
}
/**
* Read up to and including the next token after comma
* separator(s) and whitespace assuming the current token is a comma.
*
* @return Resulting ttype.
*/
public final int skipCommaSeparator() throws HttpHeaderException {
if (ttype != ',') {
throw new IllegalStateException("Not at a comma");
}
while (nextLToken() == ',')
;
return ttype;
}
/**
* Read up to and including the next token after any comma
* separator(s) and whitespace.
* <p>
* This is the same as {@link #skipCommaSeparator()} but it does
* nothing if we are and EOF.
*
* @return Resulting ttype.
*/
public final int skipAnyCommaSeparator() throws HttpHeaderException {
if (ttype != TT_EOF) {
skipCommaSeparator();
}
return ttype;
}
/**
* Convenience method to test for token or quoted string.
* <p>
* If this returns true then the token value is in <code>sval</code>
* with any quotes removed.
* @return whether token is an SVal
*/
public final boolean isSVal() {
return ttype == TT_WORD || ttype == '"';
}
/**
* Read the word token or quoted string that comes next.
*
* @return the SVal
* @throws HttpHeaderException Error detected in the fields.
*/
public final String readSVal() throws HttpHeaderException {
if (! isSVal()) {
throw new HttpHeaderException("Next token is not a (possibly quoted) word: " +
toString());
}
String result = sval;
nextLToken();
return result;
}
/**
* Read the word token that comes next.
*
* @return the word as a String
* @throws HttpHeaderException Error detected in the fields.
*/
public final String readWord() throws HttpHeaderException {
if (ttype != TT_WORD) {
throw new HttpHeaderException("Next token is not a word token: " +
toString());
}
String result = sval;
nextLToken();
// System.err.println("Tested 47");
return result;
}
/**
* Read the given word token that comes next.
*
* @throws HttpHeaderException Error detected in the fields.
*/
public final void readWord(String word) throws HttpHeaderException {
String read = readWord();
if (! read.equals(word)) {
// System.err.println("Tested 48 by temporary hack");
throw new HttpHeaderException("Expecting '" + word +
"' but encountered: " + toString());
}
}
/**
* Read the given character that comes next.
*
* @throws HttpHeaderException Error detected in the fields.
*/
public final void readChar(char c) throws HttpHeaderException {
if (ttype != c) {
// System.err.println("Tested 49");
throw new HttpHeaderException("Expecting '" + c +
"' but encountered: " +
toString());
}
nextLToken();
}
/**
* Read the number token that comes next.
* @return the number's value as a double
* @throws HttpHeaderException Error detected in the fields.
*/
public final double readNVal() throws HttpHeaderException {
if (ttype != TT_NUMBER) {
throw new HttpHeaderException("Next token is not a number: " +
toString());
}
double result = nval;
nextLToken();
return result;
}
/**
* Read a token sequence of the form "; q = 0.42" and return the number.
* @return the number's value as a float
*
* @throws IllegalStateException Current token not semicolon.
* @throws HttpHeaderException Error detected in the fields.
*/
public final float readQValue()
throws IllegalStateException, HttpHeaderException {
if (ttype != ';') {
throw new IllegalStateException("Not at a semicolon");
}
readChar(';');
readWord("q");
readChar('=');
return (float)readNVal();
}
/**
* Read a word or quoted string token optionally followed by a string
* of the form "; q = 0.42" and initialises the given object.
* @return current TokenAndQValue
*/
protected TokenAndQValue readTokenAndQValue(TokenAndQValue result)
throws HttpHeaderException {
result.token = readSVal();
switch (ttype) {
case TT_EOF :
case ',' :
break;
case ';' :
result.q = readQValue();
break;
default:
throw new HttpHeaderException("Word token: \'" + result.token +
"\' is followed by something unexpected: " + toString());
}
return result;
}
}
public static class HttpHeaderException extends MelatiRuntimeException {
private static final long serialVersionUID = -8870151118057435290L;
/**
* Create an instance with message.
*/
public HttpHeaderException(String message) {
super(message);
}
/**
* Create an instance with message and cause.
*/
public HttpHeaderException(String message, Exception e) {
super(message, e);
}
}
}