# Copyright 2011, Google Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following disclaimer # in the documentation and/or other materials provided with the # distribution. # * Neither the name of Google Inc. nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """Utilities for parsing and formatting headers that follow the grammar defined in HTTP RFC http://www.ietf.org/rfc/rfc2616.txt. """ import urlparse _SEPARATORS = '()<>@,;:\\"/[]?={} \t' def _is_char(c): """Returns true iff c is in CHAR as specified in HTTP RFC.""" return ord(c) <= 127 def _is_ctl(c): """Returns true iff c is in CTL as specified in HTTP RFC.""" return ord(c) <= 31 or ord(c) == 127 class ParsingState(object): def __init__(self, data): self.data = data self.head = 0 def peek(state, pos=0): """Peeks the character at pos from the head of data.""" if state.head + pos >= len(state.data): return None return state.data[state.head + pos] def consume(state, amount=1): """Consumes specified amount of bytes from the head and returns the consumed bytes. If there's not enough bytes to consume, returns None. """ if state.head + amount > len(state.data): return None result = state.data[state.head:state.head + amount] state.head = state.head + amount return result def consume_string(state, expected): """Given a parsing state and a expected string, consumes the string from the head. Returns True if consumed successfully. Otherwise, returns False. """ pos = 0 for c in expected: if c != peek(state, pos): return False pos += 1 consume(state, pos) return True def consume_lws(state): """Consumes a LWS from the head. Returns True if any LWS is consumed. Otherwise, returns False. LWS = [CRLF] 1*( SP | HT ) """ original_head = state.head consume_string(state, '\r\n') pos = 0 while True: c = peek(state, pos) if c == ' ' or c == '\t': pos += 1 else: if pos == 0: state.head = original_head return False else: consume(state, pos) return True def consume_lwses(state): """Consumes *LWS from the head.""" while consume_lws(state): pass def consume_token(state): """Consumes a token from the head. Returns the token or None if no token was found. """ pos = 0 while True: c = peek(state, pos) if c is None or c in _SEPARATORS or _is_ctl(c) or not _is_char(c): if pos == 0: return None return consume(state, pos) else: pos += 1 def consume_token_or_quoted_string(state): """Consumes a token or a quoted-string, and returns the token or unquoted string. If no token or quoted-string was found, returns None. """ original_head = state.head if not consume_string(state, '"'): return consume_token(state) result = [] expect_quoted_pair = False while True: if not expect_quoted_pair and consume_lws(state): result.append(' ') continue c = consume(state) if c is None: # quoted-string is not enclosed with double quotation state.head = original_head return None elif expect_quoted_pair: expect_quoted_pair = False if _is_char(c): result.append(c) else: # Non CHAR character found in quoted-pair state.head = original_head return None elif c == '\\': expect_quoted_pair = True elif c == '"': return ''.join(result) elif _is_ctl(c): # Invalid character %r found in qdtext state.head = original_head return None else: result.append(c) def quote_if_necessary(s): """Quotes arbitrary string into quoted-string.""" quote = False if s == '': return '""' result = [] for c in s: if c == '"' or c in _SEPARATORS or _is_ctl(c) or not _is_char(c): quote = True if c == '"' or _is_ctl(c): result.append('\\' + c) else: result.append(c) if quote: return '"' + ''.join(result) + '"' else: return ''.join(result) def parse_uri(uri): """Parse absolute URI then return host, port and resource.""" parsed = urlparse.urlsplit(uri) if parsed.scheme != 'wss' and parsed.scheme != 'ws': # |uri| must be a relative URI. # TODO(toyoshim): Should validate |uri|. return None, None, uri if parsed.hostname is None: return None, None, None port = None try: port = parsed.port except ValueError, e: # port property cause ValueError on invalid null port description like # 'ws://host:/path'. return None, None, None if port is None: if parsed.scheme == 'ws': port = 80 else: port = 443 path = parsed.path if not path: path += '/' if parsed.query: path += '?' + parsed.query if parsed.fragment: path += '#' + parsed.fragment return parsed.hostname, port, path try: urlparse.uses_netloc.index('ws') except ValueError, e: # urlparse in Python2.5.1 doesn't have 'ws' and 'wss' entries. urlparse.uses_netloc.append('ws') urlparse.uses_netloc.append('wss') # vi:sts=4 sw=4 et