# -*- coding: utf-8 -*- """ markupsafe ~~~~~~~~~~ Implements a Markup string. :copyright: (c) 2010 by Armin Ronacher. :license: BSD, see LICENSE for more details. """ import re from ._compat import text_type, string_types, int_types, \ unichr, PY2 __all__ = ['Markup', 'soft_unicode', 'escape', 'escape_silent'] _striptags_re = re.compile(r'(|<[^>]*>)') _entity_re = re.compile(r'&([^;]+);') class Markup(text_type): r"""Marks a string as being safe for inclusion in HTML/XML output without needing to be escaped. This implements the `__html__` interface a couple of frameworks and web applications use. :class:`Markup` is a direct subclass of `unicode` and provides all the methods of `unicode` just that it escapes arguments passed and always returns `Markup`. The `escape` function returns markup objects so that double escaping can't happen. The constructor of the :class:`Markup` class can be used for three different things: When passed an unicode object it's assumed to be safe, when passed an object with an HTML representation (has an `__html__` method) that representation is used, otherwise the object passed is converted into a unicode string and then assumed to be safe: >>> Markup("Hello World!") Markup(u'Hello World!') >>> class Foo(object): ... def __html__(self): ... return 'foo' ... >>> Markup(Foo()) Markup(u'foo') If you want object passed being always treated as unsafe you can use the :meth:`escape` classmethod to create a :class:`Markup` object: >>> Markup.escape("Hello World!") Markup(u'Hello <em>World</em>!') Operations on a markup string are markup aware which means that all arguments are passed through the :func:`escape` function: >>> em = Markup("%s") >>> em % "foo & bar" Markup(u'foo & bar') >>> strong = Markup("%(text)s") >>> strong % {'text': 'hacker here'} Markup(u'<blink>hacker here</blink>') >>> Markup("Hello ") + "" Markup(u'Hello <foo>') """ __slots__ = () def __new__(cls, base=u'', encoding=None, errors='strict'): if hasattr(base, '__html__'): base = base.__html__() if encoding is None: return text_type.__new__(cls, base) return text_type.__new__(cls, base, encoding, errors) def __html__(self): return self def __add__(self, other): if isinstance(other, string_types) or hasattr(other, '__html__'): return self.__class__(super(Markup, self).__add__(self.escape(other))) return NotImplemented def __radd__(self, other): if hasattr(other, '__html__') or isinstance(other, string_types): return self.escape(other).__add__(self) return NotImplemented def __mul__(self, num): if isinstance(num, int_types): return self.__class__(text_type.__mul__(self, num)) return NotImplemented __rmul__ = __mul__ def __mod__(self, arg): if isinstance(arg, tuple): arg = tuple(_MarkupEscapeHelper(x, self.escape) for x in arg) else: arg = _MarkupEscapeHelper(arg, self.escape) return self.__class__(text_type.__mod__(self, arg)) def __repr__(self): return '%s(%s)' % ( self.__class__.__name__, text_type.__repr__(self) ) def join(self, seq): return self.__class__(text_type.join(self, map(self.escape, seq))) join.__doc__ = text_type.join.__doc__ def split(self, *args, **kwargs): return list(map(self.__class__, text_type.split(self, *args, **kwargs))) split.__doc__ = text_type.split.__doc__ def rsplit(self, *args, **kwargs): return list(map(self.__class__, text_type.rsplit(self, *args, **kwargs))) rsplit.__doc__ = text_type.rsplit.__doc__ def splitlines(self, *args, **kwargs): return list(map(self.__class__, text_type.splitlines(self, *args, **kwargs))) splitlines.__doc__ = text_type.splitlines.__doc__ def unescape(self): r"""Unescape markup again into an text_type string. This also resolves known HTML4 and XHTML entities: >>> Markup("Main » About").unescape() u'Main \xbb About' """ from _constants import HTML_ENTITIES def handle_match(m): name = m.group(1) if name in HTML_ENTITIES: return unichr(HTML_ENTITIES[name]) try: if name[:2] in ('#x', '#X'): return unichr(int(name[2:], 16)) elif name.startswith('#'): return unichr(int(name[1:])) except ValueError: pass return u'' return _entity_re.sub(handle_match, text_type(self)) def striptags(self): r"""Unescape markup into an text_type string and strip all tags. This also resolves known HTML4 and XHTML entities. Whitespace is normalized to one: >>> Markup("Main » About").striptags() u'Main \xbb About' """ stripped = u' '.join(_striptags_re.sub('', self).split()) return Markup(stripped).unescape() @classmethod def escape(cls, s): """Escape the string. Works like :func:`escape` with the difference that for subclasses of :class:`Markup` this function would return the correct subclass. """ rv = escape(s) if rv.__class__ is not cls: return cls(rv) return rv def make_wrapper(name): orig = getattr(text_type, name) def func(self, *args, **kwargs): args = _escape_argspec(list(args), enumerate(args), self.escape) #_escape_argspec(kwargs, kwargs.iteritems(), None) return self.__class__(orig(self, *args, **kwargs)) func.__name__ = orig.__name__ func.__doc__ = orig.__doc__ return func for method in '__getitem__', 'capitalize', \ 'title', 'lower', 'upper', 'replace', 'ljust', \ 'rjust', 'lstrip', 'rstrip', 'center', 'strip', \ 'translate', 'expandtabs', 'swapcase', 'zfill': locals()[method] = make_wrapper(method) # new in python 2.5 if hasattr(text_type, 'partition'): def partition(self, sep): return tuple(map(self.__class__, text_type.partition(self, self.escape(sep)))) def rpartition(self, sep): return tuple(map(self.__class__, text_type.rpartition(self, self.escape(sep)))) # new in python 2.6 if hasattr(text_type, 'format'): format = make_wrapper('format') # not in python 3 if hasattr(text_type, '__getslice__'): __getslice__ = make_wrapper('__getslice__') del method, make_wrapper def _escape_argspec(obj, iterable, escape): """Helper for various string-wrapped functions.""" for key, value in iterable: if hasattr(value, '__html__') or isinstance(value, string_types): obj[key] = escape(value) return obj class _MarkupEscapeHelper(object): """Helper for Markup.__mod__""" def __init__(self, obj, escape): self.obj = obj self.escape = escape __getitem__ = lambda s, x: _MarkupEscapeHelper(s.obj[x], s.escape) __unicode__ = __str__ = lambda s: text_type(s.escape(s.obj)) __repr__ = lambda s: str(s.escape(repr(s.obj))) __int__ = lambda s: int(s.obj) __float__ = lambda s: float(s.obj) # we have to import it down here as the speedups and native # modules imports the markup type which is define above. try: from _speedups import escape, escape_silent, soft_unicode except ImportError: from ._native import escape, escape_silent, soft_unicode if not PY2: soft_str = soft_unicode __all__.append('soft_str')