Source code for gs.group.messages.text.matcher

# -*- coding: utf-8 -*-
############################################################################
#
# Copyright © 2015 OnlineGroups.net and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
############################################################################
from __future__ import absolute_import, unicode_literals, print_function
from re import compile as re_compile, I as re_I, M as re_M, U as re_U
from string import punctuation


[docs]class Matcher(object): '''Match a word, by a regular expression, and make a substitution :param str matchRE: The regular expression used to check if there was a match (see :func:`re.match`) :param str subStr: The string specifying the subsitution (see :func:`re.sub`)''' def __init__(self, matchRE, subStr, weight=10): self.matchRE = matchRE self.subStr = subStr self.weight = weight #: The regular expression used to make the match. The flags :const:`re.I`, :const:`re.M`, #: and :const:`re.U` are set. self.re = re_compile(self.matchRE, re_I | re_M | re_U)
[docs] def match(self, s): '''Does the string match the regular expression? :param str s: The string to evaluate :returns: ``True`` if the string matches the regular expression, ``False`` otherwise. :rtype: bool''' return self.re.match(s)
[docs] def sub(self, s): '''Substitute the string in for the substitution string :param str s: The string to process :returns: The new string substituted in :attr:`self.subStr` :rtype: unicode''' return self.re.sub(self.subStr, s)
#: Turn words within ``*asterisk*`` characters into bold-elements. This is as close as #: GroupServer gets to implementing a wiki. boldMatcher = Matcher("(?P<boldText>\*.*\*)", r'<b>\g<boldText></b>', 10) #: Turn email addresses (``person@example.com``) into clickable ``mailto:`` links. Surrounding #: text (such as parenthesis) is added to the *link text*, while the address is extractd and used #: as for the *link target*. emailMatcher = Matcher( r"(?P<leading>.*?)(?P<address>[A-Z0-9\._%+-]+@[A-Z0-9.-]+\.[A-Z]+)(?P<trailing>.*)", r'<a class="email" href="mailto:\g<address>">\g<leading>\g<address>\g<trailing></a>', 20) #: Turn site names that start with *www* (``www.example.com``) into clickable ``http://`` links. wwwMatcher = Matcher(r"(?P<siteName>www\..+)", r'<a href="http://\g<siteName>">\g<siteName></a>', 30) class URIMatcher(Matcher): '''A horrid hack for a horrid issue''' def __init__(self): super(URIMatcher, self).__init__( r"(?P<leading>\&lt;|\(|\[|\{|\"|\'|^)" r"(?P<protocol>http://|https://)" r"(?P<host>([a-z\d][-a-z\d]*[a-z\d]\.)*[a-z][-a-z\\d]+[a-z])" r"(?P<rest>.*?)" r"(?P<trailing>\&gt;|\)|\]|\}|\"|\'|$|\s)", r'<a href="\g<protocol>\g<host>\g<rest>">\g<leading>\g<protocol><b>\g<host></b>' r'\g<rest>\g<trailing></a>', 40) def sub(self, s): if len(s) <= 32: retval = super(URIMatcher, self).sub(s) else: retval = self.long_url_sub(s) return retval @staticmethod def add_zws(s): 'Add zero-width spaces to the string' retval = '' for c in s: if c in punctuation: retval += ('&#8203;' + c) else: retval += c return retval def long_url_sub(self, s): m = self.re.match(s) gd = m.groupdict() brokenRest = self.add_zws(gd['rest']) c = '{leading}{protocol}<b>{host}</b>{rest}{trailing}' content = c.format(leading=gd['leading'], protocol=gd['protocol'], host=gd['host'], rest=brokenRest, trailing=gd['trailing']) if len(s) > 64: r = '<a class="small" href="{0}">{1}</a>' else: r = '<a href="{0}">{1}</a>' url = '{0}{1}{2}'.format(gd['protocol'], gd['host'], gd['rest']) retval = r.format(url, content) return retval #: Turn URIs (both ``http`` and ``https``) into clickable #: links. If the link is particularly long (over 64 characters) #: then small text will be used (``<a class="small"``). Leading and #: trailing characters (like parenthesis) will be used in the #: *link text* while just the URL will be used for the *link target*. uriMatcher = URIMatcher()