# this project is licensed under the WTFPLv2, see COPYING.txt for details
"""Regular expression utilities
"""
import re
import unittest
from PyQt5.QtCore import Qt
__all__ = ('csToQtEnum', 'qtEnumToCs', 'qreToPattern', 'glob2re')
[docs]
def csToQtEnum(cs):
"""Return a `Qt.CaseSensitivity` flag for bool `cs`"""
if cs:
return Qt.CaseSensitive
else:
return Qt.CaseInsensitive
[docs]
def qtEnumToCs(enum):
"""Return True if `enum` value equals `Qt.CaseSensitive`"""
return enum == Qt.CaseSensitive
DOTSLASH_GENERIC = 0
DOTSLASH_NO_SLASH = 1
DOTSLASH_NO_SLASH_AND_HIDDEN = 2
[docs]
def glob2re(globstr, can_escape=False, dotslash=DOTSLASH_NO_SLASH_AND_HIDDEN,
exact=False, double_star=False, sets=False):
"""Convert a globbing pattern to a Python regex pattern
:param globstr: the glob pattern to convert
:param exact: if True, the pattern will match the start and end of string (``^`` and ``$`` are
added)
:param double_star: if True, "**" is interpreted to match a indefinite number of path components
:param sets: if True, "{foo,bar}" will match "foo" or "bar"
:param can_escape: if True, backslashes can be used to escape other metacharacters,
else it will be literal
"""
# fnmatch.translate uses python-specific syntax
if dotslash == 0:
dot = first_dot = '.'
elif dotslash == 1:
dot = first_dot = '[^/]'
elif dotslash == 2:
dot = '[^/]'
first_dot = '[^/.]'
def is_first_component(mtc):
return (mtc.start() == 0
or mtc.string[mtc.start() - 1] == '/')
def replace(mtc):
s = mtc.group(0)
if s == '?':
return first_dot if is_first_component(mtc) else dot
elif s == '*':
if is_first_component(mtc) and dotslash == 2:
return '(?:%s%s*)?' % (first_dot, dot)
else:
return dot + '*'
elif s.startswith('[') and s.endswith(']'):
if s == '[]':
return '(?:$FAIL^)' # can never match
elif s == '[!]':
return dot
elif s[1] == '!':
mid = s[2:-1]
return '[^%s]' % mid
else:
return s
elif sets and s.startswith('{') and s.endswith('}'):
parts = [re.escape(p) for p in s[1:-1].split(',')]
return '(?:%s)' % '|'.join(parts)
elif can_escape and s == '\\\\':
return s
elif can_escape and s.startswith('\\'):
return s
elif '**' in s:
assert double_star
if s == '**':
return '.*'
elif s == '/**/':
return '/(?:.*/)?'
elif s == '/**':
return '(?:/.*)?'
elif s == '**/':
return '(?:.*/)?'
else:
assert False
elif s in '()[]{}.^$+\\':
return r'\%s' % s
else:
return s
reparts = []
if can_escape:
reparts.append(r'\\\\|\\.') # warning: headaches
if double_star:
reparts.append(r'(?:^|/)\*\*(?:/|$)')
if sets:
reparts.append(r'\{[^}]*\}')
reparts.append(r'\?|\*|\[[^]]*\]|.')
r = re.sub('|'.join(reparts), replace, globstr)
if exact:
r = '^%s$' % r
return r
[docs]
def qreToPattern(qre):
s = qre.pattern()
if qre.patternSyntax() == qre.FixedString:
return qre.escape(s)
elif qre.patternSyntax() in (qre.RegExp, qre.RegExp2):
return s
elif qre.patternSyntax() == qre.Wildcard:
return glob2re(s, dotslash=DOTSLASH_GENERIC)
elif qre.patternSyntax() == qre.WildcardUnix:
return glob2re(s, dotslash=DOTSLASH_NO_SLASH_AND_HIDDEN,
can_escape=True)
raise NotImplementedError()
class ReTests(unittest.TestCase):
def check_pattern(self, glob, matches, non_matches, **options):
pattern = glob2re(glob, exact=True, **options)
r = re.compile(pattern)
for i in matches:
msg = '%s should match %s (%s) %r' % (glob, i, pattern, options)
self.assertIsNotNone(r.match(i), msg)
for i in non_matches:
msg = '%s should not match %s (%s) %r' % (glob, i, pattern, options)
self.assertIsNone(r.match(i), msg)
def test_glob2re_generic(self):
options = dict(dotslash=0)
self.check_pattern(
'*',
'foo .foo foo/bar foo.bar [] *'.split(),
[],
**options)
self.check_pattern(
'*/*',
'foo/bar foo/.bar foo/foo/bar foo/bar.baz foo/*'.split(),
'foo.bar foo'.split(),
**options)
self.check_pattern(
'foo*',
'foo foobar foo.bar foo/bar foobar/'.split(),
'bar barfoo .foo '.split(),
**options)
self.check_pattern(
'*bar',
'bar .bar foobar foo.bar foo/bar .foobar'.split(),
'barfoo'.split(),
**options)
self.check_pattern(
'*bar*',
'bar .bar foobar foo/bar barbaz foobarbaz foo.bar bar.baz'.split(),
[],
**options)
self.check_pattern(
'???',
'foo .fo fo/ f/o /fo'.split(),
'foobar'.split(),
**options)
self.check_pattern(
'?/?',
'f/b f/. ./b'.split(),
'foo'.split(),
**options)
def test_glob2re_filename(self):
self.check_pattern(
'*',
'foo foo.bar [] *'.split(),
'.foo foo/bar'.split())
self.check_pattern(
'*/*',
'foo/bar foo/bar.baz foo/*'.split(),
'foo/.bar foo.bar foo/foo/bar foo'.split())
self.check_pattern(
'foo*',
'foo foobar foo.bar'.split(),
'bar barfoo .foo foo/bar foobar/'.split())
self.check_pattern(
'*bar',
'bar foobar foo.bar'.split(),
'barfoo .bar .foobar foo/bar'.split())
self.check_pattern(
'*bar*',
'bar foobar barbaz foobarbaz foo.bar bar.baz'.split(),
'.bar foo/bar'.split())
self.check_pattern(
'???',
'foo'.split(),
'.fo fo/ f/o /fo foobar'.split())
self.check_pattern(
'?/?',
'f/b'.split(),
'foo f/. ./b'.split())
def test_glob2re_recursive(self):
options = dict(double_star=True)
self.check_pattern(
'**/*',
'foo foo/foo foo/foo/bar'.split(),
'.foo foo/.foo'.split(),
**options)
self.check_pattern(
'**/foo',
'foo foo/foo'.split(),
'.foo foo/.foo foo/foo/bar bar'.split(),
**options)
self.check_pattern(
'foo/**/bar',
'foo/bar foo/baz/bar foo/baz/baz/bar'.split(),
'.foo foo/.bar bar baz/bar foo/baz'.split(),
**options)
self.check_pattern(
'foo/**',
'foo foo/bar foo/baz/bar foo/.bar'.split(),
'.foo bar baz/bar'.split(),
**options)
def test_glob2re_escapable(self):
options = dict(can_escape=True)
self.check_pattern(
r'\*',
r'\*'.split(),
r'foo *'.split())
self.check_pattern(
r'\*',
r'*'.split(),
r'\* foo ?'.split(),
**options)
self.check_pattern(
r'\?',
'?'.split(),
r'* f \?'.split(),
**options)
self.check_pattern(
r'\[a\]',
'[a]'.split(),
'a [] ['.split(),
**options)
self.check_pattern(
r'foo\\bar',
r'foo\\bar'.split(),
r'foobar foo\bar'.split())
self.check_pattern(
r'foo\\bar',
r'foo\bar'.split(),
r'foo\\bar foobar'.split(),
**options)
self.check_pattern(
r'\{foo\}',
'{foo}'.split(),
'foo'.split(),
sets=True, **options)
def test_glob2re_charset(self):
options = dict()
self.check_pattern(
'[ac]b[d-l]',
'abd cbl abe cbh'.split(),
'dbd bbd cbt abdx a'.split())
self.check_pattern(
'[ac-eh-k]',
'a c d e h i j k'.split(),
'* b f g l o z [ [ac-eh-k] aa'.split(),
**options)
self.check_pattern(
'[!a-et]',
'f x *'.split(),
'a e c t xx'.split(),
**options)
self.check_pattern(
'[a-m]*',
'a bn m moooooo'.split(),
'x xa *'.split(),
**options)
self.check_pattern(
'[',
'['.split(),
'x xa * ]'.split(),
**options)
self.check_pattern(
']',
']'.split(),
'x xa * ['.split(),
**options)
self.check_pattern(
'[]',
[],
'x xa *'.split(),
**options)
self.check_pattern(
'[!]',
'x ] ! [ *'.split(),
'xa [!]'.split(),
**options)
def test_glob2re_sets(self):
options = dict(sets=True)
self.check_pattern(
'{foo}',
'{foo}'.split(),
'foo bar {bar}'.split())
self.check_pattern(
'{foo}',
'foo'.split(),
'{foo} bar {bar}'.split(),
**options)
self.check_pattern(
'{foo,bar}',
'foo bar'.split(),
'{foo} {bar}'.split(),
**options)
self.check_pattern(
'{f{oo,bar}',
'f{oo bar'.split(),
'{foo} {bar} {f{oo,bar} foo'.split(),
**options)
self.check_pattern(
'{{foo}}', # ({foo)}
'{foo}'.split(),
'foo {foo foo} {bar}'.split(),
**options)
if __name__ == '__main__':
unittest.main()