Difference between revisions of "Python: Regex"

From RHS Wiki
Jump to navigation Jump to search
Tag: visualeditor
m
Tag: visualeditor
Line 1: Line 1:
 +
== Quick Reference ==
 +
 +
=== All ===
 +
{| class="wikitable"
 +
|+
 +
!Token
 +
!Description
 +
|-
 +
|\n
 +
|Newline
 +
|-
 +
|\r
 +
|Carriage return
 +
|-
 +
|\0
 +
|Null character
 +
|-
 +
|A single character of: a, b, or c
 +
|[abc]
 +
|-
 +
|A character except: a, b, c
 +
|[^a,b,c]
 +
|-
 +
|A character in the range a-z
 +
|[a-z]
 +
|-
 +
|A character not in the range a-z
 +
|[^a-z]
 +
|-
 +
|A character in the range a-z or A-Z
 +
|[a-zA-Z]
 +
|-
 +
|Any single character
 +
|.
 +
|-
 +
|Any whitespace character
 +
|\s
 +
|-
 +
|Any non-whitespace character
 +
|\S
 +
|-
 +
|any digit
 +
|\d
 +
|-
 +
|Any non-digit
 +
|\D
 +
|-
 +
|Any word character
 +
|\w
 +
|-
 +
|Any non-word character
 +
|\W
 +
|-
 +
|Vertical whitespace character
 +
|\v
 +
|-
 +
|Match nth subpattern
 +
|\n
 +
|-
 +
|Hex character YY
 +
|\xYY
 +
|-
 +
|Octal character ddd
 +
|\ddd
 +
|-
 +
|Backspace character
 +
|[\b]
 +
|-
 +
|Makes any character literal
 +
|\
 +
|-
 +
|Capture everything enclosed
 +
|(...)
 +
|-
 +
|Match either a or b
 +
|<nowiki>(a|b)</nowiki>
 +
|-
 +
|Match everything enclosed
 +
|(?:...)
 +
|-
 +
|Comment
 +
|(?#...)
 +
|-
 +
|Named Capturing Group
 +
|(?P<name>...)
 +
|-
 +
|Inline modifiers
 +
|(?imsxXU)
 +
|-
 +
|Conditional statement
 +
|<nowiki>(?(1)yes|no)</nowiki>
 +
|-
 +
|Match subpattern `name`
 +
|(?P=name)
 +
|-
 +
|Positive Lookahead
 +
|(?=...)
 +
|-
 +
|Negative Lookahead
 +
|(?!...)
 +
|-
 +
|Positive Lookbehind
 +
|(?<...)
 +
|-
 +
|Negative Lookbehind
 +
|(?<!...)
 +
|-
 +
|Zero or one of a
 +
|a?
 +
|-
 +
|Zero or more of a
 +
|a*
 +
|-
 +
|One or more of a
 +
|a+
 +
|-
 +
|Exatly 3 of a
 +
|a{3}
 +
|-
 +
|Between 3 and 6 of a
 +
|a{3, 6}
 +
|-
 +
|Greedy quantifier
 +
|a*
 +
|-
 +
|Lazy quantifier
 +
|a*?
 +
|-
 +
|Start of string
 +
|^
 +
|-
 +
|End of string
 +
|$
 +
|-
 +
|Start of string
 +
|\A
 +
|-
 +
|End of string
 +
|\Z
 +
|-
 +
|A word boundary
 +
|\b
 +
|-
 +
|A word boundary with postgres
 +
|\y
 +
|-
 +
|A non word boundary
 +
|\b
 +
|-
 +
|A non word boundary with postgres
 +
|\Y
 +
|-
 +
|Global
 +
|g
 +
|-
 +
|Multiline
 +
|m
 +
|-
 +
|Case insensitive
 +
|i
 +
|-
 +
|Ignore whitespace
 +
|x
 +
|-
 +
|Single line
 +
|s
 +
|-
 +
|Enable unicode support
 +
|u
 +
|-
 +
|Restrict matches to ASCII only
 +
|a
 +
|-
 +
|Complete match contents
 +
|\g<0>
 +
|}
 +
 +
== Examples ==
 +
 
===Port===
 
===Port===
 
<source lang="python">port_regex = re.compile(r'\b('
 
<source lang="python">port_regex = re.compile(r'\b('

Revision as of 12:59, 12 February 2020

Quick Reference

All

Token Description
\n Newline
\r Carriage return
\0 Null character
A single character of: a, b, or c [abc]
A character except: a, b, c [^a,b,c]
A character in the range a-z [a-z]
A character not in the range a-z [^a-z]
A character in the range a-z or A-Z [a-zA-Z]
Any single character .
Any whitespace character \s
Any non-whitespace character \S
any digit \d
Any non-digit \D
Any word character \w
Any non-word character \W
Vertical whitespace character \v
Match nth subpattern \n
Hex character YY \xYY
Octal character ddd \ddd
Backspace character [\b]
Makes any character literal \
Capture everything enclosed (...)
Match either a or b (a|b)
Match everything enclosed (?:...)
Comment (?#...)
Named Capturing Group (?P<name>...)
Inline modifiers (?imsxXU)
Conditional statement (?(1)yes|no)
Match subpattern `name` (?P=name)
Positive Lookahead (?=...)
Negative Lookahead (?!...)
Positive Lookbehind (?<...)
Negative Lookbehind (?<!...)
Zero or one of a a?
Zero or more of a a*
One or more of a a+
Exatly 3 of a a{3}
Between 3 and 6 of a a{3, 6}
Greedy quantifier a*
Lazy quantifier a*?
Start of string ^
End of string $
Start of string \A
End of string \Z
A word boundary \b
A word boundary with postgres \y
A non word boundary \b
A non word boundary with postgres \Y
Global g
Multiline m
Case insensitive i
Ignore whitespace x
Single line s
Enable unicode support u
Restrict matches to ASCII only a
Complete match contents \g<0>

Examples

Port

port_regex = re.compile(r'\b('
                        r'6553[0-5]|'
                        r'655[0-2][0-9]|'
                        r'65[0-4][0-9][0-9]|'
                        r'6[0-4][0-9][0-9][0-9]|'
                        r'[1-5][0-9][0-9][0-9][0-9]|'
                        r'[1-9][0-9][0-9][0-9]|'
                        r'[1-9][0-9][0-9]|'
                        r'[1-9][0-9]|'
                        r'[1-9])'
                        r'\b')
port_regex = r'([0-9]{1,4}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5])'

IP

ip = re.compile('^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$')

ip_regex = r'(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'
cidr_regex = r'(?:/(?:[12][0-9]|3[0-2])|[0-9])'
ip_range_regex = r'(?:-(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|9[0-9]|[1-8][0-9]|[1-9]))'

target_regex = re.compile(r'{ip_regex}{cidr_regex}?{ip_range_regex}?'.format(
    ip_regex=ip_regex, cidr_regex=cidr_regex, ip_range_regex=ip_range_regex))

NIE

r'^[XYZ]\d{7}[ABCDEFGHJKLMNPQRSTVWXYZ]'

DNI

r'\d{8}[ABCDEFGHJKLMNPQRSTVWXYZ]'

Spanish License Plates

r'\w{0,2}\d{4}\w{1,3}'

E-MAIL

EMAIL = r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)"

IBAN

IBAN = r"[a-zA-Z]{2}[0-9]{2} ?[a-zA-z0-9]{0,4} ?[a-zA-z0-9]{0,4} ?[a-zA-z0-9]{0,3}(?:[a-zA-z0-9] ?[a-zA-z0-9]{0,4} ?[a-zA-z0-9]{0,4} ?[a-zA-z0-9]{0,4} ?[a-zA-z0-9]{0,3})?"

Payment Account Number

PAN = r"\b(?:\d[ -]*?){13,19}\b"

Zip Code

ZIP_CODE_SPAIN = r"0[1-9][0-9]{3}|\D[1-4][0-9]{4}|\D5[0-2][0-9]{3}\D"

Phone Number

sep = '(:?\s+|-|\.)?' # separator
phone_re = re.compile(r'''
  (\d{3}|\(\d{3}\))  # area code
  {sep}              # separator
  (\d{3})            # first 3
  {sep}              # separator
  (\d{4})            # last 4
'''.format(sep=sep), re.VERBOSE)

Spain Phone

spainphones = r"(?:(?:\+?34(?:[ \t|\-])?)?[9|6|7](?:(?:\d{1}(?:[ \t|\-])?[0-9]{3})|(?:\d{2}(?:[ \t|\-])?[0-9]{2}))(?:[ \t|\-])?[0-9]{2}(?:[ \t|\-])?[0-9]{2})"