Difference between revisions of "Python: Regex"

From RHS Wiki
Jump to navigation Jump to search
Tag: visualeditor
 
(6 intermediate revisions by the same user not shown)
Line 1: Line 1:
=== Port ===
+
==Quick Reference==
<source lang="python">port_regex = re.compile(r'\b('
+
 
 +
===All===
 +
{| class="wikitable"
 +
|+
 +
https://regex101.com/
 +
!Token
 +
!Description
 +
|-
 +
|Newline
 +
|\n
 +
|-
 +
|Carriage return
 +
|\r
 +
|-
 +
|Null character
 +
|\0
 +
|-
 +
|A single character of: a, b, or c
 +
|[abc]
 +
|-
 +
|A character except: a, b, c
 +
|[^a,b,c]
 +
|-
 +
|A character in the range a-z
 +
|[a-z]
 +
|-
 +
|A character not in the range a-z
 +
|[^a-z]
 +
|-
 +
|A character in the range a-z or A-Z
 +
|[a-zA-Z]
 +
|-
 +
|Any single character
 +
|.
 +
|-
 +
|Any whitespace character
 +
|\s
 +
|-
 +
|Any non-whitespace character
 +
|\S
 +
|-
 +
|any digit
 +
|\d
 +
|-
 +
|Any non-digit
 +
|\D
 +
|-
 +
|Any word character
 +
|\w
 +
|-
 +
|Any non-word character
 +
|\W
 +
|-
 +
|Vertical whitespace character
 +
|\v
 +
|-
 +
|Match nth subpattern
 +
|\n
 +
|-
 +
|Hex character YY
 +
|\xYY
 +
|-
 +
|Octal character ddd
 +
|\ddd
 +
|-
 +
|Backspace character
 +
|[\b]
 +
|-
 +
|Makes any character literal
 +
|\
 +
|-
 +
|Capture everything enclosed
 +
|(...)
 +
|-
 +
|Match either a or b
 +
|<nowiki>(a|b)</nowiki>
 +
|-
 +
|Match everything enclosed
 +
|(?:...)
 +
|-
 +
|Comment
 +
|(?#...)
 +
|-
 +
|Named Capturing Group
 +
|(?P<name>...)
 +
|-
 +
|Inline modifiers
 +
|(?imsxXU)
 +
|-
 +
|Conditional statement
 +
|<nowiki>(?(1)yes|no)</nowiki>
 +
|-
 +
|Match subpattern `name`
 +
|(?P=name)
 +
|-
 +
|Positive Lookahead
 +
|(?=...)
 +
|-
 +
|Negative Lookahead
 +
|(?!...)
 +
|-
 +
|Positive Lookbehind
 +
|(?<...)
 +
|-
 +
|Negative Lookbehind
 +
|(?<!...)
 +
|-
 +
|Zero or one of a
 +
|a?
 +
|-
 +
|Zero or more of a
 +
|a*
 +
|-
 +
|One or more of a
 +
|a+
 +
|-
 +
|Exatly 3 of a
 +
|a{3}
 +
|-
 +
|Between 3 and 6 of a
 +
|a{3, 6}
 +
|-
 +
|Greedy quantifier
 +
|a*
 +
|-
 +
|Lazy quantifier
 +
|a*?
 +
|-
 +
|Start of string
 +
|^
 +
|-
 +
|End of string
 +
|$
 +
|-
 +
|Start of string
 +
|\A
 +
|-
 +
|End of string
 +
|\Z
 +
|-
 +
|A word boundary
 +
|\b
 +
|-
 +
|A word boundary with postgres
 +
|\y
 +
|-
 +
|A non word boundary
 +
|\b
 +
|-
 +
|A non word boundary with postgres
 +
|\Y
 +
|-
 +
|Global
 +
|g
 +
|-
 +
|Multiline
 +
|m
 +
|-
 +
|Case insensitive
 +
|i
 +
|-
 +
|Ignore whitespace
 +
|x
 +
|-
 +
|Single line
 +
|s
 +
|-
 +
|Enable unicode support
 +
|u
 +
|-
 +
|Restrict matches to ASCII only
 +
|a
 +
|-
 +
|Complete match contents
 +
|\g<0>
 +
|-
 +
|Complete match contents
 +
|\0
 +
|-
 +
|Contents in capture group 1
 +
|\1
 +
|-
 +
|Contents in capture group 1
 +
|$1
 +
|-
 +
|Contents in capture group `foo`
 +
|${foo}
 +
|-
 +
|Hexadecimal replacement values
 +
|\x20
 +
|-
 +
|Hexadecimal replacement values
 +
|\x{06fa}
 +
|-
 +
|Tab
 +
|\t
 +
|-
 +
|Carriage return
 +
|\r
 +
|-
 +
|Newline
 +
|\n
 +
|-
 +
|Form-feed
 +
|\f
 +
|-
 +
|Uppercase Transformation
 +
|\U
 +
|-
 +
|Lowercase Transformation
 +
|\L
 +
|-
 +
|Terminate any Transformation
 +
|\E
 +
|-
 +
|3 or more of a
 +
|a{3,}
 +
|}
 +
 
 +
==Examples==
 +
 
 +
===Port===
 +
<source lang="python">port_regex = re.compile(r'\b('
 
                         r'6553[0-5]|'
 
                         r'6553[0-5]|'
 
                         r'655[0-2][0-9]|'
 
                         r'655[0-2][0-9]|'
Line 10: Line 232:
 
                         r'[1-9][0-9]|'
 
                         r'[1-9][0-9]|'
 
                         r'[1-9])'
 
                         r'[1-9])'
                         r'\b')</source>
+
                         r'\b')
=== IP ===
+
port_regex = r'([0-9]{1,4}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5])'</source>
<source lang="python">ip = re.compile('^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$')</source>
+
===IP===
 +
<source lang="python">ip = re.compile('^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$')
 +
 
 +
ip_regex = r'(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'
 +
cidr_regex = r'(?:/(?:[12][0-9]|3[0-2])|[0-9])'
 +
ip_range_regex = r'(?:-(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|9[0-9]|[1-8][0-9]|[1-9]))'
  
=== NIE ===
+
target_regex = re.compile(r'{ip_regex}{cidr_regex}?{ip_range_regex}?'.format(
 +
    ip_regex=ip_regex, cidr_regex=cidr_regex, ip_range_regex=ip_range_regex))</source>
 +
 
 +
===NIE===
 
  r'^[XYZ]\d{7}[ABCDEFGHJKLMNPQRSTVWXYZ]'
 
  r'^[XYZ]\d{7}[ABCDEFGHJKLMNPQRSTVWXYZ]'
  
=== DNI ===
+
===DNI===
 
  r'\d{8}[ABCDEFGHJKLMNPQRSTVWXYZ]'
 
  r'\d{8}[ABCDEFGHJKLMNPQRSTVWXYZ]'
  
=== Spanish License Plates ===
+
===Spanish License Plates===
 
  r'\w{0,2}\d{4}\w{1,3}'
 
  r'\w{0,2}\d{4}\w{1,3}'
 +
 +
===E-MAIL===
 +
<syntaxhighlight lang="python3">
 +
EMAIL = r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)"
 +
</syntaxhighlight>
 +
 +
===IBAN===
 +
<syntaxhighlight lang="python">
 +
IBAN = r"[a-zA-Z]{2}[0-9]{2} ?[a-zA-z0-9]{0,4} ?[a-zA-z0-9]{0,4} ?[a-zA-z0-9]{0,3}(?:[a-zA-z0-9] ?[a-zA-z0-9]{0,4} ?[a-zA-z0-9]{0,4} ?[a-zA-z0-9]{0,4} ?[a-zA-z0-9]{0,3})?"
 +
</syntaxhighlight>
 +
 +
===Payment Account Number===
 +
<syntaxhighlight lang="python">
 +
PAN = r"\b(?:\d[ -]*?){13,19}\b"
 +
</syntaxhighlight>
 +
 +
===Zip Code===
 +
<syntaxhighlight lang="python">
 +
ZIP_CODE_SPAIN = r"0[1-9][0-9]{3}|\D[1-4][0-9]{4}|\D5[0-2][0-9]{3}\D"
 +
</syntaxhighlight>
 +
 +
===Phone Number===
 +
<syntaxhighlight lang="python">
 +
sep = '(:?\s+|-|\.)?' # separator
 +
phone_re = re.compile(r'''
 +
  (\d{3}|\(\d{3}\))  # area code
 +
  {sep}              # separator
 +
  (\d{3})            # first 3
 +
  {sep}              # separator
 +
  (\d{4})            # last 4
 +
'''.format(sep=sep), re.VERBOSE)
 +
</syntaxhighlight>
 +
 +
====Spain Phone====
 +
<syntaxhighlight lang="python">
 +
spainphones = r"(?:(?:\+?34(?:[ \t|\-])?)?[9|6|7](?:(?:\d{1}(?:[ \t|\-])?[0-9]{3})|(?:\d{2}(?:[ \t|\-])?[0-9]{2}))(?:[ \t|\-])?[0-9]{2}(?:[ \t|\-])?[0-9]{2})"
 +
</syntaxhighlight>

Latest revision as of 13:45, 12 February 2020

Quick Reference[edit]

All[edit]

https://regex101.com/
Token Description
Newline \n
Carriage return \r
Null character \0
A single character of: a, b, or c [abc]
A character except: a, b, c [^a,b,c]
A character in the range a-z [a-z]
A character not in the range a-z [^a-z]
A character in the range a-z or A-Z [a-zA-Z]
Any single character .
Any whitespace character \s
Any non-whitespace character \S
any digit \d
Any non-digit \D
Any word character \w
Any non-word character \W
Vertical whitespace character \v
Match nth subpattern \n
Hex character YY \xYY
Octal character ddd \ddd
Backspace character [\b]
Makes any character literal \
Capture everything enclosed (...)
Match either a or b (a|b)
Match everything enclosed (?:...)
Comment (?#...)
Named Capturing Group (?P<name>...)
Inline modifiers (?imsxXU)
Conditional statement (?(1)yes|no)
Match subpattern `name` (?P=name)
Positive Lookahead (?=...)
Negative Lookahead (?!...)
Positive Lookbehind (?<...)
Negative Lookbehind (?<!...)
Zero or one of a a?
Zero or more of a a*
One or more of a a+
Exatly 3 of a a{3}
Between 3 and 6 of a a{3, 6}
Greedy quantifier a*
Lazy quantifier a*?
Start of string ^
End of string $
Start of string \A
End of string \Z
A word boundary \b
A word boundary with postgres \y
A non word boundary \b
A non word boundary with postgres \Y
Global g
Multiline m
Case insensitive i
Ignore whitespace x
Single line s
Enable unicode support u
Restrict matches to ASCII only a
Complete match contents \g<0>
Complete match contents \0
Contents in capture group 1 \1
Contents in capture group 1 $1
Contents in capture group `foo` ${foo}
Hexadecimal replacement values \x20
Hexadecimal replacement values \x{06fa}
Tab \t
Carriage return \r
Newline \n
Form-feed \f
Uppercase Transformation \U
Lowercase Transformation \L
Terminate any Transformation \E
3 or more of a a{3,}

Examples[edit]

Port[edit]

port_regex = re.compile(r'\b('
                        r'6553[0-5]|'
                        r'655[0-2][0-9]|'
                        r'65[0-4][0-9][0-9]|'
                        r'6[0-4][0-9][0-9][0-9]|'
                        r'[1-5][0-9][0-9][0-9][0-9]|'
                        r'[1-9][0-9][0-9][0-9]|'
                        r'[1-9][0-9][0-9]|'
                        r'[1-9][0-9]|'
                        r'[1-9])'
                        r'\b')
port_regex = r'([0-9]{1,4}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5])'

IP[edit]

ip = re.compile('^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$')

ip_regex = r'(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'
cidr_regex = r'(?:/(?:[12][0-9]|3[0-2])|[0-9])'
ip_range_regex = r'(?:-(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|9[0-9]|[1-8][0-9]|[1-9]))'

target_regex = re.compile(r'{ip_regex}{cidr_regex}?{ip_range_regex}?'.format(
    ip_regex=ip_regex, cidr_regex=cidr_regex, ip_range_regex=ip_range_regex))

NIE[edit]

r'^[XYZ]\d{7}[ABCDEFGHJKLMNPQRSTVWXYZ]'

DNI[edit]

r'\d{8}[ABCDEFGHJKLMNPQRSTVWXYZ]'

Spanish License Plates[edit]

r'\w{0,2}\d{4}\w{1,3}'

E-MAIL[edit]

EMAIL = r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)"

IBAN[edit]

IBAN = r"[a-zA-Z]{2}[0-9]{2} ?[a-zA-z0-9]{0,4} ?[a-zA-z0-9]{0,4} ?[a-zA-z0-9]{0,3}(?:[a-zA-z0-9] ?[a-zA-z0-9]{0,4} ?[a-zA-z0-9]{0,4} ?[a-zA-z0-9]{0,4} ?[a-zA-z0-9]{0,3})?"

Payment Account Number[edit]

PAN = r"\b(?:\d[ -]*?){13,19}\b"

Zip Code[edit]

ZIP_CODE_SPAIN = r"0[1-9][0-9]{3}|\D[1-4][0-9]{4}|\D5[0-2][0-9]{3}\D"

Phone Number[edit]

sep = '(:?\s+|-|\.)?' # separator
phone_re = re.compile(r'''
  (\d{3}|\(\d{3}\))  # area code
  {sep}              # separator
  (\d{3})            # first 3
  {sep}              # separator
  (\d{4})            # last 4
'''.format(sep=sep), re.VERBOSE)

Spain Phone[edit]

spainphones = r"(?:(?:\+?34(?:[ \t|\-])?)?[9|6|7](?:(?:\d{1}(?:[ \t|\-])?[0-9]{3})|(?:\d{2}(?:[ \t|\-])?[0-9]{2}))(?:[ \t|\-])?[0-9]{2}(?:[ \t|\-])?[0-9]{2})"