Difference between revisions of "Python: Regex"
Jump to navigation
Jump to search
Rafahsolis (talk | contribs) |
Rafahsolis (talk | contribs) m (→All) Tag: visualeditor |
||
| (7 intermediate revisions by the same user not shown) | |||
| Line 1: | Line 1: | ||
| − | === Port === | + | ==Quick Reference== |
| − | + | ||
| + | ===All=== | ||
| + | {| class="wikitable" | ||
| + | |+ | ||
| + | https://regex101.com/ | ||
| + | !Token | ||
| + | !Description | ||
| + | |- | ||
| + | |Newline | ||
| + | |\n | ||
| + | |- | ||
| + | |Carriage return | ||
| + | |\r | ||
| + | |- | ||
| + | |Null character | ||
| + | |\0 | ||
| + | |- | ||
| + | |A single character of: a, b, or c | ||
| + | |[abc] | ||
| + | |- | ||
| + | |A character except: a, b, c | ||
| + | |[^a,b,c] | ||
| + | |- | ||
| + | |A character in the range a-z | ||
| + | |[a-z] | ||
| + | |- | ||
| + | |A character not in the range a-z | ||
| + | |[^a-z] | ||
| + | |- | ||
| + | |A character in the range a-z or A-Z | ||
| + | |[a-zA-Z] | ||
| + | |- | ||
| + | |Any single character | ||
| + | |. | ||
| + | |- | ||
| + | |Any whitespace character | ||
| + | |\s | ||
| + | |- | ||
| + | |Any non-whitespace character | ||
| + | |\S | ||
| + | |- | ||
| + | |any digit | ||
| + | |\d | ||
| + | |- | ||
| + | |Any non-digit | ||
| + | |\D | ||
| + | |- | ||
| + | |Any word character | ||
| + | |\w | ||
| + | |- | ||
| + | |Any non-word character | ||
| + | |\W | ||
| + | |- | ||
| + | |Vertical whitespace character | ||
| + | |\v | ||
| + | |- | ||
| + | |Match nth subpattern | ||
| + | |\n | ||
| + | |- | ||
| + | |Hex character YY | ||
| + | |\xYY | ||
| + | |- | ||
| + | |Octal character ddd | ||
| + | |\ddd | ||
| + | |- | ||
| + | |Backspace character | ||
| + | |[\b] | ||
| + | |- | ||
| + | |Makes any character literal | ||
| + | |\ | ||
| + | |- | ||
| + | |Capture everything enclosed | ||
| + | |(...) | ||
| + | |- | ||
| + | |Match either a or b | ||
| + | |<nowiki>(a|b)</nowiki> | ||
| + | |- | ||
| + | |Match everything enclosed | ||
| + | |(?:...) | ||
| + | |- | ||
| + | |Comment | ||
| + | |(?#...) | ||
| + | |- | ||
| + | |Named Capturing Group | ||
| + | |(?P<name>...) | ||
| + | |- | ||
| + | |Inline modifiers | ||
| + | |(?imsxXU) | ||
| + | |- | ||
| + | |Conditional statement | ||
| + | |<nowiki>(?(1)yes|no)</nowiki> | ||
| + | |- | ||
| + | |Match subpattern `name` | ||
| + | |(?P=name) | ||
| + | |- | ||
| + | |Positive Lookahead | ||
| + | |(?=...) | ||
| + | |- | ||
| + | |Negative Lookahead | ||
| + | |(?!...) | ||
| + | |- | ||
| + | |Positive Lookbehind | ||
| + | |(?<...) | ||
| + | |- | ||
| + | |Negative Lookbehind | ||
| + | |(?<!...) | ||
| + | |- | ||
| + | |Zero or one of a | ||
| + | |a? | ||
| + | |- | ||
| + | |Zero or more of a | ||
| + | |a* | ||
| + | |- | ||
| + | |One or more of a | ||
| + | |a+ | ||
| + | |- | ||
| + | |Exatly 3 of a | ||
| + | |a{3} | ||
| + | |- | ||
| + | |Between 3 and 6 of a | ||
| + | |a{3, 6} | ||
| + | |- | ||
| + | |Greedy quantifier | ||
| + | |a* | ||
| + | |- | ||
| + | |Lazy quantifier | ||
| + | |a*? | ||
| + | |- | ||
| + | |Start of string | ||
| + | |^ | ||
| + | |- | ||
| + | |End of string | ||
| + | |$ | ||
| + | |- | ||
| + | |Start of string | ||
| + | |\A | ||
| + | |- | ||
| + | |End of string | ||
| + | |\Z | ||
| + | |- | ||
| + | |A word boundary | ||
| + | |\b | ||
| + | |- | ||
| + | |A word boundary with postgres | ||
| + | |\y | ||
| + | |- | ||
| + | |A non word boundary | ||
| + | |\b | ||
| + | |- | ||
| + | |A non word boundary with postgres | ||
| + | |\Y | ||
| + | |- | ||
| + | |Global | ||
| + | |g | ||
| + | |- | ||
| + | |Multiline | ||
| + | |m | ||
| + | |- | ||
| + | |Case insensitive | ||
| + | |i | ||
| + | |- | ||
| + | |Ignore whitespace | ||
| + | |x | ||
| + | |- | ||
| + | |Single line | ||
| + | |s | ||
| + | |- | ||
| + | |Enable unicode support | ||
| + | |u | ||
| + | |- | ||
| + | |Restrict matches to ASCII only | ||
| + | |a | ||
| + | |- | ||
| + | |Complete match contents | ||
| + | |\g<0> | ||
| + | |- | ||
| + | |Complete match contents | ||
| + | |\0 | ||
| + | |- | ||
| + | |Contents in capture group 1 | ||
| + | |\1 | ||
| + | |- | ||
| + | |Contents in capture group 1 | ||
| + | |$1 | ||
| + | |- | ||
| + | |Contents in capture group `foo` | ||
| + | |${foo} | ||
| + | |- | ||
| + | |Hexadecimal replacement values | ||
| + | |\x20 | ||
| + | |- | ||
| + | |Hexadecimal replacement values | ||
| + | |\x{06fa} | ||
| + | |- | ||
| + | |Tab | ||
| + | |\t | ||
| + | |- | ||
| + | |Carriage return | ||
| + | |\r | ||
| + | |- | ||
| + | |Newline | ||
| + | |\n | ||
| + | |- | ||
| + | |Form-feed | ||
| + | |\f | ||
| + | |- | ||
| + | |Uppercase Transformation | ||
| + | |\U | ||
| + | |- | ||
| + | |Lowercase Transformation | ||
| + | |\L | ||
| + | |- | ||
| + | |Terminate any Transformation | ||
| + | |\E | ||
| + | |- | ||
| + | |3 or more of a | ||
| + | |a{3,} | ||
| + | |} | ||
| + | |||
| + | ==Examples== | ||
| + | |||
| + | ===Port=== | ||
| + | <source lang="python">port_regex = re.compile(r'\b(' | ||
r'6553[0-5]|' | r'6553[0-5]|' | ||
r'655[0-2][0-9]|' | r'655[0-2][0-9]|' | ||
| Line 10: | Line 232: | ||
r'[1-9][0-9]|' | r'[1-9][0-9]|' | ||
r'[1-9])' | r'[1-9])' | ||
| − | r'\b')</source> | + | r'\b') |
| − | === IP === | + | port_regex = r'([0-9]{1,4}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5])'</source> |
| − | + | ===IP=== | |
| + | <source lang="python">ip = re.compile('^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$') | ||
| + | |||
| + | ip_regex = r'(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)' | ||
| + | cidr_regex = r'(?:/(?:[12][0-9]|3[0-2])|[0-9])' | ||
| + | ip_range_regex = r'(?:-(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|9[0-9]|[1-8][0-9]|[1-9]))' | ||
| − | === NIE === | + | target_regex = re.compile(r'{ip_regex}{cidr_regex}?{ip_range_regex}?'.format( |
| + | ip_regex=ip_regex, cidr_regex=cidr_regex, ip_range_regex=ip_range_regex))</source> | ||
| + | |||
| + | ===NIE=== | ||
r'^[XYZ]\d{7}[ABCDEFGHJKLMNPQRSTVWXYZ]' | r'^[XYZ]\d{7}[ABCDEFGHJKLMNPQRSTVWXYZ]' | ||
| − | === DNI === | + | ===DNI=== |
r'\d{8}[ABCDEFGHJKLMNPQRSTVWXYZ]' | r'\d{8}[ABCDEFGHJKLMNPQRSTVWXYZ]' | ||
| + | |||
| + | ===Spanish License Plates=== | ||
| + | r'\w{0,2}\d{4}\w{1,3}' | ||
| + | |||
| + | ===E-MAIL=== | ||
| + | <syntaxhighlight lang="python3"> | ||
| + | EMAIL = r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)" | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | ===IBAN=== | ||
| + | <syntaxhighlight lang="python"> | ||
| + | IBAN = r"[a-zA-Z]{2}[0-9]{2} ?[a-zA-z0-9]{0,4} ?[a-zA-z0-9]{0,4} ?[a-zA-z0-9]{0,3}(?:[a-zA-z0-9] ?[a-zA-z0-9]{0,4} ?[a-zA-z0-9]{0,4} ?[a-zA-z0-9]{0,4} ?[a-zA-z0-9]{0,3})?" | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | ===Payment Account Number=== | ||
| + | <syntaxhighlight lang="python"> | ||
| + | PAN = r"\b(?:\d[ -]*?){13,19}\b" | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | ===Zip Code=== | ||
| + | <syntaxhighlight lang="python"> | ||
| + | ZIP_CODE_SPAIN = r"0[1-9][0-9]{3}|\D[1-4][0-9]{4}|\D5[0-2][0-9]{3}\D" | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | ===Phone Number=== | ||
| + | <syntaxhighlight lang="python"> | ||
| + | sep = '(:?\s+|-|\.)?' # separator | ||
| + | phone_re = re.compile(r''' | ||
| + | (\d{3}|\(\d{3}\)) # area code | ||
| + | {sep} # separator | ||
| + | (\d{3}) # first 3 | ||
| + | {sep} # separator | ||
| + | (\d{4}) # last 4 | ||
| + | '''.format(sep=sep), re.VERBOSE) | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | ====Spain Phone==== | ||
| + | <syntaxhighlight lang="python"> | ||
| + | spainphones = r"(?:(?:\+?34(?:[ \t|\-])?)?[9|6|7](?:(?:\d{1}(?:[ \t|\-])?[0-9]{3})|(?:\d{2}(?:[ \t|\-])?[0-9]{2}))(?:[ \t|\-])?[0-9]{2}(?:[ \t|\-])?[0-9]{2})" | ||
| + | </syntaxhighlight> | ||
Latest revision as of 13:45, 12 February 2020
Quick Reference[edit]
All[edit]
| Token | Description |
|---|---|
| Newline | \n |
| Carriage return | \r |
| Null character | \0 |
| A single character of: a, b, or c | [abc] |
| A character except: a, b, c | [^a,b,c] |
| A character in the range a-z | [a-z] |
| A character not in the range a-z | [^a-z] |
| A character in the range a-z or A-Z | [a-zA-Z] |
| Any single character | . |
| Any whitespace character | \s |
| Any non-whitespace character | \S |
| any digit | \d |
| Any non-digit | \D |
| Any word character | \w |
| Any non-word character | \W |
| Vertical whitespace character | \v |
| Match nth subpattern | \n |
| Hex character YY | \xYY |
| Octal character ddd | \ddd |
| Backspace character | [\b] |
| Makes any character literal | \ |
| Capture everything enclosed | (...) |
| Match either a or b | (a|b) |
| Match everything enclosed | (?:...) |
| Comment | (?#...) |
| Named Capturing Group | (?P<name>...) |
| Inline modifiers | (?imsxXU) |
| Conditional statement | (?(1)yes|no) |
| Match subpattern `name` | (?P=name) |
| Positive Lookahead | (?=...) |
| Negative Lookahead | (?!...) |
| Positive Lookbehind | (?<...) |
| Negative Lookbehind | (?<!...) |
| Zero or one of a | a? |
| Zero or more of a | a* |
| One or more of a | a+ |
| Exatly 3 of a | a{3} |
| Between 3 and 6 of a | a{3, 6} |
| Greedy quantifier | a* |
| Lazy quantifier | a*? |
| Start of string | ^ |
| End of string | $ |
| Start of string | \A |
| End of string | \Z |
| A word boundary | \b |
| A word boundary with postgres | \y |
| A non word boundary | \b |
| A non word boundary with postgres | \Y |
| Global | g |
| Multiline | m |
| Case insensitive | i |
| Ignore whitespace | x |
| Single line | s |
| Enable unicode support | u |
| Restrict matches to ASCII only | a |
| Complete match contents | \g<0> |
| Complete match contents | \0 |
| Contents in capture group 1 | \1 |
| Contents in capture group 1 | $1 |
| Contents in capture group `foo` | ${foo} |
| Hexadecimal replacement values | \x20 |
| Hexadecimal replacement values | \x{06fa} |
| Tab | \t |
| Carriage return | \r |
| Newline | \n |
| Form-feed | \f |
| Uppercase Transformation | \U |
| Lowercase Transformation | \L |
| Terminate any Transformation | \E |
| 3 or more of a | a{3,} |
Examples[edit]
Port[edit]
port_regex = re.compile(r'\b('
r'6553[0-5]|'
r'655[0-2][0-9]|'
r'65[0-4][0-9][0-9]|'
r'6[0-4][0-9][0-9][0-9]|'
r'[1-5][0-9][0-9][0-9][0-9]|'
r'[1-9][0-9][0-9][0-9]|'
r'[1-9][0-9][0-9]|'
r'[1-9][0-9]|'
r'[1-9])'
r'\b')
port_regex = r'([0-9]{1,4}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5])'
IP[edit]
ip = re.compile('^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$')
ip_regex = r'(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'
cidr_regex = r'(?:/(?:[12][0-9]|3[0-2])|[0-9])'
ip_range_regex = r'(?:-(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|9[0-9]|[1-8][0-9]|[1-9]))'
target_regex = re.compile(r'{ip_regex}{cidr_regex}?{ip_range_regex}?'.format(
ip_regex=ip_regex, cidr_regex=cidr_regex, ip_range_regex=ip_range_regex))
NIE[edit]
r'^[XYZ]\d{7}[ABCDEFGHJKLMNPQRSTVWXYZ]'
DNI[edit]
r'\d{8}[ABCDEFGHJKLMNPQRSTVWXYZ]'
Spanish License Plates[edit]
r'\w{0,2}\d{4}\w{1,3}'
E-MAIL[edit]
EMAIL = r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)"
IBAN[edit]
IBAN = r"[a-zA-Z]{2}[0-9]{2} ?[a-zA-z0-9]{0,4} ?[a-zA-z0-9]{0,4} ?[a-zA-z0-9]{0,3}(?:[a-zA-z0-9] ?[a-zA-z0-9]{0,4} ?[a-zA-z0-9]{0,4} ?[a-zA-z0-9]{0,4} ?[a-zA-z0-9]{0,3})?"
Payment Account Number[edit]
PAN = r"\b(?:\d[ -]*?){13,19}\b"
Zip Code[edit]
ZIP_CODE_SPAIN = r"0[1-9][0-9]{3}|\D[1-4][0-9]{4}|\D5[0-2][0-9]{3}\D"
Phone Number[edit]
sep = '(:?\s+|-|\.)?' # separator
phone_re = re.compile(r'''
(\d{3}|\(\d{3}\)) # area code
{sep} # separator
(\d{3}) # first 3
{sep} # separator
(\d{4}) # last 4
'''.format(sep=sep), re.VERBOSE)
Spain Phone[edit]
spainphones = r"(?:(?:\+?34(?:[ \t|\-])?)?[9|6|7](?:(?:\d{1}(?:[ \t|\-])?[0-9]{3})|(?:\d{2}(?:[ \t|\-])?[0-9]{2}))(?:[ \t|\-])?[0-9]{2}(?:[ \t|\-])?[0-9]{2})"