Difference between revisions of "Python: Regex"
Jump to navigation
Jump to search
E-MAIL
Rafahsolis (talk | contribs) m (→IP) Tag: visualeditor |
Rafahsolis (talk | contribs) m Tag: visualeditor |
||
| Line 1: | Line 1: | ||
| + | == Quick Reference == | ||
| + | |||
| + | === All === | ||
| + | {| class="wikitable" | ||
| + | |+ | ||
| + | !Token | ||
| + | !Description | ||
| + | |- | ||
| + | |\n | ||
| + | |Newline | ||
| + | |- | ||
| + | |\r | ||
| + | |Carriage return | ||
| + | |- | ||
| + | |\0 | ||
| + | |Null character | ||
| + | |- | ||
| + | |A single character of: a, b, or c | ||
| + | |[abc] | ||
| + | |- | ||
| + | |A character except: a, b, c | ||
| + | |[^a,b,c] | ||
| + | |- | ||
| + | |A character in the range a-z | ||
| + | |[a-z] | ||
| + | |- | ||
| + | |A character not in the range a-z | ||
| + | |[^a-z] | ||
| + | |- | ||
| + | |A character in the range a-z or A-Z | ||
| + | |[a-zA-Z] | ||
| + | |- | ||
| + | |Any single character | ||
| + | |. | ||
| + | |- | ||
| + | |Any whitespace character | ||
| + | |\s | ||
| + | |- | ||
| + | |Any non-whitespace character | ||
| + | |\S | ||
| + | |- | ||
| + | |any digit | ||
| + | |\d | ||
| + | |- | ||
| + | |Any non-digit | ||
| + | |\D | ||
| + | |- | ||
| + | |Any word character | ||
| + | |\w | ||
| + | |- | ||
| + | |Any non-word character | ||
| + | |\W | ||
| + | |- | ||
| + | |Vertical whitespace character | ||
| + | |\v | ||
| + | |- | ||
| + | |Match nth subpattern | ||
| + | |\n | ||
| + | |- | ||
| + | |Hex character YY | ||
| + | |\xYY | ||
| + | |- | ||
| + | |Octal character ddd | ||
| + | |\ddd | ||
| + | |- | ||
| + | |Backspace character | ||
| + | |[\b] | ||
| + | |- | ||
| + | |Makes any character literal | ||
| + | |\ | ||
| + | |- | ||
| + | |Capture everything enclosed | ||
| + | |(...) | ||
| + | |- | ||
| + | |Match either a or b | ||
| + | |<nowiki>(a|b)</nowiki> | ||
| + | |- | ||
| + | |Match everything enclosed | ||
| + | |(?:...) | ||
| + | |- | ||
| + | |Comment | ||
| + | |(?#...) | ||
| + | |- | ||
| + | |Named Capturing Group | ||
| + | |(?P<name>...) | ||
| + | |- | ||
| + | |Inline modifiers | ||
| + | |(?imsxXU) | ||
| + | |- | ||
| + | |Conditional statement | ||
| + | |<nowiki>(?(1)yes|no)</nowiki> | ||
| + | |- | ||
| + | |Match subpattern `name` | ||
| + | |(?P=name) | ||
| + | |- | ||
| + | |Positive Lookahead | ||
| + | |(?=...) | ||
| + | |- | ||
| + | |Negative Lookahead | ||
| + | |(?!...) | ||
| + | |- | ||
| + | |Positive Lookbehind | ||
| + | |(?<...) | ||
| + | |- | ||
| + | |Negative Lookbehind | ||
| + | |(?<!...) | ||
| + | |- | ||
| + | |Zero or one of a | ||
| + | |a? | ||
| + | |- | ||
| + | |Zero or more of a | ||
| + | |a* | ||
| + | |- | ||
| + | |One or more of a | ||
| + | |a+ | ||
| + | |- | ||
| + | |Exatly 3 of a | ||
| + | |a{3} | ||
| + | |- | ||
| + | |Between 3 and 6 of a | ||
| + | |a{3, 6} | ||
| + | |- | ||
| + | |Greedy quantifier | ||
| + | |a* | ||
| + | |- | ||
| + | |Lazy quantifier | ||
| + | |a*? | ||
| + | |- | ||
| + | |Start of string | ||
| + | |^ | ||
| + | |- | ||
| + | |End of string | ||
| + | |$ | ||
| + | |- | ||
| + | |Start of string | ||
| + | |\A | ||
| + | |- | ||
| + | |End of string | ||
| + | |\Z | ||
| + | |- | ||
| + | |A word boundary | ||
| + | |\b | ||
| + | |- | ||
| + | |A word boundary with postgres | ||
| + | |\y | ||
| + | |- | ||
| + | |A non word boundary | ||
| + | |\b | ||
| + | |- | ||
| + | |A non word boundary with postgres | ||
| + | |\Y | ||
| + | |- | ||
| + | |Global | ||
| + | |g | ||
| + | |- | ||
| + | |Multiline | ||
| + | |m | ||
| + | |- | ||
| + | |Case insensitive | ||
| + | |i | ||
| + | |- | ||
| + | |Ignore whitespace | ||
| + | |x | ||
| + | |- | ||
| + | |Single line | ||
| + | |s | ||
| + | |- | ||
| + | |Enable unicode support | ||
| + | |u | ||
| + | |- | ||
| + | |Restrict matches to ASCII only | ||
| + | |a | ||
| + | |- | ||
| + | |Complete match contents | ||
| + | |\g<0> | ||
| + | |} | ||
| + | |||
| + | == Examples == | ||
| + | |||
===Port=== | ===Port=== | ||
<source lang="python">port_regex = re.compile(r'\b(' | <source lang="python">port_regex = re.compile(r'\b(' | ||
Revision as of 12:59, 12 February 2020
Quick Reference
All
| Token | Description |
|---|---|
| \n | Newline |
| \r | Carriage return |
| \0 | Null character |
| A single character of: a, b, or c | [abc] |
| A character except: a, b, c | [^a,b,c] |
| A character in the range a-z | [a-z] |
| A character not in the range a-z | [^a-z] |
| A character in the range a-z or A-Z | [a-zA-Z] |
| Any single character | . |
| Any whitespace character | \s |
| Any non-whitespace character | \S |
| any digit | \d |
| Any non-digit | \D |
| Any word character | \w |
| Any non-word character | \W |
| Vertical whitespace character | \v |
| Match nth subpattern | \n |
| Hex character YY | \xYY |
| Octal character ddd | \ddd |
| Backspace character | [\b] |
| Makes any character literal | \ |
| Capture everything enclosed | (...) |
| Match either a or b | (a|b) |
| Match everything enclosed | (?:...) |
| Comment | (?#...) |
| Named Capturing Group | (?P<name>...) |
| Inline modifiers | (?imsxXU) |
| Conditional statement | (?(1)yes|no) |
| Match subpattern `name` | (?P=name) |
| Positive Lookahead | (?=...) |
| Negative Lookahead | (?!...) |
| Positive Lookbehind | (?<...) |
| Negative Lookbehind | (?<!...) |
| Zero or one of a | a? |
| Zero or more of a | a* |
| One or more of a | a+ |
| Exatly 3 of a | a{3} |
| Between 3 and 6 of a | a{3, 6} |
| Greedy quantifier | a* |
| Lazy quantifier | a*? |
| Start of string | ^ |
| End of string | $ |
| Start of string | \A |
| End of string | \Z |
| A word boundary | \b |
| A word boundary with postgres | \y |
| A non word boundary | \b |
| A non word boundary with postgres | \Y |
| Global | g |
| Multiline | m |
| Case insensitive | i |
| Ignore whitespace | x |
| Single line | s |
| Enable unicode support | u |
| Restrict matches to ASCII only | a |
| Complete match contents | \g<0> |
Examples
Port
port_regex = re.compile(r'\b('
r'6553[0-5]|'
r'655[0-2][0-9]|'
r'65[0-4][0-9][0-9]|'
r'6[0-4][0-9][0-9][0-9]|'
r'[1-5][0-9][0-9][0-9][0-9]|'
r'[1-9][0-9][0-9][0-9]|'
r'[1-9][0-9][0-9]|'
r'[1-9][0-9]|'
r'[1-9])'
r'\b')
port_regex = r'([0-9]{1,4}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5])'
IP
ip = re.compile('^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$')
ip_regex = r'(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'
cidr_regex = r'(?:/(?:[12][0-9]|3[0-2])|[0-9])'
ip_range_regex = r'(?:-(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|9[0-9]|[1-8][0-9]|[1-9]))'
target_regex = re.compile(r'{ip_regex}{cidr_regex}?{ip_range_regex}?'.format(
ip_regex=ip_regex, cidr_regex=cidr_regex, ip_range_regex=ip_range_regex))
NIE
r'^[XYZ]\d{7}[ABCDEFGHJKLMNPQRSTVWXYZ]'
DNI
r'\d{8}[ABCDEFGHJKLMNPQRSTVWXYZ]'
Spanish License Plates
r'\w{0,2}\d{4}\w{1,3}'
EMAIL = r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)"
IBAN
IBAN = r"[a-zA-Z]{2}[0-9]{2} ?[a-zA-z0-9]{0,4} ?[a-zA-z0-9]{0,4} ?[a-zA-z0-9]{0,3}(?:[a-zA-z0-9] ?[a-zA-z0-9]{0,4} ?[a-zA-z0-9]{0,4} ?[a-zA-z0-9]{0,4} ?[a-zA-z0-9]{0,3})?"
Payment Account Number
PAN = r"\b(?:\d[ -]*?){13,19}\b"
Zip Code
ZIP_CODE_SPAIN = r"0[1-9][0-9]{3}|\D[1-4][0-9]{4}|\D5[0-2][0-9]{3}\D"
Phone Number
sep = '(:?\s+|-|\.)?' # separator
phone_re = re.compile(r'''
(\d{3}|\(\d{3}\)) # area code
{sep} # separator
(\d{3}) # first 3
{sep} # separator
(\d{4}) # last 4
'''.format(sep=sep), re.VERBOSE)
Spain Phone
spainphones = r"(?:(?:\+?34(?:[ \t|\-])?)?[9|6|7](?:(?:\d{1}(?:[ \t|\-])?[0-9]{3})|(?:\d{2}(?:[ \t|\-])?[0-9]{2}))(?:[ \t|\-])?[0-9]{2}(?:[ \t|\-])?[0-9]{2})"