Skip to main content
The re module provides regular expression matching operations.

Module Import

import re

Basic Matching

search() - Find First Match

import re

text = "The quick brown fox"

# Find pattern
match = re.search(r'quick', text)
if match:
    print(match.group())  # 'quick'
    print(match.start())  # 4
    print(match.end())    # 9

match() - Match at Beginning

import re

text = "Hello, World!"

# Matches only at start
match = re.match(r'Hello', text)
if match:
    print("Found at start")

findall() - Find All Matches

import re

text = "Contact: [email protected] or [email protected]"

# Find all email addresses
emails = re.findall(r'\w+@\w+\.\w+', text)
print(emails)  # ['[email protected]', '[email protected]']

finditer() - Iterator of Matches

import re

text = "The price is $10.50 and $25.00"

for match in re.finditer(r'\$([0-9.]+)', text):
    print(f"Found ${match.group(1)} at position {match.start()}")

Pattern Syntax

Common Patterns

import re

# Digit: \d or [0-9]
re.findall(r'\d+', 'abc123def456')  # ['123', '456']

# Word characters: \w or [a-zA-Z0-9_]
re.findall(r'\w+', 'hello_world 123')  # ['hello_world', '123']

# Whitespace: \s
re.split(r'\s+', 'split  by   spaces')  # ['split', 'by', 'spaces']

# Any character: .
re.findall(r'h.t', 'hat hit hot')  # ['hat', 'hit', 'hot']

# Start/End: ^ and $
re.match(r'^Hello', 'Hello, World!')  # Matches
re.search(r'World!$', 'Hello, World!')  # Matches

Quantifiers

import re

# Zero or more: *
re.findall(r'ab*', 'a ab abb abbb')  # ['a', 'ab', 'abb', 'abbb']

# One or more: +
re.findall(r'ab+', 'a ab abb abbb')  # ['ab', 'abb', 'abbb']

# Zero or one: ?
re.findall(r'ab?', 'a ab abb')  # ['a', 'ab', 'ab']

# Exactly n: {n}
re.findall(r'\d{3}', '123 45 6789')  # ['123', '678']

# Range: {m,n}
re.findall(r'\d{2,4}', '1 12 123 1234 12345')  # ['12', '123', '1234', '1234']

Substitution

sub() - Replace Pattern

import re

text = "Contact: 123-456-7890"

# Replace pattern
result = re.sub(r'\d', 'X', text)
print(result)  # 'Contact: XXX-XXX-XXXX'

# With count
result = re.sub(r'\d', 'X', text, count=3)
print(result)  # 'Contact: XXX-456-7890'

sub() with Function

import re

def replace_func(match):
    return match.group(0).upper()

text = "hello world"
result = re.sub(r'\w+', replace_func, text)
print(result)  # 'HELLO WORLD'

Groups and Capturing

import re

text = "John Doe, age: 30"

# Capture groups with ()
match = re.search(r'(\w+) (\w+), age: (\d+)', text)
if match:
    print(match.group(0))  # 'John Doe, age: 30' (full match)
    print(match.group(1))  # 'John' (first group)
    print(match.group(2))  # 'Doe' (second group)
    print(match.group(3))  # '30' (third group)
    print(match.groups())  # ('John', 'Doe', '30')

# Named groups
match = re.search(r'(?P<first>\w+) (?P<last>\w+)', text)
if match:
    print(match.group('first'))  # 'John'
    print(match.group('last'))   # 'Doe'
    print(match.groupdict())     # {'first': 'John', 'last': 'Doe'}

Compiled Patterns

import re

# Compile for reuse
pattern = re.compile(r'\d+')

result1 = pattern.findall('123 abc 456')
result2 = pattern.search('abc 789 def')

# With flags
pattern = re.compile(r'hello', re.IGNORECASE)
match = pattern.search('HELLO World')  # Matches

Flags

import re

# Case insensitive
re.search(r'hello', 'HELLO', re.IGNORECASE)

# Multiline
text = """line 1
line 2
line 3"""
re.findall(r'^line', text, re.MULTILINE)  # ['line', 'line', 'line']

# Dot matches newline
re.search(r'a.*b', 'a\nb', re.DOTALL)

# Verbose (allows comments)
pattern = re.compile(r"""
    \d{3}  # Area code
    -      # Separator
    \d{4}  # Number
""", re.VERBOSE)

Practical Examples

Email Validation

import re

def is_valid_email(email):
    pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
    return re.match(pattern, email) is not None

print(is_valid_email('[email protected]'))  # True
print(is_valid_email('invalid.email'))     # False

Extract URLs

import re

text = "Visit https://example.com or http://test.org"
url_pattern = r'https?://[\w.-]+(?:\.[\w.-]+)+'
urls = re.findall(url_pattern, text)
print(urls)  # ['https://example.com', 'http://test.org']

Phone Number Formatting

import re

def format_phone(phone):
    # Remove non-digits
    digits = re.sub(r'\D', '', phone)
    # Format as (XXX) XXX-XXXX
    return re.sub(r'(\d{3})(\d{3})(\d{4})', r'(\1) \2-\3', digits)

print(format_phone('1234567890'))  # '(123) 456-7890'
print(format_phone('123.456.7890'))  # '(123) 456-7890'
Compile patterns you use repeatedly for better performance.
Be careful with greedy quantifiers. Use ? for non-greedy matching: .*? instead of .*

string

String operations

Built-in Types

String type

Build docs developers (and LLMs) love