import re
print ("\nRegular Expressions\n")
print ("[a-zA-Z]+")
print ("matches one or more ascii letters")
print ("[A-Z][a-z]*")
print ("matches zero or more ascii letters")
print ("\bp[aeiou]{,2}t\b")
print ("matches a word consisting of p followed by up to 2 vowels, followed by t")
print ("\\d+(\\.\\d+)?")
print ("matches a digit followed possibly by a full stop and more digits")
print ("([^aeiou][aeiou][^aeiou])*")
print ("Matches zero or more CVC combinations")
print ("\\w+|[^\\w\\s]+")
def extract_dates(text):
"""
Extracts all dates in the format dd/mm/yyyy from the given text.
>>> extract_dates("My birthday is on 25/12/1990 and my friend's is on 01/01/2000.")
['25/12/1990', '01/01/2000']
>>> extract_dates("No dates here.")
[]
"""
return re.findall(r'\b\d{2}/\d{2}/\d{4}\b', text)
print('\nExtract Dates\n')
print('Input: "My birthday is on 25/12/1990 and my friend\'s is on 01/01/2000."')
print('Output:',
extract_dates("My birthday is on 25/12/1990 and my friend's is on 01/01/2000."))
def normalize_spaces(text):
"""
Replaces multiple spaces between words with a single space.
>>> normalize_spaces("This is an example sentence.")
'This is an example sentence.'
>>> normalize_spaces(" Hello world! ")
'Hello world!'
"""
return re.sub(r'\s+', ' ', text).strip()
print('\nNormalize Spaces\n')
print('Input: "This is an example sentence."')
print('Output:', normalize_spaces("This is an example sentence."))
print('\nMask Digits\n')
print('Input:', """My phone number is 123-456-7890.
Year: 2023
About four o'clock.
""")
print('Output:', normalize_spaces( """My phone number is 123-456-7890.
Year: 2023
About four o'clock.
"""))
def mask_digits(text):
"""
Replaces every digit in the input text with the '#' symbol.
>>> mask_digits("My phone number is 123-456-7890.")
'My phone number is ###-###-####.'
>>> mask_digits("Year: 2023")
'Year: ####'
>>> mask_digits("No digits here.")
'No digits here.'
"""
return re.sub(r'\d', '#', text)
print('\nMask Digits\n')
print('Input: "My birthday is on 25/12/1990 and my friend\'s is on 01/01/2000."')
print('Output:',
mask_digits("My birthday is on 25/12/1990 and my friend's is on 01/01/2000."))
def count_word_occurrences(text, word):
"""
Counts the number of times the given word appears as a standalone word in the text.
Matching should be case-insensitive.
>>> count_word_occurrences("This is an example. This is fun!", "this")
2
>>> count_word_occurrences("apple pie and pineapple", "apple")
1
>>> count_word_occurrences("Nothing to see here.", "banana")
0
>>> count_word_occurrences("Cat, caterpillar, and the cat.", "cat")
2
"""
pattern = r'\b' + re.escape(word) + r'\b'
return len(re.findall(pattern, text, flags=re.IGNORECASE))
print('\nCount Word Occurences\n')
print('Input: "Pen, pineapple, apple-pen and pineapple-pen", "apple"')
print('Output:',
count_word_occurrences("Pen, pineapple, apple-pen and pineapple-pen",
'apple'))
lyrics = """
I have a pen
I have an apple
Ah
Apple pen
I have a pen
I have pineapple
Ah
Pineapple pen
Apple pen
Pineapple pen
Ah
Pen Pie Pineapple Apple Pen
Pen Pie Pineapple Apple Pen
"""
print('Input: PPAP, "apple"')
print('Output:',
count_word_occurrences(lyrics,
'apple'))
print("In:\n", lyrics)
if __name__ == "__main__":
import doctest
doctest.testmod(verbose=True) # normally use default verbose=False