import pytest
from wk07b import analyze_text, swear_filter

def test_analyze_text_czech():
    assert analyze_text("Žlutý kůň skáče") == {
        'total': 13,
        'ascii': 7, 'accented': 6 ,
        'vowels': 4, 'consonants': 9
    }

def test_analyze_text_pinyin():
    assert analyze_text("Zhōngguó") == {
        'total': 8,
        'ascii': 6, 'accented': 2,
        'vowels': 3, 'consonants': 5
    }

def test_analyze_text_vietnamese():
    assert analyze_text("không có lỗi ở đây") == {
        'total': 14,
        'ascii': 8, 'accented': 6,
        'vowels': 6, 'consonants': 8
    }



def test_swear_filter_full():
    assert swear_filter("You are a shit and a fuck") == \
        "You are a **** and a ****"

def test_swear_filter_partial():
    assert swear_filter("You are a shit and a fuck",
                        censor_type='partial') == \
        "You are a s**t and a f**k"

def test_swear_filter_bleep():
    assert swear_filter("You are a shit and a cocksucker",
                        censor_type='bleep') == \
        "You are a bleep and a bleepbleep"


@pytest.mark.xfail(reason="we don't know regular expressions")
def test_swear_filter_in_words():
    # This is a known issue test that highlights the need to avoid censoring within words.
    # Should pass without censorship 
    assert swear_filter("People in Scunthorpe like petits fours with shittake mushrooms") == \
        "People in Scunthorpe like petits fours with shittake mushrooms"