# test with python -m doctest -v example.py


def IsReduplicated(word):
    """
    return True is a word is reduplicated
    ignore case
    >>> IsReduplicated("buku-buku")
    True
    >>> IsReduplicated("Buku-buku")
    True
    >>> IsReduplicated("buku")
    False
    >>> IsReduplicated("itu-buku")
    False
    """
    parts = word.split("-")
    if len(parts) == 2 and parts[0].lower() == parts[1].lower():
        return True
    else:
        return False

def find_redup(words):
    """
    count the number of reduplicate words
    >>> find_redup ([ "buku-buku", "kucing", "Agak-agak", "agak-agak" ])
    {'buku-buku': 1, 'agak-agak': 2}
    """
    redups = dict()
    for w in words:
        if IsReduplicated(w):
            redups[w.lower()] = redups.get(w.lower(), 0)  +1
    return redups


### Note hard to get the multiline text!
def strip_gutenberg(raw):
    """
    Strip off everything before the line '*** START'  ..., ' ***'
    and after the line '*** END OF THE PROJECT GUTENBERG EBOOK', ...

    >>> strip_gutenberg('The Project Gutenberg eBook of All cats are gray\\n...\\n*** START OF THE PROJECT GUTENBERG EBOOK ALL CATS ARE GRAY ***\\n_An odd story, made up of oddly assorted elements that include a man, \\n    a woman, a black cat, a treasure--and an invisible being that had to\\n  be seen to be believed._\\n*** END OF THE PROJECT GUTENBERG EBOOK ALL CATS ARE GRAY ***\\nMost people start at our website which has the main PG search\\nfacility: www.gutenberg.org.')
    '_An odd story, made up of oddly assorted elements that include a man, \\n    a woman, a black cat, a treasure--and an invisible being that had to\\n  be seen to be believed._'
    """
    ### go to the start of the start message
    start = raw.index('*** START')
    raw = raw[start:]
    ### go to the end of the start message
    start = raw.index (' ***')
    raw = raw[start + 5:]

    ### go to the start of the end message
    end = raw.index('*** END OF THE PROJECT GUTENBERG EBOOK')
    return raw[:end].strip()



def process_line(line, lesson):
    """
    Extract information from duolingo wiki
    >>> process_line('ne = no/not', 1)
    {'lesson': 1, 'cs': 'ne', 'en': ['no', 'not']}
    >>> process_line('na shledanou = goodbye/bye, see you soon', 3)
    {'lesson': 3, 'cs': 'na shledanou', 'en': ['goodbye', 'bye', 'see you soon']}
    """
    word = dict()
    cs, en = line.split(' = ')
    word['lesson'] = lesson
    word['cs'] = cs
    word['en'] = []
    for e in en.split(', '):
        for ee in e.split('/'):
            word['en'].append(ee)
    return word


if __name__ == "__main__":
    import doctest
    doctest.testmod(verbose=True) # normally use default verbose=False