import nltk
cmu = nltk.corpus.cmudict.entries()
print("%f of the words start with the same code" % \
(1.0 * len([w for (w, l) in cmu if w[0] == l[0].lower()]) / len(cmu)))
## find irregular paterns
## Easy print them all and eyball them
for (w, l) in cmu:
if w[0] != l[0].lower():
print(w, l)
## Could count them
#print([(w[0], l[0]) for (w, l) in cmu if w[0] != l[0].lower()][:10]
print(nltk.FreqDist([(w[0], l[0]) for (w, l) in cmu if w[0] != l[0].lower()]).most_common(20))
###
### I like the new wn module
###
### you may have to install it
### pip install wn
###
print("\n\nLook up wordnet using the wn module\n\n")
import wn
wn.download('omw-en:1.4')
wnen = wn.Wordnet('omw-en:1.4')
def show(ss):
print (ss.id, ss.lemmas())
print("Synset for student and its hypnonyms")
for ss in wnen.synsets('student'):
show(ss)
print("Hyponyms: ")
for s in ss.hyponyms():
show(s)
print()
print()
print("Synset for student and its hypernyms")
for ss in wnen.synsets('student'):
show(ss)
print("Hypernyms: ")
for s in ss.hypernyms():
show(s)
print()
print()
print("Synset for professor its hypernyms")
for ss in wnen.synsets('professor'):
show(ss)
print("Hypernyms: ")
for s in ss.hypernyms():
show(s)
print()
print()
print("Some size adjectives and their antonyms")
for a in ['big', 'large', 'great']:
for ss in wnen.synsets(a):
show(ss)
for s in ss.senses():
print(s.id, s.word().forms())
for a in s.get_related('antonym'):
print ('<=> ', a.id, a.word().forms())
print()
print()
for a in "big/large/great".split('/'):
for n in "sister/uncle/toe".split('/'):
w = f"{a} {n}"
if wnen.synsets(w):
print(w)
for s in wnen.synsets(w):
show(s)
###
### Or you can use nltk
###
print("\n\nLook up wordnet using nltk\n\n")
from nltk.corpus import wordnet as wn
for ss in wn.synsets('student'):
print(ss)
print("Hyponyms: ")
for s in ss.hyponyms():
print(s, )
print()
print()
for ss in wn.synsets('student'):
print(ss)
print("Hypernyms: ")
for s in ss.hypernyms():
print(s, )
print()
print()
for ss in wn.synsets('professor'):
print(ss)
print("Hypernyms: ")
for s in ss.hypernyms():
print(s, )
print()
print()
for a in ['big', 'large', 'great']:
for ss in wn.synsets(a):
print(ss)
for l in ss.lemmas():
print(l, l.antonyms())
print()
print()
for a in "big/large/great".split('/'):
for n in "sister/uncle/toe".split('/'):
w = "%s_%s" % (a, n) ## wordnet uses '_' as word separator
# print(w
if wn.synsets(w):
print(w, wn.synsets(w))