Skip to content

Commit 261d492

Browse files
authored
Merge pull request #22 from flowerwallpaper/master
Update _dictionary.py to use Merriam-Webseter
2 parents 1e19950 + c3c6ed4 commit 261d492

File tree

1 file changed

+36
-22
lines changed

1 file changed

+36
-22
lines changed

PyMultiDictionary/_dictionary.py

Lines changed: 36 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
'DICT_EDUCALINGO',
1111
'DICT_SYNONYMCOM',
1212
'DICT_THESAURUS',
13-
'DICT_WORDNET',
13+
'DICT_MW',
1414
'MultiDictionary'
1515
]
1616

@@ -26,14 +26,16 @@
2626
from typing import Dict, Tuple, Optional, List, Union
2727
from warnings import warn
2828

29+
import requests
30+
2931
# Dicts
3032
_EDUCALINGO_LANGS = ('bn', 'de', 'en', 'es', 'fr', 'hi', 'it', 'ja', 'jv', 'ko', 'mr',
3133
'ms', 'pl', 'pt', 'ro', 'ru', 'ta', 'tr', 'uk', 'zh')
3234

3335
DICT_EDUCALINGO = 'educalingo'
3436
DICT_SYNONYMCOM = 'synonym'
3537
DICT_THESAURUS = 'thesaurus'
36-
DICT_WORDNET = 'wordnet'
38+
DICT_MW = 'Merriam-Webster'
3739

3840
# Cache
3941
_CACHED_SOUPS: Dict[str, 'BeautifulSoup'] = {} # Stores cached web
@@ -362,7 +364,7 @@ def meaning(self, lang: str, word: str, dictionary: str = DICT_EDUCALINGO) -> Me
362364
types, words, wiki = [], '', ''
363365
word = self._process(word)
364366

365-
assert dictionary in (DICT_EDUCALINGO, DICT_WORDNET), 'Unsupported dictionary'
367+
assert dictionary in (DICT_EDUCALINGO, DICT_MW), 'Unsupported dictionary'
366368
if lang not in self._langs.keys() or not self._langs[lang][1]:
367369
raise InvalidLangCode(f'{lang} code is not supported for meanings')
368370
elif word == '':
@@ -397,26 +399,38 @@ def meaning(self, lang: str, word: str, dictionary: str = DICT_EDUCALINGO) -> Me
397399

398400
return types, words, wiki
399401

400-
elif dictionary == DICT_WORDNET and lang == 'en':
401-
if word == '':
402+
elif dictionary == DICT_MW and lang == 'en':
403+
if not word.strip():
402404
return {}
403-
word = word.replace(' ', '+')
404-
# noinspection HttpUrlsUsage
405-
html = self._bsoup(f'http://wordnetweb.princeton.edu/perl/webwn?s={word}')
406-
types = html.findAll('h3')
407-
lists = html.findAll('ul')
408-
out = {}
409-
for a in types:
410-
reg = str(lists[types.index(a)])
411-
meanings = []
412-
for x in re.findall(r'\((.*?)\)', reg):
413-
if 'often followed by' in x:
414-
pass
415-
elif len(x) > 5 or ' ' in str(x):
416-
meanings.append(x.strip())
417-
name = a.text.strip()
418-
out[name] = meanings
419-
return out
405+
406+
url = f'https://www.merriam-webster.com/dictionary/{word}'
407+
response = requests.get(url)
408+
soup = BeautifulSoup(response.text, 'html.parser')
409+
410+
definitions = {}
411+
412+
pos_entries = soup.find_all('h2', class_='parts-of-speech')
413+
414+
for pos_tag in pos_entries:
415+
part_of_speech = pos_tag.get_text(strip=True)
416+
417+
if part_of_speech in definitions:
418+
continue
419+
420+
definitions[part_of_speech] = []
421+
422+
definition_section = pos_tag.find_next('div', class_='vg')
423+
if not definition_section:
424+
continue
425+
426+
for sense in definition_section.find_all('div', class_='sb'):
427+
definition_texts = sense.find_all('span', class_='dtText')
428+
for def_text in definition_texts:
429+
definition = def_text.get_text().lstrip(": ")
430+
if definition:
431+
definitions[part_of_speech].append(definition)
432+
433+
return definitions
420434

421435
else:
422436
raise InvalidDictionary(f'Dictionary {dictionary} cannot handle language {lang}')

0 commit comments

Comments
 (0)