|
10 | 10 | 'DICT_EDUCALINGO',
|
11 | 11 | 'DICT_SYNONYMCOM',
|
12 | 12 | 'DICT_THESAURUS',
|
13 |
| - 'DICT_WORDNET', |
| 13 | + 'DICT_MW', |
14 | 14 | 'MultiDictionary'
|
15 | 15 | ]
|
16 | 16 |
|
|
26 | 26 | from typing import Dict, Tuple, Optional, List, Union
|
27 | 27 | from warnings import warn
|
28 | 28 |
|
| 29 | +import requests |
| 30 | + |
29 | 31 | # Dicts
|
30 | 32 | _EDUCALINGO_LANGS = ('bn', 'de', 'en', 'es', 'fr', 'hi', 'it', 'ja', 'jv', 'ko', 'mr',
|
31 | 33 | 'ms', 'pl', 'pt', 'ro', 'ru', 'ta', 'tr', 'uk', 'zh')
|
32 | 34 |
|
33 | 35 | DICT_EDUCALINGO = 'educalingo'
|
34 | 36 | DICT_SYNONYMCOM = 'synonym'
|
35 | 37 | DICT_THESAURUS = 'thesaurus'
|
36 |
| -DICT_WORDNET = 'wordnet' |
| 38 | +DICT_MW = 'Merriam-Webster' |
37 | 39 |
|
38 | 40 | # Cache
|
39 | 41 | _CACHED_SOUPS: Dict[str, 'BeautifulSoup'] = {} # Stores cached web
|
@@ -362,7 +364,7 @@ def meaning(self, lang: str, word: str, dictionary: str = DICT_EDUCALINGO) -> Me
|
362 | 364 | types, words, wiki = [], '', ''
|
363 | 365 | word = self._process(word)
|
364 | 366 |
|
365 |
| - assert dictionary in (DICT_EDUCALINGO, DICT_WORDNET), 'Unsupported dictionary' |
| 367 | + assert dictionary in (DICT_EDUCALINGO, DICT_MW), 'Unsupported dictionary' |
366 | 368 | if lang not in self._langs.keys() or not self._langs[lang][1]:
|
367 | 369 | raise InvalidLangCode(f'{lang} code is not supported for meanings')
|
368 | 370 | elif word == '':
|
@@ -397,26 +399,38 @@ def meaning(self, lang: str, word: str, dictionary: str = DICT_EDUCALINGO) -> Me
|
397 | 399 |
|
398 | 400 | return types, words, wiki
|
399 | 401 |
|
400 |
| - elif dictionary == DICT_WORDNET and lang == 'en': |
401 |
| - if word == '': |
| 402 | + elif dictionary == DICT_MW and lang == 'en': |
| 403 | + if not word.strip(): |
402 | 404 | return {}
|
403 |
| - word = word.replace(' ', '+') |
404 |
| - # noinspection HttpUrlsUsage |
405 |
| - html = self._bsoup(f'http://wordnetweb.princeton.edu/perl/webwn?s={word}') |
406 |
| - types = html.findAll('h3') |
407 |
| - lists = html.findAll('ul') |
408 |
| - out = {} |
409 |
| - for a in types: |
410 |
| - reg = str(lists[types.index(a)]) |
411 |
| - meanings = [] |
412 |
| - for x in re.findall(r'\((.*?)\)', reg): |
413 |
| - if 'often followed by' in x: |
414 |
| - pass |
415 |
| - elif len(x) > 5 or ' ' in str(x): |
416 |
| - meanings.append(x.strip()) |
417 |
| - name = a.text.strip() |
418 |
| - out[name] = meanings |
419 |
| - return out |
| 405 | + |
| 406 | + url = f'https://www.merriam-webster.com/dictionary/{word}' |
| 407 | + response = requests.get(url) |
| 408 | + soup = BeautifulSoup(response.text, 'html.parser') |
| 409 | + |
| 410 | + definitions = {} |
| 411 | + |
| 412 | + pos_entries = soup.find_all('h2', class_='parts-of-speech') |
| 413 | + |
| 414 | + for pos_tag in pos_entries: |
| 415 | + part_of_speech = pos_tag.get_text(strip=True) |
| 416 | + |
| 417 | + if part_of_speech in definitions: |
| 418 | + continue |
| 419 | + |
| 420 | + definitions[part_of_speech] = [] |
| 421 | + |
| 422 | + definition_section = pos_tag.find_next('div', class_='vg') |
| 423 | + if not definition_section: |
| 424 | + continue |
| 425 | + |
| 426 | + for sense in definition_section.find_all('div', class_='sb'): |
| 427 | + definition_texts = sense.find_all('span', class_='dtText') |
| 428 | + for def_text in definition_texts: |
| 429 | + definition = def_text.get_text().lstrip(": ") |
| 430 | + if definition: |
| 431 | + definitions[part_of_speech].append(definition) |
| 432 | + |
| 433 | + return definitions |
420 | 434 |
|
421 | 435 | else:
|
422 | 436 | raise InvalidDictionary(f'Dictionary {dictionary} cannot handle language {lang}')
|
|
0 commit comments