|
18 | 18 | # Usage
|
19 | 19 |
|
20 | 20 | ```python
|
21 |
| ->>> from pprint import pprint |
22 |
| ->>> import scrapedict as sd |
23 |
| ->>> from urllib.request import urlopen |
| 21 | +import scrapedict as sd |
| 22 | +from urllib.request import urlopen |
24 | 23 |
|
25 | 24 | # Fetch the content from the Urban Dictionary page for "larping"
|
26 |
| ->>> url = "https://www.urbandictionary.com/define.php?term=larping" |
27 |
| ->>> content = urlopen(url).read().decode() |
| 25 | +url = "https://www.urbandictionary.com/define.php?term=larping" |
| 26 | +content = urlopen(url).read().decode() |
28 | 27 |
|
29 | 28 | # Define the fields to be extracted
|
30 |
| ->>> fields = { |
31 |
| -... "word": sd.text(".word"), |
32 |
| -... "meaning": sd.text(".meaning"), |
33 |
| -... "example": sd.text(".example"), |
34 |
| -... } |
| 29 | +fields = { |
| 30 | + "word": sd.text(".word"), |
| 31 | + "meaning": sd.text(".meaning"), |
| 32 | + "example": sd.text(".example"), |
| 33 | +} |
35 | 34 |
|
36 | 35 | # Extract the data using scrapedict
|
37 |
| ->>> item = sd.extract(fields, content) |
| 36 | +item = sd.extract(fields, content) |
38 | 37 |
|
39 | 38 | # The result is a dictionary with the word, its meaning, and an example usage.
|
40 | 39 | # Here, we perform a couple of assertions to demonstrate the expected structure and content.
|
41 |
| ->>> assert isinstance(item, dict) |
42 |
| ->>> assert item["word"] == "Larping" |
43 |
| - |
| 40 | +assert isinstance(item, dict) |
| 41 | +assert item["word"] == "Larping" |
44 | 42 | ```
|
45 | 43 |
|
46 | 44 |
|
47 | 45 | # The orange site example
|
48 | 46 |
|
49 | 47 | ```python
|
50 |
| ->>> import scrapedict as sd |
51 |
| ->>> from urllib.request import urlopen |
| 48 | +import scrapedict as sd |
| 49 | +from urllib.request import urlopen |
52 | 50 |
|
53 | 51 | # Fetch the content from the Hacker News homepage
|
54 |
| ->>> url = "https://news.ycombinator.com/" |
55 |
| ->>> content = urlopen(url).read().decode() |
| 52 | +url = "https://news.ycombinator.com/" |
| 53 | +content = urlopen(url).read().decode() |
56 | 54 |
|
57 | 55 | # Define the fields to extract: title and URL for each news item
|
58 |
| ->>> fields = { |
59 |
| -... "title": sd.text(".titleline a"), |
60 |
| -... "url": sd.attr(".titleline a", "href"), |
61 |
| -... } |
| 56 | +fields = { |
| 57 | + "title": sd.text(".titleline a"), |
| 58 | + "url": sd.attr(".titleline a", "href"), |
| 59 | +} |
62 | 60 |
|
63 | 61 | # Use scrapedict to extract all news items as a list of dictionaries
|
64 |
| ->>> items = sd.extract_all(".athing", fields, content) |
| 62 | +items = sd.extract_all(".athing", fields, content) |
65 | 63 |
|
66 | 64 | # The result is a list of dictionaries, each containing the title and URL of a news item.
|
67 | 65 | # Here, we assert that 30 items are extracted, which is the typical number of news items on the Hacker News homepage.
|
68 |
| ->>> assert len(items) == 30 |
69 |
| - |
| 66 | +assert len(items) == 30 |
70 | 67 | ```
|
71 | 68 |
|
72 | 69 |
|
|
0 commit comments