+{"name": "douzone-crawl", "metadata": {"name": "Douzone-crawl", "version": "0.1.0", "classifiers": ["License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Programming Language :: Python :: 3"], "home_page": "https://github.com/zozni-douzone/Douzone-crawl.git", "license": null, "documentation_url": null, "dependencies": [], "summary": "A simple web crawling library", "upload_time": 1740616641}, "documentation": ["# Douzone-crawl\n\n\n\n\n\n\ud83d\udd0d \ube60\ub974\uace0 \uc26c\uc6b4 \uc6f9 \ud06c\ub864\ub9c1\uc744 \uc704\ud55c \ud30c\uc774\uc36c \ub77c\uc774\ube0c\ub7ec\ub9ac\uc785\ub2c8\ub2e4. Google \uac80\uc0c9 \uacb0\uacfc\uc640 \uc6f9 \ud398\uc774\uc9c0 \ucf58\ud150\uce20\ub97c \uc190\uc27d\uac8c \ucd94\ucd9c\ud558\uc138\uc694.\n\n## \uc8fc\uc694 \uae30\ub2a5\n\n- Google \uac80\uc0c9 \uacb0\uacfc \uc218\uc9d1\n- \uac80\uc0c9 \uacb0\uacfc \ud398\uc774\uc9c0 \ub0b4\uc6a9 \ucd94\ucd9c\n- \uac80\uc0c9 \uacb0\uacfc\uc758 \uc81c\ubaa9, URL, \ub0a0\uc9dc, \uc124\uba85 \uc815\ubcf4 \uc81c\uacf5\n- \uacb0\uacfc \ud30c\uc77c \uc800\uc7a5 \uae30\ub2a5\n\n## \uc124\uce58 \ubc29\ubc95\n\n```bash\npip install Douzone-crawl\n```\n\n## \uc0ac\uc6a9 \ubc29\ubc95\n\n### \uae30\ubcf8 \uac80\uc0c9\n\n```python\nfrom Douzone_crawl import search\n\n# \uac80\uc0c9 \uc218\ud589 (\uae30\ubcf8 \ucd5c\ub300 \uacb0\uacfc \uc218: 5)\nresults = search(\"\ub354\uc874\ube44\uc988\uc628\", max_results=3)\n\n# \uacb0\uacfc \ucd9c\ub825\nfor title, link, date, description in results:\n print(f\"\uc81c\ubaa9: {title}\")\n print(f\"\ub9c1\ud06c: {link}\")\n print(f\"\ub0a0\uc9dc: {date}\")\n print(f\"\uc124\uba85: {description}\")\n print(\"-\" * 50)\n```\n\n### \uc6f9 \ud398\uc774\uc9c0 \ub0b4\uc6a9 \ucd94\ucd9c\n\n```python\nfrom Douzone_crawl import get_content\n\n# \ud2b9\uc815 URL\uc5d0\uc11c \ucf58\ud150\uce20 \ucd94\ucd9c\ncontent = get_content(\"https://example.com\")\nprint(content)\n```\n\n### \uac80\uc0c9 \ubc0f \ub0b4\uc6a9 \ucd94\ucd9c \uacb0\ud569\n\n```python\nfrom Douzone_crawl import search_and_extract\n\n# \uac80\uc0c9 \uacb0\uacfc\uc640 \ud398\uc774\uc9c0 \ub0b4\uc6a9\uc744 \ud568\uaed8 \ucd94\ucd9c\nreport = search_and_extract(\n \"\ub354\uc874\ube44\uc988\uc628\", \n max_results=3,\n save_to_file=\"results.txt\" # \uc120\ud0dd\uc801 \ud30c\uc77c \uc800\uc7a5\n)\n\n# \uc0dd\uc131\ub41c \ubcf4\uace0\uc11c \ucd9c\ub825\nprint(report)\n```\n\n## \uc694\uad6c\uc0ac\ud56d\n\n- Python 3.6+\n- Selenium\n- Chrome WebDriver\n- python-dotenv\n\n## \ub77c\uc774\uc120\uc2a4\n\n\uc774 \ud504\ub85c\uc81d\ud2b8\ub294 MIT \ub77c\uc774\uc120\uc2a4\ub97c \ub530\ub985\ub2c8\ub2e4. \uc790\uc138\ud55c \ub0b4\uc6a9\uc740 [LICENSE](LICENSE) \ud30c\uc77c\uc744 \ucc38\uc870\ud558\uc138\uc694.\n\n## \uae30\uc5ec\ud558\uae30\n\n\ubc84\uadf8 \uc2e0\uace0\ub098 \uae30\ub2a5 \uc81c\uc548\uc740 \uc774\uc288 \ud2b8\ub798\ucee4\ub97c \uc774\uc6a9\ud574 \uc8fc\uc138\uc694. \ud480 \ub9ac\ud018\uc2a4\ud2b8\ub3c4 \ud658\uc601\ud569\ub2c8\ub2e4!\n\n---\n\n\u2b50 \uc774 \ud504\ub85c\uc81d\ud2b8\uac00 \uc720\uc6a9\ud558\ub2e4\uba74 GitHub \uc800\uc7a5\uc18c\uc5d0 \ubcc4\ud45c\ub97c \ub20c\ub7ec\uc8fc\uc138\uc694!\n"], "modules": [{"name": "Douzone-crawl.utils", "documentation": [], "functions": [], "variables": [], "classes": [], "exports": []}, {"name": "Douzone-crawl", "documentation": ["__init__.py"], "functions": [{"name": "Douzone-crawl.search", "asynchronous": false, "params": [{"name": "query", "type": null, "default": null}, {"name": "max_results", "type": null, "default": "..."}], "returns": null, "documentation": ["Google \uac80\uc0c9\uc744 \uc218\ud589\ud558\uace0 \uacb0\uacfc\ub97c \ubc18\ud658\ud558\ub294 \ud3b8\uc758 \ud568\uc218\n\nArgs:\n query (str): \uac80\uc0c9\uc5b4\n max_results (int, optional): \uac80\uc0c9 \uacb0\uacfc \uc218. \uae30\ubcf8\uac12\uc740 5.\n \nReturns:\n list: (\uc81c\ubaa9, \ub9c1\ud06c, \ub0a0\uc9dc, \uc124\uba85) \ud29c\ud50c\uc758 \ub9ac\uc2a4\ud2b8", "\ud3b8\uc758 \ud568\uc218 - \ubaa8\ub4c8 \uc774\ub984\uc73c\ub85c \ubc14\ub85c \ud638\ucd9c\ud560 \uc218 \uc788\ub294 \uae30\ub2a5"]}, {"name": "Douzone-crawl.get_content", "asynchronous": false, "params": [{"name": "url", "type": null, "default": null}], "returns": null, "documentation": ["\uc8fc\uc5b4\uc9c4 URL\uc5d0\uc11c \uc6f9 \ud398\uc774\uc9c0 \ub0b4\uc6a9\uc744 \ucd94\ucd9c\ud558\ub294 \ud3b8\uc758 \ud568\uc218\n\nArgs:\n url (str): \uc6f9 \ud398\uc774\uc9c0 URL\n \nReturns:\n str: \ucd94\ucd9c\ub41c \uc6f9 \ud398\uc774\uc9c0 \ub0b4\uc6a9"]}, {"name": "Douzone-crawl.search_and_extract", "asynchronous": false, "params": [{"name": "query", "type": null, "default": null}, {"name": "max_results", "type": null, "default": "..."}, {"name": "save_to_file", "type": null, "default": "..."}], "returns": null, "documentation": ["\uac80\uc0c9\uc744 \uc218\ud589\ud558\uace0 \uac01 \uacb0\uacfc\uc758 \ub0b4\uc6a9\uc744 \ucd94\ucd9c\ud558\ub294 \ud3b8\uc758 \ud568\uc218\n\nArgs:\n query (str): \uac80\uc0c9\uc5b4\n max_results (int, optional): \uac80\uc0c9 \uacb0\uacfc \uc218. \uae30\ubcf8\uac12\uc740 5.\n save_to_file (str, optional): \uacb0\uacfc\ub97c \uc800\uc7a5\ud560 \ud30c\uc77c \uacbd\ub85c. \uae30\ubcf8\uac12\uc740 None.\n \nReturns:\n str: \uac80\uc0c9 \uacb0\uacfc \ubcf4\uace0\uc11c"]}], "variables": [{"name": "Douzone-crawl.__version__", "type": null, "documentation": []}, {"name": "Douzone-crawl.__author__", "type": null, "documentation": []}, {"name": "Douzone-crawl.__email__", "type": null, "documentation": []}, {"name": "Douzone-crawl.__description__", "type": null, "documentation": []}, {"name": "Douzone-crawl.MAX_RESULTS", "type": null, "documentation": []}], "classes": [], "exports": [{"name": "Douzone-crawl.create_search_results", "xref": {"fqname": "Douzone-crawl.crawler.create_search_results", "project": null}}, {"name": "Douzone-crawl.create_title_link_list", "xref": {"fqname": "Douzone-crawl.crawler.create_title_link_list", "project": null}}, {"name": "Douzone-crawl.extract_date_info", "xref": {"fqname": "Douzone-crawl.crawler.extract_date_info", "project": null}}, {"name": "Douzone-crawl.extract_description", "xref": {"fqname": "Douzone-crawl.crawler.extract_description", "project": null}}, {"name": "Douzone-crawl.extract_page_content", "xref": {"fqname": "Douzone-crawl.crawler.extract_page_content", "project": null}}, {"name": "Douzone-crawl.extract_text_with_selectors", "xref": {"fqname": "Douzone-crawl.crawler.extract_text_with_selectors", "project": null}}, {"name": "Douzone-crawl.find_element_with_selectors", "xref": {"fqname": "Douzone-crawl.crawler.find_element_with_selectors", "project": null}}, {"name": "Douzone-crawl.find_search_items", "xref": {"fqname": "Douzone-crawl.crawler.find_search_items", "project": null}}, {"name": "Douzone-crawl.save_results", "xref": {"fqname": "Douzone-crawl.crawler.save_results", "project": null}}, {"name": "Douzone-crawl.setup_chrome_options", "xref": {"fqname": "Douzone-crawl.crawler.setup_chrome_options", "project": null}}]}, {"name": "Douzone-crawl.crawl", "documentation": [], "functions": [{"name": "Douzone-crawl.crawl.setup_chrome_options", "asynchronous": false, "params": [], "returns": null, "documentation": ["Chrome \ube0c\ub77c\uc6b0\uc800 \uc635\uc158\uc744 \uc124\uc815\ud558\ub294 \ud568\uc218", "======================================================================"]}, {"name": "Douzone-crawl.crawl.create_title_link_list", "asynchronous": false, "params": [{"name": "search_query", "type": null, "default": null}, {"name": "num_results", "type": null, "default": null}], "returns": null, "documentation": ["Google \uac80\uc0c9\uc744 \ud1b5\ud574 \uc804\uccb4 \uac80\uc0c9 \uacb0\uacfc\uc758 \uc81c\ubaa9\uacfc \ub9c1\ud06c\ub97c \uc218\uc9d1\ud558\ub294 \ud568\uc218\n\nArgs:\n search_query (str): \uac80\uc0c9\uc5b4\n num_results (int): \uac00\uc838\uc62c \uacb0\uacfc \uac1c\uc218\n \nReturns:\n list: (\uc81c\ubaa9, \ub9c1\ud06c, \ub0a0\uc9dc, \uc124\uba85) \ud29c\ud50c\uc758 \ub9ac\uc2a4\ud2b8"]}, {"name": "Douzone-crawl.crawl.find_search_items", "asynchronous": false, "params": [{"name": "driver", "type": null, "default": null}], "returns": null, "documentation": ["\uc5ec\ub7ec CSS \uc120\ud0dd\uc790\ub97c \uc2dc\ub3c4\ud558\uc5ec \uac80\uc0c9 \uacb0\uacfc \ud56d\ubaa9\uc744 \ucc3e\ub294 \ud568\uc218"]}, {"name": "Douzone-crawl.crawl.find_element_with_selectors", "asynchronous": false, "params": [{"name": "parent", "type": null, "default": null}, {"name": "selectors", "type": null, "default": null}], "returns": null, "documentation": ["\uc5ec\ub7ec CSS \uc120\ud0dd\uc790\ub97c \uc2dc\ub3c4\ud558\uc5ec \uc694\uc18c\ub97c \ucc3e\ub294 \ud568\uc218"]}, {"name": "Douzone-crawl.crawl.extract_date_info", "asynchronous": false, "params": [{"name": "item", "type": null, "default": null}], "returns": null, "documentation": ["\uac80\uc0c9 \uacb0\uacfc\uc5d0\uc11c \ub0a0\uc9dc \uc815\ubcf4\ub97c \ucd94\ucd9c\ud558\ub294 \ud568\uc218"]}, {"name": "Douzone-crawl.crawl.extract_description", "asynchronous": false, "params": [{"name": "item", "type": null, "default": null}], "returns": null, "documentation": ["\uac80\uc0c9 \uacb0\uacfc\uc5d0\uc11c \uc124\uba85 \uc815\ubcf4\ub97c \ucd94\ucd9c\ud558\ub294 \ud568\uc218"]}, {"name": "Douzone-crawl.crawl.extract_page_content", "asynchronous": false, "params": [{"name": "url", "type": null, "default": null}], "returns": null, "documentation": ["\uc8fc\uc5b4\uc9c4 URL\uc5d0\uc11c \uc6f9 \ud398\uc774\uc9c0\uc758 \uc8fc\uc694 \ucf58\ud150\uce20\ub97c \ucd94\ucd9c\ud558\ub294 \ud568\uc218\n\nArgs:\n url (str): \uc6f9 \ud398\uc774\uc9c0 URL\n \nReturns:\n str: \ucd94\ucd9c\ub41c \ud398\uc774\uc9c0 \ub0b4\uc6a9"]}, {"name": "Douzone-crawl.crawl.extract_text_with_selectors", "asynchronous": false, "params": [{"name": "driver", "type": null, "default": null}, {"name": "selectors", "type": null, "default": null}], "returns": null, "documentation": ["\uc5ec\ub7ec \uc120\ud0dd\uc790\ub97c \uc2dc\ub3c4\ud558\uc5ec \ud14d\uc2a4\ud2b8\ub97c \ucd94\ucd9c\ud558\ub294 \ud568\uc218"]}, {"name": "Douzone-crawl.crawl.create_search_results", "asynchronous": false, "params": [{"name": "search_query", "type": null, "default": null}], "returns": null, "documentation": ["\uac80\uc0c9\uc5b4\ub97c \uae30\ubc18\uc73c\ub85c \uac80\uc0c9 \uacb0\uacfc\ub97c \uc218\uc9d1\ud558\uace0 \ubcf4\uace0\uc11c\ub97c \uc0dd\uc131\ud558\ub294 \ud568\uc218\n\nArgs:\n search_query (str): \uac80\uc0c9\uc5b4\n \nReturns:\n str: \uac80\uc0c9 \uacb0\uacfc \ubcf4\uace0\uc11c"]}, {"name": "Douzone-crawl.crawl.save_results", "asynchronous": false, "params": [{"name": "content", "type": null, "default": null}, {"name": "filename", "type": null, "default": null}], "returns": null, "documentation": ["\uac80\uc0c9 \uacb0\uacfc\ub97c \ud30c\uc77c\ub85c \uc800\uc7a5\ud558\ub294 \ud568\uc218"]}, {"name": "Douzone-crawl.crawl.main", "asynchronous": false, "params": [], "returns": null, "documentation": []}], "variables": [{"name": "Douzone-crawl.crawl.MAX_RESULTS", "type": null, "documentation": []}], "classes": [], "exports": []}]}
0 commit comments