Description
Description
AttributeError: 'BossSpider' object has no attribute 'settings'
scrapy v2.11.2
scrapy-redis v0.9.1
python v3.12
Step to Reproduce
`from scrapy import signals
import scrapy
from DrissionPage._pages.chromium_page import ChromiumPage
from scrapy import cmdline
from scrapy_redis.spiders import RedisSpider
class BossSpider(RedisSpider):
name = "boss"
# start_urls = [
# "https://www.zhipin.com/web/geek/job?city=101250100&position=100309,100301,100302,100303,100305,100304,100703&page=1",
# ]
redis_key = 'boss:start_urls' # Redis 中的请求队列键
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.edge = ChromiumPage() # 实例化浏览器
def __del__(self):
self.edge.quit()
@classmethod
def from_crawler(cls, crawler):
# This method is used by Scrapy to create your spiders.
s = cls()
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
crawler.signals.connect(s.spider_closed, signal=signals.spider_closed)
return s
def spider_opened(self, spider):
spider.logger.info("启用DrissionPage浏览器.")
def spider_closed(self, spider):
self.edge.quit()
spider.logger.info("关闭DrissionPage浏览器.")
def parse(self, response):
job_list = self.edge.eles("x://li[@class='job-card-wrapper']")
print("job元素数:", len(job_list))
for job in job_list:
job_area = job.ele("x://span[contains(@class, 'job-area')]").text
salary = job.ele("x://span[@class='salary']").text
tags = job.ele("x://ul[contains(@class, 'tag-list')]/li").text
info_public = job.ele("x://div[@class='info-public']").text
company_name = job.ele("x://h3[@class='company-name']/a").text
info_desc = job.ele("x://div[contains(@class, 'info-desc')]").text
job_name_ele = job.ele("x://span[@class='job-name']")
job_name = job_name_ele.text
job_detail_page = job_name_ele.click.for_new_tab()
job_detail_page.wait.load_start()
job_detail = job_detail_page.ele("x://div[@class='job-sec-text']").text
login_ico = job_detail_page.ele("x://i[@class='icon-close']")
if login_ico:
login_ico.click()
job_detail_page.close()
yield {
'job_name': job_name,
'job_detail': job_detail,
'job_area': job_area,
'salary': salary,
'tags': tags,
'info_public': info_public,
'company_name': company_name,
'info_desc': info_desc,
}
pageNum = int(self.edge.ele("x://div[@class='options-pages']/a[last()-1]").text) - 1
# 提取当前页面的页码
current_page = int(response.url.split('page=')[-1])
# 构造下一页的 URL,增加页码
next_page = current_page + 1
print(f"正在爬取第{next_page}页数据")
if next_page <= pageNum:
next_page_url = f"{next_page}"
yield scrapy.Request(url=next_page_url, callback=self.parse)
cmdline.execute('scrapy crawl boss'.split())
`
Error log
`2024-09-29 20:28:22 [scrapy.core.engine] INFO: Spider opened
2024-09-29 20:28:22 [scrapy.core.engine] INFO: Closing spider (shutdown)
2024-09-29 20:28:22 [scrapy.core.engine] ERROR: Scraper close failure
Traceback (most recent call last):
File "D:\pyspider\venv\Lib\site-packages\scrapy\crawler.py", line 160, in crawl
yield self.engine.open_spider(self.spider, start_requests)
AttributeError: 'BossSpider' object has no attribute 'settings'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "D:\pyspider\venv\Lib\site-packages\twisted\internet\defer.py", line 1074, in _runCallbacks
current.result = callback( # type: ignore[misc]
File "D:\pyspider\venv\Lib\site-packages\scrapy\core\engine.py", line 439, in
dfd.addBoth(lambda _: self.scraper.close_spider(spider))
File "D:\pyspider\venv\Lib\site-packages\scrapy\core\scraper.py", line 125, in close_spider
raise RuntimeError("Scraper slot not assigned")
RuntimeError: Scraper slot not assigned
2024-09-29 20:28:23 [boss] INFO: 关闭DrissionPage浏览器.
2024-09-29 20:28:23 [scrapy.utils.signal] ERROR: Error caught on signal handler: <bound method CoreStats.spider_closed of <scrapy.extensions.corestats.CoreStats object at 0x0000018CFA7E0980>>
Traceback (most recent call last):
File "D:\pyspider\venv\Lib\site-packages\scrapy\crawler.py", line 160, in crawl
yield self.engine.open_spider(self.spider, start_requests)
AttributeError: 'BossSpider' object has no attribute 'settings'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "D:\pyspider\venv\Lib\site-packages\scrapy\utils\defer.py", line 348, in maybeDeferred_coro
result = f(*args, **kw)
File "D:\pyspider\venv\Lib\site-packages\pydispatch\robustapply.py", line 55, in robustApply
return receiver(*arguments, **named)
File "D:\pyspider\venv\Lib\site-packages\scrapy\extensions\corestats.py", line 30, in spider_closed
elapsed_time = finish_time - self.start_time
TypeError: unsupported operand type(s) for -: 'datetime.datetime' and 'NoneType'
2024-09-29 20:28:23 [scrapy.statscollectors] INFO: Dumping Scrapy stats:
{'log_count/DEBUG': 3, 'log_count/ERROR': 2, 'log_count/INFO': 8}
2024-09-29 20:28:23 [scrapy.core.engine] INFO: Spider closed (shutdown)
Unhandled error in Deferred:
2024-09-29 20:28:23 [twisted] CRITICAL: Unhandled error in Deferred:
Traceback (most recent call last):
File "D:\pyspider\venv\Lib\site-packages\scrapy\crawler.py", line 265, in crawl
return self._crawl(crawler, *args, **kwargs)
File "D:\pyspider\venv\Lib\site-packages\scrapy\crawler.py", line 269, in _crawl
d = crawler.crawl(*args, **kwargs)
File "D:\pyspider\venv\Lib\site-packages\twisted\internet\defer.py", line 2287, in unwindGenerator
return _cancellableInlineCallbacks(gen)
File "D:\pyspider\venv\Lib\site-packages\twisted\internet\defer.py", line 2197, in _cancellableInlineCallbacks
_inlineCallbacks(None, gen, status, _copy_context())
--- ---
File "D:\pyspider\venv\Lib\site-packages\twisted\internet\defer.py", line 2014, in _inlineCallbacks
result = context.run(gen.send, result)
File "D:\pyspider\venv\Lib\site-packages\scrapy\crawler.py", line 160, in crawl
yield self.engine.open_spider(self.spider, start_requests)
builtins.AttributeError: 'BossSpider' object has no attribute 'settings'
2024-09-29 20:28:23 [twisted] CRITICAL:
Traceback (most recent call last):
File "D:\pyspider\venv\Lib\site-packages\twisted\internet\defer.py", line 2014, in _inlineCallbacks
result = context.run(gen.send, result)
File "D:\pyspider\venv\Lib\site-packages\scrapy\crawler.py", line 160, in crawl
yield self.engine.open_spider(self.spider, start_requests)
AttributeError: 'BossSpider' object has no attribute 'settings'`