2
나는 치료법을 사용하고 있으며, 거미에서 긁어 낸 데이터를 MySql 데이터베이스에 저장하려고합니다. 나는 파이프 라인을 사용하여 이것을 얻었지만 운이 없다.Scrapy mysql 파이프 라인 오류
from scrapy import log
from scrapy.core.exceptions import DropItem
from twisted.enterprise import adbapi
import time
import MySQLdb.cursors
class FilterWordsPipeline(object):
"""A pipeline for filtering out items which contain certain words in their
description"""
# put all words in lowercase
words_to_filter = ['politics', 'religion']
def process_item(self, spider, item):
print spider
for word in self.words_to_filter:
if word in unicode(item['description']).lower():
raise DropItem("Contains forbidden word: %s" % word)
else:
return item
class MySQLStorePipeline(object):
def __init__(self):
# @@@ hardcoded db settings
# TODO: make settings configurable through settings
self.dbpool = adbapi.ConnectionPool('adress_to_db',
db='my_db',
user='my_user',
passwd='my_pw',
cursorclass=MySQLdb.cursors.DictCursor,
charset='utf8',
use_unicode=True
)
def process_item(self, spider, item):
# run db query in thread pool
query = self.dbpool.runInteraction(self._conditional_insert, item)
query.addErrback(self.handle_error)
return item
def _conditional_insert(self, tx, item):
# create record if doesn't exist.
# all this block run on it's own thread
tx.execute("select * from scrapytest where link = %s", (item['link'][0],))
result = tx.fetchone()
if result:
log.msg("Item already stored in db: %s" % item, level=log.DEBUG)
else:
tx.execute(\
"insert into scrapytest (title, link, desc) "
"values (%s, %s, %s)",
(item['title'][0],
item['link'][0],
item['desc'][0]
)
log.msg("Item stored in db: %s" % item, level=log.DEBUG)
def handle_error(self, e):
log.err(e)
을 그리고 여기에 내가 오류 메시지입니다 : 여기 내 코드는 파이프 라인의
SyntaxError: invalid syntax
PS C:\Python27\testscrapy\tutorial> scrapy crawl dmoz
2012-05-03 16:03:11+0200 [scrapy] INFO: Scrapy 0.14.3 started (bot: tutorial)
2012-05-03 16:03:12+0200 [scrapy] DEBUG: Enabled extensions: LogStats, TelnetConsole,
CloseSpider, WebService, CoreStats
, SpiderState
2012-05-03 16:03:12+0200 [scrapy] DEBUG: Enabled downloader middlewares: HttpAuthMiddleware,
DownloadTimeoutMiddleware,
UserAgentMiddleware, RetryMiddleware, DefaultHeadersMiddleware, RedirectMiddleware,
CookiesMiddleware, HttpCompressionMi
ddleware, ChunkedTransferMiddleware, DownloaderStats
2012-05-03 16:03:12+0200 [scrapy] DEBUG: Enabled spider middlewares: HttpErrorMiddleware,
OffsiteMiddleware, RefererMidd
leware, UrlLengthMiddleware, DepthMiddleware
Traceback (most recent call last):
File "C:\Python27\Scripts\scrapy", line 5, in <module>
pkg_resources.run_script('Scrapy==0.14.3', 'scrapy')
File "C:\Python27\lib\site-packages\pkg_resources.py", line 489, in run_script
self.require(requires)[0].run_script(script_name, ns)
File "C:\Python27\lib\site-packages\pkg_resources.py", line 1207, in run_script
execfile(script_filename, namespace, namespace)
File "c:\python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\EGG-INFO\scripts\scrapy", line
4, in <module>
execute()
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\cmdline.py", line 132,
in execute
run_print_help(parser, _run_command, cmd, args, opts)
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\cmdline.py", line 97, in
_run_print_help
func(*a, **kw)
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\cmdline.py", line 139,
in _run_command
cmd.run(args, opts)
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\commands\crawl.py", line
43, in run
spider = self.crawler.spiders.create(spname, **opts.spargs)
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\command.py", line 34,
in crawler
self._crawler.configure()
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\crawler.py", line 37, in
configure
self.engine = ExecutionEngine(self, self._spider_closed)
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\core\engine.py", line
62, in __init__
self.scraper = Scraper(crawler)
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\core\scraper.py", line
68, in __init__
self.itemproc = itemproc_cls.from_crawler(crawler)
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\middleware.py", line 48,
in from_crawler
return cls.from_settings(crawler.settings, crawler)
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\middleware.py", line 29,
in from_settings
mwcls = load_object(clspath)
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\utils\misc.py", line 37,
in load_object
mod = __import__(module, {}, {}, [''])
File "C:\Python27\testscrapy\tutorial\tutorial\pipelines.py", line 64
log.msg("Item stored in db: %s" % item, level=log.DEBUG)
^
SyntaxError: invalid syntax
내가 여기 시작하는 아무 생각이 없다, 그래서 어떤 도움이 정말 감사합니다!
시작하기에 좋은 장소가 보통 라인 괄호 ^^ 닫는
감사의 팁을위한 이전에 오류 포인트 나 라인! Unfourtunately 나는 문제를 해결하지 않았다. 이제이 오류가 발생합니다 : 파일 "C : \ Python27 \ testscrapy \ tutorial \ tutorial \ pipelines.py", 줄 64 message = "항목이 이미 db : % s"% (항목) ^ 에 저장 됨 SyntaxError : invalid 구문 – user1009453
@ user1009453 잘못된 log.msg를 보았습니다. 줄 번호가 없습니다. 당신은 닫는 괄호가 없습니다 – dm03514
당신은 절대적으로 옳았습니다, 나는 괄호를 놓치고있었습니다. 이제 다음 오류가 나타납니다. "ImportError : 'tutorial.pipelines.MySQLStorePipeline'개체를로드하는 중 오류가 발생했습니다 : 예외 모듈이 없습니다." 체크하고 파이프 라인의 이름은 settings.py에서 같습니다. 그렇다면 예외라는 모듈을 가져와야합니까? 나를 도와 주셔서 고마워요! – user1009453