2012-05-03 10 views
2

나는 치료법을 사용하고 있으며, 거미에서 긁어 낸 데이터를 MySql 데이터베이스에 저장하려고합니다. 나는 파이프 라인을 사용하여 이것을 얻었지만 운이 없다.Scrapy mysql 파이프 라인 오류

from scrapy import log 
from scrapy.core.exceptions import DropItem 
from twisted.enterprise import adbapi 

import time 
import MySQLdb.cursors 

class FilterWordsPipeline(object): 
"""A pipeline for filtering out items which contain certain words in their 
description""" 

# put all words in lowercase 
words_to_filter = ['politics', 'religion'] 

def process_item(self, spider, item): 
    print spider 
    for word in self.words_to_filter: 
     if word in unicode(item['description']).lower(): 
      raise DropItem("Contains forbidden word: %s" % word) 
    else: 
     return item 

class MySQLStorePipeline(object): 

def __init__(self): 
    # @@@ hardcoded db settings 
    # TODO: make settings configurable through settings 
    self.dbpool = adbapi.ConnectionPool('adress_to_db', 
      db='my_db', 
      user='my_user', 
      passwd='my_pw', 
      cursorclass=MySQLdb.cursors.DictCursor, 
      charset='utf8', 
      use_unicode=True 
     ) 

def process_item(self, spider, item): 
    # run db query in thread pool 
    query = self.dbpool.runInteraction(self._conditional_insert, item) 
    query.addErrback(self.handle_error) 

    return item 

def _conditional_insert(self, tx, item): 
    # create record if doesn't exist. 
    # all this block run on it's own thread 
    tx.execute("select * from scrapytest where link = %s", (item['link'][0],)) 
    result = tx.fetchone() 
    if result: 
     log.msg("Item already stored in db: %s" % item, level=log.DEBUG) 
    else: 
     tx.execute(\ 
      "insert into scrapytest (title, link, desc) " 
      "values (%s, %s, %s)", 
      (item['title'][0], 
      item['link'][0], 
      item['desc'][0] 
     ) 
     log.msg("Item stored in db: %s" % item, level=log.DEBUG) 

def handle_error(self, e): 
    log.err(e) 

을 그리고 여기에 내가 오류 메시지입니다 : 여기 내 코드는 파이프 라인의

SyntaxError: invalid syntax 
PS C:\Python27\testscrapy\tutorial> scrapy crawl dmoz 
2012-05-03 16:03:11+0200 [scrapy] INFO: Scrapy 0.14.3 started (bot: tutorial) 
2012-05-03 16:03:12+0200 [scrapy] DEBUG: Enabled extensions: LogStats, TelnetConsole,       
CloseSpider, WebService, CoreStats 
, SpiderState 
2012-05-03 16:03:12+0200 [scrapy] DEBUG: Enabled downloader middlewares: HttpAuthMiddleware,       
DownloadTimeoutMiddleware, 
UserAgentMiddleware, RetryMiddleware, DefaultHeadersMiddleware, RedirectMiddleware,  
CookiesMiddleware, HttpCompressionMi 
ddleware, ChunkedTransferMiddleware, DownloaderStats 
2012-05-03 16:03:12+0200 [scrapy] DEBUG: Enabled spider middlewares: HttpErrorMiddleware,   
OffsiteMiddleware, RefererMidd 
leware, UrlLengthMiddleware, DepthMiddleware 
Traceback (most recent call last): 
File "C:\Python27\Scripts\scrapy", line 5, in <module> 
pkg_resources.run_script('Scrapy==0.14.3', 'scrapy') 
File "C:\Python27\lib\site-packages\pkg_resources.py", line 489, in run_script 
self.require(requires)[0].run_script(script_name, ns) 
File "C:\Python27\lib\site-packages\pkg_resources.py", line 1207, in run_script 
execfile(script_filename, namespace, namespace) 
File "c:\python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\EGG-INFO\scripts\scrapy", line 
4, in <module> 
execute() 
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\cmdline.py", line 132, 
in execute 
run_print_help(parser, _run_command, cmd, args, opts) 
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\cmdline.py", line 97, in 
_run_print_help 
func(*a, **kw) 
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\cmdline.py", line 139, 
in _run_command 
cmd.run(args, opts) 
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\commands\crawl.py", line 
43, in run 
spider = self.crawler.spiders.create(spname, **opts.spargs) 
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\command.py", line 34, 
in crawler 
self._crawler.configure() 
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\crawler.py", line 37, in 
configure 
self.engine = ExecutionEngine(self, self._spider_closed) 
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\core\engine.py", line 
62, in __init__ 
self.scraper = Scraper(crawler) 
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\core\scraper.py", line 
68, in __init__ 
self.itemproc = itemproc_cls.from_crawler(crawler) 
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\middleware.py", line 48, 
in from_crawler 
return cls.from_settings(crawler.settings, crawler) 
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\middleware.py", line 29, 
in from_settings 
mwcls = load_object(clspath) 
File "C:\Python27\lib\site-packages\scrapy-0.14.3-py2.7-win32.egg\scrapy\utils\misc.py", line 37, 
in load_object 
mod = __import__(module, {}, {}, ['']) 
File "C:\Python27\testscrapy\tutorial\tutorial\pipelines.py", line 64 
log.msg("Item stored in db: %s" % item, level=log.DEBUG) 
^
SyntaxError: invalid syntax 

내가 여기 시작하는 아무 생각이 없다, 그래서 어떤 도움이 정말 감사합니다!

시작하기에 좋은 장소가 보통 라인 괄호 ^^ 닫는

답변

1
tx.execute(\ 
      "insert into scrapytest (title, link, desc) " 
      "values (%s, %s, %s)", 
      (item['title'][0], 
      item['link'][0], 
      item['desc'][0]) 
     ) 

요구

+0

감사의 팁을위한 이전에 오류 포인트 나 라인! Unfourtunately 나는 문제를 해결하지 않았다. 이제이 오류가 발생합니다 : 파일 "C : \ Python27 \ testscrapy \ tutorial \ tutorial \ pipelines.py", 줄 64 message = "항목이 이미 db : % s"% (항목) ^ 에 저장 됨 SyntaxError : invalid 구문 – user1009453

+0

@ user1009453 잘못된 log.msg를 보았습니다. 줄 번호가 없습니다. 당신은 닫는 괄호가 없습니다 – dm03514

+0

당신은 절대적으로 옳았습니다, 나는 괄호를 놓치고있었습니다. 이제 다음 오류가 나타납니다. "ImportError : 'tutorial.pipelines.MySQLStorePipeline'개체를로드하는 중 오류가 발생했습니다 : 예외 모듈이 없습니다." 체크하고 파이프 라인의 이름은 settings.py에서 같습니다. 그렇다면 예외라는 모듈을 가져와야합니까? 나를 도와 주셔서 고마워요! – user1009453