python-3.x
  • csv
  • 2017-03-12 2 views 0 likes 
    0
    import requests 
    from bs4 import BeautifulSoup 
    base_url = "https://www.yelp.com/search?find_desc=&find_loc=" 
    loc = "Newport+Beach,+CA" 
    page = 10 
    url = base_url + loc + '&start='+ str(page) 
    yelp_r = requests.get(url) 
    yelp_soup = BeautifulSoup(yelp_r.text, 'html.parser') 
    businesses = yelp_soup.findAll('div',{'class':'biz-listing-large'}) 
    file_path = 'yelp-{loc}.txt'.format(loc=loc) 
    
    with open(file_path,"a") as textfile: 
        businesses = yelp_soup.findAll('div',{'class':'biz-listing-large'}) 
        for biz in businesses: 
         title = biz.findAll('a',{'class':'biz-name'})[0].text 
         print(title) 
         address = biz.findAll('address')[0].text 
         print(address) 
         phone= biz.findAll('span',{'class':'biz-phone'})[0].text 
         print(phone) 
         page_line="{title}\n{address}\{phone}".format(
          title=title, 
          address=address, 
          phone=phone 
         ) 
         textfile.write(page_line) 
    

    데이터를 csv 파일로 내보내는 방법으로 이제는 txt 파일로 내보내집니다. 나는 csv.writer로 시도했지만 작동하지 않았다.웹 스크랩 후 데이터를 CSV 파일로 내보내기

    +0

    난 csvfile로 ("+ W", "data.csv")와 함께 개방 시도 writer.writerow ([ "데이터 1", "데이터 2"]) –

    답변

    0

    나는 파이썬 전문가가 아니지만 기본적으로 내가 너라면 어떻게 할 것인가? 라이터 = csv.writer (csvfile) writer.writerow ([ "SrNo", "이름"]) :

    import urllib2 
    
    listOfStocks = ["AAPL", "MSFT", "GOOG", "FB", "AMZN"] 
    
    urls = [] 
    
    for company in listOfStocks: 
        urls.append('http://real-chart.finance.yahoo.com/table.csv?s=' + company + '&d=6&e=28&f=2015&g=m&a=11&b=12&c=1980&ignore=.csv') 
    
    # Create a CSV on your desktop to hold that data that id scraped from the web . . . 
    Output_File = open('C:/Users/Excel/Desktop/Historical_Prices.csv','w') 
    
    New_Format_Data = '' 
    
    for counter in range(0, len(urls)): 
    
        Original_Data = urllib2.urlopen(urls[counter]).read() 
    
        if counter == 0: 
         New_Format_Data = "Company," + urllib2.urlopen(urls[counter]).readline() 
    
        rows = Original_Data.splitlines(1) 
    
        for row in range(1, len(rows)): 
    
         New_Format_Data = New_Format_Data + listOfStocks[counter] + ',' + rows[row] 
    
    Output_File.write(New_Format_Data) 
    Output_File.close() 
    
    관련 문제