File tree Expand file tree Collapse file tree 4 files changed +1221
-0
lines changed Expand file tree Collapse file tree 4 files changed +1221
-0
lines changed Original file line number Diff line number Diff line change 1+ /Library/Frameworks/Python.framework/Versions/3.5/Python
Original file line number Diff line number Diff line change 1+ import requests
2+ from bs4 import BeautifulSoup
3+
4+ base_url = 'https://www.yelp.com/search?find_desc=Restaurants&find_loc='
5+ loc = 'Newport+Beach,+CA,+United+States'
6+ current_page = 0
7+
8+ while current_page < 201 :
9+ print (current_page )
10+ url = base_url + loc + "&start=" + str (current_page )
11+ yelp_r = requests .get (url )
12+ yelp_soup = BeautifulSoup (yelp_r .text , 'html.parser' )
13+ businesses = yelp_soup .findAll ('div' , {'class' : 'biz-listing-large' })
14+ file_path = 'yelp-{loc}-2.txt' .format (loc = loc )
15+ with open (file_path , "a" ) as textfile :
16+ businesses = yelp_soup .findAll ('div' , {'class' : 'biz-listing-large' })
17+ for biz in businesses :
18+ #print(biz)
19+ title = biz .findAll ('a' , {'class' : 'biz-name' })[0 ].text
20+ print (title )
21+ second_line = ""
22+ first_line = ""
23+ try :
24+ address = biz .findAll ('address' )[0 ].contents
25+ for item in address :
26+ if "br" in str (item ):
27+ #print(item.getText())
28+ second_line += item .getText ().strip (" \n \t \r " )
29+ else :
30+ #print(item.strip(" \n\t\r"))
31+ first_line = item .strip (" \n \t \r " )
32+ print (first_line )
33+ print (second_line )
34+ except :
35+ pass
36+ print ('\n ' )
37+ try :
38+ phone = biz .findAll ('span' , {'class' : 'biz-phone' })[0 ].getText ().strip (" \n \t \r " )
39+ except :
40+ phone = None
41+ print (phone )
42+ page_line = "{title}\n {address_1}\n {address_2}\n {phone}\n \n " .format (
43+ title = title ,
44+ address_1 = first_line ,
45+ address_2 = second_line ,
46+ phone = phone
47+ )
48+ textfile .write (page_line )
49+ current_page += 10
50+
51+
52+
53+ '''
54+ Working with Django
55+ obj = SomeModel()
56+ obj.title = title
57+ obj.line_1 = first_line
58+ obj.save()
59+ '''
60+
61+
62+
63+
Original file line number Diff line number Diff line change 1+ beautifulsoup4 == 4.5.1
2+ requests == 2.11.1
You can’t perform that action at this time.
0 commit comments