Added Scrape.py
This commit is contained in:
parent
1326f723b9
commit
7f5b50ff62
|
|
@ -0,0 +1,45 @@
|
||||||
|
import urllib2
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ScrapeGranpark(object):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
# self._granpark_link ='https://eurest.mashie.com/public/menu/granpark/b4c46dcd?country=se'
|
||||||
|
self._granpark_link = r'file:///home/simon/Downloads/index.html'
|
||||||
|
|
||||||
|
def scrape_page(self):
|
||||||
|
page = urllib2.urlopen(self._granpark_link)
|
||||||
|
soup = BeautifulSoup(page, 'html.parser')
|
||||||
|
dagens_div = soup.find('div', attrs={'class': 'day-current'})
|
||||||
|
dagens_parent = dagens_div.parent
|
||||||
|
dagens_mat_list = dagens_parent.findAll('section',
|
||||||
|
attrs={'class':'day-alternative'})
|
||||||
|
|
||||||
|
index = 0
|
||||||
|
|
||||||
|
meals = {}
|
||||||
|
for dagens in dagens_mat_list:
|
||||||
|
dagens_text = re.sub('\s+',' ', dagens.text).split(' ')
|
||||||
|
if index == 0:
|
||||||
|
meals['A la Minute'] = ' '.join(dagens_text[4:])
|
||||||
|
elif index == 1:
|
||||||
|
meals['Dagens Husman'] = ' '.join(dagens_text[3:])
|
||||||
|
elif index == 2:
|
||||||
|
meals['Vegitariskt'] = ' '.join(dagens_text[4:])
|
||||||
|
elif index == 3:
|
||||||
|
meals['Halsobalans'] = ' '.join(dagens_text[2:])
|
||||||
|
elif index == 4:
|
||||||
|
meals['Snabbt och Gott'] = ' '.join(dagens_text[4:])
|
||||||
|
|
||||||
|
index += 1
|
||||||
|
return meals
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
granpark = ScrapeGranpark()
|
||||||
|
meals = granpark.scrape_page()
|
||||||
|
for meal in meals:
|
||||||
|
print meal
|
||||||
Loading…
Reference in New Issue