util/scrape.py

46 lines
1.4 KiB
Python

import urllib2
from bs4 import BeautifulSoup
import re
class ScrapeGranpark(object):
def __init__(self):
# self._granpark_link ='https://eurest.mashie.com/public/menu/granpark/b4c46dcd?country=se'
self._granpark_link = r'file:///home/simon/Downloads/index.html'
def scrape_page(self):
page = urllib2.urlopen(self._granpark_link)
soup = BeautifulSoup(page, 'html.parser')
dagens_div = soup.find('div', attrs={'class': 'day-current'})
dagens_parent = dagens_div.parent
dagens_mat_list = dagens_parent.findAll('section',
attrs={'class':'day-alternative'})
index = 0
meals = {}
for dagens in dagens_mat_list:
dagens_text = re.sub('\s+',' ', dagens.text).split(' ')
if index == 0:
meals['A la Minute'] = ' '.join(dagens_text[4:])
elif index == 1:
meals['Dagens Husman'] = ' '.join(dagens_text[3:])
elif index == 2:
meals['Vegitariskt'] = ' '.join(dagens_text[4:])
elif index == 3:
meals['Halsobalans'] = ' '.join(dagens_text[2:])
elif index == 4:
meals['Snabbt och Gott'] = ' '.join(dagens_text[4:])
index += 1
return meals
if __name__ == "__main__":
granpark = ScrapeGranpark()
meals = granpark.scrape_page()
for meal in meals:
print meal