diff --git a/scrape.py b/scrape.py new file mode 100644 index 0000000..8c03ba6 --- /dev/null +++ b/scrape.py @@ -0,0 +1,45 @@ +import urllib2 +from bs4 import BeautifulSoup +import re + + + + +class ScrapeGranpark(object): + + def __init__(self): + # self._granpark_link ='https://eurest.mashie.com/public/menu/granpark/b4c46dcd?country=se' + self._granpark_link = r'file:///home/simon/Downloads/index.html' + + def scrape_page(self): + page = urllib2.urlopen(self._granpark_link) + soup = BeautifulSoup(page, 'html.parser') + dagens_div = soup.find('div', attrs={'class': 'day-current'}) + dagens_parent = dagens_div.parent + dagens_mat_list = dagens_parent.findAll('section', + attrs={'class':'day-alternative'}) + + index = 0 + + meals = {} + for dagens in dagens_mat_list: + dagens_text = re.sub('\s+',' ', dagens.text).split(' ') + if index == 0: + meals['A la Minute'] = ' '.join(dagens_text[4:]) + elif index == 1: + meals['Dagens Husman'] = ' '.join(dagens_text[3:]) + elif index == 2: + meals['Vegitariskt'] = ' '.join(dagens_text[4:]) + elif index == 3: + meals['Halsobalans'] = ' '.join(dagens_text[2:]) + elif index == 4: + meals['Snabbt och Gott'] = ' '.join(dagens_text[4:]) + + index += 1 + return meals + +if __name__ == "__main__": + granpark = ScrapeGranpark() + meals = granpark.scrape_page() + for meal in meals: + print meal