import urllib2 from bs4 import BeautifulSoup import re class ScrapeGranpark(object): def __init__(self): # self._granpark_link ='https://eurest.mashie.com/public/menu/granpark/b4c46dcd?country=se' self._granpark_link = r'file:///home/simon/Downloads/index.html' def scrape_page(self): page = urllib2.urlopen(self._granpark_link) soup = BeautifulSoup(page, 'html.parser') dagens_div = soup.find('div', attrs={'class': 'day-current'}) dagens_parent = dagens_div.parent dagens_mat_list = dagens_parent.findAll('section', attrs={'class':'day-alternative'}) index = 0 meals = {} for dagens in dagens_mat_list: dagens_text = re.sub('\s+',' ', dagens.text).split(' ') if index == 0: meals['A la Minute'] = ' '.join(dagens_text[4:]) elif index == 1: meals['Dagens Husman'] = ' '.join(dagens_text[3:]) elif index == 2: meals['Vegitariskt'] = ' '.join(dagens_text[4:]) elif index == 3: meals['Halsobalans'] = ' '.join(dagens_text[2:]) elif index == 4: meals['Snabbt och Gott'] = ' '.join(dagens_text[4:]) index += 1 return meals if __name__ == "__main__": granpark = ScrapeGranpark() meals = granpark.scrape_page() for meal in meals: print meal