Add webscrape bytbil

2019-02-16 20:14:17 +01:00 · 2019-02-16 20:14:17 +01:00 · 9b463367d2
parent 7f5b50ff62
commit 9b463367d2
1 changed files with 27 additions and 0 deletions
--- a/bytpil.py
+++ b/bytpil.py
@ -0,0 +1,27 @@
+ # coding=utf-8
+import urllib2
+from bs4 import BeautifulSoup
+import xlsxwriter
+
+
+class ScrapeBytBil(object):
+
+    def __init__(self):
+        self._bytbil_page = "https://bytbil.com"
+        self.workbook = xlsxwriter.Workbook(r'/home/simon/repo/bytbil/kakel.xlsx')
+
+    def readPage(self, urlName):
+        link = self._bytbil_page + urlName
+        page = urllib2.urlopen(link)
+        soup = BeautifulSoup(page, 'html.parser')
+
+        car_ul = soup.find_all('ul', {'class': 'result-list uk-padding-remove'})
+        print(type(car_ul))
+
+
+if __name__ == "__main__":
+
+    bytBil = ScrapeBytBil()
+    yaris ='/bil?VehicleType=bil&Makes=Toyota&Models=Yaris&FreeText=&PriceRange.From=&PriceRange.To=&ModelYearRange.From=&ModelYearRange.To=&MilageRange.From=&MilageRange.To=&BodyTypes=&Gearboxes=&EnginePowerRange.From=&EnginePowerRange.To=&ShowLeasingOffers=&SortParams.SortField=relevance&SortParams.IsAscending=False&OnlyEnvironmentFriendly=false&OnlyHasCarfaxReport=false&OnlyFourWheelDrive=false&OnlyNew=false&OnlyDeductibleVAT=false&OnlyIsAuction=false&OnlyAuthorizedDealers=false&OnlyWarrantyProgram=false'
+
+    bytbil_href = bytBil.readPage(yaris)