Add webscrape bytbil
This commit is contained in:
parent
7f5b50ff62
commit
9b463367d2
|
|
@ -0,0 +1,27 @@
|
|||
# coding=utf-8
|
||||
import urllib2
|
||||
from bs4 import BeautifulSoup
|
||||
import xlsxwriter
|
||||
|
||||
|
||||
class ScrapeBytBil(object):
|
||||
|
||||
def __init__(self):
|
||||
self._bytbil_page = "https://bytbil.com"
|
||||
self.workbook = xlsxwriter.Workbook(r'/home/simon/repo/bytbil/kakel.xlsx')
|
||||
|
||||
def readPage(self, urlName):
|
||||
link = self._bytbil_page + urlName
|
||||
page = urllib2.urlopen(link)
|
||||
soup = BeautifulSoup(page, 'html.parser')
|
||||
|
||||
car_ul = soup.find_all('ul', {'class': 'result-list uk-padding-remove'})
|
||||
print(type(car_ul))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
bytBil = ScrapeBytBil()
|
||||
yaris ='/bil?VehicleType=bil&Makes=Toyota&Models=Yaris&FreeText=&PriceRange.From=&PriceRange.To=&ModelYearRange.From=&ModelYearRange.To=&MilageRange.From=&MilageRange.To=&BodyTypes=&Gearboxes=&EnginePowerRange.From=&EnginePowerRange.To=&ShowLeasingOffers=&SortParams.SortField=relevance&SortParams.IsAscending=False&OnlyEnvironmentFriendly=false&OnlyHasCarfaxReport=false&OnlyFourWheelDrive=false&OnlyNew=false&OnlyDeductibleVAT=false&OnlyIsAuction=false&OnlyAuthorizedDealers=false&OnlyWarrantyProgram=false'
|
||||
|
||||
bytbil_href = bytBil.readPage(yaris)
|
||||
Loading…
Reference in New Issue