From 824e2b5462f4789dccf39def7a694c66f649097d Mon Sep 17 00:00:00 2001
From: Simon Milvert <simon@milvert.com>
Date: Sat, 16 Feb 2019 22:04:57 +0100
Subject: [PATCH] Add scrape konradsson

---
 konradsson.py | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 124 insertions(+)
 create mode 100644 konradsson.py

diff --git a/konradsson.py b/konradsson.py
new file mode 100644
index 0000000..9b3b6ae
--- /dev/null
+++ b/konradsson.py
@@ -0,0 +1,124 @@
+ # coding=utf-8
+import urllib2
+from bs4 import BeautifulSoup
+import os
+import xlsxwriter
+
+
+class ScrapeKakel(object):
+
+    def __init__(self):
+        self._konradsson_page = "https://konradssons.com"
+        self._proxy_support = urllib2.ProxyHandler({"https": "smipse:hm7gRHmj1986@proxyseso.scania.com:8080"})
+        self._auth = urllib2.HTTPBasicAuthHandler()
+        self._opener = urllib2.build_opener(self._proxy_support, self._auth, urllib2.HTTPSHandler)
+        urllib2.install_opener(self._opener)
+        self.workbook = xlsxwriter.Workbook('C:\\Users\\smipse\\kakel.xlsx')
+
+    def readPage(self, urlName):
+        link = self._konradsson_page + urlName
+        page = urllib2.urlopen(link)
+        soup = BeautifulSoup(page, 'html.parser')
+
+        kakel_box = soup.find_all('div', attrs={'class': 'product-item'})
+
+        konrads_link = []
+        for item in kakel_box:
+            kakel_a= item.find('a')
+            kakel_href = kakel_a.get('href')
+            konrads_link.append(kakel_href)
+
+        return konrads_link
+
+    def getKakel(self, hrefFromPage):
+
+        kakelList = []
+
+        for linkKakel in hrefFromPage:
+            link = self._konradsson_page + linkKakel
+            if link == u'sortiment/kakel-och-klinker/golvvagg/granito-arkansas-gra-pavé-8165/':
+                print link
+            page = urllib2.urlopen(link.encode('latin1'))
+            soup = BeautifulSoup(page, 'html.parser')
+            productInfo = soup.find('div', attrs={'class': 'col-md-12'})
+            left = productInfo.find_all(attrs={"class": 'productFeaturesLeft'})
+            right = productInfo.find_all(attrs={"class": 'productFeaturesRight'})
+
+            if len(left) == len(right):
+                tmpLeft = []
+                tmpRight = []
+                for item in left:
+                    leftText = item.text.strip()
+                    leftText = leftText.replace(u'ä','a')
+                    leftText = leftText.replace(':', '')
+                    if leftText.startswith('Anta'):
+                        leftText = 'Antal'
+
+                    tmpLeft.append(leftText)
+                for item in right:
+                    tmpRight.append(item.text.strip())
+                tmpLeft.append('link')
+                tmpRight.append(link)
+                kakelInfo = dict(zip(tmpLeft,tmpRight))
+                kakelList.append(kakelInfo)
+            else:
+                print productInfo
+
+        return kakelList
+
+    def writeToExcel(self, kakelList, name):
+
+        worksheet = self.workbook.add_worksheet(name=name)
+        row = 0
+        colPlace = {
+            'Serie': 0,
+            'Priskod': 1,
+            'Storlek': 2,
+            'Antal': 3,
+            'Yta': 4,
+            'Tjocklek': 5,
+            'Frostsaker': 6,
+            'Placering': 7,
+            'link': 8,
+            'Farg': 9,
+            'PEI': 10,
+            'Plats': 11,
+            'Art nr': 12,
+            'Arkstorlek': 13,
+        }
+
+        for item in kakelList:
+            col = 0
+            for key,value in item.iteritems():
+                if row == 0:
+                    worksheet.write(row, colPlace[key], key)
+                    worksheet.write(row + 1, colPlace[key], value)
+                else:
+                    worksheet.write(row, colPlace[key], value)
+                col +=1
+            if row == 0:
+                row += 2
+            else:
+                row += 1
+
+if __name__ == "__main__":
+
+
+    kakel = ScrapeKakel()
+    wall = '/sortiment/kakel-och-klinker/vagg'
+    floorWall = '/sortiment/kakel-och-klinker/golvvagg/'
+    natursten = '/sortiment/natursten/'
+
+    kakelWallHref = kakel.readPage(wall)
+    wallKakel = kakel.getKakel(kakelWallHref)
+    kakel.writeToExcel(wallKakel, 'wall')
+
+    kakelfloorWallHref = kakel.readPage(floorWall)
+    floorWallKakel = kakel.getKakel(kakelfloorWallHref)
+    kakel.writeToExcel(floorWallKakel, 'floorWall')
+
+    kakelnaturstenHref = kakel.readPage(natursten)
+    naturstenKakel = kakel.getKakel(kakelnaturstenHref)
+    kakel.writeToExcel(naturstenKakel, 'natursten')
+
+    kakel.workbook.close()