#!/usr/bin/python

from urllib.request import urlopen
import re
from html import unescape

base_url = 'http://vvv.chmi.cz/'
list_url = base_url+'pocasi-na-stanici.html'

page_re = re.compile(r"(pocasi-na-stanici.*\.html)")

def get_url_content(url):
    handle = urlopen(url)
    content = handle.read().decode('utf-8')
    handle.close()
    return content

def get_substring_idx(needle, haystack):
    occurences = [i for i, name in enumerate(haystack) if needle in name]
    return occurences[0] if occurences else None

stanice = dict()

for line in get_url_content(list_url).split('\n'):
    page = page_re.search(line)
    if page:
        stanice_url = base_url+page.group(0)
        #print(stanice_url)
        stanice_content_list = [unescape(s).strip() for s in get_url_content(stanice_url).split('\n')]
        stanice_idx = get_substring_idx('Stanice:', stanice_content_list)
        teplota_idx = get_substring_idx('Teplota:', stanice_content_list)
        
        if stanice_idx:
            print("%s %s" % (stanice_content_list[stanice_idx+3], stanice_content_list[teplota_idx+3]))
