diff --git a/epg2xml.py b/epg2xml.py index f27eaa0..e57176d 100644 --- a/epg2xml.py +++ b/epg2xml.py @@ -44,7 +44,7 @@ if not sys.version_info[:2] == (2, 7): sys.exit() # Set variable -__version__ = '1.2.5' +__version__ = '1.2.5p1' debug = False today = datetime.date.today() ua = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36', 'accept': '*/*'} @@ -381,17 +381,16 @@ def GetEPGFromSKB(ChannelInfo): html_data = response.content data = unicode(html_data, 'euc-kr', 'ignore').encode('utf-8', 'ignore') data = re.sub('', '', data, 0, re.I|re.S) - data = re.sub('', '', data) - data = re.sub('', '', data) - data = re.sub('화면해설','',data) - data = re.sub('자막방송','',data) - data = re.sub('Full HD','',data) - data = re.sub('UHD','',data) - data = re.sub('now on','',data) - data = re.sub('','',data) - data = re.sub('프로그램 안내', '',data) - pattern = '(.*)<\/span>' - data = re.sub(pattern, partial(replacement, tag='span'), data) + data = re.sub('(.*?)', '', data) + data = re.sub('(.*?)', '', data) + data = re.sub('(.*?)', '', data) + data = re.sub('(.*?)', '', data) + data = re.sub('(.*?)', '', data) + data = re.sub('(.*?)', '', data) + data = re.sub('(.*?)', '', data) + data = re.sub('프로그램 안내', '', data) + #pattern = '(.*)<\/span>' + #data = re.sub(pattern, partial(replacement, tag='span'), data) strainer = SoupStrainer('div', {'id':'uiScheduleTabContent'}) soup = BeautifulSoup(data, htmlparser, parse_only=strainer, from_encoding='utf-8') html = soup.find_all('li',{'class':'list'}) if soup.find_all('li') else '' @@ -405,6 +404,8 @@ def GetEPGFromSKB(ChannelInfo): startTime = startTime.strftime('%Y%m%d%H%M%S') cell = row.find('p', {'class':'cont'}) if(cell): + if cell.find('span'): + cell.span.decompose() cell = cell.text.decode('string_escape').strip() pattern = "^(.*?)(\(([\d,]+)회\))?(<(.*)>)?(\((재)\))?$" matches = re.match(pattern, cell) @@ -413,9 +414,10 @@ def GetEPGFromSKB(ChannelInfo): subprogramName = matches.group(5) if matches.group(5) else '' rebroadcast = True if matches.group(7) else False episode = matches.group(3) if matches.group(3) else '' - rating = row.find('span', {'class':re.compile('^watch.*$')}) + rating = row.find('i', {'class':'hide'}) if not(rating is None) : - rating = int(rating.text.decode('string_escape').replace('세','').strip()) + rating = int(rating.text.decode('string_escape').replace('세 이상','').strip()) + #ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating epginfo.append([ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating]) time.sleep(0.001)