From a4c8593ffe65a2fd819e464a197c67e7a63ea158 Mon Sep 17 00:00:00 2001 From: wonipapa Date: Wed, 9 Nov 2016 18:44:53 +0900 Subject: [PATCH] =?UTF-8?q?urllib=EB=A5=BC=20urllib2=EB=A1=9C=20=EB=B3=80?= =?UTF-8?q?=EA=B2=BD=20User=20Agent=20=EC=B6=94=EA=B0=80=20=EC=B1=84?= =?UTF-8?q?=EB=84=90=20=EC=86=8C=EC=8A=A4=20=EB=B3=80=EA=B2=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- epg2xml.py | 126 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 71 insertions(+), 55 deletions(-) diff --git a/epg2xml.py b/epg2xml.py index e9f0e9a..e890e2f 100644 --- a/epg2xml.py +++ b/epg2xml.py @@ -3,8 +3,7 @@ import os import sys -import httplib -import urllib +import urllib2 import json import datetime from bs4 import BeautifulSoup, SoupStrainer @@ -16,7 +15,7 @@ import argparse reload(sys) sys.setdefaultencoding('utf-8') -__version__ = '1.0.5' +__version__ = '1.0.6' # Set My Configuration default_icon_url = '' # TV channel icon url (ex : http://www.example.com/Channels) @@ -27,6 +26,7 @@ default_xml_socket = 'xmltv.sock' # External XMLTV 사용시 기본 소켓 이 # Set date today = datetime.date.today() +ua = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'accept-language': 'en-US,en;q=0.8,ko;q=0.6'} # Get epg data def getEpg(): @@ -35,28 +35,25 @@ def getEpg(): SiteEPG = [] #For epg.co.kr with open(Channelfile) as f: # Read Channel Information file Channeldata = json.load(f) - for chinfo in Channeldata: - if chinfo['Enabled'] == 1 : - if MyISP == 'KT' and not( chinfo['KTCh'] is None) : - ChannelInfos.append([chinfo['Id'], chinfo['Name'], chinfo['Source'], chinfo['ServiceId']]) - elif MyISP == 'LG' and not( chinfo['LGCh'] is None) : - ChannelInfos.append([chinfo['Id'], chinfo['Name'], chinfo['Source'], chinfo['ServiceId']]) - elif MyISP == 'SK' and not( chinfo['SKCh'] is None) : - ChannelInfos.append([chinfo['Id'], chinfo['Name'], chinfo['Source'], chinfo['ServiceId']]) - # Print Channel information - for ChannelInfo in ChannelInfos: - ChannelId = ChannelInfo[0] - ChannelName = escape(ChannelInfo[1]) - ChannelSource = ChannelInfo[2] - ChannelServiceId = ChannelInfo[3] - writeXML(' ' % (ChannelId)) - writeXML(' %s' % (ChannelName)) - if IconUrl: - writeXML(' ' % (IconUrl, ChannelId)) - writeXML(' ') + for ChannelInfo in Channeldata: #Get Channel & Print Channel info + if ChannelInfo['Enabled'] == 1: + ChannelId = ChannelInfo['Id'] + ChannelName = escape(ChannelInfo['Name']) + ChannelSource = ChannelInfo['Source'] + ChannelServiceId = ChannelInfo['ServiceId'] + ChannelNumber = ChannelInfo[MyISP+'Ch'] + if not (ChannelInfo[MyISP+'Ch'] is None): + ChannelInfos.append([ChannelId, ChannelName, ChannelSource, ChannelServiceId]) + writeXML(' ' % (ChannelId)) + writeXML(' %s' % (ChannelName)) + writeXML(' %s' % (ChannelNumber)) + if IconUrl: + writeXML(' ' % (IconUrl, ChannelId)) + writeXML(' ') # Print Program Information +# Print Program Information for ChannelInfo in ChannelInfos: ChannelId = ChannelInfo[0] ChannelName = ChannelInfo[1] @@ -86,7 +83,8 @@ def GetEPGFromEPG(ChannelInfos): for k in range(period): day = today + datetime.timedelta(days=k) url = 'http://schedule.epg.co.kr/php/guide/schedule_day_on.php?%snext=&old_sub_channel_group=110&old_sub_channel_group=110&old_top_channel_group=2&search_sub_category=&search_sub_channel_group=110&search_top_category=&search_top_channel_group=2&selectday=%s&selectday2=%s&weekchannel=&ymd=%s' % (churl, day, day, day) - u = urllib.urlopen(url).read() + request = urllib2.Request(url,headers=ua) + u = urllib2.urlopen(request).read() data = unicode(u, 'euc-kr', 'ignore').encode('utf-8', 'ignore') strainer = SoupStrainer('table', {"bgcolor" : "#D6D6D6"}) soup = BeautifulSoup(data, 'lxml', parse_only=strainer, from_encoding='utf-8') @@ -105,20 +103,24 @@ def GetEPGFromEPG(ChannelInfos): endTime = str(today.year) + '/' + endTime endTime = datetime.datetime.strptime(endTime, '%Y/%m/%d %p %I:%M') endTime = endTime.strftime('%Y%m%d%H%M%S') + desc = '' category = epgdata[5].split('-')[0].strip() actors = epgdata[6] producers = epgdata[7] + category = epgdata[5].split('-')[0].strip() matches = re.match('^(.*?)\s*(<(.*)>)?(\(([\d,]+)회\))?$', programName) if not (matches is None): programName = matches.group(1) if matches.group(1) else '' subprogramName = matches.group(3) if matches.group(3) else '' episode = matches.group(5) if matches.group(5) else '' + rebroadcast = False rating = 0 for image in td.findAll('img'): - if 'rebroadcast' in image.get('src') : programName = programName + ' (재방송)' + if 'rebroadcast' in image.get('src') : + programName = programName + ' (재방송)' + rebroadcast = True if 'grade' in image.get('src') : rating = int(image.get('src')[22:].replace('.gif','')) - desc = '' - programdata = {'channelId':channelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rating':rating} + programdata = {'channelId':channelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating} writeProgram(programdata) # Get EPG data from KT @@ -129,7 +131,8 @@ def GetEPGFromKT(ChannelInfo): for k in range(period): day = today + datetime.timedelta(days=k) url = 'http://tv.olleh.com/renewal_sub/liveTv/pop_schedule_week.asp?ch_name=&ch_no=%s&nowdate=%s&seldate=%s&tab_no=1' % (ServiceId, day, day) - u = urllib.urlopen(url).read() + request = urllib2.Request(url,headers=ua) + u = urllib2.urlopen(request).read() data = unicode(u, 'euc-kr', 'ignore').encode('utf-8', 'ignore') strainer = SoupStrainer('table', {'id':'pop_day'}) soup = BeautifulSoup(data, 'lxml', parse_only=strainer, from_encoding='utf-8') @@ -149,15 +152,18 @@ def GetEPGFromKT(ChannelInfo): endTime = datetime.datetime.strptime(epg2[1], '%Y-%m-%d %H:%M') endTime = endTime.strftime('%Y%m%d%H%M%S') category = epg1[2] - rating = 0 - matches = re.match('(\d+)', epg1[3]) - if not(matches is None): rating = int(matches.group()) desc = '' actors = '' producers = '' episode = '' - programdata = {'channelId':channelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rating':rating} + rebroadcast = False + rating = 0 + matches = re.match('(\d+)', epg1[3]) + if not(matches is None): rating = int(matches.group()) + programdata = {'channelId':channelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating} writeProgram(programdata) + + # Get EPG data from LG def GetEPGFromLG(ChannelInfo): channelId = ChannelInfo[0] @@ -166,7 +172,8 @@ def GetEPGFromLG(ChannelInfo): for k in range(period): day = today + datetime.timedelta(days=k) url = 'http://www.uplus.co.kr/css/chgi/chgi/RetrieveTvSchedule.hpi?chnlCd=%s&evntCmpYmd=%s' % (ServiceId, day.strftime('%Y%m%d')) - u = urllib.urlopen(url).read() + request = urllib2.Request(url,headers=ua) + u = urllib2.urlopen(request).read() data = unicode(u, 'euc-kr', 'ignore').encode('utf-8', 'ignore') strainer = SoupStrainer('table') soup = BeautifulSoup(data, 'lxml', parse_only=strainer, from_encoding='utf-8') @@ -187,47 +194,53 @@ def GetEPGFromLG(ChannelInfo): endTime = datetime.datetime.strptime(epg2[1], "%Y-%m-%d %H:%M") endTime = endTime.strftime("%Y%m%d%H%M%S") category = epg1[2] - rating = 0 - matches = re.match('(\d+)세이상 관람가', epg1[3].encode('utf-8')) - if not(matches is None): rating = int(matches.group(1)) desc = '' actors = '' producers = '' - programdata = {'channelId':channelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rating':rating} + category = epg1[2] + rebroadcast = False + category = epg1[2] + rating = 0 + matches = re.match('(\d+)세이상 관람가', epg1[3].encode('utf-8')) + if not(matches is None): rating = int(matches.group(1)) + programdata = {'channelId':channelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating} writeProgram(programdata) + # Get EPG data from SK def GetEPGFromSK(ChannelInfo): channelId = ChannelInfo[0] ServiceId = ChannelInfo[3] lastday = today + datetime.timedelta(days=period-1) url = 'http://m.btvplus.co.kr/Common/Inc/IFGetData.asp?variable=IF_LIVECHART_DETAIL&pcode=|^|start_time=%s00|^|end_time=%s24|^|svc_id=%s' % (today.strftime("%Y%m%d"), lastday.strftime("%Y%m%d"), ServiceId) - u = urllib.urlopen(url).read() + request = urllib2.Request(url,headers=ua) + u = urllib2.urlopen(request).read() data = json.loads(u, encoding='utf-8') programs = data['channel']['programs'] for program in programs: programName = '' subprogramName = '' episode = '' - rebroadcast = '' + rebroadcast = False matches = re.match('^(.*?)(?:\s*[\(<]([\d,회]+)[\)>])?(?:\s*<([^<]*?)>)?(\((재)\))?$', program['programName'].replace('...', '>').encode('utf-8')) if not (matches is None): programName = matches.group(1).strip() if matches.group(1) else '' subprogramName = matches.group(3).strip() if matches.group(3) else '' episode = matches.group(2).replace('회', '') if matches.group(2) else '' - rebroadcast = 'Y' if matches.group(5) else 'N' - if rebroadcast == 'Y': programName = programName + ' (재방송)' + rebroadcast = True if matches.group(5) else False + if rebroadcast == True: programName = programName + ' (재방송)' actors = program['actorName'].replace('...','').strip(', ') if program['actorName'] else '' producers = program['directorName'].replace('...','').strip(', ') if program['directorName'] else '' startTime = datetime.datetime.fromtimestamp(int(program['startTime'])/1000) startTime = startTime.strftime('%Y%m%d%H%M%S') endTime = datetime.datetime.fromtimestamp(int(program['endTime'])/1000) endTime = endTime.strftime('%Y%m%d%H%M%S') + desc = program['synopsis'] if program['synopsis'] else '' category = program['mainGenreName'] rating = int(program['ratingCd']) if program['programName'] else 0 desc = '' if program['synopsis'] : desc = program['synopsis'] - programdata = {'channelId':channelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rating':rating} + programdata = {'channelId':channelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating} writeProgram(programdata) # Get EPG data from SKY @@ -237,30 +250,31 @@ def GetEPGFromSKY(ChannelInfo): for k in range(period): day = today + datetime.timedelta(days=k) url = 'http://www.skylife.co.kr/channel/epg/channelScheduleList.do?area=in&inFd_channel_id=%s&inairdate=%s&indate_type=now' % (ServiceId, day) - u = urllib.urlopen(url).read() + request = urllib2.Request(url,headers=ua) + u = urllib2.urlopen(request).read() data = json.loads(u, encoding='utf-8') programs = data['scheduleListIn'] for program in {v['starttime']:v for v in programs}.values(): programName = unescape(program['program_name']).replace('lt;','<').replace('gt;','>').replace('amp;','&') if program['program_name'] else '' subprogramName = unescape(program['program_subname']).replace('lt;','<').replace('gt;','>').replace('amp;','&') if program['program_subname'] else '' - rebroadcast = program['rebroad'] if program['rebroad'] else '' - if rebroadcast == 'Y': programName = programName + ' (재방송)' actors = program['cast'].replace('...','').strip(', ') if program['cast'] else '' producers = program['dirt'].replace('...','').strip(', ') if program['dirt'] else '' startTime = program['starttime'] endTime = program['endtime'] - category = program['program_category1'] - rating = int(program['grade']) if program['grade'] else '' - episode = program['episode_id'] if program['episode_id'] else '' - if episode : episode = int(episode) description = unescape(program['description']).replace('lt;','<').replace('gt;','>').replace('amp;','&') if program['description'] else '' if description: description = unescape(description).replace('lt;','<').replace('gt;','>').replace('amp;','&') summary = unescape(program['summary']).replace('lt;','<').replace('gt;','>').replace('amp;','&') if program['summary'] else '' - desc = '' - if description: desc = description + desc = description if description else '' if summary : desc = desc + '\n' + summary - programdata = {'channelId':channelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rating':rating} + category = program['program_category1'] + episode = program['episode_id'] if program['episode_id'] else '' + if episode : episode = int(episode) + rebroadcast = True if program['rebroad']== 'Y' else False + if rebroadcast == True: programName = programName + ' (재방송)' + rating = int(program['grade']) if program['grade'] else 0 + programdata = {'channelId':channelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating} writeProgram(programdata) + # Write Program def writeProgram(programdata): @@ -273,11 +287,11 @@ def writeProgram(programdata): producers = escape(programdata['producers']) category = escape(programdata['category']) episode = programdata['episode'] + rebroadcast = programdata['rebroadcast'] if programdata['rating'] == 0 : rating = '전체 관람가' else : rating = '%s세 이상 관람가' % (programdata['rating']) - desc = programName if subprogramName : desc = desc + '\n부제 : ' + subprogramName if episode : desc = desc + '\n회차 : ' + str(episode) + '회' @@ -286,6 +300,7 @@ def writeProgram(programdata): if producers : desc = desc + '\n제작 : ' + producers desc = desc + '\n등급 : ' + rating if programdata['desc'] : desc = desc + '\n' + escape(programdata['desc']) + rebroadcast = programdata['rebroadcast'] contentTypeDict={'교양':'Arts / Culture (without music)', '만화':'Cartoons / Puppets', '교육':'Education / Science / Factual topics', '취미':'Leisure hobbies', '드라마':'Movie / Drama', '영화':'Movie / Drama', '음악':'Music / Ballet / Dance', '뉴스':'News / Current affairs', '다큐':'Documentary', '시사/다큐':'Documentary', '연예':'Show / Game show', '스포츠':'Sports', '홈쇼핑':'Advertisement / Shopping'} contentType = '' for key, value in contentTypeDict.iteritems(): @@ -307,14 +322,15 @@ def writeProgram(programdata): print ' ' if category: print ' %s' % (category) if contentType: print ' %s' % (contentType) - if episode: - print ' %s' % (episode) + if episode: print ' %s' % (episode) + if rebroadcast: print ' ' + if rating: print ' ' print ' %s' % (rating) print ' ' print ' ' -# Write XML + def writeXML(data): print data