urllib2를 requests로 변경

User Agent 변경
채널 소스 변경
Naver 함수 추가
This commit is contained in:
wonipapa 2016-11-09 18:47:50 +09:00
parent f26e148ea7
commit 5c69d5a214

View File

@ -1,9 +1,10 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import print_function
import os import os
import sys import sys
import urllib2 import requests
import json import json
import datetime import datetime
from bs4 import BeautifulSoup, SoupStrainer from bs4 import BeautifulSoup, SoupStrainer
@ -15,270 +16,415 @@ import argparse
reload(sys) reload(sys)
sys.setdefaultencoding('utf-8') sys.setdefaultencoding('utf-8')
__version__ = '1.0.6' __version__ = '1.0.7'
# Set My Configuration # Set My Configuration
default_icon_url = '' # TV channel icon url (ex : http://www.example.com/Channels) default_icon_url = '' # TV channel icon url (ex : http://www.example.com/Channels)
default_verbose = 'n' # 자세한 epg 데이터 출력
default_fetch_limit = 2 # epg 데이터 가져오는 기간 default_fetch_limit = 2 # epg 데이터 가져오는 기간
default_xml_filename = 'xmltv.xml' # epg 저장시 기본 저장 이름 (ex: /home/tvheadend/xmltv.xml) default_xml_filename = 'xmltv.xml' # epg 저장시 기본 저장 이름 (ex: /home/tvheadend/xmltv.xml)
default_xml_socket = 'xmltv.sock' # External XMLTV 사용시 기본 소켓 이름 (ex: /home/tvheadend/xmltv.sock) default_xml_socket = 'xmltv.sock' # External XMLTV 사용시 기본 소켓 이름 (ex: /home/tvheadend/xmltv.sock)
# Set My Configuration # Set My Configuration
# Set date # Set variable
debug = False
today = datetime.date.today() today = datetime.date.today()
ua = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'accept-language': 'en-US,en;q=0.8,ko;q=0.6'} ua = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36', 'accept': '*/*'}
CHANNEL_ERROR = ' 존재하지 않는 채널입니다.'
CONTENT_ERROR = ' EPG 정보가 없습니다.'
HTTP_ERROR = ' EPG 정보를 가져오는데 문제가 있습니다.'
# Get epg data # Get epg data
def getEpg(): def getEpg():
Channelfile = os.path.dirname(os.path.abspath(__file__)) + '/Channel.json' Channelfile = os.path.dirname(os.path.abspath(__file__)) + '/Channel.json'
ChannelInfos = [] ChannelInfos = []
SiteEPG = [] #For epg.co.kr SiteEPG = [] #For epg.co.kr
with open(Channelfile) as f: # Read Channel Information file try:
Channeldata = json.load(f) with open(Channelfile) as f: # Read Channel Information file
Channeldatas = json.load(f)
except EnvironmentError:
printError('Channel.json 파일을 읽을 수 없습니다.')
sys.exit()
except ValueError:
printError('Channel.json 파일 형식이 잘못되었습니다.')
sys.exit()
for ChannelInfo in Channeldata: #Get Channel & Print Channel info
if ChannelInfo['Enabled'] == 1: print('<?xml version="1.0" encoding="UTF-8"?>')
ChannelId = ChannelInfo['Id'] print('<!DOCTYPE tv SYSTEM "xmltv.dtd">\n')
ChannelName = escape(ChannelInfo['Name']) print('<tv generator-info-name="epg2xml.py">')
ChannelSource = ChannelInfo['Source']
ChannelServiceId = ChannelInfo['ServiceId'] for Channeldata in Channeldatas: #Get Channel & Print Channel info
ChannelNumber = ChannelInfo[MyISP+'Ch'] if Channeldata['Enabled'] == 1:
if not (ChannelInfo[MyISP+'Ch'] is None): ChannelId = Channeldata['Id']
ChannelName = escape(Channeldata['Name'])
ChannelSource = Channeldata['Source']
ChannelServiceId = Channeldata['ServiceId']
ChannelNumber = Channeldata[MyISP+'Ch']
if not (Channeldata[MyISP+'Ch'] is None):
ChannelInfos.append([ChannelId, ChannelName, ChannelSource, ChannelServiceId]) ChannelInfos.append([ChannelId, ChannelName, ChannelSource, ChannelServiceId])
writeXML(' <channel id="%s">' % (ChannelId)) print(' <channel id="%s">' % (ChannelId))
writeXML(' <display-name>%s</display-name>' % (ChannelName)) print(' <display-name>%s</display-name>' % (ChannelName))
writeXML(' <display-name>%s</display-name>' % (ChannelNumber)) print(' <display-name>%s</display-name>' % (ChannelNumber))
if IconUrl: if IconUrl:
writeXML(' <icon src="%s/%s.png" />' % (IconUrl, ChannelId)) print(' <icon src="%s/%s.png" />' % (IconUrl, ChannelId))
writeXML(' </channel>') print(' </channel>')
# Print Program Information # Print Program Information
# Print Program Information
for ChannelInfo in ChannelInfos: for ChannelInfo in ChannelInfos:
ChannelId = ChannelInfo[0] ChannelId = ChannelInfo[0]
ChannelName = ChannelInfo[1] ChannelName = ChannelInfo[1]
ChannelSource = ChannelInfo[2] ChannelSource = ChannelInfo[2]
ChannelServiceId = ChannelInfo[3] ChannelServiceId = ChannelInfo[3]
if(debug) : printLog(ChannelName + ' 채널 EPG 데이터 가져오고 있습니다')
if ChannelSource == 'EPG': if ChannelSource == 'EPG':
SiteEPG.append([ChannelId, ChannelName, ChannelSource, ChannelServiceId]) GetEPGFromEPG(ChannelInfo)
elif ChannelSource == 'KT': elif ChannelSource == 'KT':
GetEPGFromKT(ChannelInfo) GetEPGFromKT(ChannelInfo)
elif ChannelSource == 'LG': elif ChannelSource == 'LG':
GetEPGFromLG(ChannelInfo) GetEPGFromLG(ChannelInfo)
elif ChannelSource == 'SK': elif ChannelSource == 'SK':
GetEPGFromSK(ChannelInfo) GetEPGFromSK(ChannelInfo)
elif ChannelSource == 'SKY': elif ChannelSource == 'SKY':
GetEPGFromSKY(ChannelInfo) GetEPGFromSKY(ChannelInfo)
GetEPGFromEPG(SiteEPG) elif ChannelSource == 'NAVER':
GetEPGFromNaver(ChannelInfo)
print('</tv>')
# Get EPG data from epg.co.kr # Get EPG data from epg.co.kr
def GetEPGFromEPG(ChannelInfos): def GetEPGFromEPG(ChannelInfo):
ChannelInfo = [ChannelInfos[i:i+5] for i in range(0, len(ChannelInfos),5)] ChannelId = ChannelInfo[0]
ChannelName = ChannelInfo[1]
html = [] ServiceId = ChannelInfo[3]
for i in range(len(ChannelInfo)): url = 'http://www.epg.co.kr/epg-cgi/extern/cnm_guide_type_v070530.cgi'
churl = '' contenturl = 'http://www.epg.co.kr/epg-cgi/guide_schedule_content.cgi'
for j in range(len(ChannelInfo[i])): for k in range(period):
churl += 'checkchannel%5B' + str(ChannelInfo[i][j][3]) + '%5D=' + str(ChannelInfo[i][j][0]) + '&' day = today + datetime.timedelta(days=k)
for k in range(period): params = {'beforegroup':'100', 'checkchannel':ServiceId, 'select_group':'100', 'start_date':day.strftime('%Y%m%d')}
day = today + datetime.timedelta(days=k) try:
url = 'http://schedule.epg.co.kr/php/guide/schedule_day_on.php?%snext=&old_sub_channel_group=110&old_sub_channel_group=110&old_top_channel_group=2&search_sub_category=&search_sub_channel_group=110&search_top_category=&search_top_channel_group=2&selectday=%s&selectday2=%s&weekchannel=&ymd=%s' % (churl, day, day, day) response = requests.post(url, data=params, headers=ua)
request = urllib2.Request(url,headers=ua) response.raise_for_status()
u = urllib2.urlopen(request).read() html_data = response.content
data = unicode(u, 'euc-kr', 'ignore').encode('utf-8', 'ignore') data = unicode(html_data, 'euc-kr', 'ignore').encode('utf-8', 'ignore')
strainer = SoupStrainer('table', {"bgcolor" : "#D6D6D6"}) strainer = SoupStrainer('table', {'style':'margin-bottom:30'})
soup = BeautifulSoup(data, 'lxml', parse_only=strainer, from_encoding='utf-8') soup = BeautifulSoup(data, 'lxml', parse_only=strainer, from_encoding='utf-8')
html.append(soup.select('td > a[href^="JavaScript:ViewContent"]')) table = soup.find_all('table', {'style':'margin-bottom:30'})
for row in html:
for cell in row: for i in range(1,4):
td = cell.parent thisday = day
epgdata = re.findall("[\(]?'(.*?)'[,\)]", str(td)) pid = ''
programName = unescape(epgdata[2].decode('string_escape')) row = table[i].find_all('td', {'colspan':'2'})
subprogramName = '' for j, cell in enumerate(row):
channelId = epgdata[3] hour = int(cell.text.strip().strip(''))
startTime, endTime = unescape(epgdata[4]).split('<br>~') if(i == 1) : hour = 'AM ' + str(hour)
startTime = str(today.year) + '/' + startTime elif(i == 2) : hour = 'PM ' + str(hour)
startTime = datetime.datetime.strptime(startTime, '%Y/%m/%d %p %I:%M') elif(i == 3 and hour > 5) : hour = 'PM ' + str(hour)
startTime = startTime.strftime('%Y%m%d%H%M%S') elif(i == 3 and hour < 5) :
endTime = str(today.year) + '/' + endTime hour = 'AM ' + str(hour)
endTime = datetime.datetime.strptime(endTime, '%Y/%m/%d %p %I:%M') thisday = day + datetime.timedelta(days=1)
endTime = endTime.strftime('%Y%m%d%H%M%S') for celldata in cell.parent.find_all('tr'):
desc = '' matches = re.match("<tr>.*\[(.*)\]<\/td>\s.*ViewContent\('(.*)'\)\">(.*?)\s*(&lt;(.*)&gt;)?\s*(\(재\))?\s*(\(([\d,]+)회\))?(<img.*)?<\/a> <\/td><\/tr>", str(celldata))
category = epgdata[5].split('-')[0].strip() if not (matches is None):
actors = epgdata[6] minute = matches.group(1) if matches.group(1) else ''
producers = epgdata[7] startTime = str(thisday) + ' ' + hour + ':' + minute
category = epgdata[5].split('-')[0].strip() startTime = datetime.datetime.strptime(startTime, '%Y-%m-%d %p %I:%M')
matches = re.match('^(.*?)\s*(<(.*)>)?(\(([\d,]+)회\))?$', programName) startTime = startTime.strftime('%Y%m%d%H%M%S')
if not (matches is None): endTime = ''
programName = matches.group(1) if matches.group(1) else '' pid = matches.group(2) if matches.group(2) else ''
subprogramName = matches.group(3) if matches.group(3) else '' programName = matches.group(3) if matches.group(3) else ''
episode = matches.group(5) if matches.group(5) else '' subprogramName = matches.group(5) if matches.group(5) else ''
rebroadcast = False desc = ''
rating = 0 actors = ''
for image in td.findAll('img'): producers = ''
if 'rebroadcast' in image.get('src') : category = ''
programName = programName + ' (재방송)' rebroadcast = True if matches.group(6) else False
rebroadcast = True episode = matches.group(8) if matches.group(8) else ''
if 'grade' in image.get('src') : rating = int(image.get('src')[22:].replace('.gif','')) image = matches.group(9) if matches.group(9) else ''
programdata = {'channelId':channelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating} grade = re.match('.*schedule_([\d,]+)?.*',image)
writeProgram(programdata) if not (grade is None):
rating = int(grade.group(1))
else :
rating = 0
if(i == 3 and len(row) - 1 == j and pid) :
params = {'pid':pid}
try:
response = requests.get(contenturl, params=params, headers=ua)
response.raise_for_status()
html_data = response.content
data = unicode(html_data, 'euc-kr', 'ignore').encode('utf-8', 'ignore')
strainer = SoupStrainer('table', {'border':'0', 'cellpadding':'3'})
soup = BeautifulSoup(data, 'lxml', parse_only=strainer, from_encoding='utf-8')
td = soup.select('tr:nth-of-type(3) > td:nth-of-type(3)')
endTime = td[0].text.split('~')[1].replace('',':').replace('','').replace(': ', ':').strip()
if(endTime.startswith('0')): endTime = endTime.replace('0:','12:')
endTime = str(thisday) + ' ' + 'AM ' + endTime
if(endTime.endswith(':')) : endTime = endTime + '00'
endTime = datetime.datetime.strptime(endTime, '%Y-%m-%d %p %I:%M')
endTime = endTime.strftime('%Y%m%d%H%M%S')
except requests.exceptions.HTTPError:
printError(ChannelName + HTTP_ERROR)
programdata = {'channelId':ChannelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating}
writeProgram(programdata)
except requests.exceptions.HTTPError:
printError(ChannelName + HTTP_ERROR)
# Get EPG data from KT # Get EPG data from KT
def GetEPGFromKT(ChannelInfo): def GetEPGFromKT(ChannelInfo):
channelId = ChannelInfo[0] ChannelId = ChannelInfo[0]
ChannelName = ChannelInfo[1]
ServiceId = ChannelInfo[3] ServiceId = ChannelInfo[3]
epginfo = [] epginfo = []
url = 'http://tv.olleh.com/renewal_sub/liveTv/pop_schedule_week.asp'
for k in range(period): for k in range(period):
day = today + datetime.timedelta(days=k) day = today + datetime.timedelta(days=k)
url = 'http://tv.olleh.com/renewal_sub/liveTv/pop_schedule_week.asp?ch_name=&ch_no=%s&nowdate=%s&seldate=%s&tab_no=1' % (ServiceId, day, day) params = {'ch_name':'', 'ch_no':ServiceId, 'nowdate':day.strftime('%Y%m%d'), 'seldatie':day.strftime('%Y%m%d'), 'tab_no':'1'}
request = urllib2.Request(url,headers=ua)
u = urllib2.urlopen(request).read()
data = unicode(u, 'euc-kr', 'ignore').encode('utf-8', 'ignore')
strainer = SoupStrainer('table', {'id':'pop_day'})
soup = BeautifulSoup(data, 'lxml', parse_only=strainer, from_encoding='utf-8')
html = soup.find('table', {'id':'pop_day'}).tbody.findAll('tr') if soup.find('table', {'id':'pop_day'}) else ''
for row in html:
for cell in [row.findAll('td')]:
epginfo.append([cell[1].text, str(day) + ' ' + cell[0].text, cell[4].text, cell[2].text])
for epg1, epg2 in zip(epginfo, epginfo[1:]):
programName = ''
subprogrmaName = ''
matches = re.match('^(.*?)( <(.*)>)?$', epg1[0].decode('string_escape'))
if not (matches is None):
programName = matches.group(1) if matches.group(1) else ''
subprogramName = matches.group(3) if matches.group(3) else ''
startTime = datetime.datetime.strptime(epg1[1], '%Y-%m-%d %H:%M')
startTime = startTime.strftime('%Y%m%d%H%M%S')
endTime = datetime.datetime.strptime(epg2[1], '%Y-%m-%d %H:%M')
endTime = endTime.strftime('%Y%m%d%H%M%S')
category = epg1[2]
desc = ''
actors = ''
producers = ''
episode = ''
rebroadcast = False
rating = 0
matches = re.match('(\d+)', epg1[3])
if not(matches is None): rating = int(matches.group())
programdata = {'channelId':channelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating}
writeProgram(programdata)
try:
response = requests.get(url, params=params, headers=ua)
response.raise_for_status()
html_data = response.content
data = unicode(html_data, 'euc-kr', 'ignore').encode('utf-8', 'ignore')
strainer = SoupStrainer('table', {'id':'pop_day'})
soup = BeautifulSoup(data, 'lxml', parse_only=strainer, from_encoding='utf-8')
html = soup.find('table', {'id':'pop_day'}).tbody.find_all('tr') if soup.find('table', {'id':'pop_day'}) else ''
if(html):
for row in html:
for cell in [row.find_all('td')]:
epginfo.append([cell[1].text, str(day) + ' ' + cell[0].text, cell[4].text, cell[2].text])
for epg1, epg2 in zip(epginfo, epginfo[1:]):
programName = ''
subprogrmaName = ''
matches = re.match('^(.*?)( <(.*)>)?$', epg1[0].decode('string_escape'))
if not (matches is None):
programName = matches.group(1) if matches.group(1) else ''
subprogramName = matches.group(3) if matches.group(3) else ''
startTime = datetime.datetime.strptime(epg1[1], '%Y-%m-%d %H:%M')
startTime = startTime.strftime('%Y%m%d%H%M%S')
endTime = datetime.datetime.strptime(epg2[1], '%Y-%m-%d %H:%M')
endTime = endTime.strftime('%Y%m%d%H%M%S')
category = epg1[2]
desc = ''
actors = ''
producers = ''
episode = ''
rebroadcast = False
rating = 0
matches = re.match('(\d+)', epg1[3])
if not(matches is None): rating = int(matches.group())
programdata = {'channelId':ChannelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating}
writeProgram(programdata)
else: printError(ChannelName + CONTENT_ERROR)
except requests.exceptions.HTTPError:
printError(ChannelName + HTTP_ERROR)
# Get EPG data from LG # Get EPG data from LG
def GetEPGFromLG(ChannelInfo): def GetEPGFromLG(ChannelInfo):
channelId = ChannelInfo[0] ChannelId = ChannelInfo[0]
ChannelName = ChannelInfo[1]
ServiceId = ChannelInfo[3] ServiceId = ChannelInfo[3]
epginfo = [] epginfo = []
url = 'http://www.uplus.co.kr/css/chgi/chgi/RetrieveTvSchedule.hpi'
for k in range(period): for k in range(period):
day = today + datetime.timedelta(days=k) day = today + datetime.timedelta(days=k)
url = 'http://www.uplus.co.kr/css/chgi/chgi/RetrieveTvSchedule.hpi?chnlCd=%s&evntCmpYmd=%s' % (ServiceId, day.strftime('%Y%m%d')) params = {'chnlCd': ServiceId, 'evntCmpYmd': day.strftime('%Y%m%d')}
request = urllib2.Request(url,headers=ua)
u = urllib2.urlopen(request).read() try:
data = unicode(u, 'euc-kr', 'ignore').encode('utf-8', 'ignore') response = requests.get(url, params=params, headers=ua)
strainer = SoupStrainer('table') response.raise_for_status()
soup = BeautifulSoup(data, 'lxml', parse_only=strainer, from_encoding='utf-8') html_data = response.content
html = soup.find('table', {'class':'datatable06'}).tbody.findAll('tr') if soup.find('table', {'class':'datatable06'}) else '' data = unicode(html_data, 'euc-kr', 'ignore').encode('utf-8', 'ignore')
for row in html: strainer = SoupStrainer('table')
for cell in [row.findAll('td')]: soup = BeautifulSoup(data, 'lxml', parse_only=strainer, from_encoding='utf-8')
epginfo.append([cell[1].text.strip(), str(day) + ' ' + cell[0].text, cell[2].text.strip(), cell[1].find('img', alt=True)['alt'].strip()]) html = soup.find('table', {'class':'datatable06'}).tbody.find_all('tr') if soup.find('table', {'class':'datatable06'}) else ''
for epg1, epg2 in zip(epginfo, epginfo[1:]): if(html):
programName = '' for row in html:
subprogramName = '' for cell in [row.find_all('td')]:
episode = '' epginfo.append([cell[1].text.strip(), str(day) + ' ' + cell[0].text, cell[2].text.strip(), cell[1].find('img', alt=True)['alt'].strip()])
matches = re.match('^(.*?)(\(([\d,]+)회\))?$', epg1[0].decode('string_escape')) for epg1, epg2 in zip(epginfo, epginfo[1:]):
if not (matches is None): programName = ''
programName = matches.group(1) if matches.group(1) else '' subprogramName = ''
episode = int(matches.group(3)) if matches.group(3) else '' episode = ''
startTime = datetime.datetime.strptime(epg1[1], "%Y-%m-%d %H:%M") matches = re.match('^(.*?)(\(([\d,]+)회\))?$', epg1[0].decode('string_escape'))
startTime = startTime.strftime("%Y%m%d%H%M%S") if not (matches is None):
endTime = datetime.datetime.strptime(epg2[1], "%Y-%m-%d %H:%M") programName = matches.group(1) if matches.group(1) else ''
endTime = endTime.strftime("%Y%m%d%H%M%S") episode = int(matches.group(3)) if matches.group(3) else ''
category = epg1[2] startTime = datetime.datetime.strptime(epg1[1], '%Y-%m-%d %H:%M')
desc = '' startTime = startTime.strftime('%Y%m%d%H%M%S')
actors = '' endTime = datetime.datetime.strptime(epg2[1], '%Y-%m-%d %H:%M')
producers = '' endTime = endTime.strftime('%Y%m%d%H%M%S')
category = epg1[2] category = epg1[2]
rebroadcast = False if verbose=='y' :
category = epg1[2] desc = ''
rating = 0 actors = epgdata[6]
matches = re.match('(\d+)세이상 관람가', epg1[3].encode('utf-8')) producers = epgdata[7]
if not(matches is None): rating = int(matches.group(1)) else:
programdata = {'channelId':channelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating} desc = ''
writeProgram(programdata) actors = ''
producers = ''
rebroadcast = False
rating = 0
matches = re.match('(\d+)세이상 관람가', epg1[3].encode('utf-8'))
if not(matches is None): rating = int(matches.group(1))
programdata = {'channelId':ChannelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating}
writeProgram(programdata)
else: printError(ChannelName + CONTENT_ERROR)
except requests.exceptions.HTTPError:
printError(ChannelName + HTTP_ERROR)
# Get EPG data from SK # Get EPG data from SK
def GetEPGFromSK(ChannelInfo): def GetEPGFromSK(ChannelInfo):
channelId = ChannelInfo[0] ChannelId = ChannelInfo[0]
ChannelName = ChannelInfo[1]
ServiceId = ChannelInfo[3] ServiceId = ChannelInfo[3]
lastday = today + datetime.timedelta(days=period-1) lastday = today + datetime.timedelta(days=period-1)
url = 'http://m.btvplus.co.kr/Common/Inc/IFGetData.asp?variable=IF_LIVECHART_DETAIL&pcode=|^|start_time=%s00|^|end_time=%s24|^|svc_id=%s' % (today.strftime("%Y%m%d"), lastday.strftime("%Y%m%d"), ServiceId) url = 'http://m.btvplus.co.kr/Common/Inc/IFGetData.asp'
request = urllib2.Request(url,headers=ua) params = {'variable': 'IF_LIVECHART_DETAIL', 'pcode':'|^|start_time=' + today.strftime('%Y%m%d') + '00|^|end_time='+ lastday.strftime('%Y%m%d') + '24|^|svc_id=' + str(ServiceId)}
u = urllib2.urlopen(request).read() try:
data = json.loads(u, encoding='utf-8') response = requests.get(url, params=params, headers=ua)
programs = data['channel']['programs'] response.raise_for_status()
for program in programs: json_data = response.text
programName = '' try:
subprogramName = '' data = json.loads(json_data, encoding='utf-8')
episode = '' if (data['channel'] is None) :
rebroadcast = False printError(ChannelName + CHANNEL_ERROR)
matches = re.match('^(.*?)(?:\s*[\(<]([\d,회]+)[\)>])?(?:\s*<([^<]*?)>)?(\((재)\))?$', program['programName'].replace('...', '>').encode('utf-8')) else :
if not (matches is None): programs = data['channel']['programs']
programName = matches.group(1).strip() if matches.group(1) else '' for program in programs:
subprogramName = matches.group(3).strip() if matches.group(3) else '' programName = ''
episode = matches.group(2).replace('', '') if matches.group(2) else '' subprogramName = ''
rebroadcast = True if matches.group(5) else False episode = ''
if rebroadcast == True: programName = programName + ' (재방송)' rebroadcast = False
actors = program['actorName'].replace('...','').strip(', ') if program['actorName'] else '' matches = re.match('^(.*?)(?:\s*[\(<]([\d,회]+)[\)>])?(?:\s*<([^<]*?)>)?(\((재)\))?$', program['programName'].replace('...', '>').encode('utf-8'))
producers = program['directorName'].replace('...','').strip(', ') if program['directorName'] else '' if not (matches is None):
startTime = datetime.datetime.fromtimestamp(int(program['startTime'])/1000) programName = matches.group(1).strip() if matches.group(1) else ''
startTime = startTime.strftime('%Y%m%d%H%M%S') subprogramName = matches.group(3).strip() if matches.group(3) else ''
endTime = datetime.datetime.fromtimestamp(int(program['endTime'])/1000) episode = matches.group(2).replace('', '') if matches.group(2) else ''
endTime = endTime.strftime('%Y%m%d%H%M%S') rebroadcast = True if matches.group(5) else False
desc = program['synopsis'] if program['synopsis'] else '' startTime = datetime.datetime.fromtimestamp(int(program['startTime'])/1000)
category = program['mainGenreName'] startTime = startTime.strftime('%Y%m%d%H%M%S')
rating = int(program['ratingCd']) if program['programName'] else 0 endTime = datetime.datetime.fromtimestamp(int(program['endTime'])/1000)
desc = '' endTime = endTime.strftime('%Y%m%d%H%M%S')
if program['synopsis'] : desc = program['synopsis'] if verbose=='y' :
programdata = {'channelId':channelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating} desc = program['synopsis'] if program['synopsis'] else ''
writeProgram(programdata) actors = program['actorName'].replace('...','').strip(', ') if program['actorName'] else ''
producers = program['directorName'].replace('...','').strip(', ') if program['directorName'] else ''
else:
desc = ''
actors = ''
producers = ''
category = program['mainGenreName']
rating = int(program['ratingCd']) if program['programName'] else 0
desc = ''
if program['synopsis'] : desc = program['synopsis']
programdata = {'channelId':ChannelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating}
writeProgram(programdata)
except ValueError:
printError(ChannelName + CONTENT_ERROR)
except requests.exceptions.HTTPError:
printError(ChannelName + HTTP_ERROR)
# Get EPG data from SKY # Get EPG data from SKY
def GetEPGFromSKY(ChannelInfo): def GetEPGFromSKY(ChannelInfo):
channelId = ChannelInfo[0] ChannelId = ChannelInfo[0]
ChannelName = ChannelInfo[1]
ServiceId = ChannelInfo[3] ServiceId = ChannelInfo[3]
url = 'http://www.skylife.co.kr/channel/epg/channelScheduleList.do'
for k in range(period): for k in range(period):
day = today + datetime.timedelta(days=k) day = today + datetime.timedelta(days=k)
url = 'http://www.skylife.co.kr/channel/epg/channelScheduleList.do?area=in&inFd_channel_id=%s&inairdate=%s&indate_type=now' % (ServiceId, day) params = {'area': 'in', 'inFd_channel_id': ServiceId, 'inairdate': day.strftime('%Y-%m-%d'), 'indate_type': 'now'}
request = urllib2.Request(url,headers=ua) try:
u = urllib2.urlopen(request).read() response = requests.get(url, params=params, headers=ua)
data = json.loads(u, encoding='utf-8') response.raise_for_status()
programs = data['scheduleListIn'] json_data = response.text
for program in {v['starttime']:v for v in programs}.values(): try:
programName = unescape(program['program_name']).replace('lt;','<').replace('gt;','>').replace('amp;','&') if program['program_name'] else '' data = json.loads(json_data, encoding='utf-8')
subprogramName = unescape(program['program_subname']).replace('lt;','<').replace('gt;','>').replace('amp;','&') if program['program_subname'] else '' if (len(data['scheduleListIn']) == 0) :
actors = program['cast'].replace('...','').strip(', ') if program['cast'] else '' printError(ChannelName + CONTENT_ERROR)
producers = program['dirt'].replace('...','').strip(', ') if program['dirt'] else '' else :
startTime = program['starttime'] programs = data['scheduleListIn']
endTime = program['endtime'] for program in {v['starttime']:v for v in programs}.values():
description = unescape(program['description']).replace('lt;','<').replace('gt;','>').replace('amp;','&') if program['description'] else '' programName = unescape(program['program_name']).replace('lt;','<').replace('gt;','>').replace('amp;','&') if program['program_name'] else ''
if description: description = unescape(description).replace('lt;','<').replace('gt;','>').replace('amp;','&') subprogramName = unescape(program['program_subname']).replace('lt;','<').replace('gt;','>').replace('amp;','&') if program['program_subname'] else ''
summary = unescape(program['summary']).replace('lt;','<').replace('gt;','>').replace('amp;','&') if program['summary'] else '' startTime = program['starttime']
desc = description if description else '' endTime = program['endtime']
if summary : desc = desc + '\n' + summary if verbose == 'y':
category = program['program_category1'] actors = program['cast'].replace('...','').strip(', ') if program['cast'] else ''
episode = program['episode_id'] if program['episode_id'] else '' producers = program['dirt'].replace('...','').strip(', ') if program['dirt'] else ''
if episode : episode = int(episode) description = unescape(program['description']).replace('lt;','<').replace('gt;','>').replace('amp;','&') if program['description'] else ''
rebroadcast = True if program['rebroad']== 'Y' else False if description: description = unescape(description).replace('lt;','<').replace('gt;','>').replace('amp;','&')
if rebroadcast == True: programName = programName + ' (재방송)' summary = unescape(program['summary']).replace('lt;','<').replace('gt;','>').replace('amp;','&') if program['summary'] else ''
rating = int(program['grade']) if program['grade'] else 0 desc = description if description else ''
programdata = {'channelId':channelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating} if summary : desc = desc + '\n' + summary
writeProgram(programdata) else:
desc = ''
actors = ''
producers = ''
category = program['program_category1']
episode = program['episode_id'] if program['episode_id'] else ''
if episode : episode = int(episode)
rebroadcast = True if program['rebroad']== 'Y' else False
rating = int(program['grade']) if program['grade'] else 0
programdata = {'channelId':ChannelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating}
writeProgram(programdata)
except ValueError:
printError(ChannelName + CONTENT_ERROR)
except requests.exceptions.HTTPError:
printError(ChannelName + HTTP_ERROR)
# Get EPG data from Naver
def GetEPGFromNaver(ChannelInfo):
ChannelId = ChannelInfo[0]
ChannelName = ChannelInfo[1]
ServiceId = ChannelInfo[3]
epginfo = []
totaldate = []
url = 'https://search.naver.com/p/csearch/content/batchrender_ssl.nhn'
for k in range(period):
day = today + datetime.timedelta(days=k)
totaldate.append(day.strftime('%Y%m%d'))
params = {'_callback': 'epg', 'fileKey': 'single_schedule_channel_day', 'pkid': '66', 'u1': 'single_schedule_channel_day', 'u2': ','.join(totaldate), 'u3': today.strftime('%Y%m%d'), 'u4': period, 'u5': ServiceId, 'u6': '1', 'u7': ChannelName + '편성표', 'u8': ChannelName + '편성표', 'where': 'nexearch'}
try:
response = requests.get(url, params=params, headers=ua)
response.raise_for_status()
json_data = re.sub(re.compile("/\*.*?\*/",re.DOTALL ) ,"" ,response.text.split("epg(")[1].strip(");").strip())
try:
data = json.loads(json_data, encoding='utf-8')
for i, date in enumerate(data['displayDates']):
for j in range(0,24):
for program in data['schedules'][j][i]:
epginfo.append([program['title'], date['date'] + ' ' + program['startTime'], program['episode'].replace('',''), program['isRerun'], program['grade']])
for epg1, epg2 in zip(epginfo, epginfo[1:]):
programName = unescape(epg1[0]) if epg1[0] else ''
subprogramName = ''
startTime = datetime.datetime.strptime(epg1[1], '%Y%m%d %H:%M')
startTime = startTime.strftime('%Y%m%d%H%M%S')
endTime = datetime.datetime.strptime(epg2[1], '%Y%m%d %H:%M')
endTime = endTime.strftime('%Y%m%d%H%M%S')
desc = ''
actors = ''
producers = ''
category = ''
episode = epg1[2] if epg1[2] else ''
if episode : episode = int(episode)
rebroadcast = epg1[3]
rating = epg1[4]
programdata = {'channelId':ChannelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating}
writeProgram(programdata)
except ValueError:
printError(ChannelName + CONTENT_ERROR)
except requests.exceptions.HTTPError:
printError(ChannelName + HTTP_ERROR)
# Write Program # Write Program
def writeProgram(programdata): def writeProgram(programdata):
channelId = programdata['channelId'] ChannelId = programdata['channelId']
startTime = programdata['startTime'] startTime = programdata['startTime']
endTime = programdata['endTime'] endTime = programdata['endTime']
programName = escape(programdata['programName']) programName = escape(programdata['programName'])
@ -288,54 +434,65 @@ def writeProgram(programdata):
category = escape(programdata['category']) category = escape(programdata['category'])
episode = programdata['episode'] episode = programdata['episode']
rebroadcast = programdata['rebroadcast'] rebroadcast = programdata['rebroadcast']
if rebroadcast == True: programName = programName + ' (재방송)'
if programdata['rating'] == 0 : if programdata['rating'] == 0 :
rating = '전체 관람가' rating = '전체 관람가'
else : else :
rating = '%s세 이상 관람가' % (programdata['rating']) rating = '%s세 이상 관람가' % (programdata['rating'])
desc = programName if verbose == 'y':
if subprogramName : desc = desc + '\n부제 : ' + subprogramName desc = programName
if episode : desc = desc + '\n회차 : ' + str(episode) + '' if subprogramName : desc = desc + '\n부제 : ' + subprogramName
desc = desc + '\n장르 : ' + category if episode : desc = desc + '\n회차 : ' + str(episode) + ''
if actors : desc = desc + '\n출연 : ' + actors if category : desc = desc + '\n장르 : ' + category
if producers : desc = desc + '\n제작 : ' + producers if actors : desc = desc + '\n출연 : ' + actors
desc = desc + '\n등급 : ' + rating if producers : desc = desc + '\n제작 : ' + producers
desc = desc + '\n등급 : ' + rating
else:
desc =''
if programdata['desc'] : desc = desc + '\n' + escape(programdata['desc']) if programdata['desc'] : desc = desc + '\n' + escape(programdata['desc'])
rebroadcast = programdata['rebroadcast']
contentTypeDict={'교양':'Arts / Culture (without music)', '만화':'Cartoons / Puppets', '교육':'Education / Science / Factual topics', '취미':'Leisure hobbies', '드라마':'Movie / Drama', '영화':'Movie / Drama', '음악':'Music / Ballet / Dance', '뉴스':'News / Current affairs', '다큐':'Documentary', '시사/다큐':'Documentary', '연예':'Show / Game show', '스포츠':'Sports', '홈쇼핑':'Advertisement / Shopping'} contentTypeDict={'교양':'Arts / Culture (without music)', '만화':'Cartoons / Puppets', '교육':'Education / Science / Factual topics', '취미':'Leisure hobbies', '드라마':'Movie / Drama', '영화':'Movie / Drama', '음악':'Music / Ballet / Dance', '뉴스':'News / Current affairs', '다큐':'Documentary', '시사/다큐':'Documentary', '연예':'Show / Game show', '스포츠':'Sports', '홈쇼핑':'Advertisement / Shopping'}
contentType = '' contentType = ''
for key, value in contentTypeDict.iteritems(): for key, value in contentTypeDict.iteritems():
if category.startswith(key): if category.startswith(key):
contentType = value contentType = value
print ' <programme start="%s +0900" stop="%s +0900" channel="%s">' % (startTime, endTime,channelId) if(endTime) :
print ' <title lang="kr">%s</title>' % (programName) print(' <programme start="%s +0900" stop="%s +0900" channel="%s">' % (startTime, endTime, ChannelId))
else :
print(' <programme start="%s +0900" channel="%s">' % (startTime, ChannelId))
print(' <title lang="kr">%s</title>' % (programName))
if subprogramName : if subprogramName :
print ' <sub-title lang="kr">%s</sub-title>' % (subprogramName) print(' <sub-title lang="kr">%s</sub-title>' % (subprogramName))
print ' <desc lang="kr">%s</desc>' % (desc) if verbose=='y' :
if actors or producers: print(' <desc lang="kr">%s</desc>' % (desc))
print ' <credits>' if actors or producers:
if actors: print(' <credits>')
for actor in actors.split(','): if actors:
if actor: print ' <actor>%s</actor>' % (actor) for actor in actors.split(','):
if producers: if actor: print(' <actor>%s</actor>' % (actor))
for producer in producers.split(','): if producers:
if producer: print ' <producer>%s</producer>' % (producer) for producer in producers.split(','):
print ' </credits>' if producer: print(' <producer>%s</producer>' % (producer))
if category: print ' <category lang="kr">%s</category>' % (category) print(' </credits>')
if contentType: print ' <category lang="en">%s</category>' % (contentType)
if episode: print ' <episode-num system="onscreen">%s</episode-num>' % (episode) if category: print(' <category lang="kr">%s</category>' % (category))
if rebroadcast: print ' <previously-shown />' if contentType: print(' <category lang="en">%s</category>' % (contentType))
if episode: print(' <episode-num system="onscreen">%s</episode-num>' % (episode))
if rebroadcast: print(' <previously-shown />')
if rating: if rating:
print ' <rating system="KMRB">' print(' <rating system="KMRB">')
print ' <value>%s</value>' % (rating) print(' <value>%s</value>' % (rating))
print ' </rating>' print(' </rating>')
print ' </programme>' print(' </programme>')
def writeXML(data): def printLog(*args):
print data print(*args, file=sys.stderr)
parser = argparse.ArgumentParser(description='EPG 정보를 출력하는 방법을 선택한다') def printError(*args):
argu1 = parser.add_argument_group(description='IPTV 선택') print("Error:", *args, file=sys.stderr)
parser = argparse.ArgumentParser(description = 'EPG 정보를 출력하는 방법을 선택한다')
argu1 = parser.add_argument_group(description = 'IPTV 선택')
argu1.add_argument('-i', dest = 'iptv', choices = ['KT', 'LG', 'SK'], help = '사용하는 IPTV : KT, LG, SK', required = True) argu1.add_argument('-i', dest = 'iptv', choices = ['KT', 'LG', 'SK'], help = '사용하는 IPTV : KT, LG, SK', required = True)
argu2 = parser.add_mutually_exclusive_group(required = True) argu2 = parser.add_mutually_exclusive_group(required = True)
argu2.add_argument('-v', '--version', action = 'version', version = '%(prog)s version : ' + __version__) argu2.add_argument('-v', '--version', action = 'version', version = '%(prog)s version : ' + __version__)
@ -343,8 +500,9 @@ argu2.add_argument('-d', '--display', action = 'store_true', help = 'EPG 정보
argu2.add_argument('-o', '--outfile', metavar = default_xml_filename, nargs = '?', const = default_xml_filename, help = 'EPG 정보 저장') argu2.add_argument('-o', '--outfile', metavar = default_xml_filename, nargs = '?', const = default_xml_filename, help = 'EPG 정보 저장')
argu2.add_argument('-s', '--socket', metavar = default_xml_socket, nargs = '?', const = default_xml_socket, help = 'xmltv.sock(External: XMLTV)로 EPG정보 전송') argu2.add_argument('-s', '--socket', metavar = default_xml_socket, nargs = '?', const = default_xml_socket, help = 'xmltv.sock(External: XMLTV)로 EPG정보 전송')
argu3 = parser.add_argument_group('추가옵션') argu3 = parser.add_argument_group('추가옵션')
argu3.add_argument('-l', '--limit', dest='limit', type = int, metavar = "1-7", choices = range(1,8), help = 'EPG 정보를 가져올 기간, 기본값: '+ str(default_fetch_limit), default = default_fetch_limit) argu3.add_argument('-l', '--limit', dest = 'limit', type = int, metavar = "1-7", choices = range(1,8), help = 'EPG 정보를 가져올 기간, 기본값: '+ str(default_fetch_limit), default = default_fetch_limit)
argu3.add_argument('--icon', dest='icon', metavar = "http://www.example.com/icon", help = '채널 아이콘 URL, 기본값: '+ default_icon_url, default = default_icon_url) argu3.add_argument('--icon', dest = 'icon', metavar = "http://www.example.com/icon", help = '채널 아이콘 URL, 기본값: '+ default_icon_url, default = default_icon_url)
argu3.add_argument('--verbose', dest = 'verbose', metavar = 'y, n', choices = 'yn', help = 'EPG 정보 추가 출력', default = default_verbose)
args = parser.parse_args() args = parser.parse_args()
@ -361,8 +519,14 @@ else:
if args.icon: if args.icon:
IconUrl = args.icon IconUrl = args.icon
else : else:
IconUrl = default_icon_url IconUrl = default_icon_url
if args.verbose:
verbose = args.verbose
else:
verbose = default_verbse
if args.outfile: if args.outfile:
sys.stdout = codecs.open(args.outfile, 'w+', encoding='utf-8') sys.stdout = codecs.open(args.outfile, 'w+', encoding='utf-8')
elif args.socket: elif args.socket:
@ -370,8 +534,5 @@ elif args.socket:
sock.connect(args.socket) sock.connect(args.socket)
sockfile = sock.makefile('w+') sockfile = sock.makefile('w+')
sys.stdout = sockfile sys.stdout = sockfile
writeXML('<?xml version="1.0" encoding="UTF-8"?>')
writeXML('<!DOCTYPE tv SYSTEM "xmltv.dtd">\n')
writeXML('<tv generator-info-name="epg2xml.py">')
getEpg() getEpg()
writeXML('</tv>')