diff --git a/epg2xml.py b/epg2xml.py
index 3e0a7e3..7557e00 100644
--- a/epg2xml.py
+++ b/epg2xml.py
@@ -16,7 +16,7 @@ import argparse
reload(sys)
sys.setdefaultencoding('utf-8')
-__version__ = '1.0.7'
+__version__ = '1.0.8'
# Set My Configuration
default_icon_url = '' # TV channel icon url (ex : http://www.example.com/Channels)
@@ -97,6 +97,7 @@ def GetEPGFromEPG(ChannelInfo):
ChannelId = ChannelInfo[0]
ChannelName = ChannelInfo[1]
ServiceId = ChannelInfo[3]
+ epginfo = []
url = 'http://www.epg.co.kr/epg-cgi/extern/cnm_guide_type_v070530.cgi'
contenturl = 'http://www.epg.co.kr/epg-cgi/guide_schedule_content.cgi'
for k in range(period):
@@ -113,8 +114,7 @@ def GetEPGFromEPG(ChannelInfo):
for i in range(1,4):
thisday = day
- pid = ''
- row = table[i].find_all('td', {'colspan':'2'})
+ row = table[i].find_all('td', {'colspan':'2'})
for j, cell in enumerate(row):
hour = int(cell.text.strip().strip('시'))
if(i == 1) : hour = 'AM ' + str(hour)
@@ -124,50 +124,37 @@ def GetEPGFromEPG(ChannelInfo):
hour = 'AM ' + str(hour)
thisday = day + datetime.timedelta(days=1)
for celldata in cell.parent.find_all('tr'):
- matches = re.match("
.*\[(.*)\]<\/td>\s.*ViewContent\('(.*)'\)\">(.*?)\s*(<(.*)>)?\s*(\(재\))?\s*(\(([\d,]+)회\))?( <\/td><\/tr>", str(celldata))
+ pattern = ".*\[(.*)\]<\/td>\s.*\">(.*?)\s*(<(.*)>)?\s*(\(재\))?\s*(\(([\d,]+)회\))?()?\s*<\/td><\/tr>"
+ matches = re.match(pattern, str(celldata))
if not (matches is None):
minute = matches.group(1) if matches.group(1) else ''
startTime = str(thisday) + ' ' + hour + ':' + minute
startTime = datetime.datetime.strptime(startTime, '%Y-%m-%d %p %I:%M')
startTime = startTime.strftime('%Y%m%d%H%M%S')
- endTime = ''
- pid = matches.group(2) if matches.group(2) else ''
- programName = matches.group(3) if matches.group(3) else ''
- subprogramName = matches.group(5) if matches.group(5) else ''
- desc = ''
- actors = ''
- producers = ''
- category = ''
- rebroadcast = True if matches.group(6) else False
- episode = matches.group(8) if matches.group(8) else ''
- image = matches.group(9) if matches.group(9) else ''
+ image = matches.group(8) if matches.group(8) else ''
grade = re.match('.*schedule_([\d,]+)?.*',image)
- if not (grade is None):
- rating = int(grade.group(1))
- else :
- rating = 0
- if(i == 3 and len(row) - 1 == j and pid) :
- params = {'pid':pid}
- try:
- response = requests.get(contenturl, params=params, headers=ua)
- response.raise_for_status()
- html_data = response.content
- data = unicode(html_data, 'euc-kr', 'ignore').encode('utf-8', 'ignore')
- strainer = SoupStrainer('table', {'border':'0', 'cellpadding':'3'})
- soup = BeautifulSoup(data, 'lxml', parse_only=strainer, from_encoding='utf-8')
- td = soup.select('tr:nth-of-type(3) > td:nth-of-type(3)')
- endTime = td[0].text.split('~')[1].replace('시',':').replace('분','').replace(': ', ':').strip()
- if(endTime.startswith('0')): endTime = endTime.replace('0:','12:')
- endTime = str(thisday) + ' ' + 'AM ' + endTime
- if(endTime.endswith(':')) : endTime = endTime + '00'
- endTime = datetime.datetime.strptime(endTime, '%Y-%m-%d %p %I:%M')
- endTime = endTime.strftime('%Y%m%d%H%M%S')
- except requests.exceptions.HTTPError:
- printError(ChannelName + HTTP_ERROR)
- programdata = {'channelId':ChannelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating}
- writeProgram(programdata)
+ if not (grade is None): rating = int(grade.group(1))
+ else : rating = 0
+ #programName, startTime, rating, subprogramName, rebroadcast, episode
+ epginfo.append([matches.group(2), startTime, rating, matches.group(4), matches.group(5), matches.group(7)])
+
+ for epg1, epg2 in zip(epginfo, epginfo[1:]):
+ programName = epg1[0] if epg1[0] else ''
+ subprogramName = epg1[3] if epg1[3] else ''
+ startTime = epg1[1] if epg1[1] else ''
+ endTime = epg2[1] if epg2[1] else ''
+ desc = ''
+ actors = ''
+ producers = ''
+ category = ''
+ rebroadcast = True if epg1[4] else False
+ episode = epg1[5] if epg1[5] else ''
+ rating = int(epg1[2]) if epg1[2] else 0
+ programdata = {'channelId':ChannelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating}
+ writeProgram(programdata)
except requests.exceptions.HTTPError:
- printError(ChannelName + HTTP_ERROR)
+ if(debug): printError(ChannelName + HTTP_ERROR)
+ else: pass
# Get EPG data from KT
def GetEPGFromKT(ChannelInfo):
@@ -215,9 +202,12 @@ def GetEPGFromKT(ChannelInfo):
if not(matches is None): rating = int(matches.group())
programdata = {'channelId':ChannelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating}
writeProgram(programdata)
- else: printError(ChannelName + CONTENT_ERROR)
+ else:
+ if(debug): printError(ChannelName + CONTENT_ERROR)
+ else: pass
except requests.exceptions.HTTPError:
- printError(ChannelName + HTTP_ERROR)
+ if(debug): printError(ChannelName + HTTP_ERROR)
+ else: pass
# Get EPG data from LG
def GetEPGFromLG(ChannelInfo):
@@ -249,29 +239,27 @@ def GetEPGFromLG(ChannelInfo):
matches = re.match('^(.*?)(\(([\d,]+)회\))?$', epg1[0].decode('string_escape'))
if not (matches is None):
programName = matches.group(1) if matches.group(1) else ''
- episode = int(matches.group(3)) if matches.group(3) else ''
+ episode = matches.group(3) if matches.group(3) else ''
startTime = datetime.datetime.strptime(epg1[1], '%Y-%m-%d %H:%M')
startTime = startTime.strftime('%Y%m%d%H%M%S')
endTime = datetime.datetime.strptime(epg2[1], '%Y-%m-%d %H:%M')
endTime = endTime.strftime('%Y%m%d%H%M%S')
category = epg1[2]
- if verbose=='y' :
- desc = ''
- actors = epgdata[6]
- producers = epgdata[7]
- else:
- desc = ''
- actors = ''
- producers = ''
+ desc = ''
+ actors = ''
+ producers = ''
rebroadcast = False
rating = 0
matches = re.match('(\d+)세이상 관람가', epg1[3].encode('utf-8'))
if not(matches is None): rating = int(matches.group(1))
programdata = {'channelId':ChannelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating}
writeProgram(programdata)
- else: printError(ChannelName + CONTENT_ERROR)
+ else:
+ if(debug): printError(ChannelName + CONTENT_ERROR)
+ else: pass
except requests.exceptions.HTTPError:
- printError(ChannelName + HTTP_ERROR)
+ if(debug): printError(ChannelName + HTTP_ERROR)
+ else: pass
# Get EPG data from SK
def GetEPGFromSK(ChannelInfo):
@@ -321,9 +309,11 @@ def GetEPGFromSK(ChannelInfo):
programdata = {'channelId':ChannelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating}
writeProgram(programdata)
except ValueError:
- printError(ChannelName + CONTENT_ERROR)
+ if(debug): printError(ChannelName + CONTENT_ERROR)
+ else: pass
except requests.exceptions.HTTPError:
- printError(ChannelName + HTTP_ERROR)
+ if(debug): printError(ChannelName + HTTP_ERROR)
+ else: pass
# Get EPG data from SKY
def GetEPGFromSKY(ChannelInfo):
@@ -341,7 +331,8 @@ def GetEPGFromSKY(ChannelInfo):
try:
data = json.loads(json_data, encoding='utf-8')
if (len(data['scheduleListIn']) == 0) :
- printError(ChannelName + CONTENT_ERROR)
+ if(debug): printError(ChannelName + CONTENT_ERROR)
+ else: pass
else :
programs = data['scheduleListIn']
for program in {v['starttime']:v for v in programs}.values():
@@ -370,9 +361,11 @@ def GetEPGFromSKY(ChannelInfo):
programdata = {'channelId':ChannelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating}
writeProgram(programdata)
except ValueError:
- printError(ChannelName + CONTENT_ERROR)
+ if(debug): printError(ChannelName + CONTENT_ERROR)
+ else: pass
except requests.exceptions.HTTPError:
- printError(ChannelName + HTTP_ERROR)
+ if(debug): printError(ChannelName + HTTP_ERROR)
+ else: pass
# Get EPG data from Naver
def GetEPGFromNaver(ChannelInfo):
@@ -417,9 +410,11 @@ def GetEPGFromNaver(ChannelInfo):
programdata = {'channelId':ChannelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating}
writeProgram(programdata)
except ValueError:
- printError(ChannelName + CONTENT_ERROR)
+ if(debug): printError(ChannelName + CONTENT_ERROR)
+ else: pass
except requests.exceptions.HTTPError:
- printError(ChannelName + HTTP_ERROR)
+ if(debug): printError(ChannelName + HTTP_ERROR)
+ else: pass
# Write Program
@@ -536,3 +531,4 @@ elif args.socket:
sys.stdout = sockfile
getEpg()
+