From 9238214f5887621ad57fcf97596dea53401c8f78 Mon Sep 17 00:00:00 2001 From: wonipapa Date: Tue, 19 Sep 2017 14:40:34 +0900 Subject: [PATCH] Update 1.2.3 --- Channel.json | 2 +- README.md | 13 ++--- epg2xml-web.php | 128 +++++++++++++++++++++++++++++++----------------- epg2xml.json | 12 ++--- epg2xml.py | 101 +++++++++++++++++++++++++++++--------- 5 files changed, 171 insertions(+), 85 deletions(-) diff --git a/Channel.json b/Channel.json index b82ee13..a427ad4 100644 --- a/Channel.json +++ b/Channel.json @@ -31,7 +31,7 @@ { "Id": 40, "Name": "CJ오쇼핑", "KT Name": "CJ오쇼핑", "KTCh": 4, "LG Name": "CJ오쇼핑", "LGCh": 8, "SK Name": "CJ오쇼핑", "SKCh": 6, "Radio Name": "", "RadioCh": null, "Icon_url": "http://i.imgur.com/uBClUx6.png", "Source": "EPG", "ServiceId": "250"}, { "Id": 41, "Name": "CJ오쇼핑 플러스", "KT Name": "CJ오쇼핑플러스", "KTCh": 28, "LG Name": "CJ오쇼핑+", "LGCh": 32, "SK Name": "CJ오쇼핑 플러스", "SKCh": 33, "Radio Name": "", "RadioCh": null, "Icon_url": "http://i.imgur.com/WlK2YDk.png", "Source": "KT", "ServiceId": "28"}, { "Id": 42, "Name": "CLASSICA", "KT Name": "CLASSICA", "KTCh": 90, "LG Name": "클래시카", "LGCh": 146, "SK Name": "Classica HD", "SKCh": 235, "Radio Name": "", "RadioCh": null, "Icon_url": "http://i.imgur.com/FEfMS0W.png", "Source": "EPG", "ServiceId": "365"}, -{ "Id": 43, "Name": "CMC가족오락TV", "KT Name": "CMC가족오락TV", "KTCh": 126, "LG Name": "", "LGCh": null, "SK Name": "CMC 가족오락TV", "SKCh": 93, "Radio Name": "", "RadioCh": null, "Icon_url": "http://i.imgur.com/fNuqGzR.png", "Source": "NAVER", "ServiceId": "814797"}, +{ "Id": 43, "Name": "CMC가족오락TV", "KT Name": "CMC가족오락TV", "KTCh": 126, "LG Name": "", "LGCh": null, "SK Name": "CMC 가족오락TV", "SKCh": 93, "Radio Name": "", "RadioCh": null, "Icon_url": "http://i.imgur.com/fNuqGzR.png", "Source": "EVERYON", "ServiceId": "1"}, { "Id": 44, "Name": "CMTV", "KT Name": "CMTV", "KTCh": 262, "LG Name": "", "LGCh": null, "SK Name": "", "SKCh": null, "Radio Name": "", "RadioCh": null, "Icon_url": "http://i.imgur.com/ITweLdv.png", "Source": "KT", "ServiceId": "44"}, { "Id": 45, "Name": "CNBC", "KT Name": "CNBC", "KTCh": 197, "LG Name": "CNBC", "LGCh": 118, "SK Name": "", "SKCh": null, "Radio Name": "", "RadioCh": null, "Icon_url": "http://i.imgur.com/Mx8ZXqk.png", "Source": "NAVER", "ServiceId": "815128"}, { "Id": 46, "Name": "CNN International", "KT Name": "CNN International", "KTCh": 191, "LG Name": "CNN International", "LGCh": 117, "SK Name": "CNN International", "SKCh": 158, "Radio Name": "", "RadioCh": null, "Icon_url": "http://i.imgur.com/RxsYny9.png", "Source": "EPG", "ServiceId": "117"}, diff --git a/README.md b/README.md index 339eb71..a25fd41 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ default_icon_url : 채널별 아이콘이 있는 url을 설정할 수 있다. default_rebroadcast : 제목에 재방송 정보 출력 default_episode : 제목에 회차정보 출력 default_verbose : EPG 정보 상세 출력 -default_fetch_limit : EPG 데이터 가져오는 기간이다. +default_fetch_limit : EPG 데이터 가져오는 기간. default_xml_filename : EPG 저장시 기본 저장 이름으로 tvheadend 서버가 쓰기가 가능한 경로로 설정해야 한다. default_xml_socket : External XMLTV 사용시 xmltv.sock가 있는 경로로 설정해준다. @@ -42,10 +42,6 @@ default_xml_socket : External XMLTV 사용시 xmltv.sock가 있는 경로로 ### Channel.json Channel.json 파일의 최신버전은 https://github.com/wonipapa/Channel.json 에서 다운받을 수 있다. Channel.json 파일을 텍스트 편집기로 열어보면 각채널별 정보가 들어 있다. -이중 Enabled:1로 되어 있는 부분을 Enabled:0으로 바꾸면 EPG정보를 가져오지 않는다. -필요없는 채널정보를 가져오지 않게 하는 것으로 EPG 정보 수집시 시간을 단축할 수 있다. -삭제된 채널등으로 인해서 오류 발생시에도 Enabled:0으로 변경하면 오류 발생을 차단할 수 있다. -1.2.3 버전부터 Enabled 항목은 없어질 예정이다. ## 옵션 소개 ### epg2xml.py, epg2xml.php 옵션 @@ -90,8 +86,8 @@ python 경로와 php의 경로는 /usr/bin에 있고, epg2xml 파일은 /home/ht #### PHP WEB의 경우
-wget -O - http://domain/epg2xml.php 또는
-wget -O - http://domain/epg2xml.php?i=ALL&l=2
+wget -O - http://domain/epg2xml-web.php 또는
+wget -O - http://domain/epg2xml-web.php?i=ALL&l=2
 
### XMLTV SOCKET 사용시 @@ -123,7 +119,8 @@ https://github.com/wonipapa/epg2xml/wiki/FAQ ## 변경사항 ### Version 1.2.3 - - PHP 통합 + - PHP 버전통합 + - PYTHON 버전 html Parser 변수 추가 - 소스 추가 - Channel.json Enabled 항목 제거 - 에피소드 넘버 xmltv_ns 옵션 항목 추가 diff --git a/epg2xml-web.php b/epg2xml-web.php index 8e82184..b587d6f 100644 --- a/epg2xml-web.php +++ b/epg2xml-web.php @@ -1,8 +1,8 @@ ', '&'),$program['program_subname']) ?: ""; + preg_match('/(.*) \(?(\d+부)\)?/', $programName, $matches); + if ($matches != NULL) : + if(isset($matches[1])) $programName = trim($matches[1]) ?: ""; + if(isset($matches[2])) $subprogramName = trim($matches[2]." ".$subprogramName) ?: ""; + endif; $startTime = $program['starttime']; $endTime = $program['endtime']; $actors = trim(str_replace('...', '',$program['cast']), ', ') ?: ""; @@ -993,7 +995,6 @@ function GetEPGFromIscs($ChannelInfo) { $rating = 0; if(startsWith($program['Time'], '1') || startsWith($program['Time'], '2')) $istomorrow = True; if(startsWith($program['Time'], '0') && $istomorrow == True) : -// $thisday = date("Ymd", strtotime($day." +1 days")); $startTime = date("YmdHis", strtotime($day." +1 days"." ".$program['Time'])); else : $startTime = date("YmdHis", strtotime($day." ".$program['Time'])); @@ -1025,7 +1026,6 @@ function GetEPGFromIscs($ChannelInfo) { endforeach; $epginfo= array_map("unserialize", array_unique(array_map("serialize", $epginfo))); epgzip($epginfo); - } // Get EPG data from Hcn @@ -1156,6 +1156,59 @@ function GetEPGFromPooq($ChannelInfo) { epgzip($epginfo); } +# Get EPG data from EVERYON +function GetEPGFromEveryon($ChannelInfo) { + $ChannelId = $ChannelInfo[0]; + $ChannelName = $ChannelInfo[1]; + $ServiceId = $ChannelInfo[3]; + $epginfo = array(); + foreach(range(1, $GLOBALS['period']) as $k) : + $url = "http://www.everyon.tv/mobile/schedule_ch.ptv"; + $day = date("Ymd", strtotime("+".($k - 1)." days")); + $params = array( + 'chid' => $ServiceId, + 'date' => $day + ); + $params = http_build_query($params); + $method = "GET"; + try { + $response = getWeb($url, $params, $method); + if ($response === False && $GLOBALS['debug']) : + printError($ChannelName.HTTP_ERROR); + else : + $response = mb_convert_encoding($response, "HTML-ENTITIES", "UTF-8"); + $dom = new DomDocument; + libxml_use_internal_errors(True); + if($dom->loadHTML($response)): + $xpath = new DomXPath($dom); + $query = "//ul[@class='lt2']"; + $rows = $xpath->query($query); + foreach($rows as $row) : + $startTime = $endTime = $programName = $subprogramName = $desc = $actors = $producers = $category = $episode = ""; + $rebroadcast = False; + $rating = 0; + $startTime = trim($xpath->query("li[@class='pr_time']", $row)->item(0)->nodeValue) ?: ""; + + $startTime = date("YmdHis", strtotime($day." ".$startTime)); + $programName = trim($xpath->query("li[@class='pr_name']", $row)->item(0)->nodeValue) ?: ""; + if(in_array($programName, array("편성표가 곧 등록될 예정입니다.", "편성 정보가 없습니다."))) continue; + $grade = trim($xpath->query("li[contains(@class,'img')]", $row)->item(0)->getAttribute('class')); + $rating = str_replace(array("img ","c", "all"), array("", "", "0"), $grade); + //ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating + $epginfo[] = array($ChannelId, $startTime, $programName, $subprogramName, $desc, $actors, $producers, $category, $episode, $rebroadcast, $rating); + usleep(1000); + endforeach; + else : + if($GLOBALS['debug']) printError($ChannelName.CONTENT_ERROR); + endif; + endif; + } catch (Exception $e) { + if($GLOBALS['debug']) printError($e->getMessage()); + } + endforeach; + epgzip($epginfo); +} + // Get EPG data from MBC function GetEPGFromMbc($ChannelInfo) { $ChannelId = $ChannelInfo[0]; @@ -1500,22 +1553,21 @@ function GetEPGFromArirang($ChannelInfo) { # Zip epginfo function epgzip($epginfo) { - if($epginfo == NULL) $epginfo = array(); - #ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating - $zipped = array_slice(array_map(NULL, $epginfo, array_slice($epginfo,1)),0,-1); - foreach($zipped as $epg) : - $ChannelId = $epg[0][0] ?: ""; - $startTime = $epg[0][1] ?: ""; - $endTime = $epg[1][1] ?: ""; - $programName = $epg[0][2] ?: ""; - $subprogramName = $epg[0][3] ?: ""; - $desc = $epg[0][4] ?: ""; - $actors = $epg[0][5] ?: ""; - $producers = $epg[0][6] ?: ""; - $category = $epg[0][7] ?: ""; - $episode = $epg[0][8] ?: ""; - $rebroadcast = $rebroadcast = $epg[0][9] ? True: False; - $rating = $epg[0][10] ?: 0; + $epg1 = current($epginfo); + array_shift($epginfo); + foreach($epginfo as $epg2): + $ChannelId = $epg1[0] ?: ""; + $startTime = $epg1[1] ?: ""; + $endTime = $epg2[1] ?: ""; + $programName = $epg1[2] ?: ""; + $subprogramName = $epg1[3] ?: ""; + $desc = $epg1[4] ?: ""; + $actors = $epg1[5] ?: ""; + $producers = $epg1[6] ?: ""; + $category = $epg1[7] ?: ""; + $episode = $epg1[8] ?: ""; + $rebroadcast = $rebroadcast = $epg1[9] ? True: False; + $rating = $epg1[10] ?: 0; $programdata = array( 'channelId'=> $ChannelId, 'startTime' => $startTime, @@ -1531,7 +1583,8 @@ function epgzip($epginfo) { 'rating' => $rating ); writeProgram($programdata); - endforeach; + $epg1 = $epg2; + endforeach; } function writeProgram($programdata) { @@ -1545,7 +1598,7 @@ function writeProgram($programdata) { if ($matches != NULL) : if(isset($matches[1])) $programName = trim($matches[1]) ?: ""; if(isset($matches[2])) $subprogramName = trim($matches[2]." ".$subprogramName) ?: ""; - endif; + endif;// if($programName == NULL): $programName = $subprogramName; endif; @@ -1567,7 +1620,7 @@ function writeProgram($programdata) { $rating = sprintf("%s세 이상 관람가", $programdata['rating']); endif; if($GLOBALS['addverbose'] == 'y') : - $desc = trim(htmlspecialchars($programdata['programName'], ENT_XML1)); + $desc = $programName; if($subprogramName) $desc = $desc."\n부제 : ".$subprogramName; if($rebroadcast == True && $GLOBALS['addrebroadcast'] == 'y') $desc = $desc."\n방송 : 재방송"; if($episode) $desc = $desc."\n회차 : ".$episode."회"; @@ -1684,27 +1737,11 @@ function startsWith($haystack, $needle) { return !strncmp($haystack, $needle, strlen($needle)); } -function pairs($it) { - $prev = current($it); - array_shift($it); - foreach($it as $v): - yield array($prev,$v); - $prev = $v; - endforeach; - /* - it = iter(it) - prev = next(it) - for v in it: - yield prev, v - prev = v - */ -} //사용방법 $usage = <<', '<재>').replace(' [..','').replace(' (..', '') strainer = SoupStrainer('table') - soup = BeautifulSoup(data, 'lxml', parse_only=strainer, from_encoding='utf-8') + soup = BeautifulSoup(data, htmlparser, parse_only=strainer, from_encoding='utf-8') html = soup.find('table').tbody.find_all('tr') if soup.find('table') else '' if(html): for row in html: @@ -384,7 +388,7 @@ def GetEPGFromSKB(ChannelInfo): data = re.sub(pattern, partial(replacement, tag='span'), data) #print(data) strainer = SoupStrainer('div', {'id':'dawn'}) - soup = BeautifulSoup(data, 'lxml', parse_only=strainer, from_encoding='utf-8') + soup = BeautifulSoup(data, htmlparser, parse_only=strainer, from_encoding='utf-8') html = soup.find_all('li') if soup.find_all('li') else '' if(html): for row in html: @@ -575,7 +579,6 @@ def GetEPGFromHcn(ChannelInfo): url = 'http://m.hcn.co.kr/sch_ScheduleList.action' for k in range(period): day = today + datetime.timedelta(days=k) - params = {'method': 'ajax_00', 'pageType': 'sheetList', 'ch_id': ServiceId, 'onairdate': day} params = {'ch_id': ServiceId, 'onairdate': day, '_': int(time.time()*1000)} try: response = requests.get(url, params=params, headers=ua, timeout=timeout) @@ -583,7 +586,7 @@ def GetEPGFromHcn(ChannelInfo): html_data = response.content data = html_data strainer = SoupStrainer('li') - soup = BeautifulSoup(data, 'lxml', parse_only=strainer, from_encoding='utf-8') + soup = BeautifulSoup(data, htmlparser, parse_only=strainer, from_encoding='utf-8') html = soup.find_all('li') if soup.find_all('li') else '' if(html) : for row in html: @@ -662,6 +665,48 @@ def GetEPGFromPooq(ChannelInfo): else: pass epgzip(epginfo) +# Get EPG data from EVERYON +def GetEPGFromEveryon(ChannelInfo): + ChannelId = ChannelInfo[0] + ChannelName = ChannelInfo[1] + ServiceId = ChannelInfo[3] + epginfo = [] + url = 'http://www.everyon.tv/mobile/schedule_ch.ptv' + for k in range(period): + day = today + datetime.timedelta(days=k) + params = {'chid': ServiceId, 'date': day.strftime('%Y%m%d')} + try: + response = requests.get(url, params=params, headers=ua, timeout=timeout) + response.raise_for_status() + html_data = response.content + data = html_data + strainer = SoupStrainer('ul') + soup = BeautifulSoup(data, htmlparser, parse_only=strainer, from_encoding='utf-8') + html = soup.find_all('ul',{'class':'lt2'}) if soup.find_all('ul') else '' + if(html) : + for row in html: + startTime = endTime = programName = subprogramName = desc = actors = producers = category = episode = '' + rebroadcast = False + rating = 0 + startTime = str(day) + ' ' + row.find('li', {'class':'pr_time'}).text.strip() + startTime = datetime.datetime.strptime(startTime, '%Y-%m-%d %H:%M') + startTime = startTime.strftime('%Y%m%d%H%M%S') + programName = row.find('li', {'class':'pr_name'}).text.decode('string_escape').strip() + grade = row.find('li', {'class':'img'})['class'][1] + rating = grade.replace('c','').replace('all','0') + #ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating + epginfo.append([ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating]) + time.sleep(0.001) + except ValueError: + if(debug): printError(ChannelName + CONTENT_ERROR) + else: pass + except (requests.exceptions.RequestException) as e: + if(debug): printError(ChannelName + str(e)) + else: pass + a = epgzip(epginfo) + for i, j in a: + print(i[1], j[1]) + print(i[2], j[2]) # Get EPG data from MBC def GetEPGFromMbc(ChannelInfo): ChannelId = ChannelInfo[0] @@ -814,7 +859,7 @@ def GetEPGFromKbs(ChannelInfo): json_data = response.text try: data = json.loads(json_data, encoding='utf-8') - soup = BeautifulSoup(data['schedule'], 'lxml') + soup = BeautifulSoup(data['schedule'], htmlparser) for row in soup.find_all('li'): startTime = endTime = programName = subprogramName = desc = actors = producers = category = episode = '' rebroadcast = False @@ -857,7 +902,7 @@ def GetEPGFromArirang(ChannelInfo): strainer = SoupStrainer('table', {'id':'aIRSW_sat'}) elif day.weekday() == 6: strainer = SoupStrainer('table', {'id':'aIRSW_sun'}) - soup = BeautifulSoup(data, 'lxml', parse_only=strainer, from_encoding='utf-8') + soup = BeautifulSoup(data, htmlparser, parse_only=strainer, from_encoding='utf-8') html = soup.find_all('tr') if soup.find_all('tr') else '' if(html): for row in html: @@ -895,7 +940,7 @@ def GetEPGFromArirang(ChannelInfo): else: pass # Zip epginfo -def epgzip(epginfo): +def epgzip1(epginfo): #ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating for epg1, epg2 in zip(epginfo, epginfo[1:]): programdata = {} @@ -914,6 +959,27 @@ def epgzip(epginfo): programdata = {'channelId':ChannelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating} writeProgram(programdata) +def epgzip(epginfo): + epginfo = iter(epginfo) + epg1 = next(epginfo) + for epg2 in epginfo: + programdata = {} + ChannelId = epg1[0] + startTime = epg1[1] if epg1[1] else '' + endTime = epg2[1] if epg2[1] else '' + programName = epg1[2] if epg1[2] else '' + subprogramName = epg1[3] if epg1[3] else '' + desc = epg1[4] if epg1[4] else '' + actors = epg1[5] if epg1[5] else '' + producers = epg1[6] if epg1[6] else '' + category = epg1[7] if epg1[7] else '' + episode = epg1[8] if epg1[8] else '' + rebroadcast = True if epg1[9] else False + rating = int(epg1[10]) if epg1[10] else 0 + programdata = {'channelId':ChannelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating} + writeProgram(programdata) + epg1 = epg2 + # Write Program def writeProgram(programdata): ChannelId = programdata['channelId'] @@ -948,7 +1014,7 @@ def writeProgram(programdata): else : rating = '%s세 이상 관람가' % (programdata['rating']) if addverbose == 'y': - desc = escape(programdata['programName']).strip() + desc = programName if subprogramName : desc = desc + '\n부제 : ' + subprogramName if rebroadcast == True and addrebroadcast == 'y' : desc = desc + '\n방송 : 재방송' if episode : desc = desc + '\n회차 : ' + str(episode) + '회' @@ -1006,13 +1072,6 @@ def replacement(match, tag): else: return ''; -def pairs(it): - it = iter(it) - prev = next(it) - for v in it: - yield prev, v - prev = v - Settingfile = os.path.dirname(os.path.abspath(__file__)) + '/epg2xml.json' ChannelInfos = [] try: @@ -1050,7 +1109,6 @@ argu3.add_argument('-l', '--limit', dest = 'limit', type=int, metavar = "1-7", c argu3.add_argument('--rebroadcast', dest = 'rebroadcast', metavar = 'y, n', choices = 'yn', help = '제목에 재방송 정보 출력', default = default_rebroadcast) argu3.add_argument('--episode', dest = 'episode', metavar = 'y, n', choices = 'yn', help = '제목에 회차 정보 출력', default = default_episode) argu3.add_argument('--verbose', dest = 'verbose', metavar = 'y, n', choices = 'yn', help = 'EPG 정보 추가 출력', default = default_verbose) -argu3.add_argument('--xmltvns', dest = 'xmltvns', metavar = 'y, n', choices = 'yn', help = '회차정보 xmltv_ns 출력', default = default_xmltvns) args = parser.parse_args() if args.MyISP : MyISP = args.MyISP @@ -1067,7 +1125,6 @@ if args.limit : default_fetch_limit = args.limit if args.rebroadcast : default_rebroadcast = args.rebroadcast if args.episode : default_episode = args.episode if args.verbose : default_verbose = args.verbose -if args.xmltvns : default_xmltvns = args.xmltvns if MyISP: if not any(MyISP in s for s in ['ALL', 'KT', 'LG', 'SK']): @@ -1163,8 +1220,4 @@ elif output == "socket" : else : printError("epg2xml.json 파일의 default_xml_socket항목이 없습니다."); sys.exit() -#getEpg() -it= [1,2,3,4,5] - -for cur, next in pairs(it): - print (cur, next) +getEpg() \ No newline at end of file