From 763ca1714f6e2703feef29aa9f326a5c11ed2efc Mon Sep 17 00:00:00 2001 From: wonipapa Date: Mon, 5 Mar 2018 17:46:42 +0900 Subject: [PATCH 1/5] =?UTF-8?q?SKB=20=ED=95=A8=EC=88=98=20=EC=88=98?= =?UTF-8?q?=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- epg2xml.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/epg2xml.py b/epg2xml.py index f27eaa0..e57176d 100644 --- a/epg2xml.py +++ b/epg2xml.py @@ -44,7 +44,7 @@ if not sys.version_info[:2] == (2, 7): sys.exit() # Set variable -__version__ = '1.2.5' +__version__ = '1.2.5p1' debug = False today = datetime.date.today() ua = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36', 'accept': '*/*'} @@ -381,17 +381,16 @@ def GetEPGFromSKB(ChannelInfo): html_data = response.content data = unicode(html_data, 'euc-kr', 'ignore').encode('utf-8', 'ignore') data = re.sub('', '', data, 0, re.I|re.S) - data = re.sub('', '', data) - data = re.sub('', '', data) - data = re.sub('화면해설','',data) - data = re.sub('자막방송','',data) - data = re.sub('Full HD','',data) - data = re.sub('UHD','',data) - data = re.sub('now on','',data) - data = re.sub('','',data) - data = re.sub('프로그램 안내', '',data) - pattern = '(.*)<\/span>' - data = re.sub(pattern, partial(replacement, tag='span'), data) + data = re.sub('(.*?)', '', data) + data = re.sub('(.*?)', '', data) + data = re.sub('(.*?)', '', data) + data = re.sub('(.*?)', '', data) + data = re.sub('(.*?)', '', data) + data = re.sub('(.*?)', '', data) + data = re.sub('(.*?)', '', data) + data = re.sub('프로그램 안내', '', data) + #pattern = '(.*)<\/span>' + #data = re.sub(pattern, partial(replacement, tag='span'), data) strainer = SoupStrainer('div', {'id':'uiScheduleTabContent'}) soup = BeautifulSoup(data, htmlparser, parse_only=strainer, from_encoding='utf-8') html = soup.find_all('li',{'class':'list'}) if soup.find_all('li') else '' @@ -405,6 +404,8 @@ def GetEPGFromSKB(ChannelInfo): startTime = startTime.strftime('%Y%m%d%H%M%S') cell = row.find('p', {'class':'cont'}) if(cell): + if cell.find('span'): + cell.span.decompose() cell = cell.text.decode('string_escape').strip() pattern = "^(.*?)(\(([\d,]+)회\))?(<(.*)>)?(\((재)\))?$" matches = re.match(pattern, cell) @@ -413,9 +414,10 @@ def GetEPGFromSKB(ChannelInfo): subprogramName = matches.group(5) if matches.group(5) else '' rebroadcast = True if matches.group(7) else False episode = matches.group(3) if matches.group(3) else '' - rating = row.find('span', {'class':re.compile('^watch.*$')}) + rating = row.find('i', {'class':'hide'}) if not(rating is None) : - rating = int(rating.text.decode('string_escape').replace('세','').strip()) + rating = int(rating.text.decode('string_escape').replace('세 이상','').strip()) + #ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating epginfo.append([ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating]) time.sleep(0.001) From 1d997feb615a60c6bfc17ffc2e63e08c3a06d10b Mon Sep 17 00:00:00 2001 From: wonipapa Date: Mon, 5 Mar 2018 17:59:58 +0900 Subject: [PATCH 2/5] =?UTF-8?q?SKB=20=ED=95=A8=EC=88=98=20=EC=88=98?= =?UTF-8?q?=EC=A0=95(p1)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- epg2xml.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/epg2xml.py b/epg2xml.py index e57176d..dcdafa5 100644 --- a/epg2xml.py +++ b/epg2xml.py @@ -389,8 +389,6 @@ def GetEPGFromSKB(ChannelInfo): data = re.sub('(.*?)', '', data) data = re.sub('(.*?)', '', data) data = re.sub('프로그램 안내', '', data) - #pattern = '(.*)<\/span>' - #data = re.sub(pattern, partial(replacement, tag='span'), data) strainer = SoupStrainer('div', {'id':'uiScheduleTabContent'}) soup = BeautifulSoup(data, htmlparser, parse_only=strainer, from_encoding='utf-8') html = soup.find_all('li',{'class':'list'}) if soup.find_all('li') else '' From f3c1a9067e4b9ba94adef43e656735fdabfdb45f Mon Sep 17 00:00:00 2001 From: wonipapa Date: Tue, 6 Mar 2018 14:57:09 +0900 Subject: [PATCH 3/5] =?UTF-8?q?=EC=B1=84=EB=84=90=20=EB=B3=80=EA=B2=BD=20?= =?UTF-8?q?=EB=B0=98=EC=98=81=202018=EB=85=84=203=EC=9B=94=206=EC=9D=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Channel.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Channel.json b/Channel.json index aa6b0d9..4186a66 100644 --- a/Channel.json +++ b/Channel.json @@ -232,7 +232,7 @@ { "Id": 294, "Name": "쿠키건강TV", "KT Name": "쿠키건강TV", "KTCh": 220, "LG Name": "쿠키건강TV", "LGCh": 144, "SK Name": "쿠키건강TV", "SKCh": 269, "Radio Name": "", "RadioCh": null, "Icon_url": "http://i.imgur.com/4gl92D1.png", "Source": "EPG", "ServiceId": "735" }, { "Id": 296, "Name": "키즈원", "KT Name": "키즈원", "KTCh": 148, "LG Name": "키즈원", "LGCh": 157, "SK Name": "KIDS1", "SKCh": 193, "Radio Name": "", "RadioCh": null, "Icon_url": "http://i.imgur.com/f8T1Sw4.png", "Source": "NAVER", "ServiceId": "815020" }, { "Id": 297, "Name": "토마토TV", "KT Name": "토마토TV", "KTCh": 185, "LG Name": "토마토TV", "LGCh": 111, "SK Name": "토마토TV", "SKCh": 150, "Radio Name": "", "RadioCh": null, "Icon_url": "http://i.imgur.com/dVWy3Ex.png", "Source": "SK", "ServiceId": "620" }, -{ "Id": 299, "Name": "핑크하우스", "KT Name": "핑크하우스", "KTCh": 208, "LG Name": "", "LGCh": null, "SK Name": "핑크하우스", "SKCh": 324, "Radio Name": "", "RadioCh": null, "Icon_url": "http://i.imgur.com/03U2Ges.png", "Source": "EPG", "ServiceId": "599" }, +{ "Id": 299, "Name": "핑크하우스", "KT Name": "핑크하우스", "KTCh": 208, "LG Name": "", "LGCh": null, "SK Name": "핑크하우스", "SKCh": 324, "Radio Name": "", "RadioCh": null, "Icon_url": "https://i.imgur.com/RnEFpd6.png", "Source": "EPG", "ServiceId": "599" }, { "Id": 301, "Name": "가톨릭평화방송", "KT Name": "가톨릭평화방송", "KTCh": 231, "LG Name": "가톨릭평화방송", "LGCh": 184, "SK Name": "가톨릭평화방송", "SKCh": 307, "Radio Name": "", "RadioCh": null, "Icon_url": "http://i.imgur.com/G5fTCL3.png", "Source": "HCN", "ServiceId": "33" }, { "Id": 302, "Name": "폴라리스TV", "KT Name": "폴라리스TV", "KTCh": 129, "LG Name": "폴라리스 TV", "LGCh": 67, "SK Name": "폴라리스TV", "SKCh": 249, "Radio Name": "", "RadioCh": null, "Icon_url": "http://i.imgur.com/aghufJ7.png", "Source": "SK", "ServiceId": "252" }, { "Id": 303, "Name": "한국경제TV", "KT Name": "한국경제TV", "KTCh": 180, "LG Name": "한국경제TV", "LGCh": 121, "SK Name": "한국경제TV", "SKCh": 151, "Radio Name": "", "RadioCh": null, "Icon_url": "http://i.imgur.com/ChnD0FT.png", "Source": "NAVER", "ServiceId": "814929" }, @@ -244,8 +244,8 @@ { "Id": 313, "Name": "현대홈쇼핑+샵", "KT Name": "현대홈쇼핑+샵", "KTCh": 36, "LG Name": "현대홈쇼핑+샵", "LGCh": 34, "SK Name": "현대홈쇼핑+Shop", "SKCh": 39, "Radio Name": "", "RadioCh": null, "Icon_url": "http://i.imgur.com/COo8Bcm.png", "Source": "SK", "ServiceId": "337" }, { "Id": 314, "Name": "홈&쇼핑", "KT Name": "홈&쇼핑", "KTCh": 14, "LG Name": "홈앤쇼핑", "LGCh": 4, "SK Name": "홈&쇼핑", "SKCh": 4, "Radio Name": "", "RadioCh": null, "Icon_url": "http://i.imgur.com/nLxw0LW.png", "Source": "NAVER", "ServiceId": "815524" }, { "Id": 315, "Name": "환경TV", "KT Name": "환경TV", "KTCh": 166, "LG Name": "", "LGCh": null, "SK Name": "환경TV", "SKCh": 276, "Radio Name": "", "RadioCh": null, "Icon_url": "http://i.imgur.com/eITOr2Y.png", "Source": "HCN", "ServiceId": "29" }, -{ "Id": 316, "Name": "Life N", "KT Name": "", "KTCh": null, "LG Name": "", "LGCh": null, "SK Name": "Life N", "SKCh": 215, "Radio Name": "", "RadioCh": null, "Icon_url": "http://i.imgur.com/qNde2j7.png", "Source": "SKB", "ServiceId": "277" }, -{ "Id": 317, "Name": "Discovery HD", "KT Name": "", "KTCh": null, "LG Name": "디스커버리 아시아", "LGCh": 133, "SK Name": "", "SKCh": null, "Radio Name": "", "RadioCh": null, "Icon_url": "http://i.imgur.com/bH9BY7E.png", "Source": "SKY", "ServiceId": "929" }, +{ "Id": 316, "Name": "Life U", "KT Name": "", "KTCh": null, "LG Name": "", "LGCh": null, "SK Name": "Life U", "SKCh": 215, "Radio Name": "", "RadioCh": null, "Icon_url": "https://i.imgur.com/3VJOGoI.png", "Source": "SKB", "ServiceId": "277" }, +{ "Id": 317, "Name": "디스커버리 아시아", "KT Name": "", "KTCh": null, "LG Name": "디스커버리 아시아", "LGCh": 133, "SK Name": "", "SKCh": null, "Radio Name": "", "RadioCh": null, "Icon_url": "https://i.imgur.com/6NdyDW5.png", "Source": "LG", "ServiceId": "610" }, { "Id": 318, "Name": "Celestial Movies", "KT Name": "", "KTCh": null, "LG Name": "", "LGCh": null, "SK Name": "Celestial Movies", "SKCh": 62, "Radio Name": "", "RadioCh": null, "Icon_url": "http://i.imgur.com/xDXM13Q.png", "Source": "SKB", "ServiceId": "877" }, { "Id": 319, "Name": "UHD Dream TV", "KT Name": "", "KTCh": null, "LG Name": "", "LGCh": null, "SK Name": "UHD Dream TV", "SKCh": 71, "Radio Name": "", "RadioCh": null, "Icon_url": "http://i.imgur.com/aLG2rKa.png", "Source": "SKY", "ServiceId": "689" }, { "Id": 320, "Name": "UMAX", "KT Name": "", "KTCh": null, "LG Name": "", "LGCh": null, "SK Name": "UMAX", "SKCh": 73, "Radio Name": "", "RadioCh": null, "Icon_url": "http://i.imgur.com/EgVuybQ.png", "Source": "SKB", "ServiceId": "69" }, From 586b67ad5d229116214a6b2bbd2b9f83b88648dd Mon Sep 17 00:00:00 2001 From: wonipapa Date: Tue, 6 Mar 2018 14:57:40 +0900 Subject: [PATCH 4/5] =?UTF-8?q?SKB=ED=95=A8=EC=88=98=20=EC=88=98=EC=A0=95(?= =?UTF-8?q?p1)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- epg2xml-web.php | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/epg2xml-web.php b/epg2xml-web.php index 9ef02cc..3180a09 100644 --- a/epg2xml-web.php +++ b/epg2xml-web.php @@ -3,7 +3,7 @@ @date_default_timezone_set('Asia/Seoul'); error_reporting(E_ALL ^ E_NOTICE); @set_time_limit(0); -define("VERSION", "1.2.5"); +define("VERSION", "1.2.5p1"); $debug = False; $ua = "'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36'"; $timeout = 5; @@ -751,18 +751,22 @@ function GetEPGFromSKB($ChannelInfo) { if ($response === False && $GLOBALS['debug']) : printError($ChannelName.HTTP_ERROR); else : - $response = str_replace('charset="euc-kr"', 'charset="utf-8"', $response); + $response = str_replace('charset="EUC-KR"', 'charset="UTF-8"', $response); + $response = mb_convert_encoding($response, "UTF-8", "EUC-KR"); $response = preg_replace('//is', '', $response); - $response = preg_replace('/<\/span>/is', '', $response); - //$response = preg_replace('/프로그램 안내<\/strong>/is', '', $response); - //$response = preg_replace('//is', '', $response); - $pattern = '/(.*)<\/span>/'; - $response = preg_replace_callback($pattern, function($matches) { return ''.htmlspecialchars($matches[1], ENT_NOQUOTES).'';}, $response); + $response = preg_replace('/(.*?)<\/span>/', '', $response); + $response = preg_replace('/(.*?)<\/span>/', '', $response); + $response = preg_replace('/(.*?)<\/span>/', '', $response); + $response = preg_replace('/(.*?)<\/span>/', '', $response); + $response = preg_replace('/(.*?)<\/span>/', '', $response); + $response = preg_replace('/(.*?)<\/span>/', '', $response); + $response = preg_replace('/(.*?)<\/span>/', '', $response); + $response = preg_replace('/프로그램 안내<\/strong>/', '', $response); $dom = new DomDocument; libxml_use_internal_errors(True); if($dom->loadHTML(''.$response)): $xpath = new DomXPath($dom); - $query = "//span[@class='caption' or @class='explan' or @class='fullHD' or @class='UHD' or @class='nowon' or @class='flag_box']"; + $query = "//span[@class='caption' or @class='explan' or @class='fullHD' or @class='UHD' or @class='nowon']"; $spans = $xpath->query($query); foreach($spans as $span) : $span->parentNode->removeChild( $span); @@ -776,7 +780,7 @@ function GetEPGFromSKB($ChannelInfo) { $cells = $row->getElementsByTagName('p'); $startTime = $cells->item(0)->nodeValue ?: ""; $startTime = date("YmdHis", strtotime($day." ".$startTime)); - $programName = trim($cells->item(1)->nodeValue) ?: ""; + $programName = trim($cells->item(1)->childNodes->item(0)->nodeValue) ?: ""; $pattern = '/^(.*?)(\(([\d,]+)회\))?(<(.*)>)?(\((재)\))?$/'; preg_match($pattern, $programName, $matches); if ($matches != NULL) : @@ -784,8 +788,8 @@ function GetEPGFromSKB($ChannelInfo) { if(isset($matches[5])) $subprogramName = trim($matches[5]) ?: ""; if(isset($matches[3])) $episode = $matches[3] ?: ""; if(isset($matches[7])) $rebroadcast = $matches[7] ? True : False; - endif; - if($cells->length > 3) $rating = str_replace('세', '', $cells->item(3)->nodeValue) ?: 0; + endif; + if(trim($cells->item(1)->childNodes->item(1)->nodeValue)) $rating = str_replace('세 이상', '', trim($cells->item(1)->childNodes->item(1)->nodeValue)) ?: 0; //ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating $epginfo[] = array($ChannelId, $startTime, $programName, $subprogramName, $desc, $actors, $producers, $category, $episode, $rebroadcast, $rating); usleep(1000); From 2e5c00d20f107b268890eaa50c90b9d54f96885f Mon Sep 17 00:00:00 2001 From: wonipapa Date: Tue, 6 Mar 2018 15:07:07 +0900 Subject: [PATCH 5/5] update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 1b59f8f..8411f9d 100644 --- a/README.md +++ b/README.md @@ -120,6 +120,7 @@ https://github.com/wonipapa/epg2xml/wiki/FAQ ## 변경사항 ### Version 1.2.5 - SKB 함수 수정 + - SKB 함수 수정(p1) ### Version 1.2.4 - ISCS 함수 수정 - SKB 함수 수정(p1)