From e0027ab3a645247ce0d8f0c0e341b2b9c1defc40 Mon Sep 17 00:00:00 2001 From: wonipapa Date: Wed, 12 Jul 2017 16:29:36 +0900 Subject: [PATCH] =?UTF-8?q?SKB=20=ED=95=A8=EC=88=98=20=EC=B6=94=EA=B0=80?= =?UTF-8?q?=20=EA=B0=80=EC=A0=B8=EC=98=A4=EB=8A=94=20=EB=82=A0=EC=A7=9C=20?= =?UTF-8?q?2=EC=9D=BC=EB=A1=9C=20=EC=A0=9C=ED=95=9C=20=ED=83=80=EC=9D=B4?= =?UTF-8?q?=ED=8B=80=20=EC=A0=9C=EB=AA=A9=201=EB=B6=80,=202=EB=B6=80=20?= =?UTF-8?q?=EC=84=9C=EB=B8=8C=ED=83=80=EC=9D=B4=ED=8B=80=EB=A1=9C=20?= =?UTF-8?q?=EC=9D=B4=EB=8F=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- epg2xml-web.php | 98 ++++++++++++++++++++++++++++++++++++++-- epg2xml.php | 118 ++++++++++++++++++++++++++++++++++++++++++++---- epg2xml.py | 26 ++++++----- 3 files changed, 219 insertions(+), 23 deletions(-) diff --git a/epg2xml-web.php b/epg2xml-web.php index 9e195e4..7e4e514 100644 --- a/epg2xml-web.php +++ b/epg2xml-web.php @@ -167,6 +167,7 @@ else : else : if(in_array($default_fetch_limit, array(1, 2, 3, 4, 5, 6, 7))) : $period = $default_fetch_limit; + $period = $period > 2 ? 2 : $period; else : printError("default_fetch_limit는 1, 2, 3, 4, 5, 6, 7만 가능합니다."); exit; @@ -358,6 +359,8 @@ function getEPG() { GetEPGFromLG($ChannelInfo); elseif($ChannelSource == 'SK') : GetEPGFromSK($ChannelInfo); + elseif($ChannelSource == 'SKB') : + GetEPGFromSKB($ChannelInfo); elseif($ChannelSource == 'SKY') : GetEPGFromSKY($ChannelInfo); elseif($ChannelSource == 'NAVER') : @@ -726,6 +729,88 @@ function GetEPGFromSK($ChannelInfo) { } } +// Get EPG data from SKB +function GetEPGFromSKB($ChannelInfo) { + $ChannelId = $ChannelInfo[0]; + $ChannelName = $ChannelInfo[1]; + $ServiceId = $ChannelInfo[3]; + $epginfo = array(); + foreach(range(1, $GLOBALS['period']) as $k) : + $url = "http://www.skbroadband.com/content/realtime/Channel_List.do"; + $day = date("Ymd", strtotime("+".($k - 1)." days")); + $params = array( + 'key_depth2' => $ServiceId, + 'key_depth3' => $day, + 'tab_gubun' => 'lst' + ); + $params = http_build_query($params); + $method = "POST"; + try { + $response = getWeb($url, $params, $method); + if ($response === False && $GLOBALS['debug']) : + printError($ChannelName.HTTP_ERROR); + else : + $response = str_replace('charset="euc-kr"', 'charset="utf-8"', $response); + $dom = new DomDocument; + libxml_use_internal_errors(True); + $response = mb_convert_encoding($response, "UTF-8", "EUC-KR"); + $dom->loadHTML($response); + $xpath = new DomXPath($dom); + $query = "//tr[@class='".$day."']"; + $rows = $xpath->query($query); + foreach($rows as $row) : + $cells = $row->getElementsByTagName('td'); + $pattern = '/^(.*?)(\(([\d,]+)회\))?(<(.*)>)?(\((재)\))?$/'; + preg_match($pattern, trim($cells->item(0)->nodeValue), $matches); + if ($matches != NULL) : + if(isset($matches[1])) $programName = trim($matches[1]) ?: ""; + if(isset($matches[5])) $subprogramName = trim($matches[5]) ?: ""; + if(isset($matches[3])) $episode = $matches[3] ?: ""; + if(isset($matches[7])) $rebroadcast = $matches[7] ? True : False; + endif; + preg_match('/.*\s*([\d,]+)\s*.*/', $cells->item(1)->nodeValue, $rating); + $startTime = $row->getElementsByTagName('th')->item(0)->nodeValue; + $startTime = date("YmdHis", strtotime($day." ".$startTime)); + $rating = $rating[1]; + //programName, startTime, rating, subprogramName, rebroadcast, episode + $epginfo[]= array($programName, $startTime, $rating, $subprogramName, $rebroadcast, $episode); + endforeach; + endif; + } catch (Exception $e) { + if($GLOBALS['debug']) printError($e->getMessage()); + } + endforeach; + $zipped = array_slice(array_map(NULL, $epginfo, array_slice($epginfo,1)),0,-1); + foreach($zipped as $epg) : + $programName = trim($epg[0][0]) ?: ""; + $subprogramName = trim($epg[0][3]) ?: ""; + $episode = $epg[0][5] ?: ""; + $rebroadcast = $epg[0][4] ? True: False; + $startTime = $epg[0][1] ?: ""; + $endTime = $epg[1][1] ?: ""; + $desc = ""; + $actors = ""; + $producers = ""; + $category = ""; + $rating = $epg[0][2] ?: 0; + $programdata = array( + 'channelId'=> $ChannelId, + 'startTime' => $startTime, + 'endTime' => $endTime, + 'programName' => $programName, + 'subprogramName'=> $subprogramName, + 'desc' => $desc, + 'actors' => $actors, + 'producers' => $producers, + 'category' => $category, + 'episode' => $episode, + 'rebroadcast' => $rebroadcast, + 'rating' => $rating + ); + writeProgram($programdata); + endforeach; +} + // Get EPG data from SKY function GetEPGFromSKY($ChannelInfo) { $ChannelId = $ChannelInfo[0]; @@ -1213,9 +1298,16 @@ function writeProgram($programdata) { $ChannelId = $programdata['channelId']; $startTime = $programdata['startTime']; $endTime = $programdata['endTime']; - $programName = htmlspecialchars($programdata['programName'], ENT_XML1); - $subprogramName = htmlspecialchars($programdata['subprogramName'], ENT_XML1); - $actors = htmlspecialchars($programdata['actors'], ENT_XML1); + $programName = trim(htmlspecialchars($programdata['programName'], ENT_XML1)); + $subprogramName = trim(htmlspecialchars($programdata['subprogramName'], ENT_XML1)); + preg_match('/(.*) \(?(\d+부)\)?/', $programName, $matches); + if ($matches != NULL) : + if(isset($matches[1])) $programName = trim($matches[1]) ?: ""; + if(isset($matches[2])) $subprogramName = trim($matches[2]." ".$subprogramName) ?: ""; + endif; + if($programName == NULL): + $programName = $subprogramName; + endif; $actors = htmlspecialchars($programdata['actors'], ENT_XML1); $producers = htmlspecialchars($programdata['producers'], ENT_XML1); $category = htmlspecialchars($programdata['category'], ENT_XML1); $episode = $programdata['episode']; diff --git a/epg2xml.php b/epg2xml.php index 9a2764f..1c29e44 100644 --- a/epg2xml.php +++ b/epg2xml.php @@ -187,6 +187,8 @@ else : else : if(in_array($default_fetch_limit, array(1, 2, 3, 4, 5, 6, 7))) : $period = $default_fetch_limit; + $period = $period > 2 ? 2 : $period; + else : printError("default_fetch_limit는 1, 2, 3, 4, 5, 6, 7만 가능합니다."); exit; @@ -377,6 +379,8 @@ function getEPG() { GetEPGFromLG($ChannelInfo); elseif($ChannelSource == 'SK') : GetEPGFromSK($ChannelInfo); + elseif($ChannelSource == 'SKB') : + GetEPGFromSKB($ChannelInfo); elseif($ChannelSource == 'SKY') : GetEPGFromSKY($ChannelInfo); elseif($ChannelSource == 'NAVER') : @@ -455,8 +459,9 @@ function GetEPGFromEPG($ChannelInfo) { $subprogramName = ""; $rating = 0; $episode = ""; - $rebroadcast = False; - preg_match('/?(.*)?(.*?)\s*(<(.*)>)?\s*(\(재\))?\s*(\(([\d,]+)회\))?()?\s*<\/td>/', trim($dom->saveHTML($program)), $matches); + $rebroadcast = False; + $pattern = '/?(.*)?(.*?)\s*(<(.*)>)?\s*(\(재\))?\s*(\(([\d,]+)회\))?()?\s*<\/td>/'; + preg_match($pattern, trim($dom->saveHTML($program)), $matches); if ($matches != NULL) : if(isset($matches[2])) $programName = trim($matches[2]) ?: ""; if(isset($matches[4])) $subprogramName = trim($matches[4]) ?: ""; @@ -554,7 +559,8 @@ function GetEPGFromKT($ChannelInfo) { foreach($zipped as $epg) : $programName = ""; $subprogramName = ""; - preg_match('/^(.*?)( <(.*)>)?$/', $epg[0][0], $matches); + $pattern = '/^(.*?)( <(.*)>)?$/'; + preg_match($pattern, $epg[0][0], $matches); if ($matches != NULL) : if(isset($matches[1])) $programName = $matches[1] ?: ""; if(isset($matches[3])) $subprogramName = $matches[3] ?: ""; @@ -631,7 +637,8 @@ function GetEPGFromLG($ChannelInfo) { endforeach; $zipped = array_slice(array_map(NULL, $epginfo, array_slice($epginfo,1)),0,-1); foreach($zipped as $epg) : - preg_match('/(<재>?)?(.*?)(\[(.*)\])?\s?(\(([\d,]+)회\))?$/', $epg[0][0], $matches); + $pattern = '/(<재>?)?(.*?)(\[(.*)\])?\s?(\(([\d,]+)회\))?$/'; + preg_match($pattern, $epg[0][0], $matches); $programName = ""; $subprogramName = ""; $episode = ""; @@ -701,7 +708,8 @@ function GetEPGFromSK($ChannelInfo) { $subprogramName = ""; $episode = ""; $rebroadcast = False; - preg_match('/^(.*?)(?:\s*[\(<]([\d,회]+)[\)>])?(?:\s*<([^<]*?)>)?(\((재)\))?$/', str_replace('...', '>', $program['programName']), $matches); + $pattern = '/^(.*?)(?:\s*[\(<]([\d,회]+)[\)>])?(?:\s*<([^<]*?)>)?(\((재)\))?$/'; + preg_match($pattern, str_replace('...', '>', $program['programName']), $matches); if ($matches != NULL) : if(isset($matches[1])) $programName = trim($matches[1]) ?: ""; if(isset($matches[3])) $subprogramName = trim($matches[3]) ?: ""; @@ -745,6 +753,88 @@ function GetEPGFromSK($ChannelInfo) { } } +// Get EPG data from SKB +function GetEPGFromSKB($ChannelInfo) { + $ChannelId = $ChannelInfo[0]; + $ChannelName = $ChannelInfo[1]; + $ServiceId = $ChannelInfo[3]; + $epginfo = array(); + foreach(range(1, $GLOBALS['period']) as $k) : + $url = "http://www.skbroadband.com/content/realtime/Channel_List.do"; + $day = date("Ymd", strtotime("+".($k - 1)." days")); + $params = array( + 'key_depth2' => $ServiceId, + 'key_depth3' => $day, + 'tab_gubun' => 'lst' + ); + $params = http_build_query($params); + $method = "POST"; + try { + $response = getWeb($url, $params, $method); + if ($response === False && $GLOBALS['debug']) : + printError($ChannelName.HTTP_ERROR); + else : + $response = str_replace('charset="euc-kr"', 'charset="utf-8"', $response); + $dom = new DomDocument; + libxml_use_internal_errors(True); + $response = mb_convert_encoding($response, "UTF-8", "EUC-KR"); + $dom->loadHTML($response); + $xpath = new DomXPath($dom); + $query = "//tr[@class='".$day."']"; + $rows = $xpath->query($query); + foreach($rows as $row) : + $cells = $row->getElementsByTagName('td'); + $pattern = '/^(.*?)(\(([\d,]+)회\))?(<(.*)>)?(\((재)\))?$/'; + preg_match($pattern, trim($cells->item(0)->nodeValue), $matches); + if ($matches != NULL) : + if(isset($matches[1])) $programName = trim($matches[1]) ?: ""; + if(isset($matches[5])) $subprogramName = trim($matches[5]) ?: ""; + if(isset($matches[3])) $episode = $matches[3] ?: ""; + if(isset($matches[7])) $rebroadcast = $matches[7] ? True : False; + endif; + preg_match('/.*\s*([\d,]+)\s*.*/', $cells->item(1)->nodeValue, $rating); + $startTime = $row->getElementsByTagName('th')->item(0)->nodeValue; + $startTime = date("YmdHis", strtotime($day." ".$startTime)); + $rating = $rating[1]; + //programName, startTime, rating, subprogramName, rebroadcast, episode + $epginfo[]= array($programName, $startTime, $rating, $subprogramName, $rebroadcast, $episode); + endforeach; + endif; + } catch (Exception $e) { + if($GLOBALS['debug']) printError($e->getMessage()); + } + endforeach; + $zipped = array_slice(array_map(NULL, $epginfo, array_slice($epginfo,1)),0,-1); + foreach($zipped as $epg) : + $programName = trim($epg[0][0]) ?: ""; + $subprogramName = trim($epg[0][3]) ?: ""; + $episode = $epg[0][5] ?: ""; + $rebroadcast = $epg[0][4] ? True: False; + $startTime = $epg[0][1] ?: ""; + $endTime = $epg[1][1] ?: ""; + $desc = ""; + $actors = ""; + $producers = ""; + $category = ""; + $rating = $epg[0][2] ?: 0; + $programdata = array( + 'channelId'=> $ChannelId, + 'startTime' => $startTime, + 'endTime' => $endTime, + 'programName' => $programName, + 'subprogramName'=> $subprogramName, + 'desc' => $desc, + 'actors' => $actors, + 'producers' => $producers, + 'category' => $category, + 'episode' => $episode, + 'rebroadcast' => $rebroadcast, + 'rating' => $rating + ); + writeProgram($programdata); + endforeach; +} + // Get EPG data from SKY function GetEPGFromSKY($ChannelInfo) { $ChannelId = $ChannelInfo[0]; @@ -958,7 +1048,8 @@ function GetEPGFromMbc($ChannelInfo) { if($program['Channel'] == "CHAM" && $program['LiveDays'] == $dayofweek[date("w", strtotime($day))]) : $programName = ""; $rebroadcast = False; - preg_match('/^(.*?)(\(재\))?$/', htmlspecialchars_decode($program['ProgramTitle']), $matches); + $pattern = '/^(.*?)(\(재\))?$/'; + preg_match($pattern, htmlspecialchars_decode($program['ProgramTitle']), $matches); if ($matches != NULL) : $programName = $matches[1]; $rebroadcast = $matches[2] ? True : False; @@ -1031,7 +1122,8 @@ function GetEPGFromMil($ChannelInfo) { foreach($programs as $program) : $programName = ""; $rebroadcast = False; - preg_match('/^(.*?)(\(재\))?$/', htmlspecialchars_decode($program['program_title']), $matches); + $pattern = '/^(.*?)(\(재\))?$/'; + preg_match($pattern, htmlspecialchars_decode($program['program_title']), $matches); if ($matches != NULL) : $programName = $matches[1]; $rebroadcast = $matches[2] ? True : False; @@ -1232,8 +1324,16 @@ function writeProgram($programdata) { $ChannelId = $programdata['channelId']; $startTime = $programdata['startTime']; $endTime = $programdata['endTime']; - $programName = htmlspecialchars($programdata['programName'], ENT_XML1); - $subprogramName = htmlspecialchars($programdata['subprogramName'], ENT_XML1); + $programName = trim(htmlspecialchars($programdata['programName'], ENT_XML1)); + $subprogramName = trim(htmlspecialchars($programdata['subprogramName'], ENT_XML1)); + preg_match('/(.*) \(?(\d+부)\)?/', $programName, $matches); + if ($matches != NULL) : + if(isset($matches[1])) $programName = trim($matches[1]) ?: ""; + if(isset($matches[2])) $subprogramName = trim($matches[2]." ".$subprogramName) ?: ""; + endif; + if($programName == NULL): + $programName = $subprogramName; + endif; $actors = htmlspecialchars($programdata['actors'], ENT_XML1); $producers = htmlspecialchars($programdata['producers'], ENT_XML1); $category = htmlspecialchars($programdata['category'], ENT_XML1); diff --git a/epg2xml.py b/epg2xml.py index e2bb556..a64f377 100644 --- a/epg2xml.py +++ b/epg2xml.py @@ -157,8 +157,7 @@ def GetEPGFromEPG(ChannelInfo): thisday = day row = tables[i].find_all('td', {'colspan':'2'}) for cell in row: - hour = int(cell.text.strip().strip('시')) - + hour = int(cell.text.strip().strip('시')) if(i == 1) : hour = 'AM ' + str(hour) elif(i == 2) : hour = 'PM ' + str(hour) elif(i == 3 and hour > 5 and hour < 12 ) : hour = 'PM ' + str(hour) @@ -230,7 +229,8 @@ def GetEPGFromKT(ChannelInfo): for epg1, epg2 in zip(epginfo, epginfo[1:]): programName = '' subprogrmaName = '' - matches = re.match('^(.*?)( <(.*)>)?$', epg1[0].decode('string_escape')) + pattern = '^(.*?)( <(.*)>)?$' + matches = re.match(pattern, epg1[0].decode('string_escape')) if not (matches is None): programName = matches.group(1) if matches.group(1) else '' subprogramName = matches.group(3) if matches.group(3) else '' @@ -285,7 +285,8 @@ def GetEPGFromLG(ChannelInfo): programName = '' subprogramName = '' episode = '' - matches = re.match('(<재>?)?(.*?)(\[(.*)\])?\s?(\(([\d,]+)회\))?$', epg1[0].decode('string_escape')) + pattern = '(<재>?)?(.*?)(\[(.*)\])?\s?(\(([\d,]+)회\))?$' + matches = re.match(pattern, epg1[0].decode('string_escape')) rebroadcast = False if not (matches is None): programName = matches.group(2) if matches.group(2) else '' @@ -328,7 +329,8 @@ def GetEPGFromSK(ChannelInfo): subprogramName = '' episode = '' rebroadcast = False - matches = re.match('^(.*?)(?:\s*[\(<]([\d,회]+)[\)>])?(?:\s*<([^<]*?)>)?(\((재)\))?$', program['programName'].replace('...', '>').encode('utf-8')) + pattern = '^(.*?)(?:\s*[\(<]([\d,회]+)[\)>])?(?:\s*<([^<]*?)>)?(\((재)\))?$' + matches = re.match(pattern, program['programName'].replace('...', '>').encode('utf-8')) if not (matches is None): programName = matches.group(1).strip() if matches.group(1) else '' subprogramName = matches.group(3).strip() if matches.group(3) else '' @@ -378,7 +380,6 @@ def GetEPGFromSKB(ChannelInfo): for row in html: startTime = str(day) + ' ' + row.find('th').text for cell in [row.find_all('td')]: - pattern = "(.*?)\s*(\(([\d,]+)회\))?\s*(<(.*)>)?\s*(\(재\)?)" pattern = "^(.*?)(\(([\d,]+)회\))?(<(.*)>)?(\((재)\))?$" matches = re.match(pattern, cell[0].text.decode('string_escape')) if not(matches is None) : @@ -538,7 +539,8 @@ def GetEPGFromMbc(ChannelInfo): if program['Channel'] == "CHAM" and program['LiveDays'] == dayofweek[day.weekday()]: programName = '' rebroadcast = True - matches = re.match('^(.*?)(\(재\))?$', unescape(program['ProgramTitle'].encode('utf-8', 'ignore'))) + pattern = '^(.*?)(\(재\))?$' + matches = re.match(pattern, unescape(program['ProgramTitle'].encode('utf-8', 'ignore'))) if not(matches is None): programName = matches.group(1) rebroadcast = True if matches.group(2) else False @@ -581,7 +583,8 @@ def GetEPGFromMil(ChannelInfo): for program in data['resultList']: programName = '' rebroadcast = False - matches = re.match('^(.*?)(\(재\))?$', unescape(program['program_title'].encode('utf-8', 'ignore'))) + pattern = '^(.*?)(\(재\))?$' + matches = re.match(pattern, unescape(program['program_title'].encode('utf-8', 'ignore'))) if not(matches is None): programName = matches.group(1) rebroadcast = True if matches.group(2) else False @@ -680,7 +683,8 @@ def GetEPGFromKbs(ChannelInfo): for row in soup.find_all('li'): programName = '' startTime = '' - matches = re.match('([0-2][0-9]:[0-5][0-9])[0-2][0-9]:[0-5][0-9]\[(.*)\] Broadcast', unescape(row.text.encode('utf-8', 'ignore'))) + pattern = '([0-2][0-9]:[0-5][0-9])[0-2][0-9]:[0-5][0-9]\[(.*)\] Broadcast' + matches = re.match(pattern, unescape(row.text.encode('utf-8', 'ignore'))) if not(matches is None): programName = unescape(matches.group(2)) startTime = str(day) + ' ' + matches.group(1) @@ -716,8 +720,8 @@ def writeProgram(programdata): ChannelId = programdata['channelId'] startTime = programdata['startTime'] endTime = programdata['endTime'] - programName = escape(programdata['programName']) - subprogramName = escape(programdata['subprogramName']) + programName = escape(programdata['programName']).strip() + subprogramName = escape(programdata['subprogramName']).strip() matches = re.match('(.*) \(?(\d+부)\)?', unescape(programName.encode('utf-8', 'ignore'))) if not(matches is None):