From ec3ce6f12723c96d2844145113506a5e8c3dbaa1 Mon Sep 17 00:00:00 2001 From: wonipapa Date: Wed, 6 Sep 2017 14:22:02 +0900 Subject: [PATCH] =?UTF-8?q?HCN,=20ISCS=20=ED=95=A8=EC=88=98=20=EC=88=98?= =?UTF-8?q?=EC=A0=95,=20=EC=9B=B9=ED=8E=98=EC=9D=B4=EC=A7=80=20=ED=8C=A8?= =?UTF-8?q?=EC=B9=98=EC=8B=9C=20=EC=8B=9C=EA=B0=84=20=EA=B0=84=EA=B2=A9=20?= =?UTF-8?q?=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- epg2xml.php | 115 +++++++++++++++++++++------------------ epg2xml.py | 153 +++++++++++++++++++++------------------------------- 2 files changed, 124 insertions(+), 144 deletions(-) diff --git a/epg2xml.php b/epg2xml.php index aa45f70..af9eaba 100644 --- a/epg2xml.php +++ b/epg2xml.php @@ -490,6 +490,7 @@ function GetEPGFromEPG($ChannelInfo) { endforeach; //ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating $epginfo[] = array($ChannelId, $startTime, $programName, $subprogramName, $desc, $actors, $producers, $category, $episode, $rebroadcast, $rating); + usleep(1000); endforeach; endfor; else: @@ -552,6 +553,7 @@ function GetEPGFromKT($ChannelInfo) { $rating = str_replace("all", 0, str_replace("세 이상", "", trim($cells->item(2)->nodeValue))); //ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating $epginfo[] = array($ChannelId, $startTime, $programName, $subprogramName, $desc, $actors, $producers, $category, $episode, $rebroadcast, $rating); + usleep(1000); endforeach; else : if($GLOBALS['debug']) printError($ChannelName.CONTENT_ERROR); @@ -613,6 +615,7 @@ function GetEPGFromLG($ChannelInfo) { $rating = trim($spans->item(1)->nodeValue)=="All" ? 0 : trim($spans->item(1)->nodeValue); //ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating $epginfo[] = array($ChannelId, $startTime, $programName, $subprogramName, $desc, $actors, $producers, $category, $episode, $rebroadcast, $rating); + usleep(1000); endforeach; else : if($GLOBALS['debug']) printError($ChannelName.CONTENT_ERROR); @@ -691,6 +694,7 @@ function GetEPGFromSK($ChannelInfo) { 'rating' => $rating ); writeProgram($programdata); + usleep(1000); endforeach; endif; } catch(Exception $e) { @@ -758,6 +762,7 @@ function GetEPGFromSKB($ChannelInfo) { if($cells->length > 3) $rating = str_replace('세', '', $cells->item(3)->nodeValue) ?: 0; //ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating $epginfo[] = array($ChannelId, $startTime, $programName, $subprogramName, $desc, $actors, $producers, $category, $episode, $rebroadcast, $rating); + usleep(1000); endforeach; else : if($GLOBALS['debug']) printError($ChannelName.CONTENT_ERROR); @@ -838,7 +843,8 @@ function GetEPGFromSKY($ChannelInfo) { 'rebroadcast' => $rebroadcast, 'rating' => $rating ); - writeProgram($programdata); + writeProgram($programdata); + usleep(1000); endforeach; endif; } catch(Exception $e) { @@ -908,6 +914,7 @@ function GetEPGFromNaver($ChannelInfo) { $rating = $program['grade']; //ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating $epginfo[] = array($ChannelId, $startTime, $programName, $subprogramName, $desc, $actors, $producers, $category, $episode, $rebroadcast, $rating); + usleep(1000); endforeach; endfor; endfor; @@ -929,11 +936,12 @@ function GetEPGFromIscs($ChannelInfo) { $ServiceId = $ChannelInfo[3]; $epginfo = array(); foreach(range(1, $GLOBALS['period']) as $k) : - $url = "https://www.iscs.co.kr/service/sub/ajax_channel_view.asp"; + $url = "http://m.iscs.co.kr/sub/02/data.asp"; $day = date("Y-m-d", strtotime("+".($k - 1)." days")); $params = array( - 's_idx' => $ServiceId, - 'C_date' => $day + 'Exec_Mode' => 'view', + 'Source_Id' => $ServiceId, + 'Ch_Day' => $day ); $params = http_build_query($params); $method = "POST"; @@ -945,43 +953,33 @@ function GetEPGFromIscs($ChannelInfo) { try { $data = json_decode($response, TRUE); if(json_last_error() != JSON_ERROR_NONE) throw new Exception(JSON_SYNTAX_ERROR); - if(count($data['html']) == 0) : + if(count($data['total']) == 0) : if($GLOBALS['debug']) : printError($ChannelName.CHANNEL_ERROR); endif; else : - $response = $data['html']; - $pattern = '/(.*)<\/td>/'; - $response = preg_replace_callback($pattern, function($matches) { return ''.htmlspecialchars($matches[1]).'';}, $response); - $response = mb_convert_encoding($response, "HTML-ENTITIES", "UTF-8"); - $dom = new DomDocument; - libxml_use_internal_errors(True); - if($dom->loadHTML($response)): - $xpath = new DomXPath($dom); - $query = "//div[@class='pp_tbl']/table/tbody/tr"; - $rows = $xpath->query($query); - foreach($rows as $row) : - $startTime = $endTime = $programName = $subprogramName = $desc = $actors = $producers = $category = $episode = ""; - $rebroadcast = False; + $programs = $data['list']; + foreach($programs as $program) : + $startTime = $endTime = $programName = $subprogramName = $desc = $actors = $producers = $category = $episode = ""; + $rebroadcast = False; + $rating = 0; + $startTime = date("YmdHis", strtotime($day." ".$program['Time'])); + $pattern = '/^(.*?)(?:\(([\d,]+)회\))?(?:\((재)\))?$/'; + preg_match($pattern, trim($program['Pg_Name']), $matches); + if ($matches != NULL) : + if(isset($matches[1])) $programName = trim($matches[1]) ?: ""; + if(isset($matches[2])) $episode = $matches[2] ?: ""; + if(isset($matches[3])) $rebroadcast = $matches[3] ? True : False; + endif; + if($program['Rating'] == '모든연령'): $rating = 0; - $cells = $row->getElementsByTagName('td'); - $startTime = $cells->item(0)->nodeValue ?: ""; - $startTime = date("YmdHis", strtotime($day." ".$startTime)); - $programName = trim($cells->item(1)->nodeValue) ?: ""; - $pattern = '/^(.*?)(?:\(([\d,]+)회\))?(?:\((재)\))?$/'; - preg_match($pattern, $programName, $matches); - if ($matches != NULL) : - if(isset($matches[1])) $programName = trim($matches[1]) ?: ""; - if(isset($matches[2])) $episode = $matches[2] ?: ""; - if(isset($matches[3])) $rebroadcast = $matches[3] ? True : False; - endif; - $rating = $cells->item(2)->nodeValue=='전체관람' ? 0 : str_replace('세이상', '', $cells->item(2)->nodeValue); - //ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating - $epginfo[] = array($ChannelId, $startTime, $programName, $subprogramName, $desc, $actors, $producers, $category, $episode, $rebroadcast, $rating); - endforeach; - else : - if($GLOBALS['debug']) printError($ChannelName.CONTENT_ERROR); - endif; + else: + $rating = str_replace("세이상","", $program['Rating']); + endif; + //ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating + $epginfo[] = array($ChannelId, $startTime, $programName, $subprogramName, $desc, $actors, $producers, $category, $episode, $rebroadcast, $rating); + usleep(1000); + endforeach; endif; } catch(Exception $e) { if($GLOBALS['debug']) printError($e->getMessage()); @@ -1001,38 +999,36 @@ function GetEPGFromHcn($ChannelInfo) { $ServiceId = $ChannelInfo[3]; $epginfo = array(); foreach(range(1, $GLOBALS['period']) as $k) : - $url = "https://www.hcn.co.kr/ur/bs/ch/channelInfo.hcn"; + $url = "http://m.hcn.co.kr/sch_ScheduleList.action"; $day = date("Y-m-d", strtotime("+".($k - 1)." days")); $params = array( - 'method' => 'ajax_00', - 'pageType' => 'sheetList', 'ch_id' => $ServiceId, - 'onairdate' => $day + 'onairdate' => $day, + '_' => _microtime() ); $params = http_build_query($params); - $method = "POST"; + $method = "GET"; try { $response = getWeb($url, $params, $method); if ($response === False && $GLOBALS['debug']) : printError($ChannelName.HTTP_ERROR); else : - $response = mb_convert_encoding($response, "HTML-ENTITIES", "EUC-KR"); + $response = mb_convert_encoding($response, "HTML-ENTITIES", "UTF-8"); $dom = new DomDocument; libxml_use_internal_errors(True); if($dom->loadHTML($response)): $xpath = new DomXPath($dom); - $query = "//tr[@class='']"; + $query = "//li"; $rows = $xpath->query($query); foreach($rows as $row) : $startTime = $endTime = $programName = $subprogramName = $desc = $actors = $producers = $category = $episode = ""; $rebroadcast = False; $rating = 0; - $cells = $row->getElementsByTagName('td'); - $startTime = $cells->item(0)->nodeValue ?: ""; + $startTime = trim($xpath->query("span[@class='progTime']", $row)->item(0)->nodeValue) ?: ""; $startTime = date("YmdHis", strtotime($day." ".$startTime)); - $programName = trim($cells->item(1)->nodeValue) ?: ""; - $category = trim($cells->item(2)->nodeValue) ?: ""; - $category = preg_replace('/\(.*\)/', '', $category); + $programName = trim($xpath->query("span[@class='progTitle']", $row)->item(0)->nodeValue) ?: ""; + //$category = trim($cells->item(2)->nodeValue) ?: ""; + //$category = preg_replace('/\(.*\)/', '', $category); $images = $row->getElementsByTagName('img'); foreach($images as $image): preg_match('/re\.png/', $image->getAttribute('src'), $rebroad); @@ -1042,6 +1038,7 @@ function GetEPGFromHcn($ChannelInfo) { endforeach; //ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating $epginfo[] = array($ChannelId, $startTime, $programName, $subprogramName, $desc, $actors, $producers, $category, $episode, $rebroadcast, $rating); + usleep(1000); endforeach; else : if($GLOBALS['debug']) printError($ChannelName.CONTENT_ERROR); @@ -1126,6 +1123,7 @@ function GetEPGFromPooq($ChannelInfo) { 'rating' => $rating ); writeProgram($programdata); + usleep(1000); endif; endforeach; endif; @@ -1196,6 +1194,7 @@ function GetEPGFromMbc($ChannelInfo) { 'rating' => $rating ); writeProgram($programdata); + usleep(1000); endif; endforeach; endif; @@ -1267,7 +1266,8 @@ function GetEPGFromMil($ChannelInfo) { 'rebroadcast' => $rebroadcast, 'rating' => $rating ); - writeProgram($programdata); + writeProgram($programdata); + usleep(1000); endforeach; endif; } catch(Exception $e) { @@ -1334,7 +1334,8 @@ function GetEPGFromIfm($ChannelInfo) { 'rebroadcast' => $rebroadcast, 'rating' => $rating ); - writeProgram($programdata); + writeProgram($programdata); + usleep(1000); endforeach; endif; } catch(Exception $e) { @@ -1390,7 +1391,8 @@ function GetEPGFromKbs($ChannelInfo) { $programName = trim($cells->item(2)->childNodes->item(0)->nodeValue); $programName = str_replace(array("[","]", " Broadcast"), array("", "", ""), $programName); //ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating - $epginfo[] = array($ChannelId, $startTime, $programName, $subprogramName, $desc, $actors, $producers, $category, $episode, $rebroadcast, $rating); + $epginfo[] = array($ChannelId, $startTime, $programName, $subprogramName, $desc, $actors, $producers, $category, $episode, $rebroadcast, $rating); + usleep(1000); endforeach; endif; } catch(Exception $e) { @@ -1465,7 +1467,8 @@ function GetEPGFromArirang($ChannelInfo) { 'rebroadcast' => $rebroadcast, 'rating' => $rating ); - writeProgram($programdata); + writeProgram($programdata); + usleep(1000); endforeach; else : if($GLOBALS['debug']) printError($ChannelName.CONTENT_ERROR); @@ -1640,4 +1643,10 @@ function printLog($string) { function printError($string) { fwrite(STDERR, "Error : ".$string."\n"); } -?> \ No newline at end of file + +function _microtime() +{ + list($usec, $sec) = explode(" ", microtime()); + return ($sec.(int)($usec*1000)); +} +?> diff --git a/epg2xml.py b/epg2xml.py index 55df001..020ea62 100644 --- a/epg2xml.py +++ b/epg2xml.py @@ -204,13 +204,12 @@ def GetEPGFromEPG(ChannelInfo): episode = matches.group(7) if matches.group(7) else '' #ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating epginfo.append([ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating]) - time.sleep(0.01) + time.sleep(0.001) else: if(debug): printError(ChannelName + CONTENT_ERROR) else: pass except (requests.exceptions.RequestException) as e: if(debug): printError(ChannelName + str(e)) - time.sleep(0.01) epgzip(epginfo) # Get EPG data from KT @@ -250,14 +249,13 @@ def GetEPGFromKT(ChannelInfo): if not(matches is None): rating = int(matches.group()) #ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating epginfo.append([ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating]) - time.sleep(0.01) + time.sleep(0.001) else: if(debug): printError(ChannelName + CONTENT_ERROR) else: pass except (requests.exceptions.RequestException) as e: if(debug): printError(ChannelName + str(e)) else: pass - time.sleep(0.01) epgzip(epginfo) # Get EPG data from LG @@ -300,14 +298,13 @@ def GetEPGFromLG(ChannelInfo): category = cell[2].text.strip() #ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating epginfo.append([ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating]) - time.sleep(0.01) + time.sleep(0.001) else: if(debug): printError(ChannelName + CONTENT_ERROR) else: pass except (requests.exceptions.RequestException) as e: if(debug): printError(ChannelName + str(e)) else: pass - time.sleep(0.01) epgzip(epginfo) # Get EPG data from SK @@ -333,8 +330,9 @@ def GetEPGFromSK(ChannelInfo): startTime = endTime = programName = subprogramName = desc = actors = producers = category = episode = '' rebroadcast = False rating = 0 + programName = program['programName'].replace('...', '>').encode('utf-8') pattern = '^(.*?)(?:\s*[\(<]([\d,회]+)[\)>])?(?:\s*<([^<]*?)>)?(\((재)\))?$' - matches = re.match(pattern, program['programName'].replace('...', '>').encode('utf-8')) + matches = re.match(pattern, programName) if not (matches is None): programName = matches.group(1).strip() if matches.group(1) else '' subprogramName = matches.group(3).strip() if matches.group(3) else '' @@ -353,7 +351,7 @@ def GetEPGFromSK(ChannelInfo): rating = int(program['ratingCd']) if program['programName'] else 0 programdata = {'channelId':ChannelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating} writeProgram(programdata) - time.sleep(0.01) + time.sleep(0.001) except ValueError: if(debug): printError(ChannelName + CONTENT_ERROR) else: pass @@ -411,14 +409,13 @@ def GetEPGFromSKB(ChannelInfo): rating = int(rating.text.decode('string_escape').replace('세','').strip()) #ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating epginfo.append([ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating]) - time.sleep(0.01) + time.sleep(0.001) else: if(debug): printError(ChannelName + CONTENT_ERROR) else: pass except (requests.exceptions.RequestException) as e: if(debug): printError(ChannelName + str(e)) else: pass - time.sleep(0.01) epgzip(epginfo) # Get EPG data from SKY @@ -465,7 +462,7 @@ def GetEPGFromSKY(ChannelInfo): rating = int(program['grade']) if program['grade'] else 0 programdata = {'channelId':ChannelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating} writeProgram(programdata) - time.sleep(0.01) + time.sleep(0.001) except ValueError: if(debug): printError(ChannelName + CONTENT_ERROR) else: pass @@ -506,7 +503,7 @@ def GetEPGFromNaver(ChannelInfo): rating = program['grade'] #ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating epginfo.append([ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating]) - time.sleep(0.01) + time.sleep(0.001) except ValueError: if(debug): printError(ChannelName + CONTENT_ERROR) else: pass @@ -521,48 +518,43 @@ def GetEPGFromIscs(ChannelInfo): ChannelName = ChannelInfo[1] ServiceId = ChannelInfo[3] epginfo = [] - url='http://www.iscs.co.kr/service/sub/ajax_channel_view.asp' + url='http://m.iscs.co.kr/sub/02/data.asp' for k in range(period): day = today + datetime.timedelta(days=k) - params = {'s_idx': ServiceId, 'C_date': day} + params = {'Exec_Mode': 'view', 'Source_Id': ServiceId, 'Ch_Day': day} response = requests.post(url, data=params, headers=ua, timeout=timeout) response.raise_for_status() json_data = response.text try: data = json.loads(json_data, encoding='utf-8') - pattern = '(.*)<\/td>' - data['html'] = re.sub(pattern, partial(replacement, tag='td'), data['html']) - strainer = SoupStrainer('tbody') - soup = BeautifulSoup(data['html'], 'lxml', parse_only=strainer) - html = soup.find_all('tr') if soup.find_all('tr') else '' - if(html) : - for row in html: + if(data['total'] > 0 ): + programs = data['list'] + for program in programs: startTime = endTime = programName = subprogramName = desc = actors = producers = category = episode = '' rebroadcast = False rating = 0 - startTime = str(day) + ' ' + row.find('td', {'class':'time'}).text.strip() + startTime = str(day) + ' ' + program['Time'] startTime = datetime.datetime.strptime(startTime, '%Y-%m-%d %H:%M') startTime = startTime.strftime('%Y%m%d%H%M%S') - programName = row.find('td', {'class':'title'}).text.decode('string_escape').strip() - rating = row.find('span', {'class':'year'}).text.decode('string_escape').strip() - if rating == '전체관람' : rating = 0 - else : rating = rating.replace('세이상', ' ') - pattern = '^(.*?)(?:\(([\d,]+)회\))?(?:\((재)\))?$' - matches = re.match(pattern, programName) + pattern = '^(.*?)(?:\(([\d,]+)회\))?(?:\((재)\))?$'; + matches = re.match(pattern, program['Pg_Name'].decode('string_escape').strip()) if not(matches is None) : programName = matches.group(1) if matches.group(1) else '' episode = matches.group(2) if matches.group(2) else '' rebroadcast = True if matches.group(3) else False + if program['Rating'].decode('string_escape').strip() == '모든연령': + rating = 0 + else: + rating = program['Rating'].replace('세이상','') #ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating epginfo.append([ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating]) - time.sleep(0.01) + time.sleep(0.001) except ValueError: if(debug): printError(ChannelName + CONTENT_ERROR) else: pass except (requests.RequestException) as e: if(debug): printError(ChannelName + str(e)) else: pass - time.sleep(0.01) epgzip(epginfo) # Get EPG data from HCN @@ -571,48 +563,43 @@ def GetEPGFromHcn(ChannelInfo): ChannelName = ChannelInfo[1] ServiceId = ChannelInfo[3] epginfo = [] - html = [] - url = 'http://www.hcn.co.kr/ur/bs/ch/channelInfo.hcn' + url = 'http://m.hcn.co.kr/sch_ScheduleList.action' for k in range(period): day = today + datetime.timedelta(days=k) params = {'method': 'ajax_00', 'pageType': 'sheetList', 'ch_id': ServiceId, 'onairdate': day} + params = {'ch_id': ServiceId, 'onairdate': day, '_': int(time.time()*1000)} try: response = requests.get(url, params=params, headers=ua, timeout=timeout) response.raise_for_status() html_data = response.content - data = unicode(html_data, 'euc-kr', 'ignore').encode('utf-8', 'ignore') - strainer = SoupStrainer('tr', {'class':''}) + data = html_data + strainer = SoupStrainer('li') soup = BeautifulSoup(data, 'lxml', parse_only=strainer, from_encoding='utf-8') - for td in soup.findAll('td', {'class': 'f'}): - td.insert(0, str(day) + ' ') - html = html + soup.find_all('tr') if soup.find_all('tr') else '' + html = soup.find_all('li') if soup.find_all('li') else '' + if(html) : + for row in html: + startTime = endTime = programName = subprogramName = desc = actors = producers = category = episode = '' + rebroadcast = False + rating = 0 + startTime = str(day) + ' ' + row.find('span', {'class':'progTime'}).text.strip() + startTime = datetime.datetime.strptime(startTime, '%Y-%m-%d %H:%M') + startTime = startTime.strftime('%Y%m%d%H%M%S') + programName = row.find('span', {'class':'progTitle'}).text.decode('string_escape').strip() + for image in row.find_all('img', {'class':'vM'}, alt=True): + rebroad = re.match('(재방송)',image['alt'].decode('string_escape').strip()) + if not (rebroad is None): rebroadcast = True + grade = re.match('([\d,]+)',image['alt']) + if not (grade is None): rating = int(grade.group(1)) + #ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating + epginfo.append([ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating]) + time.sleep(0.001) + except ValueError: + if(debug): printError(ChannelName + CONTENT_ERROR) + else: pass except (requests.exceptions.RequestException) as e: if(debug): printError(ChannelName + str(e)) else: pass - if(html): - for cur, next in pairs(html): - startTime = endTime = programName = subprogramName = desc = actors = producers = category = episode = '' - rebroadcast = False - rating = 0 - startTime = cur.find('td', {'class':'f'}).text.strip() - startTime = datetime.datetime.strptime(startTime, '%Y-%m-%d %H:%M') - startTime = startTime.strftime('%Y%m%d%H%M%S') - endTime = next.find('td', {'class':'f'}).text.strip() - endTime = datetime.datetime.strptime(endTime, '%Y-%m-%d %H:%M') - endTime = endTime.strftime('%Y%m%d%H%M%S') - programName = cur.find('td', {'class':'left'}).text.decode('string_escape').strip() - category = cur.find('td', {'class':'l'}).text.decode('string_escape').strip() - category = re.sub('\(.*\)', '', category) - for image in cur.find_all('img', {'class':'vM'}, alt=True): - rebroad = re.match('(재방송)',image['alt'].decode('string_escape').strip()) - if not (rebroad is None): rebroadcast = True - grade = re.match('([\d,]+)',image['alt']) - if not (grade is None): rating = int(grade.group(1)) - programdata = {'channelId':ChannelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating} - writeProgram(programdata) - else: - if(debug): printError(ChannelName + CONTENT_ERROR) - else: pass + epgzip(epginfo) # Get EPG data from POOQ def GetEPGFromPooq(ChannelInfo): @@ -657,7 +644,7 @@ def GetEPGFromPooq(ChannelInfo): rating = int(program['age']) programdata = {'channelId':ChannelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating} writeProgram(programdata) - time.sleep(0.01) + time.sleep(0.001) except ValueError: if(debug): printError(ChannelName + CONTENT_ERROR) else: pass @@ -699,14 +686,13 @@ def GetEPGFromMbc(ChannelInfo): category = '음악' programdata = {'channelId':ChannelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating} writeProgram(programdata) - time.sleep(0.01) + time.sleep(0.001) except ValueError: if(debug): printError(ChannelName + CONTENT_ERROR) else: pass except (requests.exceptions.RequestException) as e: if(debug): printError(ChannelName + str(e)) else: pass - time.sleep(0.01) # Get EPG data from MIL def GetEPGFromMil(ChannelInfo): @@ -749,14 +735,13 @@ def GetEPGFromMil(ChannelInfo): producers = unescape(program['movie_director']) programdata = {'channelId':ChannelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating} writeProgram(programdata) - time.sleep(0.01) + time.sleep(0.001) except ValueError: if(debug): printError(ChannelName + CONTENT_ERROR) else: pass except (requests.exceptions.RequestException) as e: if(debug): printError(ChannelName + str(e)) else: pass - time.sleep(0.01) # Get EPG data from IFM def GetEPGFromIfm(ChannelInfo): @@ -795,14 +780,13 @@ def GetEPGFromIfm(ChannelInfo): producers = program['pgmPd'] programdata = {'channelId':ChannelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating} writeProgram(programdata) - time.sleep(0.01) + time.sleep(0.001) except ValueError: if(debug): printError(ChannelName + CONTENT_ERROR) else: pass except (requests.exceptions.RequestException) as e: if(debug): printError(ChannelName + str(e)) else: pass - time.sleep(0.01) # Get EPG data from KBS def GetEPGFromKbs(ChannelInfo): @@ -834,14 +818,13 @@ def GetEPGFromKbs(ChannelInfo): startTime = startTime.strftime('%Y%m%d%H%M%S') #ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating epginfo.append([ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating]) - time.sleep(0.01) + time.sleep(0.001) except ValueError: if(debug): printError(ChannelName + CONTENT_ERROR) else: pass except (requests.exceptions.RequestException) as e: if(debug): printError(ChannelName + str(e)) else: pass - time.sleep(0.01) epgzip(epginfo) # Get EPG data from ARIRANG @@ -893,14 +876,13 @@ def GetEPGFromArirang(ChannelInfo): rebroadcast = True if matches.group(2) else False programdata = {'channelId':ChannelId, 'startTime':startTime, 'endTime':endTime, 'programName':programName, 'subprogramName':subprogramName, 'desc':desc, 'actors':actors, 'producers':producers, 'category':category, 'episode':episode, 'rebroadcast':rebroadcast, 'rating':rating} writeProgram(programdata) - time.sleep(0.01) + time.sleep(0.001) else: if(debug): printError(ChannelName + CONTENT_ERROR) else: pass except (requests.exceptions.RequestException) as e: if(debug): printError(ChannelName + str(e)) else: pass - time.sleep(0.01) # Zip epginfo def epgzip(epginfo): @@ -940,9 +922,13 @@ def writeProgram(programdata): producers = escape(programdata['producers']) category = escape(programdata['category']) episode = programdata['episode'] - if episode : - episode_ns = int(episode) - 1 - episode_ns = '0'+ '.' + str(episode_ns) + '.' + '0' + '/' + '0' + if episode: + try: + episode_ns = int(episode) - 1 + episode_ns = '0'+ '.' + str(episode_ns) + '.' + '0' + '/' + '0' + except ValueError as ex: + episode_ns = int(episode.split(',', 1)[0]) - 1 + episode_ns = '0'+ '.' + str(episode_ns) + '.' + '0' + '/' + '0' episode_on = episode rebroadcast = programdata['rebroadcast'] if episode and addepisode == 'y': programName = programName + ' ('+ str(episode) + '회)' @@ -1154,19 +1140,4 @@ elif output == "socket" : else : printError("epg2xml.json 파일의 default_xml_socket항목이 없습니다."); sys.exit() -getEpg() - - - -#a = [0, 1,2,3,4,5] -#it = iter(a) - -#print(next(it)) -#print(next(it)) -#for i in it : -# print(i) - - -#import itertools as it -#for prev, cur in pairs(it.cycle([1,2,3,4])): -# print( cur , prev ) \ No newline at end of file +getEpg() \ No newline at end of file