SKB 함수까지 체크
This commit is contained in:
parent
5059499479
commit
f40c20e977
16
epg2xml.php
16
epg2xml.php
@ -588,10 +588,10 @@ function GetEPGFromLG($ChannelInfo) {
|
||||
printError($ChannelName.HTTP_ERROR);
|
||||
else :
|
||||
$response = '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">'.$response;
|
||||
$dom = new DomDocument;
|
||||
libxml_use_internal_errors(True);
|
||||
$response = mb_convert_encoding($response, "UTF-8", "EUC-KR");
|
||||
$response = str_replace(array('<재>', ' [..', ' (..'), array('<재>', '', ''), $response);
|
||||
$dom = new DomDocument;
|
||||
libxml_use_internal_errors(True);
|
||||
if($dom->loadHTML($response)):
|
||||
$xpath = new DomXPath($dom);
|
||||
$query = "//div[@class='tblType list']/table/tbody/tr";
|
||||
@ -603,7 +603,7 @@ function GetEPGFromLG($ChannelInfo) {
|
||||
$cells = $row->getElementsByTagName('td');
|
||||
$startTime = date("YmdHis", strtotime($day." ".trim($cells->item(0)->nodeValue)));
|
||||
$programName = trim($cells->item(1)->childNodes->item(0)->nodeValue);
|
||||
$pattern = '/(<재>?)?(.*?)(?:\[(.*)\])?\s?(?:\(([\d,]+)회\))?$/';
|
||||
$pattern = '/(<재>)?\s?(?:\[.*?\])?(.*?)(?:\[(.*)\])?\s?(?:\(([\d,]+)회\))?$/';
|
||||
preg_match($pattern, $programName, $matches);
|
||||
if ($matches != NULL) :
|
||||
if(isset($matches[2])) $programName = trim($matches[2]) ?: "";
|
||||
@ -726,9 +726,13 @@ function GetEPGFromSKB($ChannelInfo) {
|
||||
printError($ChannelName.HTTP_ERROR);
|
||||
else :
|
||||
$response = str_replace('charset="euc-kr"', 'charset="utf-8"', $response);
|
||||
$response = mb_convert_encoding($response, "UTF-8", "EUC-KR");
|
||||
$response = preg_replace('/<!--(.*?)-->/is', '', $response);
|
||||
$response = preg_replace('/<span><\/span>/is', '', $response);
|
||||
$pattern = '/<span>(.*)<\/span>/';
|
||||
$response = preg_replace_callback($pattern, function($matches) { return '<span class="title">'.htmlspecialchars($matches[1], ENT_NOQUOTES).'</span>';}, $response);
|
||||
$dom = new DomDocument;
|
||||
libxml_use_internal_errors(True);
|
||||
$response = mb_convert_encoding($response, "UTF-8", "EUC-KR");
|
||||
if($dom->loadHTML($response)):
|
||||
$xpath = new DomXPath($dom);
|
||||
$query = "//span[@class='caption' or @class='explan' or @class='fullHD' or @class='UHD' or @class='nowon']";
|
||||
@ -758,7 +762,6 @@ function GetEPGFromSKB($ChannelInfo) {
|
||||
//ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating
|
||||
$epginfo[] = array($ChannelId, $startTime, $programName, $subprogramName, $desc, $actors, $producers, $category, $episode, $rebroadcast, $rating);
|
||||
endforeach;
|
||||
epgzip($epginfo);
|
||||
else :
|
||||
if($GLOBALS['debug']) printError($ChannelName.CONTENT_ERROR);
|
||||
endif;
|
||||
@ -767,6 +770,7 @@ function GetEPGFromSKB($ChannelInfo) {
|
||||
if($GLOBALS['debug']) printError($e->getMessage());
|
||||
}
|
||||
endforeach;
|
||||
epgzip($epginfo);
|
||||
}
|
||||
|
||||
// Get EPG data from SKY
|
||||
@ -1535,7 +1539,7 @@ function writeProgram($programdata) {
|
||||
$rating = sprintf("%s세 이상 관람가", $programdata['rating']);
|
||||
endif;
|
||||
if($GLOBALS['addverbose'] == 'y') :
|
||||
$desc = htmlspecialchars($programdata['programName'], ENT_XML1);
|
||||
$desc = trim(htmlspecialchars($programdata['programName'], ENT_XML1));
|
||||
if($subprogramName) $desc = $desc."\n부제 : ".$subprogramName;
|
||||
if($rebroadcast == True && $GLOBALS['addrebroadcast'] == 'y') $desc = $desc."\n방송 : 재방송";
|
||||
if($episode) $desc = $desc."\n회차 : ".$episode."회";
|
||||
|
39
epg2xml.py
39
epg2xml.py
@ -163,7 +163,7 @@ def GetEPGFromEPG(ChannelInfo):
|
||||
html_data = response.content
|
||||
data = unicode(html_data, 'euc-kr', 'ignore').encode('utf-8', 'ignore')
|
||||
pattern = '<td height="25" valign=top >(.*)<\/td>'
|
||||
data = re.sub(pattern, partial(replacement,txt='title'), data)
|
||||
data = re.sub(pattern, partial(replacement, tag='td'), data)
|
||||
strainer = SoupStrainer('table', {'style':'margin-bottom:30'})
|
||||
soup = BeautifulSoup(data, 'lxml', parse_only=strainer, from_encoding='utf-8')
|
||||
html = soup.find_all('table', {'style':'margin-bottom:30'})
|
||||
@ -270,7 +270,7 @@ def GetEPGFromLG(ChannelInfo):
|
||||
response.raise_for_status()
|
||||
html_data = response.content
|
||||
data = unicode(html_data, 'euc-kr', 'ignore').encode('utf-8', 'ignore')
|
||||
data = data.replace('<재>', '<재>')
|
||||
data = data.replace('<재>', '<재>').replace(' [..','').replace(' (..', '')
|
||||
strainer = SoupStrainer('table')
|
||||
soup = BeautifulSoup(data, 'lxml', parse_only=strainer, from_encoding='utf-8')
|
||||
html = soup.find('table').tbody.find_all('tr') if soup.find('table') else ''
|
||||
@ -285,7 +285,7 @@ def GetEPGFromLG(ChannelInfo):
|
||||
startTime = startTime.strftime('%Y%m%d%H%M%S')
|
||||
rating = 0 if cell[1].find('span', {'class': 'tag cte_all'}).text.strip()=="All" else int(cell[1].find('span', {'class': 'tag cte_all'}).text.strip())
|
||||
cell[1].find('span', {'class': 'tagGroup'}).decompose()
|
||||
pattern = '(<재>?)?(.*?)(?:\[(.*)\])?\s?(?:\(([\d,]+)회\))?$'
|
||||
pattern = '(<재>)?\s?(?:\[.*?\])?(.*?)(?:\[(.*)\])?\s?(?:\(([\d,]+)회\))?$'
|
||||
matches = re.match(pattern, cell[1].text.strip().decode('string_escape'))
|
||||
if not (matches is None):
|
||||
programName = matches.group(2).strip() if matches.group(2) else ''
|
||||
@ -368,6 +368,17 @@ def GetEPGFromSKB(ChannelInfo):
|
||||
response.raise_for_status()
|
||||
html_data = response.content
|
||||
data = unicode(html_data, 'euc-kr', 'ignore').encode('utf-8', 'ignore')
|
||||
data = re.sub('<!--(.*?)-->', '', data, 0, re.I|re.S)
|
||||
data = re.sub('<span></span>', '', data)
|
||||
data = re.sub('<span class="title">', '<span>', data)
|
||||
data = re.sub('<span class="explan">화면해설</span>','',data)
|
||||
data = re.sub('<span class="caption">자막방송</span>','',data)
|
||||
data = re.sub('<span class="fullHD">Full HD</span>','',data)
|
||||
data = re.sub('<span class="UHD">UHD</span>','',data)
|
||||
data = re.sub('<span class="nowon">now on</span>','',data)
|
||||
pattern = '<span>(.*)<\/span>'
|
||||
data = re.sub(pattern, partial(replacement, tag='span'), data)
|
||||
#print(data)
|
||||
strainer = SoupStrainer('div', {'id':'dawn'})
|
||||
soup = BeautifulSoup(data, 'lxml', parse_only=strainer, from_encoding='utf-8')
|
||||
html = soup.find_all('li') if soup.find_all('li') else ''
|
||||
@ -379,9 +390,7 @@ def GetEPGFromSKB(ChannelInfo):
|
||||
startTime = str(day) + ' ' + row.find('span', {'class':'time'}).text
|
||||
startTime = datetime.datetime.strptime(startTime, '%Y-%m-%d %H:%M')
|
||||
startTime = startTime.strftime('%Y%m%d%H%M%S')
|
||||
if row.find('span', {'class':['caption', 'explan', 'fullHD', 'UHD', 'nowon']}) :
|
||||
row.find('span', {'class':['caption', 'explan', 'fullHD', 'UHD', 'nowon']}).decompose()
|
||||
cell = row.find('span', {'class':None}).text.decode('string_escape').strip()
|
||||
cell = row.find('span', {'class':'title'}).text.decode('string_escape').strip()
|
||||
pattern = "^(.*?)(\(([\d,]+)회\))?(<(.*)>)?(\((재)\))?$"
|
||||
matches = re.match(pattern, cell)
|
||||
if not(matches is None) :
|
||||
@ -394,13 +403,13 @@ def GetEPGFromSKB(ChannelInfo):
|
||||
rating = int(rating.text.decode('string_escape').replace('세','').strip())
|
||||
#ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating
|
||||
epginfo.append([ChannelId, startTime, programName, subprogramName, desc, actors, producers, category, episode, rebroadcast, rating])
|
||||
epgzip(epginfo)
|
||||
else:
|
||||
if(debug): printError(ChannelName + CONTENT_ERROR)
|
||||
else: pass
|
||||
except (requests.exceptions.RequestException) as e:
|
||||
if(debug): printError(ChannelName + str(e))
|
||||
else: pass
|
||||
epgzip(epginfo)
|
||||
|
||||
# Get EPG data from SKY
|
||||
def GetEPGFromSKY(ChannelInfo):
|
||||
@ -510,7 +519,7 @@ def GetEPGFromIscs(ChannelInfo):
|
||||
try:
|
||||
data = json.loads(json_data, encoding='utf-8')
|
||||
pattern = '<td class="name">(.*)<\/td>'
|
||||
data['html'] = re.sub(pattern, partial(replacement, txt='name'), data['html'])
|
||||
data['html'] = re.sub(pattern, partial(replacement, tag='td'), data['html'])
|
||||
strainer = SoupStrainer('tbody')
|
||||
soup = BeautifulSoup(data['html'], 'lxml', parse_only=strainer)
|
||||
html = soup.find_all('tr') if soup.find_all('tr') else ''
|
||||
@ -522,7 +531,7 @@ def GetEPGFromIscs(ChannelInfo):
|
||||
startTime = str(day) + ' ' + row.find('td', {'class':'time'}).text
|
||||
startTime = datetime.datetime.strptime(startTime, '%Y-%m-%d %H:%M')
|
||||
startTime = startTime.strftime('%Y%m%d%H%M%S')
|
||||
programName = row.find('td', {'class':'name'}).text.decode('string_escape').strip()
|
||||
programName = row.find('td', {'class':'title'}).text.decode('string_escape').strip()
|
||||
rating = row.find('span', {'class':'year'}).text.decode('string_escape').strip()
|
||||
if rating == '전체관람' : rating = 0
|
||||
else : rating = rating.replace('세이상', ' ')
|
||||
@ -888,7 +897,7 @@ def writeProgram(programdata):
|
||||
subprogramName = escape(programdata['subprogramName']).strip()
|
||||
matches = re.match('(.*) \(?(\d+부)\)?', unescape(programName.encode('utf-8', 'ignore')))
|
||||
if not(matches is None):
|
||||
programName = escape(matches.group(1));
|
||||
programName = escape(matches.group(1)).strip();
|
||||
subprogramName = escape(matches.group(2)) + ' ' + subprogramName
|
||||
subprogramName = subprogramName.strip()
|
||||
if programName is None:
|
||||
@ -905,7 +914,7 @@ def writeProgram(programdata):
|
||||
else :
|
||||
rating = '%s세 이상 관람가' % (programdata['rating'])
|
||||
if addverbose == 'y':
|
||||
desc = escape(programdata['programName'])
|
||||
desc = escape(programdata['programName']).strip()
|
||||
if subprogramName : desc = desc + '\n부제 : ' + subprogramName
|
||||
if rebroadcast == True and addrebroadcast == 'y' : desc = desc + '\n방송 : 재방송'
|
||||
if episode : desc = desc + '\n회차 : ' + str(episode) + '회'
|
||||
@ -917,11 +926,10 @@ def writeProgram(programdata):
|
||||
desc =''
|
||||
if programdata['desc'] : desc = desc + '\n' + escape(programdata['desc'])
|
||||
desc = re.sub(' +',' ', desc)
|
||||
#desc = re.sub('\s+','\s', desc)
|
||||
contentTypeDict={'교양':'Arts / Culture (without music)', '만화':'Cartoons / Puppets', '교육':'Education / Science / Factual topics', '취미':'Leisure hobbies', '드라마':'Movie / Drama', '영화':'Movie / Drama', '음악':'Music / Ballet / Dance', '뉴스':'News / Current affairs', '다큐':'Documentary', '라이프':'Documentary', '시사/다큐':'Documentary', '연예':'Show / Game show', '스포츠':'Sports', '홈쇼핑':'Advertisement / Shopping'}
|
||||
contentType = ''
|
||||
for key, value in contentTypeDict.iteritems():
|
||||
if category.startswith(key):
|
||||
if key in category:
|
||||
contentType = value
|
||||
print(' <programme start="%s +0900" stop="%s +0900" channel="%s">' % (startTime, endTime, ChannelId))
|
||||
print(' <title lang="kr">%s</title>' % (programName))
|
||||
@ -956,10 +964,11 @@ def printLog(*args):
|
||||
def printError(*args):
|
||||
print("Error : ", *args, file=sys.stderr)
|
||||
|
||||
def replacement(match, txt):
|
||||
def replacement(match, tag):
|
||||
if not(match is None):
|
||||
tag = tag.strip()
|
||||
programName = unescape(match.group(1)).replace('<','<').replace('>','>').strip()
|
||||
programName = '<td class="'+ txt.strip() + '">' + programName + '</td>'
|
||||
programName = '<'+ tag + ' class="title">' + programName + '</' + tag + '>'
|
||||
return programName
|
||||
else:
|
||||
return '';
|
||||
|
Loading…
x
Reference in New Issue
Block a user