3.93

moyy996 · Mar 30, 2020 · 2de2e29 · 2de2e29
1 parent 62dcbfb
commit 2de2e29
Show file tree

Hide file tree

Showing 19 changed files with 2,192 additions and 1,266 deletions.
diff --git a/AVDC_Main.py b/AVDC_Main.py
diff --git a/Function/Function.py b/Function/Function.py
@@ -1,13 +1,10 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
-
 import re
 import os
 import json
 from configparser import ConfigParser
-import requests
-from lxml import etree
-from Getter import avsox, javbus, javdb, fc2fans_club, mgstage, dmm
+from Getter import avsox, javbus, javdb, fc2fans_club, mgstage, dmm, jav321
 
 
 # ========================================================================获取config
@@ -22,6 +19,35 @@ def get_config():
     return config
 
 
+# ========================================================================是否为无码
+def is_uncensored(number):
+    if re.match('^\d{4,}', number) or re.match('n\d{4}', number) or 'HEYZO' in number.upper():
+        return True
+    config = get_config()
+    prefix_list = str(config['uncensored']['uncensored_prefix']).split('|')
+    for pre in prefix_list:
+        if pre.upper() in number.upper():
+            return True
+    return False
+
+
+# ========================================================================元数据获取失败检测
+def getDataState(json_data):
+    if json_data['title'] == '' or json_data['title'] == 'None' or json_data['title'] == 'null':
+        return 0
+    else:
+        return 1
+
+
+# ========================================================================去掉异常字符
+def escapePath(path, Config):  # Remove escape literals
+    escapeLiterals = Config['escape']['literals']
+    backslash = '\\'
+    for literal in escapeLiterals:
+        path = path.replace(backslash + literal, '')
+    return path
+
+
 # ========================================================================获取视频列表
 def movie_lists(escape_folder, movie_type, movie_path):
     if escape_folder != '':
@@ -99,31 +125,27 @@ def getNumber(filepath, escape_string):
             return os.path.splitext(filepath.split('/')[-1])[0]
 
 
-# ========================================================================去掉异常字符
-def escapePath(path, Config):  # Remove escape literals
-    escapeLiterals = Config['escape']['literals']
-    backslash = '\\'
-    for literal in escapeLiterals:
-        path = path.replace(backslash + literal, '')
-    return path
-
-
 # ========================================================================根据番号获取数据
 def getDataFromJSON(file_number, config, mode):  # 从JSON返回元数据
     # ================================================网站规则添加开始================================================
+    isuncensored = is_uncensored(file_number)
     json_data = {}
     if mode == 1:  # 从全部网站刮削
-        # =======================================================================无码抓取:111111-111,n1111,HEYZO-1111
-        if re.match('^\d{4,}', file_number) or re.match('n\d{4}', file_number) or 'HEYZO' in file_number.upper():
+        # =======================================================================无码抓取:111111-111,n1111,HEYZO-1111,SMD-115
+        if isuncensored:
             json_data = json.loads(javbus.main_uncensored(file_number))
             if getDataState(json_data) == 0:
-                json_data = json.loads(javdb.main(file_number))
+                json_data = json.loads(javdb.main(file_number, True))
+            if getDataState(json_data) == 0 and 'HEYZO' in file_number.upper():
+                json_data = json.loads(jav321.main(file_number, True))
             if getDataState(json_data) == 0:
                 json_data = json.loads(avsox.main(file_number))
         # =======================================================================259LUXU-1111
         elif re.match('\d+[a-zA-Z]+-\d+', file_number) or 'SIRO' in file_number.upper():
             json_data = json.loads(mgstage.main(file_number))
             file_number = re.search('[a-zA-Z]+-\d+', file_number).group()
+            if getDataState(json_data) == 0:
+                json_data = json.loads(jav321.main(file_number))
             if getDataState(json_data) == 0:
                 json_data = json.loads(javdb.main(file_number))
             if getDataState(json_data) == 0:
@@ -144,6 +166,8 @@ def getDataFromJSON(file_number, config, mode):  # 从JSON返回元数据
         # =======================================================================MIDE-139
         else:
             json_data = json.loads(javbus.main(file_number))
+            if getDataState(json_data) == 0:
+                json_data = json.loads(jav321.main(file_number))
             if getDataState(json_data) == 0:
                 json_data = json.loads(javdb.main(file_number))
             if getDataState(json_data) == 0:
@@ -159,20 +183,22 @@ def getDataFromJSON(file_number, config, mode):  # 从JSON返回元数据
     elif mode == 3:  # 仅从fc2club
         json_data = json.loads(fc2fans_club.main(file_number))
     elif mode == 4:  # 仅从javbus
-        if re.match('^\d{5,}', file_number) or re.match('n\d{4}', file_number) or 'HEYZO' in file_number.upper():
+        if isuncensored:
             json_data = json.loads(javbus.main_uncensored(file_number))
         elif re.search('\D+.\d{2}.\d{2}.\d{2}', file_number):
             json_data = json.loads(javbus.main_us(file_number))
         else:
             json_data = json.loads(javbus.main(file_number))
-    elif mode == 5:  # 仅从javdb
+    elif mode == 5:  # 仅从jav321
+        json_data = json.loads(jav321.main(file_number, isuncensored))
+    elif mode == 6:  # 仅从javdb
         if re.search('\D+.\d{2}.\d{2}.\d{2}', file_number):
             json_data = json.loads(javdb.main_us(file_number))
         else:
-            json_data = json.loads(javdb.main(file_number))
-    elif mode == 6:  # 仅从avsox
+            json_data = json.loads(javdb.main(file_number, isuncensored))
+    elif mode == 7:  # 仅从avsox
         json_data = json.loads(avsox.main(file_number))
-    elif mode == 7:  # 仅从dmm
+    elif mode == 8:  # 仅从dmm
         json_data = json.loads(dmm.main(file_number))
 
     # ================================================网站规则添加结束================================================
@@ -273,8 +299,9 @@ def save_config(json_config):
         print("success_output_folder = " + json_config['success_output_folder'], file=code)
         print("failed_file_move = " + str(json_config['failed_file_move']), file=code)
         print("soft_link = " + str(json_config['soft_link']), file=code)
+        print("show_poster = " + str(json_config['show_poster']), file=code)
         print("website = " + json_config['website'], file=code)
-        print("# all or mgstage or fc2club or javbus or javdb or avsox or dmm", file=code)
+        print("# all or mgstage or fc2club or javbus or jav321 or javdb or avsox or dmm", file=code)
         print("", file=code)
         print("[proxy]", file=code)
         print("proxy = " + json_config['proxy'], file=code)
@@ -296,8 +323,6 @@ def save_config(json_config):
         print("media_type = " + json_config['media_type'], file=code)
         print("sub_type = " + json_config['sub_type'], file=code)
         print("media_path = " + json_config['media_path'], file=code)
-        print("media_warehouse = " + json_config['media_warehouse'], file=code)
-        print("# emby or plex or kodi ,emby = jellyfin", file=code)
         print("", file=code)
         print("[escape]", file=code)
         print("literals = " + json_config['literals'], file=code)
@@ -310,12 +335,20 @@ def save_config(json_config):
         print("[emby]", file=code)
         print("emby_url = " + json_config['emby_url'], file=code)
         print("api_key = " + json_config['api_key'], file=code)
-    code.close()
-
+        print("", file=code)
+        print("[mark]", file=code)
+        print("poster_mark = " + str(json_config['poster_mark']), file=code)
+        print("thumb_mark = " + str(json_config['thumb_mark']), file=code)
+        print("mark_size = " + str(json_config['mark_size']), file=code)
+        print("mark_type = " + json_config['mark_type'], file=code)
+        print("mark_pos = " + json_config['mark_pos'], file=code)
+        print("# mark_size : range 1-5", file=code)
+        print("# mark_type : sub, leak, uncensored", file=code)
+        print("# mark_pos  : bottom_right or bottom_left or top_right or top_left", file=code)
+        print("", file=code)
+        print("[uncensored]", file=code)
+        print("uncensored_prefix = " + str(json_config['uncensored_prefix']), file=code)
+        print("uncensored_poster = " + str(json_config['uncensored_poster']), file=code)
+        print("# 0 : official, 1 : cut", file=code)
 
-# ========================================================================元数据获取失败检测
-def getDataState(json_data):
-    if json_data['title'] == '' or json_data['title'] == 'None' or json_data['title'] == 'null':
-        return 0
-    else:
-        return 1
+    code.close()
diff --git a/Function/getHtml.py b/Function/getHtml.py
@@ -3,23 +3,30 @@
 from configparser import ConfigParser
 
 
-# ========================================================================网页请求
-def get_html(url, cookies=None):
+# ========================================================================获取config
+def get_config():
     config_file = ''
     if os.path.exists('../config.ini'):
         config_file = '../config.ini'
     elif os.path.exists('config.ini'):
         config_file = 'config.ini'
     config = ConfigParser()
     config.read(config_file, encoding='UTF-8')
+    return config
+
+
+# ========================================================================网页请求
+def get_html(url, cookies=None):
+    config = get_config()
     retry_count = 0
     proxy = ''
     timeout = 0
     try:
         proxy = str(config['proxy']['proxy'])
         timeout = int(config['proxy']['timeout'])
         retry_count = int(config['proxy']['retry'])
-    except:
+    except Exception as error_info:
+        print('Error in get_html :' + str(error_info))
         print('[-]Proxy config error! Please check the config.')
     i = 0
     while i < retry_count:
@@ -39,10 +46,40 @@ def get_html(url, cookies=None):
                 getweb = requests.get(str(url), headers=headers, timeout=timeout, cookies=cookies)
                 getweb.encoding = 'utf-8'
                 return getweb.text
-        except:
+        except Exception as error_info:
             i += 1
+            print('Error in get_html :' + str(error_info))
             print('[-]Connect retry ' + str(i) + '/' + str(retry_count))
     print('[-]Connect Failed! Please check your Proxy or Network!')
     return 'ProxyError'
 
 
+def post_html(url: str, query: dict):
+    config = get_config()
+    retry_count = 3
+    proxy = ''
+    timeout = 10
+    try:
+        proxy = str(config['proxy']['proxy'])
+        timeout = int(config['proxy']['timeout'])
+        retry_count = int(config['proxy']['retry'])
+    except Exception as error_info:
+        print('Error in post_html :' + str(error_info))
+        print('[-]Proxy config error! Please check the config.')
+    if proxy:
+        proxies = {"http": "http://" + proxy, "https": "https://" + proxy}
+    else:
+        proxies = {}
+    for i in range(retry_count):
+        try:
+            result = requests.post(url, data=query, proxies=proxies, timeout=timeout)
+            result.encoding = 'utf-8'
+            result = result.text
+            return result
+        except Exception as error_info:
+            print('Error in post_html :' + str(error_info))
+            print("[-]Connect retry {}/{}".format(i + 1, retry_count))
+    print("[-]Connect Failed! Please check your Proxy or Network!")
+    return 'ProxyError'
+
+
diff --git a/Getter/avsox.py b/Getter/avsox.py
@@ -1,6 +1,5 @@
 import json
 import re
-
 from bs4 import BeautifulSoup
 from lxml import etree
 from Function.getHtml import get_html
@@ -22,7 +21,7 @@ def getTitle(a):
     try:
         html = etree.fromstring(a, etree.HTMLParser())
         result = str(html.xpath('/html/body/div[2]/h3/text()')).strip(" ['']")  # [0]
-        return result.replace('/', '').replace('_', '-')
+        return result.replace('/', '')
     except:
         return ''
 
@@ -57,7 +56,7 @@ def getSeries(a):
 
 def getNum(a):
     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//span[contains(text(),"识别码:")]/../span[2]/text()')).strip(" ['']").replace('_', '-')
+    result1 = str(html.xpath('//span[contains(text(),"识别码:")]/../span[2]/text()')).strip(" ['']")
     return result1
 
 
@@ -81,17 +80,10 @@ def getCover(htmlcode):
     return result
 
 
-def getCover_small(htmlcode, number):
+def getCover_small(htmlcode, count):
     html = etree.fromstring(htmlcode, etree.HTMLParser())
-    counts = len(html.xpath("//div[@id='waterfall']/div/a/div"))
-    if counts == 0:
-        return ''
-    for count in range(1, counts + 1):  # 遍历搜索结果，找到需要的番号
-        number_get = html.xpath("//div[@id='waterfall']/div[" + str(count) + "]/a/div[@class='photo-info']/span/date[1]/text()")
-        if len(number_get) > 0 and number_get[0] == number:
-            cover_small = html.xpath("//div[@id='waterfall']/div[" + str(count) + "]/a/div[@class='photo-frame']/img/@src")[0]
-            return cover_small
-    return ''
+    cover_small = html.xpath("//div[@id='waterfall']/div[" + str(count) + "]/a/div[@class='photo-frame']/img/@src")[0]
+    return cover_small
 
 
 def getTag(a):  # 获取演员
@@ -103,54 +95,64 @@ def getTag(a):  # 获取演员
     return d
 
 
+def getUrl(number):
+    response = get_html('https://avsox.host/cn/search/' + number)
+    html = etree.fromstring(response, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    url_list = html.xpath('//*[@id="waterfall"]/div/a/@href')
+    if len(url_list) > 0:
+        for i in range(1, len(url_list) + 1):
+            number_get = str(html.xpath('//*[@id="waterfall"]/div[' + str(i) + ']/a/div[@class="photo-info"]/span/date[1]/text()')).strip(" ['']")
+            if number.upper() == number_get.upper():
+                return i, response, str(html.xpath('//*[@id="waterfall"]/div[' + str(i) + ']/a/@href')).strip(" ['']")
+    return response, ''
+
+
 def main(number):
-    a = get_html('https://avsox.host/cn/search/' + number)
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
-    if result1 == '' or result1 == 'null' or result1 == 'None':
-        a = get_html('https://avsox.host/cn/search/' + number.replace('-', '_'))
-        html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-        result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
-        if result1 == '' or result1 == 'null' or result1 == 'None':
-            a = get_html('https://avsox.host/cn/search/' + number.replace('_', ''))
-            html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-            result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
-    web = get_html(result1)
-    soup = BeautifulSoup(web, 'lxml')
-    info = str(soup.find(attrs={'class': 'row movie'}))
     try:
+        count, response, url = getUrl(number)
+        if str(response) == 'ProxyError':
+            raise TimeoutError
+        if url == '':
+            raise Exception('Movie Data not found in avsox!')
+        web = get_html(url)
+        soup = BeautifulSoup(web, 'lxml')
+        info = str(soup.find(attrs={'class': 'row movie'}))
+        number = getNum(web)
+        print(1)
         dic = {
             'actor': getActor(web),
-            'title': getTitle(web).strip(getNum(web)).strip().replace(' ', '-'),
+            'title': getTitle(web).strip(number).strip().replace(' ', '-'),
             'studio': getStudio(info),
-            'publisher': '',
-            'outline': '',  #
             'runtime': getRuntime(info),
-            'director': '',  #
             'release': getRelease(info),
             'number': getNum(info),
-            'cover': getCover(web),
-            'cover_small': getCover_small(a, number),
-            'imagecut': 3,
             'tag': getTag(web),
             'series': getSeries(info),
-            'year': getYear(getRelease(info)),  # str(re.search('\d{4}',getRelease(a)).group()),
+            'year': getYear(getRelease(info)),
             'actor_photo': getActorPhoto(web),
-            'website': result1,
+            'cover': getCover(web),
+            'cover_small': getCover_small(response, count),
+            'imagecut': 3,
+            'director': '',
+            'publisher': '',
+            'outline': '',
+            'score': '',
+            'website': url,
             'source': 'avsox.py',
         }
-    except:
-        if a == 'ProxyError':
-            dic = {
-                'title': '',
-                'website': 'timeout',
-            }
-        else:
-            dic = {
-                'title': '',
-                'website': '',
-            }
+    except TimeoutError:
+        dic = {
+            'title': '',
+            'website': 'timeout',
+        }
+    except Exception as error_info:
+        print('Error in avsox.main : ' + str(error_info))
+        dic = {
+            'title': '',
+            'website': '',
+        }
     js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
     return js
 
 # print(main('051119-917'))
+# print(main('032620_001'))