Skip to content

Commit

Permalink
3.93
Browse files Browse the repository at this point in the history
  • Loading branch information
moyy996 committed Mar 30, 2020
1 parent 62dcbfb commit 2de2e29
Show file tree
Hide file tree
Showing 19 changed files with 2,192 additions and 1,266 deletions.
840 changes: 506 additions & 334 deletions AVDC_Main.py

Large diffs are not rendered by default.

97 changes: 65 additions & 32 deletions Function/Function.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import re
import os
import json
from configparser import ConfigParser
import requests
from lxml import etree
from Getter import avsox, javbus, javdb, fc2fans_club, mgstage, dmm
from Getter import avsox, javbus, javdb, fc2fans_club, mgstage, dmm, jav321


# ========================================================================获取config
Expand All @@ -22,6 +19,35 @@ def get_config():
return config


# ========================================================================是否为无码
def is_uncensored(number):
if re.match('^\d{4,}', number) or re.match('n\d{4}', number) or 'HEYZO' in number.upper():
return True
config = get_config()
prefix_list = str(config['uncensored']['uncensored_prefix']).split('|')
for pre in prefix_list:
if pre.upper() in number.upper():
return True
return False


# ========================================================================元数据获取失败检测
def getDataState(json_data):
if json_data['title'] == '' or json_data['title'] == 'None' or json_data['title'] == 'null':
return 0
else:
return 1


# ========================================================================去掉异常字符
def escapePath(path, Config): # Remove escape literals
escapeLiterals = Config['escape']['literals']
backslash = '\\'
for literal in escapeLiterals:
path = path.replace(backslash + literal, '')
return path


# ========================================================================获取视频列表
def movie_lists(escape_folder, movie_type, movie_path):
if escape_folder != '':
Expand Down Expand Up @@ -99,31 +125,27 @@ def getNumber(filepath, escape_string):
return os.path.splitext(filepath.split('/')[-1])[0]


# ========================================================================去掉异常字符
def escapePath(path, Config): # Remove escape literals
escapeLiterals = Config['escape']['literals']
backslash = '\\'
for literal in escapeLiterals:
path = path.replace(backslash + literal, '')
return path


# ========================================================================根据番号获取数据
def getDataFromJSON(file_number, config, mode): # 从JSON返回元数据
# ================================================网站规则添加开始================================================
isuncensored = is_uncensored(file_number)
json_data = {}
if mode == 1: # 从全部网站刮削
# =======================================================================无码抓取:111111-111,n1111,HEYZO-1111
if re.match('^\d{4,}', file_number) or re.match('n\d{4}', file_number) or 'HEYZO' in file_number.upper():
# =======================================================================无码抓取:111111-111,n1111,HEYZO-1111,SMD-115
if isuncensored:
json_data = json.loads(javbus.main_uncensored(file_number))
if getDataState(json_data) == 0:
json_data = json.loads(javdb.main(file_number))
json_data = json.loads(javdb.main(file_number, True))
if getDataState(json_data) == 0 and 'HEYZO' in file_number.upper():
json_data = json.loads(jav321.main(file_number, True))
if getDataState(json_data) == 0:
json_data = json.loads(avsox.main(file_number))
# =======================================================================259LUXU-1111
elif re.match('\d+[a-zA-Z]+-\d+', file_number) or 'SIRO' in file_number.upper():
json_data = json.loads(mgstage.main(file_number))
file_number = re.search('[a-zA-Z]+-\d+', file_number).group()
if getDataState(json_data) == 0:
json_data = json.loads(jav321.main(file_number))
if getDataState(json_data) == 0:
json_data = json.loads(javdb.main(file_number))
if getDataState(json_data) == 0:
Expand All @@ -144,6 +166,8 @@ def getDataFromJSON(file_number, config, mode): # 从JSON返回元数据
# =======================================================================MIDE-139
else:
json_data = json.loads(javbus.main(file_number))
if getDataState(json_data) == 0:
json_data = json.loads(jav321.main(file_number))
if getDataState(json_data) == 0:
json_data = json.loads(javdb.main(file_number))
if getDataState(json_data) == 0:
Expand All @@ -159,20 +183,22 @@ def getDataFromJSON(file_number, config, mode): # 从JSON返回元数据
elif mode == 3: # 仅从fc2club
json_data = json.loads(fc2fans_club.main(file_number))
elif mode == 4: # 仅从javbus
if re.match('^\d{5,}', file_number) or re.match('n\d{4}', file_number) or 'HEYZO' in file_number.upper():
if isuncensored:
json_data = json.loads(javbus.main_uncensored(file_number))
elif re.search('\D+.\d{2}.\d{2}.\d{2}', file_number):
json_data = json.loads(javbus.main_us(file_number))
else:
json_data = json.loads(javbus.main(file_number))
elif mode == 5: # 仅从javdb
elif mode == 5: # 仅从jav321
json_data = json.loads(jav321.main(file_number, isuncensored))
elif mode == 6: # 仅从javdb
if re.search('\D+.\d{2}.\d{2}.\d{2}', file_number):
json_data = json.loads(javdb.main_us(file_number))
else:
json_data = json.loads(javdb.main(file_number))
elif mode == 6: # 仅从avsox
json_data = json.loads(javdb.main(file_number, isuncensored))
elif mode == 7: # 仅从avsox
json_data = json.loads(avsox.main(file_number))
elif mode == 7: # 仅从dmm
elif mode == 8: # 仅从dmm
json_data = json.loads(dmm.main(file_number))

# ================================================网站规则添加结束================================================
Expand Down Expand Up @@ -273,8 +299,9 @@ def save_config(json_config):
print("success_output_folder = " + json_config['success_output_folder'], file=code)
print("failed_file_move = " + str(json_config['failed_file_move']), file=code)
print("soft_link = " + str(json_config['soft_link']), file=code)
print("show_poster = " + str(json_config['show_poster']), file=code)
print("website = " + json_config['website'], file=code)
print("# all or mgstage or fc2club or javbus or javdb or avsox or dmm", file=code)
print("# all or mgstage or fc2club or javbus or jav321 or javdb or avsox or dmm", file=code)
print("", file=code)
print("[proxy]", file=code)
print("proxy = " + json_config['proxy'], file=code)
Expand All @@ -296,8 +323,6 @@ def save_config(json_config):
print("media_type = " + json_config['media_type'], file=code)
print("sub_type = " + json_config['sub_type'], file=code)
print("media_path = " + json_config['media_path'], file=code)
print("media_warehouse = " + json_config['media_warehouse'], file=code)
print("# emby or plex or kodi ,emby = jellyfin", file=code)
print("", file=code)
print("[escape]", file=code)
print("literals = " + json_config['literals'], file=code)
Expand All @@ -310,12 +335,20 @@ def save_config(json_config):
print("[emby]", file=code)
print("emby_url = " + json_config['emby_url'], file=code)
print("api_key = " + json_config['api_key'], file=code)
code.close()

print("", file=code)
print("[mark]", file=code)
print("poster_mark = " + str(json_config['poster_mark']), file=code)
print("thumb_mark = " + str(json_config['thumb_mark']), file=code)
print("mark_size = " + str(json_config['mark_size']), file=code)
print("mark_type = " + json_config['mark_type'], file=code)
print("mark_pos = " + json_config['mark_pos'], file=code)
print("# mark_size : range 1-5", file=code)
print("# mark_type : sub, leak, uncensored", file=code)
print("# mark_pos : bottom_right or bottom_left or top_right or top_left", file=code)
print("", file=code)
print("[uncensored]", file=code)
print("uncensored_prefix = " + str(json_config['uncensored_prefix']), file=code)
print("uncensored_poster = " + str(json_config['uncensored_poster']), file=code)
print("# 0 : official, 1 : cut", file=code)

# ========================================================================元数据获取失败检测
def getDataState(json_data):
if json_data['title'] == '' or json_data['title'] == 'None' or json_data['title'] == 'null':
return 0
else:
return 1
code.close()
45 changes: 41 additions & 4 deletions Function/getHtml.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,30 @@
from configparser import ConfigParser


# ========================================================================网页请求
def get_html(url, cookies=None):
# ========================================================================获取config
def get_config():
config_file = ''
if os.path.exists('../config.ini'):
config_file = '../config.ini'
elif os.path.exists('config.ini'):
config_file = 'config.ini'
config = ConfigParser()
config.read(config_file, encoding='UTF-8')
return config


# ========================================================================网页请求
def get_html(url, cookies=None):
config = get_config()
retry_count = 0
proxy = ''
timeout = 0
try:
proxy = str(config['proxy']['proxy'])
timeout = int(config['proxy']['timeout'])
retry_count = int(config['proxy']['retry'])
except:
except Exception as error_info:
print('Error in get_html :' + str(error_info))
print('[-]Proxy config error! Please check the config.')
i = 0
while i < retry_count:
Expand All @@ -39,10 +46,40 @@ def get_html(url, cookies=None):
getweb = requests.get(str(url), headers=headers, timeout=timeout, cookies=cookies)
getweb.encoding = 'utf-8'
return getweb.text
except:
except Exception as error_info:
i += 1
print('Error in get_html :' + str(error_info))
print('[-]Connect retry ' + str(i) + '/' + str(retry_count))
print('[-]Connect Failed! Please check your Proxy or Network!')
return 'ProxyError'


def post_html(url: str, query: dict):
config = get_config()
retry_count = 3
proxy = ''
timeout = 10
try:
proxy = str(config['proxy']['proxy'])
timeout = int(config['proxy']['timeout'])
retry_count = int(config['proxy']['retry'])
except Exception as error_info:
print('Error in post_html :' + str(error_info))
print('[-]Proxy config error! Please check the config.')
if proxy:
proxies = {"http": "http://" + proxy, "https": "https://" + proxy}
else:
proxies = {}
for i in range(retry_count):
try:
result = requests.post(url, data=query, proxies=proxies, timeout=timeout)
result.encoding = 'utf-8'
result = result.text
return result
except Exception as error_info:
print('Error in post_html :' + str(error_info))
print("[-]Connect retry {}/{}".format(i + 1, retry_count))
print("[-]Connect Failed! Please check your Proxy or Network!")
return 'ProxyError'


96 changes: 49 additions & 47 deletions Getter/avsox.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import json
import re

from bs4 import BeautifulSoup
from lxml import etree
from Function.getHtml import get_html
Expand All @@ -22,7 +21,7 @@ def getTitle(a):
try:
html = etree.fromstring(a, etree.HTMLParser())
result = str(html.xpath('/html/body/div[2]/h3/text()')).strip(" ['']") # [0]
return result.replace('/', '').replace('_', '-')
return result.replace('/', '')
except:
return ''

Expand Down Expand Up @@ -57,7 +56,7 @@ def getSeries(a):

def getNum(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//span[contains(text(),"识别码:")]/../span[2]/text()')).strip(" ['']").replace('_', '-')
result1 = str(html.xpath('//span[contains(text(),"识别码:")]/../span[2]/text()')).strip(" ['']")
return result1


Expand All @@ -81,17 +80,10 @@ def getCover(htmlcode):
return result


def getCover_small(htmlcode, number):
def getCover_small(htmlcode, count):
html = etree.fromstring(htmlcode, etree.HTMLParser())
counts = len(html.xpath("//div[@id='waterfall']/div/a/div"))
if counts == 0:
return ''
for count in range(1, counts + 1): # 遍历搜索结果,找到需要的番号
number_get = html.xpath("//div[@id='waterfall']/div[" + str(count) + "]/a/div[@class='photo-info']/span/date[1]/text()")
if len(number_get) > 0 and number_get[0] == number:
cover_small = html.xpath("//div[@id='waterfall']/div[" + str(count) + "]/a/div[@class='photo-frame']/img/@src")[0]
return cover_small
return ''
cover_small = html.xpath("//div[@id='waterfall']/div[" + str(count) + "]/a/div[@class='photo-frame']/img/@src")[0]
return cover_small


def getTag(a): # 获取演员
Expand All @@ -103,54 +95,64 @@ def getTag(a): # 获取演员
return d


def getUrl(number):
response = get_html('https://avsox.host/cn/search/' + number)
html = etree.fromstring(response, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
url_list = html.xpath('//*[@id="waterfall"]/div/a/@href')
if len(url_list) > 0:
for i in range(1, len(url_list) + 1):
number_get = str(html.xpath('//*[@id="waterfall"]/div[' + str(i) + ']/a/div[@class="photo-info"]/span/date[1]/text()')).strip(" ['']")
if number.upper() == number_get.upper():
return i, response, str(html.xpath('//*[@id="waterfall"]/div[' + str(i) + ']/a/@href')).strip(" ['']")
return response, ''


def main(number):
a = get_html('https://avsox.host/cn/search/' + number)
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
if result1 == '' or result1 == 'null' or result1 == 'None':
a = get_html('https://avsox.host/cn/search/' + number.replace('-', '_'))
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
if result1 == '' or result1 == 'null' or result1 == 'None':
a = get_html('https://avsox.host/cn/search/' + number.replace('_', ''))
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
web = get_html(result1)
soup = BeautifulSoup(web, 'lxml')
info = str(soup.find(attrs={'class': 'row movie'}))
try:
count, response, url = getUrl(number)
if str(response) == 'ProxyError':
raise TimeoutError
if url == '':
raise Exception('Movie Data not found in avsox!')
web = get_html(url)
soup = BeautifulSoup(web, 'lxml')
info = str(soup.find(attrs={'class': 'row movie'}))
number = getNum(web)
print(1)
dic = {
'actor': getActor(web),
'title': getTitle(web).strip(getNum(web)).strip().replace(' ', '-'),
'title': getTitle(web).strip(number).strip().replace(' ', '-'),
'studio': getStudio(info),
'publisher': '',
'outline': '', #
'runtime': getRuntime(info),
'director': '', #
'release': getRelease(info),
'number': getNum(info),
'cover': getCover(web),
'cover_small': getCover_small(a, number),
'imagecut': 3,
'tag': getTag(web),
'series': getSeries(info),
'year': getYear(getRelease(info)), # str(re.search('\d{4}',getRelease(a)).group()),
'year': getYear(getRelease(info)),
'actor_photo': getActorPhoto(web),
'website': result1,
'cover': getCover(web),
'cover_small': getCover_small(response, count),
'imagecut': 3,
'director': '',
'publisher': '',
'outline': '',
'score': '',
'website': url,
'source': 'avsox.py',
}
except:
if a == 'ProxyError':
dic = {
'title': '',
'website': 'timeout',
}
else:
dic = {
'title': '',
'website': '',
}
except TimeoutError:
dic = {
'title': '',
'website': 'timeout',
}
except Exception as error_info:
print('Error in avsox.main : ' + str(error_info))
dic = {
'title': '',
'website': '',
}
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
return js

# print(main('051119-917'))
# print(main('032620_001'))
Loading

0 comments on commit 2de2e29

Please sign in to comment.