Skip to content

Commit

Permalink
修复部分数据获取不全
Browse files Browse the repository at this point in the history
  • Loading branch information
myhhub committed Feb 19, 2025
1 parent 90c9fcd commit 98e3f76
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 22 deletions.
4 changes: 2 additions & 2 deletions instock/core/crawling/fund_etf_em.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def fund_etf_spot_em() -> pd.DataFrame:
:rtype: pandas.DataFrame
"""
url = "http://88.push2.eastmoney.com/api/qt/clist/get"
page_size = 100
page_size = 200
page_current = 1
params = {
"pn": page_current,
Expand All @@ -30,7 +30,7 @@ def fund_etf_spot_em() -> pd.DataFrame:
"fltt": "2",
"invt": "2",
"wbp2u": "|0|0|0|web",
"fid": "f3",
"fid": "f12",
"fs": "b:MK0021,b:MK0022,b:MK0023,b:MK0024",
"fields": "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152",
"_": "1672806290972",
Expand Down
28 changes: 22 additions & 6 deletions instock/core/crawling/stock_fund_em.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def stock_individual_fund_flow_rank(indicator: str = "5日") -> pd.DataFrame:
],
}
url = "http://push2.eastmoney.com/api/qt/clist/get"
page_size = 100
page_size = 200
page_current = 1
params = {
"fid": indicator_map[indicator][0],
Expand Down Expand Up @@ -270,11 +270,11 @@ def stock_sector_fund_flow_rank(
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36",
}
page_size = 100
page_size = 200
page_current = 1
params = {
"pn": "1",
"pz": "5000",
"pn": page_current,
"pz": page_size,
"po": "1",
"np": "1",
"ut": "b2884a393a59ad64002292a3e90d46a5",
Expand All @@ -290,8 +290,24 @@ def stock_sector_fund_flow_rank(
}
r = requests.get(url, params=params, headers=headers)
text_data = r.text
json_data = json.loads(text_data[text_data.find("{") : -2])
temp_df = pd.DataFrame(json_data["data"]["diff"])
data_json = json.loads(text_data[text_data.find("{") : -2])
data = data_json["data"]["diff"]

data_count = data_json["data"]["total"]
page_count = math.ceil(data_count/page_size)
while page_count > 1:
page_current = page_current + 1
params["pn"] = page_current
r = requests.get(url, params=params, headers=headers)
text_data = r.text
json_data = json.loads(text_data[text_data.find("{"): -2])
_data = json_data["data"]["diff"]
data.extend(_data)
page_count =page_count - 1

temp_df = pd.DataFrame(data)


temp_df = temp_df[~temp_df["f2"].isin(["-"])]
if indicator == "今日":
temp_df.columns = [
Expand Down
56 changes: 42 additions & 14 deletions instock/core/crawling/stock_hist_em.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def stock_zh_a_spot_em() -> pd.DataFrame:
:rtype: pandas.DataFrame
"""
url = "http://82.push2.eastmoney.com/api/qt/clist/get"
page_size = 100
page_size =200
page_current = 1
params = {
"pn": page_current,
Expand All @@ -28,7 +28,7 @@ def stock_zh_a_spot_em() -> pd.DataFrame:
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
"fltt": "2",
"invt": "2",
"fid": "f3",
"fid": "f12",
"fs": "m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048",
"fields": "f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f14,f15,f16,f17,f18,f20,f21,f22,f23,f24,f25,f26,f37,f38,f39,f40,f41,f45,f46,f48,f49,f57,f61,f100,f112,f113,f114,f115,f221",
"_": "1623833739532",
Expand Down Expand Up @@ -187,7 +187,7 @@ def code_id_map_em() -> dict:
:rtype: dict
"""
url = "http://80.push2.eastmoney.com/api/qt/clist/get"
page_size = 100
page_size = 200
page_current = 1
params = {
"pn": page_current,
Expand All @@ -197,7 +197,7 @@ def code_id_map_em() -> dict:
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
"fltt": "2",
"invt": "2",
"fid": "f3",
"fid": "f12",
"fs": "m:1 t:2,m:1 t:23",
"fields": "f12",
"_": "1623833739532",
Expand All @@ -223,44 +223,72 @@ def code_id_map_em() -> dict:
temp_df["market_id"] = 1
temp_df.columns = ["sh_code", "sh_id"]
code_id_dict = dict(zip(temp_df["sh_code"], temp_df["sh_id"]))
page_current = 1
params = {
"pn": "1",
"pz": "50000",
"pn": page_current,
"pz": page_size,
"po": "1",
"np": "1",
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
"fltt": "2",
"invt": "2",
"fid": "f3",
"fid": "f12",
"fs": "m:0 t:6,m:0 t:80",
"fields": "f12",
"_": "1623833739532",
}
r = requests.get(url, params=params)
data_json = r.json()
if not data_json["data"]["diff"]:
data = data_json["data"]["diff"]
if not data:
return dict()
temp_df_sz = pd.DataFrame(data_json["data"]["diff"])

data_count = data_json["data"]["total"]
page_count = math.ceil(data_count/page_size)
while page_count > 1:
page_current = page_current + 1
params["pn"] = page_current
r = requests.get(url, params=params)
data_json = r.json()
_data = data_json["data"]["diff"]
data.extend(_data)
page_count =page_count - 1

temp_df_sz = pd.DataFrame(data)
temp_df_sz["sz_id"] = 0
code_id_dict.update(dict(zip(temp_df_sz["f12"], temp_df_sz["sz_id"])))
page_current = 1
params = {
"pn": "1",
"pz": "50000",
"pn": page_current,
"pz": page_size,
"po": "1",
"np": "1",
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
"fltt": "2",
"invt": "2",
"fid": "f3",
"fid": "f12",
"fs": "m:0 t:81 s:2048",
"fields": "f12",
"_": "1623833739532",
}
r = requests.get(url, params=params)
data_json = r.json()
if not data_json["data"]["diff"]:
data = data_json["data"]["diff"]
if not data:
return dict()
temp_df_sz = pd.DataFrame(data_json["data"]["diff"])

data_count = data_json["data"]["total"]
page_count = math.ceil(data_count/page_size)
while page_count > 1:
page_current = page_current + 1
params["pn"] = page_current
r = requests.get(url, params=params)
data_json = r.json()
_data = data_json["data"]["diff"]
data.extend(_data)
page_count =page_count - 1

temp_df_sz = pd.DataFrame(data)
temp_df_sz["bj_id"] = 0
code_id_dict.update(dict(zip(temp_df_sz["f12"], temp_df_sz["bj_id"])))
return code_id_dict
Expand Down

0 comments on commit 98e3f76

Please sign in to comment.