Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add GSAU(甘肃农业大学) router of news index page. #18027

Closed
wants to merge 19 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions lib/routes/gsau/namespace.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import type { Namespace } from '@/types';

export const namespace: Namespace = {
name: '甘肃农业大学',
url: 'www.gsau.edu.cn',
};
156 changes: 156 additions & 0 deletions lib/routes/gsau/news.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
import { DataItem, Route } from '@/types';
import cache from '@/utils/cache';
import got from '@/utils/got';
import { parseDate } from '@/utils/parse-date';
import { load } from 'cheerio';
import { generateRssItemForUnsupportedLink } from './utils/content';
import { isSubdomainOfGsau } from './utils/domain';

type NewsCategory = {
title: string;
description: string;
};

const NEWS_TYPES: Record<string, NewsCategory> = {
xxyw: {
title: '学校要闻',
description: '甘肃农业大学学校要闻',
},
xykx: {
title: '校园快讯',
description: '甘肃农业大学校园快讯',
},
tzgg: {
title: '通知公告',
description: '甘肃农业大学校内通知公告',
},
jzbg: {
title: '讲座报告',
description: '甘肃农业大学讲座报告信息',
},
jqgz: {
title: '近期关注',
description: '甘肃农业大学近期关注',
},
jyjx: {
title: '教育教学',
description: '甘肃农业大学教育教学新闻',
},
xsky: {
title: '学术科研',
description: '甘肃农业大学学术科研信息',
},
hzjl: {
title: '合作交流',
description: '甘肃农业大学合作交流信息',
},
mzgn: {
title: '每周甘农',
description: '甘肃农业大学周记总结',
},
mtnd: {
title: '媒体农大',
description: '相关对甘肃农业大学的媒体报道',
},
};

const handler: Route['handler'] = async (context) => {
const { category } = context.req.param();

const BASE_URL = `https://www.gsau.edu.cn/xwzx/${category}.htm`;

const { data: listResponse } = await got(BASE_URL);
const $ = load(listResponse);

// Select all list items containing news information
const ITEM_SELECTOR = '#warp > div.nyleft > div.infolist > ul > li';
const listItems = $(ITEM_SELECTOR);

// Map through each list item to extract details
const contentLinkList = listItems
.toArray()
.map((element) => {
const title = $(element).find('a').attr('title')?.trim();
const date: string = parseDate($(element).find('a > span').text().trim()).toISOString();

const relativeLink = $(element).find('a').attr('href') || '';
const absoluteLink = new URL(relativeLink, BASE_URL).href;
if (title && date && relativeLink) {
return { title, date, link: absoluteLink };
}
return null;
})
.filter((item) => item !== null);

return {
title: NEWS_TYPES[category].title,
description: NEWS_TYPES[category].description,
link: BASE_URL,
image: 'https://www.gsau.edu.cn/images/foot_03.jpg',
item: (await Promise.all(
contentLinkList.map((item) =>
cache.tryGet(item.link, async () => {
if (!isSubdomainOfGsau(item.link)) {
return generateRssItemForUnsupportedLink(item.title, item.date, item.link);
}
const { data: contentResponse } = await got(item.link);
const CONTENT_SELECTOR = '#warp > div.nyleft > form > div > div.infoarea > div';
const contentPage = load(contentResponse);
const content = contentPage(CONTENT_SELECTOR).html() || '';
return {
title: item.title,
pubDate: item.date,
link: item.link,
description: content,
category: ['university'],
guid: item.link,
id: item.link,
image: 'https://www.gsau.edu.cn/images/foot_03.jpg',
content,
updated: item.date,
language: 'zh-cn',
};
})
)
)) as DataItem[],
allowEmpty: true,
language: 'zh-cn',
feedLink: `https://rsshub.app/gsau/news/${category}`,
id: BASE_URL,
};
};

export const route: Route = {
path: '/news/:category',
PrinOrange marked this conversation as resolved.
Show resolved Hide resolved
name: '主页新闻',
parameters: {
category: '新闻栏目代码,取值可见描述中的列表。',
},
description: `
| category | 标题 |
| -------- | ---------- |
| xxyw | 学校要闻 |
| xykx | 校园快讯 |
| tzgg | 通知公告 |
| jzbg | 讲座报告 |
| jqgz | 近期关注 |
| jyjx | 教育教学 |
| xsky | 学术科研 |
| hzjl | 合作交流 |
| mzgn | 每周甘农 |
| mtnd | 媒体农大 |
`,
maintainers: ['PrinOrange'],
url: 'www.gsau.edu.cn/xwzx/xxyw.htm',
handler,
categories: ['university'],
features: {
requireConfig: false,
requirePuppeteer: false,
antiCrawler: false,
supportBT: false,
supportPodcast: false,
supportScihub: false,
},
example: '/gsau/news/tzgg',
};
37 changes: 37 additions & 0 deletions lib/routes/gsau/utils/content.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import type { DataItem } from '@/types';

export const generateRssItemForUnsupportedLink = (title: string, date: string, url: string): DataItem => {
const htmlContent = `
<p>
抱歉,本文章 <u>${title}</u> 来源非甘肃农业大学官方网站,不支持解析。<br/>
请通过链接查看原文:<a href="${url}">${url}</a>
</p>
<p>
Sorry, the provenance of article <u>${title}</u> is not from official website of Gansu Agriculture University,
and it's not supported to parse. <br/>
Please read the origin website by link: <a href="${url}">${url}</a>
</p>
`;
const textContent = `
抱歉,本文章 ${title} 来源非甘肃农业大学官方网站,不支持解析。
请通过链接查看原文:${url}
Sorry, the provenance of article ${title} is not from official website of Gansu Agriculture University,
and it's not supported to parse. Please read the origin website by link: ${url}
`;
return {
title,
pubDate: date,
link: url,
description: htmlContent,
category: ['university'],
guid: url,
id: url,
image: 'https://www.gsau.edu.cn/images/foot_03.jpg',
content: {
text: textContent,
html: htmlContent,
},
updated: date,
language: 'zh-cn',
};
};
18 changes: 18 additions & 0 deletions lib/routes/gsau/utils/domain.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
const OFFICIAL_DOMAIN = 'gsau.edu.cn';

/**
* Check whether a URL is a subdomain belongs to the official domain.
* Because there maybe some different links of outside official domain in list,
* These page may have some anti-crawler or login-requirement measures.
* So I need check whether is a URL belongs to the official domain.
*/
export const isSubdomainOfGsau = (url: string): boolean => {
try {
const normalizedUrl = url.startsWith('http') ? url : `https://${url}`;
const parsedUrl = new URL(normalizedUrl);
const hostname = parsedUrl.hostname;
return hostname === OFFICIAL_DOMAIN || hostname.endsWith(`.${OFFICIAL_DOMAIN}`);
} catch {
return false;
}
};
Loading