DIYgod · PrinOrange · Oct 13, 2024 · Oct 13, 2024 · Oct 13, 2024 · Oct 14, 2024
diff --git a/lib/routes/gsau/namespace.ts b/lib/routes/gsau/namespace.ts
@@ -0,0 +1,6 @@
+import type { Namespace } from '@/types';
+
+export const namespace: Namespace = {
+    name: '甘肃农业大学',
+    url: 'www.gsau.edu.cn',
+};
diff --git a/lib/routes/gsau/news.ts b/lib/routes/gsau/news.ts
@@ -0,0 +1,156 @@
+import { DataItem, Route } from '@/types';
+import cache from '@/utils/cache';
+import got from '@/utils/got';
+import { parseDate } from '@/utils/parse-date';
+import { load } from 'cheerio';
+import { generateRssItemForUnsupportedLink } from './utils/content';
+import { isSubdomainOfGsau } from './utils/domain';
+
+type NewsCategory = {
+    title: string;
+    description: string;
+};
+
+const NEWS_TYPES: Record<string, NewsCategory> = {
+    xxyw: {
+        title: '学校要闻',
+        description: '甘肃农业大学学校要闻',
+    },
+    xykx: {
+        title: '校园快讯',
+        description: '甘肃农业大学校园快讯',
+    },
+    tzgg: {
+        title: '通知公告',
+        description: '甘肃农业大学校内通知公告',
+    },
+    jzbg: {
+        title: '讲座报告',
+        description: '甘肃农业大学讲座报告信息',
+    },
+    jqgz: {
+        title: '近期关注',
+        description: '甘肃农业大学近期关注',
+    },
+    jyjx: {
+        title: '教育教学',
+        description: '甘肃农业大学教育教学新闻',
+    },
+    xsky: {
+        title: '学术科研',
+        description: '甘肃农业大学学术科研信息',
+    },
+    hzjl: {
+        title: '合作交流',
+        description: '甘肃农业大学合作交流信息',
+    },
+    mzgn: {
+        title: '每周甘农',
+        description: '甘肃农业大学周记总结',
+    },
+    mtnd: {
+        title: '媒体农大',
+        description: '相关对甘肃农业大学的媒体报道',
+    },
+};
+
+const handler: Route['handler'] = async (context) => {
+    const { category } = context.req.param();
+
+    const BASE_URL = `https://www.gsau.edu.cn/xwzx/${category}.htm`;
+
+    const { data: listResponse } = await got(BASE_URL);
+    const $ = load(listResponse);
+
+    // Select all list items containing news information
+    const ITEM_SELECTOR = '#warp > div.nyleft > div.infolist > ul > li';
+    const listItems = $(ITEM_SELECTOR);
+
+    // Map through each list item to extract details
+    const contentLinkList = listItems
+        .toArray()
+        .map((element) => {
+            const title = $(element).find('a').attr('title')?.trim();
+            const date: string = parseDate($(element).find('a > span').text().trim()).toISOString();
+
+            const relativeLink = $(element).find('a').attr('href') || '';
+            const absoluteLink = new URL(relativeLink, BASE_URL).href;
+            if (title && date && relativeLink) {
+                return { title, date, link: absoluteLink };
+            }
+            return null;
+        })
+        .filter((item) => item !== null);
+
+    return {
+        title: NEWS_TYPES[category].title,
+        description: NEWS_TYPES[category].description,
+        link: BASE_URL,
+        image: 'https://www.gsau.edu.cn/images/foot_03.jpg',
+        item: (await Promise.all(
+            contentLinkList.map((item) =>
+                cache.tryGet(item.link, async () => {
+                    if (!isSubdomainOfGsau(item.link)) {
+                        return generateRssItemForUnsupportedLink(item.title, item.date, item.link);
+                    }
+                    const { data: contentResponse } = await got(item.link);
+                    const CONTENT_SELECTOR = '#warp > div.nyleft > form > div > div.infoarea > div';
+                    const contentPage = load(contentResponse);
+                    const content = contentPage(CONTENT_SELECTOR).html() || '';
+                    return {
+                        title: item.title,
+                        pubDate: item.date,
+                        link: item.link,
+                        description: content,
+                        category: ['university'],
+                        guid: item.link,
+                        id: item.link,
+                        image: 'https://www.gsau.edu.cn/images/foot_03.jpg',
+                        content,
+                        updated: item.date,
+                        language: 'zh-cn',
+                    };
+                })
+            )
+        )) as DataItem[],
+        allowEmpty: true,
+        language: 'zh-cn',
+        feedLink: `https://rsshub.app/gsau/news/${category}`,
+        id: BASE_URL,
+    };
+};
+
+export const route: Route = {
+    path: '/news/:category',
+    name: '主页新闻',
+    parameters: {
+        category: '新闻栏目代码，取值可见描述中的列表。',
+    },
+    description: `
+| category | 标题       |
+| -------- | ---------- |
+| xxyw     | 学校要闻   |
+| xykx     | 校园快讯   |
+| tzgg     | 通知公告   |
+| jzbg     | 讲座报告   |
+| jqgz     | 近期关注   |
+| jyjx     | 教育教学   |
+| xsky     | 学术科研   |
+| hzjl     | 合作交流   |
+| mzgn     | 每周甘农   |
+| mtnd     | 媒体农大   |
+    `,
+    maintainers: ['PrinOrange'],
+    url: 'www.gsau.edu.cn/xwzx/xxyw.htm',
+    handler,
+    categories: ['university'],
+    features: {
+        requireConfig: false,
+        requirePuppeteer: false,
+        antiCrawler: false,
+        supportBT: false,
+        supportPodcast: false,
+        supportScihub: false,
+    },
+    example: '/gsau/news/tzgg',
+};
diff --git a/lib/routes/gsau/utils/content.ts b/lib/routes/gsau/utils/content.ts
@@ -0,0 +1,37 @@
+import type { DataItem } from '@/types';
+
+export const generateRssItemForUnsupportedLink = (title: string, date: string, url: string): DataItem => {
+    const htmlContent = `
+<p>
+    抱歉，本文章 <u>${title}</u> 来源非甘肃农业大学官方网站，不支持解析。<br/>
+    请通过链接查看原文：<a href="${url}">${url}</a>
+</p>
+<p>
+    Sorry, the provenance of article <u>${title}</u> is not from official website of Gansu Agriculture University,
+    and it's not supported to parse. <br/>
+    Please read the origin website by link: <a href="${url}">${url}</a>
+</p>
+`;
+    const textContent = `
+抱歉，本文章 ${title} 来源非甘肃农业大学官方网站，不支持解析。
+请通过链接查看原文：${url}
+Sorry, the provenance of article ${title} is not from official website of Gansu Agriculture University,
+and it's not supported to parse. Please read the origin website by link: ${url}
+`;
+    return {
+        title,
+        pubDate: date,
+        link: url,
+        description: htmlContent,
+        category: ['university'],
+        guid: url,
+        id: url,
+        image: 'https://www.gsau.edu.cn/images/foot_03.jpg',
+        content: {
+            text: textContent,
+            html: htmlContent,
+        },
+        updated: date,
+        language: 'zh-cn',
+    };
+};
diff --git a/lib/routes/gsau/utils/domain.ts b/lib/routes/gsau/utils/domain.ts
@@ -0,0 +1,18 @@
+const OFFICIAL_DOMAIN = 'gsau.edu.cn';
+
+/**
+ * Check whether a URL is a subdomain belongs to the official domain.
+ * Because there maybe some different links of outside official domain in list,
+ * These page may have some anti-crawler or login-requirement measures.
+ * So I need check whether is a URL belongs to the official domain.
+ */
+export const isSubdomainOfGsau = (url: string): boolean => {
+    try {
+        const normalizedUrl = url.startsWith('http') ? url : `https://${url}`;
+        const parsedUrl = new URL(normalizedUrl);
+        const hostname = parsedUrl.hostname;
+        return hostname === OFFICIAL_DOMAIN || hostname.endsWith(`.${OFFICIAL_DOMAIN}`);
+    } catch {
+        return false;
+    }
+};