Skip to content

Commit

Permalink
refactor(ExhentaiController): download
Browse files Browse the repository at this point in the history
  • Loading branch information
orzyyyy committed Jul 2, 2019
1 parent 9e72b57 commit b27ad63
Show file tree
Hide file tree
Showing 2 changed files with 154 additions and 148 deletions.
165 changes: 23 additions & 142 deletions server/controller/ExhentaiController.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
// import fs from 'fs-extra';
// import path from 'path';
// import { format } from 'date-fns';
// import request from 'request-promise';
import { success } from '../utils/log';
import { success, info } from '../utils/log';
import { Controller, Request } from '../utils/decorator';
import { writeIntoJsonFile, getTimeStamp } from '../utils/common';
import ExhentaiService from '../service/ExhentaiService';
Expand All @@ -15,38 +11,6 @@ export interface ExHentaiInfoItem {
thumbnailUrl: string;
}

// const getAllThumbnaiUrls = async (page: any) =>
// await page.$$eval(
// exHentai.thumbnailClass,
// (wrappers: any[]) =>
// new Promise(resolve => {
// const result: any[] = [];
// for (const item of wrappers) {
// result.push(item.href);
// }
// resolve(result);
// }),
// );

// const getUrlFromPaginationInfo = async (page: any) =>
// await page.$$eval(
// 'table.ptt a',
// (wrappers: any[]) =>
// new Promise(resolve => {
// if (wrappers.length !== 1) {
// const result: string[] = [];
// wrappers.pop();
// wrappers.shift();
// for (const item of wrappers) {
// result.push(item.href);
// }
// resolve(result);
// } else {
// resolve([]);
// }
// }),
// );

@Controller('/exhentai')
export default class ExhentaiController {
@Request({ url: '/', method: 'get' })
Expand All @@ -66,114 +30,31 @@ export default class ExhentaiController {
ctx.response.body = `./assets/exhentai/${getLastestListFileName()}.json`;
}

// @Request({ url: '/download', method: 'post' })
// async downloadImages(ctx: any) {
// const { url, name } = ctx.request.body;
// const subName = name.replace(
// /[·!#¥(——):;“”‘、,|《。》?、【】[\]]/gim,
// '',
// );
// info(`download from: ${url}`);
// const { page, browser } = await launchExHentaiPage();
// await page.goto('https://www.google.com/', {
// waitUntil: 'domcontentloaded',
// });
// await page.goto(url, { waitUntil: 'domcontentloaded' });
@Request({ url: '/download', method: 'post' })
async downloadImages(ctx: any) {
const { url } = ctx.request.body;
const service = new ExhentaiService();
await service.initBrowser();
await service.gotoTargetPage(url);
const prefixPath = await service.ensureFolderForSave();

info(`start fetching thumbnai urls from: ${url}`);

// // prepare for download
// const datePath = format(new Date(), 'yyyyMMdd');
// fs.ensureDirSync(
// path.join(
// process.cwd(),
// `${exHentai.downloadPath}/${datePath}/${subName}`,
// ),
// );
const thumbnailUrls = await service.getThumbnaiUrlFromDetailPage();
writeIntoJsonFile(`${prefixPath}/restDetailUrls`, thumbnailUrls);

// const restDetailUrls = await getUrlFromPaginationInfo(page);
// const firstPageThumbnailUrls = await getAllThumbnaiUrls(page);
// await page.waitFor(exHentai.waitTime);
info(`start fetching target images`);

// for (const item of restDetailUrls) {
// await page.goto('https://www.google.com/', {
// waitUntil: 'domcontentloaded',
// });
// await page.goto(item, { waitUntil: 'domcontentloaded' });
// const thumbnailUrlsFromNextPage = await getAllThumbnaiUrls(page);
// firstPageThumbnailUrls.push(...thumbnailUrlsFromNextPage);
// info('image length: ' + firstPageThumbnailUrls.length);
// await page.waitFor(exHentai.waitTime);
// }
const images = await service.fetchTargetImageUrls(thumbnailUrls);

// const images = [];
// const targetImgUrls = firstPageThumbnailUrls;
// // get thumbnail url in detail page
// for (let i = 0; i < targetImgUrls.length; i++) {
// await page.goto('https://www.google.com/', {
// waitUntil: 'domcontentloaded',
// });
// await page.goto(targetImgUrls[i], { waitUntil: 'domcontentloaded' });
// info(`fetching image url => ${targetImgUrls[i]}`);
// const imgUrl = await page.$eval(
// '[id=i3] img',
// (target: any) =>
// new Promise(resolve => {
// resolve(target.src);
// }),
// );
// images.push(imgUrl);
// await page.waitFor(exHentai.waitTime);
// }
// success('fetch all images');
// // save image url into file, for unexpect error
// fs.outputJSON(
// path.join(
// process.cwd(),
// `${exHentai.downloadPath}/${datePath}/${subName}/restDetailUrls.json`,
// ),
// targetImgUrls,
// ).catch((err: any) => {
// error('write into json' + err);
// });
// fs.outputJSON(
// path.join(
// process.cwd(),
// `${exHentai.downloadPath}/${datePath}/${subName}/detailImageUrls.json`,
// ),
// images,
// ).catch((err: any) => {
// error('write into json' + err);
// });
success('fetch all image urls');

// // fetch and save images
// for (let i = 0; i < images.length; i++) {
// const item = images[i];
// trace('download begin: ' + item);
// await request
// .get({ url: item, proxy: exHentai.proxy } as {
// url: string;
// proxy: string;
// })
// .on('error', (err: any) => {
// error(err + ' => ' + item);
// })
// .pipe(
// fs
// .createWriteStream(
// path.join(
// process.cwd(),
// `${exHentai.downloadPath}/${datePath}/${subName}/${i + 1}.jpg`,
// ),
// )
// .on('finish', () => success(`${i + 1}.jpg`))
// .on('error', (err: any) =>
// error(`${subName}-${i + 1}.jpg failed, ${err}`),
// ),
// );
// if (i % 4 === 0) {
// await page.waitFor(exHentai.waitTime);
// }
// }
// await browser.close();
// ctx.response.body = true;
// }
writeIntoJsonFile(`${prefixPath}/detailImageUrls`, images);

await service.downImages(images, prefixPath);

success('download completed');

ctx.response.body = true;
}
}
137 changes: 131 additions & 6 deletions server/service/ExhentaiService.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import puppeteer from 'puppeteer-core';
import { success, info } from '../utils/log';
import fs from 'fs-extra';
import request from 'request-promise';
import { success, info, error, trace } from '../utils/log';
import { getTargetResource } from '../utils/resource';
import { ExHentaiInfoItem } from '../controller/ExhentaiController';
import { joinWithRootPath, getDateStamp } from '../utils/common';

export default class ExhentaiService {
cookie: any[];
Expand Down Expand Up @@ -37,15 +40,41 @@ export default class ExhentaiService {
this.setExHentaiCookie();

success('set cookie');

return { page, browser };
};

getListInfo = async ({ pageIndex }: { pageIndex: number }) => {
gotoTargetPage = async (url: string) => {
await this.page.goto('https://www.google.com/', {
waitUntil: 'domcontentloaded',
});
await this.page.goto(this.config.href + pageIndex, {
await this.page.goto(url, {
waitUntil: 'domcontentloaded',
});
};

getComicName = async () => {
const name: any = await this.page.$eval(
'#gj',
(target: any) => new Promise(resolve => resolve(target.innerText)),
);
return name.replace(/[·!#¥(——):;“”‘、,|《。》?、【】[\]]/gim, '');
};

ensureFolderForSave = async () => {
const subName = await this.getComicName();
const datePath = getDateStamp();
const prefixPath = `${this.config.downloadPath}/${datePath}/${subName}`;
const folderToSaveImages = joinWithRootPath(prefixPath);
fs.ensureDirSync(folderToSaveImages);

success(`check ${prefixPath} for saving images`);

return prefixPath;
};

getListInfo = async ({ pageIndex }: { pageIndex: number }) => {
await this.gotoTargetPage(this.config.href + pageIndex);
const exHentaiInfo: ExHentaiInfoItem[] = await this.page.$$eval(
'div.gl1t',
(wrappers: any[]) =>
Expand Down Expand Up @@ -102,9 +131,105 @@ export default class ExhentaiService {
return results;
};

saveListInfo = () => {};
getUrlFromPaginationInfo = async () =>
await this.page.$$eval(
'table.ptt a',
(wrappers: any[]) =>
new Promise(resolve => {
if (wrappers.length !== 1) {
const result: string[] = [];
wrappers.pop();
wrappers.shift();
for (const item of wrappers) {
result.push(item.href);
}
resolve(result);
} else {
resolve([]);
}
}),
);

getPage = () => this.page;
getAllThumbnaiUrls = async () =>
await this.page.$$eval(
this.config.thumbnailClass,
(wrappers: any[]) =>
new Promise(resolve => {
const result: any[] = [];
for (const item of wrappers) {
result.push(item.href);
}
resolve(result);
}),
);

downImages = async (imageUrl: string[], prefixPath: string) => {
const { page, browser, config } = this;
for (let i = 0; i < imageUrl.length; i++) {
const item = imageUrl[i];
const pageIndex = i + 1;

trace('download begin: ' + item);

await request
.get({ url: item, proxy: config.proxy } as {
url: string;
proxy: string;
})
.on('error', (err: any) => {
error(err + ' => ' + item);
})
.pipe(
fs
.createWriteStream(
joinWithRootPath(`${prefixPath}/${pageIndex}.jpg`),
)
.on('finish', () => success(`${pageIndex}.jpg`))
.on('error', (err: any) =>
error(`${pageIndex}.jpg failed, ${err}`),
),
);
if (i % 4 === 0) {
await page.waitFor(config.waitTime);
}
}
await browser.close();
};

getThumbnaiUrlFromDetailPage = async () => {
const restDetailUrls = (await this.getUrlFromPaginationInfo()) as string[];
const firstPageThumbnailUrls = (await this.getAllThumbnaiUrls()) as string[];
await this.page.waitFor(this.config.waitTime);

for (const item of restDetailUrls) {
await this.gotoTargetPage(item);
const thumbnailUrlsFromNextPage = (await this.getAllThumbnaiUrls()) as string[];
firstPageThumbnailUrls.push(...thumbnailUrlsFromNextPage);

getBrowser = () => this.browser;
info('image length: ' + firstPageThumbnailUrls.length);

await this.page.waitFor(this.config.waitTime);
}
return firstPageThumbnailUrls;
};

fetchTargetImageUrls = async (thumbnailUrls: string[]) => {
const images: string[] = [];
for (let i = 0; i < thumbnailUrls.length; i++) {
await this.gotoTargetPage(thumbnailUrls[i]);

info(`fetching image url => ${thumbnailUrls[i]}`);

const imgUrl: string = await this.page.$eval(
'[id=i3] img',
(target: any) =>
new Promise(resolve => {
resolve(target.src);
}),
);
images.push(imgUrl);
await this.page.waitFor(this.config.waitTime);
}
return images;
};
}

0 comments on commit b27ad63

Please sign in to comment.