Skip to content

Commit

Permalink
Merge pull request #609 from eai04191/fix-603
Browse files Browse the repository at this point in the history
NarouResolverの修正と改善
  • Loading branch information
shibafu528 authored Feb 14, 2021
2 parents db34e36 + 360e107 commit b013435
Show file tree
Hide file tree
Showing 5 changed files with 899 additions and 14 deletions.
2 changes: 1 addition & 1 deletion app/MetadataResolver/MetadataResolver.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class MetadataResolver implements Resolver
'~dmm\.co\.jp/~' => FanzaResolver::class,
'~www\.patreon\.com/~' => PatreonResolver::class,
'~www\.deviantart\.com/.*/art/.*~' => DeviantArtResolver::class,
'~\.syosetu\.com/n\d+[a-z]+~' => NarouResolver::class,
'~\.syosetu\.com/(novelview/infotop/ncode/)?n\d+[a-z]+~' => NarouResolver::class,
'~ci-en\.(jp|net|dlsite\.com)/creator/\d+/article/\d+~' => CienResolver::class,
'~www\.plurk\.com\/p\/.*~' => PlurkResolver::class,
'~store\.steampowered\.com/app/\d+~' => SteamResolver::class,
Expand Down
50 changes: 37 additions & 13 deletions app/MetadataResolver/NarouResolver.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,30 +26,54 @@ public function resolve(string $url): Metadata
{
$cookieJar = CookieJar::fromArray(['over18' => 'yes'], '.syosetu.com');

$res = $this->client->get($url, ['cookies' => $cookieJar]);
$metadata = $this->ogpResolver->parse($res->getBody());
$metadata->description = '';

preg_match('~\.syosetu\.com/(novelview/infotop/ncode/)?(?P<ncode>n\d+[a-z]+)~', $url, $matches);
$ncode = $matches['ncode'];

$res = $this->client->get("https://novel18.syosetu.com/novelview/infotop/ncode/$ncode/", ['cookies' => $cookieJar]);
$html = $res->getBody()->getContents();

// 一見旧式のDOMDocumentを使っているように見えるがこれは罠で、なろうのHTMLはDOMCrawlerだとパースに失敗する
$dom = new \DOMDocument();
@$dom->loadHTML(mb_convert_encoding($res->getBody(), 'HTML-ENTITIES', 'ASCII,JIS,UTF-8,eucJP-win,SJIS-win'));
@$dom->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'ASCII,JIS,UTF-8,eucJP-win,SJIS-win'));
$xpath = new \DOMXPath($dom);

$metadata = $this->ogpResolver->parse($html);
$description = [];

// タイトル
$titleNodeList = $xpath->query('//h1/a');
if ($titleNodeList->length !== 0) {
$metadata->title = $titleNodeList->item(0)->textContent;
}

// タグ
$keywordNodeList = $xpath->query('//th[contains(text(), "キーワード")]/following-sibling::td[1]');
if ($keywordNodeList->length !== 0) {
$keyword = trim($keywordNodeList->item(0)->textContent);
$metadata->tags = explode(' ', $keyword);
}

// 作者名
$writerNodes = $xpath->query('//*[contains(@class, "novel_writername")]');
if ($writerNodes->length !== 0 && !empty($writerNodes->item(0)->textContent)) {
$description[] = trim($writerNodes->item(0)->textContent);
$authorNodeList = $xpath->query('//a[contains(@href,"mypage.syosetu.com")]');
if ($authorNodeList->length !== 0) {
$description[] = '作者: ' . trim($authorNodeList->item(0)->textContent);
}

// あらすじ
$exNodes = $xpath->query('//*[@id="novel_ex"]');
if ($exNodes->length !== 0 && !empty($exNodes->item(0)->textContent)) {
$summary = trim($exNodes->item(0)->textContent);
$description[] = mb_strimwidth($summary, 0, 101, ''); // 100 + '…'(1)
// あらすじがあれば先頭150文字を取得する
$exNodeList = $xpath->query('//td[@class="ex"]');
if ($exNodeList->length !== 0) {
$summary = trim($exNodeList->item(0)->textContent);
// 長過ぎたら150文字に切って「……」をつける
if (mb_strlen($summary) >= 148) {
$description[] = mb_substr($summary, 0, 150) . '……';
} else {
$description[] = $summary;
}
}

$metadata->description = implode(' / ', $description);
// 作者名とあらすじをくっつける
$metadata->description = implode("\n", $description);

return $metadata;
}
Expand Down
65 changes: 65 additions & 0 deletions tests/Unit/MetadataResolver/NarouResolverTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
<?php

namespace Tests\Unit\MetadataResolver;

use App\MetadataResolver\NarouResolver;
use Tests\TestCase;

class NarouResolverTest extends TestCase
{
use CreateMockedResolver;

public function setUp(): void
{
parent::setUp();

if (!$this->shouldUseMock()) {
sleep(1);
}
}

public function testNovel()
{
$responseText = $this->fetchSnapshot(__DIR__ . '/../../fixture/Narou/novel.html');

$this->createResolver(NarouResolver::class, $responseText);

$metadata = $this->resolver->resolve('https://novel18.syosetu.com/n2978fx/');
$this->assertEquals('ぱんつ売りの少女', $metadata->title);
$this->assertEquals("作者: 飴宮 地下\n冴えない男「ハルノ・ヒトキ」が仕事からの帰宅途中で美少女に声を掛けられる。\n驚いた事に少女の目的は自分の下着を売る事だった。\nお金に困っていた少女は徐々にヒトキに下着を売っていたが段々とその内容がエスカレートして行き……。", $metadata->description);
$this->assertEquals(['ギャグ', '男主人公', '現代', '下着', '匂いフェチ', 'ブルセラ'], $metadata->tags);
if ($this->shouldUseMock()) {
$this->assertSame('https://novel18.syosetu.com/novelview/infotop/ncode/n2978fx/', (string) $this->handler->getLastRequest()->getUri());
}
}

public function testNovelViewURL()
{
$responseText = $this->fetchSnapshot(__DIR__ . '/../../fixture/Narou/novel.html');

$this->createResolver(NarouResolver::class, $responseText);

$metadata = $this->resolver->resolve('https://novel18.syosetu.com/novelview/infotop/ncode/n2978fx/');
$this->assertEquals('ぱんつ売りの少女', $metadata->title);
$this->assertEquals("作者: 飴宮 地下\n冴えない男「ハルノ・ヒトキ」が仕事からの帰宅途中で美少女に声を掛けられる。\n驚いた事に少女の目的は自分の下着を売る事だった。\nお金に困っていた少女は徐々にヒトキに下着を売っていたが段々とその内容がエスカレートして行き……。", $metadata->description);
$this->assertEquals(['ギャグ', '男主人公', '現代', '下着', '匂いフェチ', 'ブルセラ'], $metadata->tags);
if ($this->shouldUseMock()) {
$this->assertSame('https://novel18.syosetu.com/novelview/infotop/ncode/n2978fx/', (string) $this->handler->getLastRequest()->getUri());
}
}

public function testLongDescriptionNovel()
{
$responseText = $this->fetchSnapshot(__DIR__ . '/../../fixture/Narou/novel_longdescription.html');

$this->createResolver(NarouResolver::class, $responseText);

$metadata = $this->resolver->resolve('https://novel18.syosetu.com/n0477gn/');
$this->assertEquals('俺のことが好き過ぎる双子の天才妹が、「妹と結婚して子作りすること」を合法化して二人がかりで精液を搾りとってくる話', $metadata->title);
$this->assertEquals("作者: 伍式\n巨大財閥の跡取りである高校生「菊川笙(きくかわしょう)」。その双子の妹、貧乳美尻丁寧語の「菊川琴(こと)」と巨乳巨尻甘えん坊の「菊川鈴(すず)」。\n巨大財閥を実質的に動かしている天才双子妹は、しかし兄を盲愛して兄の精液で孕みたいと固く決意しているダブルブラコン妹だった。\nある時妹たち二人と一緒に高級……", $metadata->description);
$this->assertEquals(['ほのぼの', '男主人公', '', 'いちゃらぶ', '近親相姦', 'オナニー', '見せつけ', '尻合わせ', '孕ませっくす', '自慰', '尻ズリ', '兄妹', '双子', 'だいしゅきホールド', '膝立ちバック'], $metadata->tags);
if ($this->shouldUseMock()) {
$this->assertSame('https://novel18.syosetu.com/novelview/infotop/ncode/n0477gn/', (string) $this->handler->getLastRequest()->getUri());
}
}
}
Loading

0 comments on commit b013435

Please sign in to comment.