Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add UrlTransformerService and integrate in LinkifyExtension #588

Merged
merged 8 commits into from
Nov 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 153 additions & 0 deletions src/Service/UrlTransformerService.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
<?php

namespace App\Service;

class UrlTransformerService
{
/*
* Known hostformats.
* key is regex to match host portion of url (parse_url)
* value are <type>[,parameters...] where type denotes the affiliate program later used for the transformation logic
*/
private array $hostFormats = [
'/^(www\.)?amazon\.(com|co\.(jp|uk|za)|com\.(au|be|br|mx|tr)|ae|ca|cn|de|eg|es|fr|ie|in|it|nl|pl|sa|se|sg)$/' => 'amazon',
'/^(www\.)?bol\.com$/' => 'bol',
'/^(www\.)?coolblue\.be$/' => 'awin,85165',
];

private $partnerIds = [];

public function __construct()
{
// Get all partner ids from ENV.
// Multiple ids are supported by space seperating them.
foreach ($_ENV as $key => $value) {
if (0 === strpos($key, 'PARTNER_')) {
$this->partnerIds[strtolower(substr($key, 8))] = array_filter(explode(' ', $value));
}
}
}

public function extractUrls(string $text): array
{
$pattern = '#\bhttps?://[^,\s()<>]+(?:\([\w\d]+\)|([^,[:punct:]\s]|/))#';
if (preg_match_all($pattern, $text, $matches)) {
// remove duplicates
$urls = array_unique($matches[0]);

return $urls;
}

return [];
}

/**
* Replace all urls in input array (as key) with its replacements (values).
*
* Custom function because str_replace could replace occurrences from previous replacements
* or urls that are child/parent urls of other urls that need to be replaced
*/
public function replaceUrls(string $text, array $urls): string
{
// urls contain the original urls as key, and replacements as value
$urlsOnly = array_keys($urls);

// sort by length longest to shortest
usort($urlsOnly, function ($a, $b) {
return strlen($b) - strlen($a);
});

// create index of all positions of urls, where a position can only be taken by the longest url (child/parent)
$byUrl = [];
$byPosition = [];
foreach ($urlsOnly as $url) {
$byUrl[$url] = [];
// get first match
$position = strpos($text, $url, 0);
while (false !== $position) {
if (!isset($byPosition[$position])) {
// position not already matched with longer url
$byUrl[$url][] = $position;
$byPosition[$position] = $url;
}
// find next occurrence
$position = strpos($text, $url, $position + strlen($url));
}
}

// start replacements back to front to not mess up earlier positions
krsort($byPosition, SORT_NUMERIC);
foreach ($byPosition as $position => $url) {
$text = substr($text, 0, $position).
$urls[$url].
substr($text, $position + strlen($url))
;
}

return $text;
}

public function transformUrl(string $url): string
{
// parse URL into parts
$urlParts = parse_url($url);

// find matching hostpattern
$matchedFormat = '';
foreach ($this->hostFormats as $hostFormat => $key) {
if (preg_match($hostFormat, $urlParts['host'])) {
$matchedFormat = $key;
break;
}
}

if ($matchedFormat) {
// split by comma. Shift first element off and use as key to identify type of link.
$params = explode(',', $matchedFormat);
$key = array_shift($params);
// have we configured a partner id for this program?
if (isset($this->partnerIds[$key][0]) && $this->partnerIds[$key][0]) {
if (count($this->partnerIds[$key]) > 1) {
// select random id from array
$partnerId = $this->partnerIds[$key][array_rand($this->partnerIds[$key])];
} else {
$partnerId = $this->partnerIds[$key][0];
}

switch ($key) {
case 'amazon':
// append id as tag parameter
if (isset($urlParts['query'])) {
$url .= '&tag='.$partnerId;
} else {
$url .= '?tag='.$partnerId;
}
break;

case 'bol':
// generate text link to partner program and append original URL encoded
$url = 'https://partner.bol.com/click/click?p=1&t=url&s='.$partnerId.'&f=TXL&url='.urlencode($url);
break;

case 'tradetracker':
// params[0] should contain campaignid, append original URL encoded
$url = 'https://tc.tradetracker.net/?c='.$params[0].'&m=12&a='.$partnerId.'&r=&u='.urlencode($urlParts['path']);
if (isset($urlParts['query'])) {
$url .= urlencode('?'.$urlParts['query']);
}
break;

case 'awin':
// params[0] should contain merchantid, append original URL encoded
$url = 'https://www.awin1.com/cread.php?awinmid='.$params[0].'&awinaffid='.$this->partnerIds[$key].'&ued='.urlencode($url);
break;

default:
// No matching format
}
}
}

return $url;
}
}
26 changes: 13 additions & 13 deletions src/Twig/LinkifyExtension.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,16 @@

namespace App\Twig;

use App\Service\UrlTransformerService;
use Twig\Extension\AbstractExtension;
use Twig\TwigFilter;

class LinkifyExtension extends AbstractExtension
{
public function __construct(private UrlTransformerService $urlTransformerService)
{
}

public function getFilters(): array
{
return [
Expand All @@ -21,19 +26,14 @@ public function linkifyFilter($html)
return $html;
}

// Selects all urls starting with ://
$html = preg_replace(
'~[[:alpha:]]+://[^<>[:space:]]+[[:alnum:]/=!\?,]~',
'<a href="\\0" target="_blank" rel="noopener noreferrer">\\0</a>',
$html
);

// Selects all urls starting with www. but do not start with ://
$html = preg_replace(
'~(?<!://)www.[^<>[:space:]]+[[:alnum:]/=!\?,]~',
'<a href="http://\\0" target="_blank" rel="noopener noreferrer">\\0</a>',
$html
);
// extract, transform, create links and replace
$urls = $this->urlTransformerService->extractUrls($html);
$replacements = [];
foreach ($urls as $url) {
$replacement = $this->urlTransformerService->transformUrl($url);
$replacements[$url] = '<a href="'.$replacement.'" target="_blank" rel="noopener noreferrer">'.$url.'</a>';
}
$html = $this->urlTransformerService->replaceUrls($html, $replacements);

return $html;
}
Expand Down
49 changes: 49 additions & 0 deletions tests/php/unit/Service/UrlTransformerTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
<?php

namespace App\Tests\Unit\Service;

use App\Service\UrlTransformerService;
use PHPUnit\Framework\TestCase;

class UrlTransformerTest extends TestCase
{
// Simple party with simple excludes
public function testUrlExtraction()
{
$urlTransformer = new UrlTransformerService();

$urls = $urlTransformer->extractUrls('https://www.test.com');
$this->assertEquals(1, count($urls));
$this->assertEquals('https://www.test.com', $urls[0]);
}

public function testAmazonUrlExtraction()
{
$_ENV['PARTNER_AMAZON'] = 'abc-123';
$urlTransformer = new UrlTransformerService();

$url = $urlTransformer->transformUrl('https://www.amazon.com/Zmart-Funny-Christmas-Coworkers-Secret/dp/B0CC1S12S3');
$this->assertEquals('https://www.amazon.com/Zmart-Funny-Christmas-Coworkers-Secret/dp/B0CC1S12S3?tag=abc-123', $url);

$url = $urlTransformer->transformUrl('https://www.amazon.com/Zmart-Funny-Christmas-Coworkers-Secret/dp/B0CC1S12S3?crid=123456789');
$this->assertEquals('https://www.amazon.com/Zmart-Funny-Christmas-Coworkers-Secret/dp/B0CC1S12S3?crid=123456789&tag=abc-123', $url);
}

public function testurlReplacement()
{
$urlTransformer = new UrlTransformerService();

$html = 'Multiple links to https://www.tom.be, again https://www.tom.be and a child https://www.tom.be/zeb and a child https://www.tom.be/arne';
$replacements = [
'https://www.tom.be' => 'https://www.tom.be/zeb',
'https://www.tom.be/zeb' => 'https://www.tom.be/Zeb',
'https://www.tom.be/arne' => 'https://www.tom.be/Arne',
];
// It should not replace the /zeb instances that are the result of the replacements of https://www.tom.be.
// It should also not replace https://www.tom.be in any of the other child urls, but instead replace them with the capital names.
$expectedHtml = 'Multiple links to https://www.tom.be/zeb, again https://www.tom.be/zeb and a child https://www.tom.be/Zeb and a child https://www.tom.be/Arne';

$actual = $urlTransformer->replaceUrls($html, $replacements);
$this->assertEquals($expectedHtml, $actual);
}
}
5 changes: 3 additions & 2 deletions tests/php/unit/Twig/LinkifyExtensionTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

namespace App\Tests\Unit\Twig;

use App\Service\UrlTransformerService;
use App\Twig\LinkifyExtension;
use PHPUnit\Framework\TestCase;

Expand All @@ -14,7 +15,7 @@ class LinkifyExtensionTest extends TestCase
*/
public function itShouldWrapAnHtmlLinkInAnAnchorTag($rawHtml, $anchorHtml)
{
$filter = new LinkifyExtension();
$filter = new LinkifyExtension(new UrlTransformerService());

$this->assertEquals($anchorHtml, $filter->linkifyFilter($rawHtml));
}
Expand All @@ -36,7 +37,7 @@ public function htmlProvider()
],
[
'www.test.com',
'<a href="http://www.test.com" target="_blank" rel="noopener noreferrer">www.test.com</a>',
'www.test.com',
],
[
'http://test',
Expand Down
Loading