Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Moved cookies to temp request dir for parallel workers #193 #194

Merged
merged 2 commits into from
Feb 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions classes/robot/crawler.php
Original file line number Diff line number Diff line change
Expand Up @@ -861,8 +861,9 @@ public function parse_html($node, $external, $verbose = false) {
} while ($walk);

$text = self::clean_html_node_content($e);
$text = trim($text);
if ($verbose > 1) {
printf (" - Found link to: %-20s / %-50s => %-50s\n", $text, $e->href, $href);
printf (" - Found link to: %-30s -> %s\n", "'$text'", $href);
}
$this->link_from_node_to_url($node, $href, $text, $idattr);
}
Expand Down Expand Up @@ -1089,7 +1090,12 @@ private static function determine_filesize($curlhandle, $method, $success, $body
public function scrape($url) {

global $CFG;
$cookiefilelocation = $CFG->dataroot . '/tool_crawler_cookies.txt';

static $cookiefilelocaion = '';
if (!$cookiefilelocation) {
$cookiefilelocation = make_request_directory() . '/tool_crawler_cookies.txt';
}

$config = self::get_config();

$version = moodle_major_version();
Expand Down
5 changes: 5 additions & 0 deletions lang/en/tool_crawler.php
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@
$string['crawlend'] = 'Crawl end';
$string['crawlstart'] = 'Crawl start';
$string['cronticks'] = 'Cron ticks';
$string['debugging'] = 'Verbose debugging';
$string['debugoff'] = 'Debugging off';
$string['debugnormal'] = 'Normal debugging';
$string['debugverbose'] = 'Verbose debugging';
$string['debuggingdesc'] = 'This turns on debugging in the task output';
$string['disablebot'] = 'Disable the link crawler robot';
$string['disablebotdesc'] = 'Make the crawler do nothing when a scheduled task is executed. This effectively prevents crawling of links and running of bot cleanup functions. Intended to deactivate or temporarily pause the crawler without having to disable all its scheduled tasks.';
$string['duration'] = 'Duration';
Expand Down
5 changes: 5 additions & 0 deletions lib.php
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ function tool_crawler_crawl($verbose = false) {
$robot = new crawler();
$url = new url();
$config = $robot::get_config();

if ($config->debugging) {
$verbose = $config->debugging;
}

brendanheywood marked this conversation as resolved.
Show resolved Hide resolved
$crawlstart = $config->crawlstart;
$crawlend = $config->crawlend;

Expand Down
11 changes: 11 additions & 0 deletions settings.php
Original file line number Diff line number Diff line change
Expand Up @@ -209,5 +209,16 @@
new lang_string('disablebot', 'tool_crawler'),
new lang_string('disablebotdesc', 'tool_crawler'),
'0' ));

$options = [
0 => new lang_string('debugoff', 'tool_crawler'),
1 => new lang_string('debugnormal', 'tool_crawler'),
2 => new lang_string('debugverbose', 'tool_crawler'),
];
$settings->add(new admin_setting_configselect('tool_crawler/debugging',
new lang_string('debugging', 'tool_crawler'),
new lang_string('debuggingdesc', 'tool_crawler'),
0,
$options));
}
}
4 changes: 2 additions & 2 deletions version.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
defined('MOODLE_INTERNAL') || die();


$plugin->version = 2024031401; // The current plugin version (Date: YYYYMMDDXX)
$plugin->release = 2024031401; // The current plugin version (Date: YYYYMMDDXX)
$plugin->version = 2025020401; // The current plugin version (Date: YYYYMMDDXX)
$plugin->release = 2025020401; // The current plugin version (Date: YYYYMMDDXX)
$plugin->requires = 2016021800; // Requires this Moodle version.
$plugin->supported = [34, 405];
$plugin->component = 'tool_crawler'; // To check on upgrade, that module sits in correct place.
Expand Down
Loading