Skip to content

Commit

Permalink
Merge pull request #430 from sbatururimi/NUTCH-2676
Browse files Browse the repository at this point in the history
  • Loading branch information
sebastian-nagel committed Feb 23, 2019
2 parents f7fdca3 + 8f421a4 commit dfd8602
Show file tree
Hide file tree
Showing 7 changed files with 286 additions and 239 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ ivy/ivy-2.3.0.jar
ivy/ivy-2.4.0.jar
ivy/ivy-2.5.0-rc1.jar
naivebayes-model
.gitconfig
26 changes: 22 additions & 4 deletions conf/nutch-default.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2525,10 +2525,11 @@ visit https://wiki.apache.org/nutch/SimilarityScoringFilter-->
<description>
A String value representing the flavour of Selenium
WebDriver() to use. Currently the following options
exist - 'firefox', 'chrome', 'safari', 'opera', 'phantomjs' and 'remote'.
exist - 'firefox', 'chrome', 'safari', 'opera' and 'remote'.
If 'remote' is used it is essential to also set correct properties for
'selenium.hub.port', 'selenium.hub.path', 'selenium.hub.host',
'selenium.hub.protocol', 'selenium.grid.driver' and 'selenium.grid.binary'.
'selenium.hub.protocol', 'selenium.grid.driver', 'selenium.grid.binary'
and 'selenium.enable.headless'.
</description>
</property>

Expand Down Expand Up @@ -2560,8 +2561,9 @@ visit https://wiki.apache.org/nutch/SimilarityScoringFilter-->
<name>selenium.grid.driver</name>
<value>firefox</value>
<description>A String value representing the flavour of Selenium
WebDriver() used on the selenium grid. Currently the following options
exist - 'firefox', 'phantomjs' </description>
WebDriver() used on the selenium grid. We must set `selenium.driver` to `remote` first.
Currently the following options
exist - 'firefox', 'chrome', 'random' </description>
</property>

<property>
Expand All @@ -2572,6 +2574,14 @@ visit https://wiki.apache.org/nutch/SimilarityScoringFilter-->
</description>
</property>

<!-- headless options for Firefox and Chrome-->
<property>
<name>selenium.enable.headless</name>
<value>false</value>
<description>A Boolean value representing the headless option
for Firefix and Chrome drivers
</description>
</property>
<!-- selenium firefox configuration;
applies to protocol-selenium and protocol-interactiveselenium plugins -->
<property>
Expand Down Expand Up @@ -2622,6 +2632,14 @@ visit https://wiki.apache.org/nutch/SimilarityScoringFilter-->
Currently this option exist for - 'firefox' </description>
</property>

<!-- selenium chrome configurations -->
<property>
<name>webdriver.chrome.driver</name>
<value>/root/chromedriver</value>
<description>The path to the ChromeDriver binary</description>
</property>
<!-- end of selenium chrome configurations -->

<!-- protocol-interactiveselenium configuration -->
<property>
<name>interactiveselenium.handlers</name>
Expand Down
13 changes: 13 additions & 0 deletions src/plugin/lib-selenium/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Updates
* The use of phantomjs has been deprecated. Check [Wikipedia](https://en.wikipedia.org/wiki/PhantomJS) for more info.
* The updated code for Safari webriver is under development as starting Safari 10 on OS X El Capitan and macOS Sierra, Safari comes bundled with a new driver implementation.
* Opera is now based on ChromeDriver and has been adapted by Opera that enables programmatic automation of Chromium-based Opera products but hasn't been updated since April 5, 2017. We have suspended its support and removed from the code.([link](https://github.com/operasoftware/operachromiumdriver))
* Headless mode has been added for Chrome and Firefox. Set `selenium.enable.headless` to `true` in nutch-default.xml or nutch-site.xml to use it.


Your can run Nutch in Docker. Check some examples at https://github.com/sbatururimi/nutch-test.
Don't forget to update Dockefile to point to the original Nutch repository when updated.

# Contributors
Stas Batururimi [s.batururimi@gmail.com]

2 changes: 1 addition & 1 deletion src/plugin/lib-selenium/build-ivy.xml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
-->
<project name="lib-selenium" default="deps-jar" xmlns:ivy="antlib:org.apache.ivy.ant">

<property name="ivy.install.version" value="2.1.0" />
<property name="ivy.install.version" value="2.4.0" />
<condition property="ivy.home" value="${env.IVY_HOME}">
<isset property="env.IVY_HOME" />
</condition>
Expand Down
11 changes: 4 additions & 7 deletions src/plugin/lib-selenium/ivy.xml
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,13 @@

<dependencies>
<!-- begin selenium dependencies -->
<dependency org="org.seleniumhq.selenium" name="selenium-java" rev="2.48.2" />

<dependency org="org.seleniumhq.selenium" name="selenium-java" rev="3.141.5" />
<!--
<dependency org="com.opera" name="operadriver" rev="1.5">
<exclude org="org.seleniumhq.selenium" name="selenium-remote-driver" />
</dependency>
<dependency org="com.codeborne" name="phantomjsdriver" rev="1.2.1" >
<exclude org="org.seleniumhq.selenium" name="selenium-remote-driver" />
<exclude org="org.seleniumhq.selenium" name="selenium-java" />
</dependency>
-->
<!-- end selenium dependencies -->
</dependencies>

</ivy-module>
120 changes: 19 additions & 101 deletions src/plugin/lib-selenium/plugin.xml
Original file line number Diff line number Diff line change
Expand Up @@ -29,147 +29,65 @@
<export name="*"/>
</library>
<!-- all classes from dependent libraries are exported -->
<library name="cglib-nodep-2.1_3.jar">
<library name="animal-sniffer-annotations-1.14.jar">
<export name="*"/>
</library>
<library name="commons-codec-1.10.jar">
<library name="byte-buddy-1.8.15.jar">
<export name="*"/>
</library>
<library name="commons-collections-3.2.1.jar">
<library name="checker-compat-qual-2.0.0.jar">
<export name="*"/>
</library>
<library name="commons-exec-1.3.jar">
<export name="*"/>
</library>
<library name="commons-io-2.4.jar">
<library name="error_prone_annotations-2.1.3.jar">
<export name="*"/>
</library>
<library name="commons-jxpath-1.3.jar">
<library name="guava-25.0-jre.jar">
<export name="*"/>
</library>
<library name="commons-lang3-3.4.jar">
<library name="j2objc-annotations-1.1.jar">
<export name="*"/>
</library>
<library name="commons-logging-1.2.jar">
<library name="jsr305-1.3.9.jar">
<export name="*"/>
</library>
<library name="cssparser-0.9.16.jar">
<library name="okhttp-3.11.0.jar">
<export name="*"/>
</library>
<library name="gson-2.3.1.jar">
<library name="okio-1.14.0.jar">
<export name="*"/>
</library>
<library name="guava-18.0.jar">
<library name="selenium-api-3.141.5.jar">
<export name="*"/>
</library>
<library name="htmlunit-2.18.jar">
<library name="selenium-chrome-driver-3.141.5.jar">
<export name="*"/>
</library>
<library name="htmlunit-core-js-2.17.jar">
<library name="selenium-edge-driver-3.141.5.jar">
<export name="*"/>
</library>
<library name="httpclient-4.5.1.jar">
<library name="selenium-firefox-driver-3.141.5.jar">
<export name="*"/>
</library>
<library name="httpcore-4.4.3.jar">
<library name="selenium-ie-driver-3.141.5.jar">
<export name="*"/>
</library>
<library name="httpmime-4.5.jar">
<library name="selenium-java-3.141.5.jar">
<export name="*"/>
</library>
<library name="ini4j-0.5.2.jar">
<library name="selenium-opera-driver-3.141.5.jar">
<export name="*"/>
</library>
<library name="jetty-io-9.2.12.v20150709.jar">
<library name="selenium-remote-driver-3.141.5.jar">
<export name="*"/>
</library>
<library name="jetty-util-9.2.12.v20150709.jar">
<library name="selenium-safari-driver-3.141.5.jar">
<export name="*"/>
</library>
<library name="jna-4.1.0.jar">
<export name="*"/>
</library>
<library name="jna-platform-4.1.0.jar">
<export name="*"/>
</library>
<library name="nekohtml-1.9.22.jar">
<export name="*"/>
</library>
<library name="netty-3.5.2.Final.jar">
<export name="*"/>
</library>
<library name="operadriver-1.5.jar">
<export name="*"/>
</library>
<library name="operalaunchers-1.1.jar">
<export name="*"/>
</library>
<library name="phantomjsdriver-1.2.1.jar">
<export name="*"/>
</library>
<library name="protobuf-java-2.4.1.jar">
<export name="*"/>
</library>
<library name="sac-1.3.jar">
<export name="*"/>
</library>
<library name="selenium-api-2.48.2.jar">
<export name="*"/>
</library>
<library name="selenium-chrome-driver-2.48.2.jar">
<export name="*"/>
</library>
<library name="selenium-edge-driver-2.48.2.jar">
<export name="*"/>
</library>
<library name="selenium-firefox-driver-2.48.2.jar">
<export name="*"/>
</library>
<library name="selenium-htmlunit-driver-2.48.2.jar">
<export name="*"/>
</library>
<library name="selenium-ie-driver-2.48.2.jar">
<export name="*"/>
</library>
<library name="selenium-java-2.48.2.jar">
<export name="*"/>
</library>
<library name="selenium-leg-rc-2.48.2.jar">
<export name="*"/>
</library>
<library name="selenium-remote-driver-2.48.2.jar">
<export name="*"/>
</library>
<library name="selenium-safari-driver-2.48.2.jar">
<export name="*"/>
</library>
<library name="selenium-support-2.48.2.jar">
<export name="*"/>
</library>
<library name="serializer-2.7.2.jar">
<export name="*"/>
</library>
<library name="webbit-0.4.14.jar">
<export name="*"/>
</library>
<library name="websocket-api-9.2.12.v20150709.jar">
<export name="*"/>
</library>
<library name="websocket-client-9.2.12.v20150709.jar">
<export name="*"/>
</library>
<library name="websocket-common-9.2.12.v20150709.jar">
<export name="*"/>
</library>
<library name="xalan-2.7.2.jar">
<export name="*"/>
</library>
<library name="xercesImpl-2.11.0.jar">
<export name="*"/>
</library>
<library name="xml-apis-1.4.01.jar">
<library name="selenium-support-3.141.5.jar">
<export name="*"/>
</library>
</runtime>

</plugin>
Loading

0 comments on commit dfd8602

Please sign in to comment.