forked from databricks/containers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile
56 lines (48 loc) · 2.71 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
FROM databricksruntime/minimal:experimental
# Suppress interactive configuration prompts
ENV DEBIAN_FRONTEND=noninteractive
# Install python 3.10 and virtualenv for Spark and Notebooks
RUN apt-get update \
&& apt-get install -y \
python3.10 \
virtualenv
# We add RStudio's debian source to install the latest r-base version (4.1)
# We are using the more secure long form of pgp key ID of marutter@gmail.com
# based on these instructions (avoiding firewall issue for some users):
# https://cran.rstudio.com/bin/linux/ubuntu/#secure-apt
RUN apt-get update \
&& apt-get install --yes software-properties-common apt-transport-https \
&& gpg --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 \
&& gpg -a --export E298A3A825C0D65DFD57CBB651716619E084DAB9 | sudo apt-key add - \
&& add-apt-repository -y "deb [arch=amd64,i386] https://cran.rstudio.com/bin/linux/ubuntu $(lsb_release -cs)-cran40/" \
&& apt-get update \
&& apt-get install --yes \
libssl-dev \
r-base \
r-base-dev \
&& add-apt-repository -r "deb [arch=amd64,i386] https://cran.rstudio.com/bin/linux/ubuntu $(lsb_release -cs)-cran40/" \
&& apt-key del E298A3A825C0D65DFD57CBB651716619E084DAB9 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
# hwriterPlus is used by Databricks to display output in notebook cells
# hwriterPlus is removed for newer version of R, so we hardcode the dependency to archived version
# Rserve allows Spark to communicate with a local R process to run R code
RUN R -e "options(repos = list(CRAN = 'https://packagemanager.posit.co/cran/latest')); install.packages(c('hwriter', 'TeachingDemos', 'htmltools'))" \
&& R -e "install.packages('https://cran.r-project.org/src/contrib/Archive/hwriterPlus/hwriterPlus_1.0-3.tar.gz', repos=NULL, type='source')" \
&& R -e "install.packages('Rserve', repos='http://rforge.net/')"
# Additional instructions to setup rstudio. If you dont need rstudio, you can
# omit the below commands in your docker file. Even after this you need to use
# an init script to start the RStudio daemon (See README.md for details.)
# Databricks configuration for RStudio sessions.
COPY Rprofile.site /usr/lib/R/etc/Rprofile.site
# Rstudio installation.
RUN apt-get update \
# Install gdebi-core.
&& apt-get install -y gdebi-core \
# Download rstudio and install it.
&& apt-get install -y wget \
&& apt-get install -y gdebi-core \
&& wget https://download2.rstudio.org/server/jammy/amd64/rstudio-server-2022.12.0-353-amd64.deb \
&& gdebi -n rstudio-server-2022.12.0-353-amd64.deb && rstudio-server version
# Initialize the default environment that Spark and notebooks will use
RUN virtualenv -p python3.10 --system-site-packages /databricks/python3