Skip to content

Commit

Permalink
Extend config for external commands (eikek#2536)
Browse files Browse the repository at this point in the history
Allows to configure external commands and provide different arguments
based on runtime values, like language. It extends the current config
of a command to allow a `arg-mappings` section. An example for
ocrmypdf:

```conf
ocrmypdf = {
  enabled = true
  command = {
    program = "ocrmypdf"
### new arg-mappings
    arg-mappings = {
      "mylang" = {
        value = "{{lang}}"
        mappings = [
          {
            matches = "deu"
            args = [ "-l", "deu", "--pdf-renderer", "sandwich" ]
          },
          {
            matches = ".*"
            args = [ "-l", "{{lang}}" ]
          }
        ]
      }
    }
#### end new arg-mappings
    args = [
      ### will be replaced with corresponding args from "mylang" mapping
      "{{mylang}}", 
      "--skip-text",
      "--deskew",
      "-j", "1",
      "{{infile}}",
      "{{outfile}}"
    ]
    timeout = "5 minutes"
  }
  working-dir = ${java.io.tmpdir}"/docspell-convert"
}
```

The whole section will be first processed to replace all `{{…}}`
patterns with corresponding values. Then `arg-mappings` will be looked
at and the first match (value == matches) in its `mappings` array is
used to replace its name in the arguments to the command.
  • Loading branch information
eikek authored and madduck committed May 14, 2024
1 parent 87a7178 commit 6767dee
Show file tree
Hide file tree
Showing 21 changed files with 368 additions and 357 deletions.
212 changes: 0 additions & 212 deletions modules/common/src/main/scala/docspell/common/SystemCommand.scala

This file was deleted.

3 changes: 3 additions & 0 deletions modules/common/src/main/scala/docspell/common/exec/Env.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ case class Env(values: Map[String, String]) {
def addAll(e: Env): Env =
Env(values ++ e.values)

def modifyValue(f: String => String): Env =
Env(values.view.mapValues(f).toMap)

def ++(e: Env) = addAll(e)

def foreach(f: (String, String) => Unit): Unit =
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/

package docspell.common.exec

import docspell.common.Duration
import docspell.common.Ident
import docspell.common.exec.Env
import docspell.common.exec.ExternalCommand.ArgMapping
import docspell.common.exec.SysCmd

final case class ExternalCommand(
program: String,
args: Seq[String],
timeout: Duration,
env: Map[String, String] = Map.empty,
argMappings: Map[Ident, ArgMapping] = Map.empty
) {
def withVars(vars: Map[String, String]): ExternalCommand.WithVars =
ExternalCommand.WithVars(this, vars)

import ExternalCommand.pattern

def resolve(vars: Map[String, String]): SysCmd = {
val replace = ExternalCommand.replaceString(vars) _
val resolvedArgMappings =
argMappings.view.mapValues(_.resolve(replace).firstMatch).toMap
val resolvedArgs = args.map(replace).flatMap { arg =>
resolvedArgMappings
.find(e => pattern(e._1.id) == arg)
.map(_._2)
.getOrElse(List(arg))
}

SysCmd(replace(program), resolvedArgs: _*)
.withTimeout(timeout)
.withEnv(_ => Env(env).modifyValue(replace))
}
}

object ExternalCommand {
private val openPattern = "{{"
private val closePattern = "}}"

private def pattern(s: String): String = s"${openPattern}${s}${closePattern}"

def apply(program: String, args: Seq[String], timeout: Duration): ExternalCommand =
ExternalCommand(program, args, timeout, Map.empty, Map.empty)

final case class ArgMapping(
value: String,
mappings: List[ArgMatch]
) {
private[exec] def resolve(replace: String => String): ArgMapping =
ArgMapping(replace(value), mappings.map(_.resolve(replace)))

def firstMatch: List[String] =
mappings.find(am => value.matches(am.matches)).map(_.args).getOrElse(Nil)
}

final case class ArgMatch(
matches: String,
args: List[String]
) {
private[exec] def resolve(replace: String => String): ArgMatch =
ArgMatch(replace(matches), args.map(replace))
}

private def replaceString(vars: Map[String, String])(in: String): String =
vars.foldLeft(in) { case (result, (name, value)) =>
val key = s"{{$name}}"
result.replace(key, value)
}

final case class WithVars(cmd: ExternalCommand, vars: Map[String, String]) {
def resolved: SysCmd = cmd.resolve(vars)
def append(more: (String, String)*): WithVars =
WithVars(cmd, vars ++ more.toMap)

def withVar(key: String, value: String): WithVars =
WithVars(cmd, vars.updated(key, value))

def withVarOption(key: String, value: Option[String]): WithVars =
value.map(withVar(key, _)).getOrElse(this)
}
}
53 changes: 53 additions & 0 deletions modules/common/src/main/scala/docspell/common/exec/SysExec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,20 @@ trait SysExec[F[_]] {

def waitFor(timeout: Option[Duration] = None): F[Int]

/** Uses `waitFor` and throws when return code is non-zero. Logs stderr and stdout while
* waiting.
*/
def runToSuccess(logger: Logger[F], timeout: Option[Duration] = None)(implicit
F: Async[F]
): F[Int]

/** Uses `waitFor` and throws when return code is non-zero. Logs stderr while waiting
* and collects stdout once finished successfully.
*/
def runToSuccessStdout(logger: Logger[F], timeout: Option[Duration] = None)(implicit
F: Async[F]
): F[String]

/** Sends a signal to the process to terminate it immediately */
def cancel: F[Unit]

Expand Down Expand Up @@ -75,6 +89,12 @@ object SysExec {
proc <- startProcess(logger, cmd, workdir, stdin)
fibers <- Resource.eval(Ref.of[F, List[F[Unit]]](Nil))
} yield new SysExec[F] {
private lazy val basicName: String =
cmd.program.lastIndexOf(java.io.File.separatorChar.toInt) match {
case n if n > 0 => cmd.program.drop(n + 1)
case _ => cmd.program.takeRight(16)
}

def stdout: Stream[F, Byte] =
fs2.io.readInputStream(
Sync[F].blocking(proc.getInputStream),
Expand Down Expand Up @@ -107,6 +127,39 @@ object SysExec {
)
}

def runToSuccess(logger: Logger[F], timeout: Option[Duration])(implicit
F: Async[F]
): F[Int] =
logOutputs(logger, basicName).use(_.waitFor(timeout).flatMap {
case rc if rc == 0 => Sync[F].pure(0)
case rc =>
Sync[F].raiseError(
new Exception(s"Command `${cmd.program}` returned non-zero exit code ${rc}")
)
})

def runToSuccessStdout(logger: Logger[F], timeout: Option[Duration])(implicit
F: Async[F]
): F[String] =
F.background(
stderrLines
.through(line => Stream.eval(logger.debug(s"[$basicName (err)]: $line")))
.compile
.drain
).use { f1 =>
waitFor(timeout)
.flatMap {
case rc if rc == 0 => stdout.through(fs2.text.utf8.decode).compile.string
case rc =>
Sync[F].raiseError[String](
new Exception(
s"Command `${cmd.program}` returned non-zero exit code ${rc}"
)
)
}
.flatTap(_ => f1)
}

def consumeOutputs(out: Pipe[F, String, Unit], err: Pipe[F, String, Unit])(implicit
F: Async[F]
): Resource[F, SysExec[F]] =
Expand Down
Loading

0 comments on commit 6767dee

Please sign in to comment.