new function text_extract

davidgohel · Feb 3, 2015 · 377be1e · 377be1e
1 parent accb9d8
commit 377be1e
Show file tree

Hide file tree

Showing 5 changed files with 73 additions and 0 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -149,6 +149,7 @@ export(textBoldItalic)
 export(textItalic)
 export(textNormal)
 export(textProperties)
+export(text_extract)
 export(toc.options)
 export(triggerPostCommand)
 export(vanilla.table)

diff --git a/R/text_extract.R b/R/text_extract.R
@@ -0,0 +1,32 @@
+#' @title Simple Text Extraction From a Word Document
+#'
+#' @description
+#' Provides a simple method to get text from a docx document. 
+#' It returns a \code{character} vector containing all 
+#' chunk of text found in the document.
+#' @param x \code{\link{docx}} object
+#' @param body specifies to scan document body 
+#' @param header specifies to scan document header 
+#' @param footer specifies to scan document footer 
+#' @param bookmark a character value ; id of the Word bookmark to scan.
+#' @return a character vector
+#' @examples 
+#' #START_TAG_TEST
+#' doc = docx( title = "My example", template = file.path( 
+#'   find.package("ReporteRs"), "templates/bookmark_example.docx") )
+#' text_extract( doc )
+#' text_extract( doc, header = FALSE, footer = FALSE )
+#' text_extract( doc, bookmark = "author" )
+#' @example examples/STOP_TAG_TEST.R
+#' @seealso \code{\link{docx}}
+#' @export
+text_extract = function( x, body = TRUE, header = TRUE, footer = TRUE, bookmark){
+	if( missing( bookmark ) )
+		out = .jcall(x$obj, "[S", "getWords", body, header, footer)
+	else {
+		if( length( bookmark ) != 1 || !is.character(bookmark))
+			stop("bookmark must be an atomic character.")
+		out = .jcall(x$obj, "[S", "getWords", casefold( bookmark, upper = FALSE ) )
+	}
+	out
+}
diff --git a/inst/java/ReporteRs-0.7.jar b/inst/java/ReporteRs-0.7.jar
diff --git a/java/ReporteRs-0.7-src.jar b/java/ReporteRs-0.7-src.jar
diff --git a/man/text_extract.Rd b/man/text_extract.Rd
@@ -0,0 +1,40 @@
+% Generated by roxygen2 (4.1.0): do not edit by hand
+% Please edit documentation in R/text_extract.R
+\name{text_extract}
+\alias{text_extract}
+\title{Simple text extraction from a docx document}
+\usage{
+text_extract(x, body = TRUE, header = TRUE, footer = TRUE, bookmark)
+}
+\arguments{
+\item{x}{\code{\link{docx}} object}
+
+\item{body}{specifies to scan document body}
+
+\item{header}{specifies to scan document header}
+
+\item{footer}{specifies to scan document footer}
+
+\item{bookmark}{a character value ; id of the Word bookmark to scan.}
+}
+\value{
+a character vector
+}
+\description{
+Provides a simple method to get text from a docx document.
+It returns a \code{character} vector containing all
+chunk of text found in the document.
+}
+\examples{
+#START_TAG_TEST
+doc = docx( title = "My example", template = file.path(
+  find.package("ReporteRs"), "templates/bookmark_example.docx") )
+text_extract( doc )
+text_extract( doc, header = FALSE, footer = FALSE )
+text_extract( doc, bookmark = "author" )
+#STOP_TAG_TEST
+}
+\seealso{
+\code{\link{docx}}
+}
+