Skip to content

Commit

Permalink
Support all in-cell formats (#5)
Browse files Browse the repository at this point in the history
  • Loading branch information
nacnudus committed Oct 20, 2017
1 parent 6a2593d commit fbd752b
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 51 deletions.
Binary file modified inst/extdata/examples.xlsx
Binary file not shown.
130 changes: 79 additions & 51 deletions src/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,117 +150,145 @@ inline bool parseString(const rapidxml::xml_node<>* string, std::string& out) {
inline Rcpp::List parseFormattedString(
const rapidxml::xml_node<>* string,
xlsxstyles& styles) {

int n(0); // number of substrings
for (rapidxml::xml_node<>* node = string->first_node();
node != NULL;
node = node->next_sibling()) {
++n;
}

std::vector<int> bold;
std::vector<int> italic;
std::vector<int> underline;
std::vector<double> size;
Rcpp::CharacterVector character(n, NA_STRING);
Rcpp::LogicalVector bold(n, NA_LOGICAL);
Rcpp::LogicalVector italic(n, NA_LOGICAL);
Rcpp::CharacterVector underline(n, NA_STRING);
Rcpp::LogicalVector strike(n, NA_LOGICAL);
Rcpp::CharacterVector vertAlign(n, NA_STRING);
Rcpp::NumericVector size(n, NA_REAL);
Rcpp::CharacterVector color_rgb(n, NA_STRING);
std::vector<int> color_theme;
std::vector<int> color_indexed;
Rcpp::IntegerVector color_theme(n, NA_INTEGER);
Rcpp::NumericVector color_tint(n, NA_REAL);
Rcpp::IntegerVector color_indexed(n, NA_INTEGER);
Rcpp::CharacterVector font(n, NA_STRING);
std::vector<int> family;
Rcpp::IntegerVector family(n, NA_INTEGER);
Rcpp::CharacterVector scheme(n, NA_STRING);

int i(0); // ith substring
for (rapidxml::xml_node<>* node = string->first_node();
node != NULL;
node = node->next_sibling()) {

std::string node_name = node->name();
if (node_name == "t") {
bold.push_back(NA_LOGICAL);
italic.push_back(NA_LOGICAL);
underline.push_back(NA_LOGICAL);
size.push_back(NA_REAL);
color_theme.push_back(NA_INTEGER);
color_indexed.push_back(NA_INTEGER);
family.push_back(NA_INTEGER);

SET_STRING_ELT(character, i, Rf_mkCharCE(node->value(), CE_UTF8));

} else {

SET_STRING_ELT(character, i, Rf_mkCharCE(node->first_node("t")->value(), CE_UTF8));
rapidxml::xml_node<>* rPr = node->first_node("rPr");

if (rPr != NULL) {
bold.push_back(rPr->first_node("b") != NULL);
italic.push_back(rPr->first_node("i") != NULL);
underline.push_back(rPr->first_node("u") != NULL);

bold[i] = rPr->first_node("b") != NULL;
italic[i] = rPr->first_node("i") != NULL;

rapidxml::xml_node<>* u = rPr->first_node("u");
if (u != NULL) {
rapidxml::xml_attribute<>* u_val = u->first_attribute("val");
if (u_val != NULL) {
underline[i] = u_val->value();
} else {
underline[i] = "single";
}
}

strike[i] = rPr->first_node("strike") != NULL;

rapidxml::xml_node<>* vertAlign_node = rPr->first_node("vertAlign");
if (vertAlign_node != NULL) {
vertAlign[i] = vertAlign_node->first_attribute("val")->value();
}

rapidxml::xml_node<>* sz = rPr->first_node("sz");
if (sz != NULL) {
size.push_back(strtod(sz->value(), NULL));
} else {
size.push_back(NA_REAL);
size[i] = strtod(sz->value(), NULL);
}

rapidxml::xml_node<>* color = rPr->first_node("color");
if (color != NULL) {
rapidxml::xml_attribute<>* color_attr = color->first_attribute();
std::string attr_name = color_attr->name();
if (attr_name == "rgb") {
color_rgb[i] = color_attr->value();
color_theme.push_back(NA_INTEGER);
color_indexed.push_back(NA_INTEGER);
} else if (attr_name == "theme") {
int theme_int = strtol(color_attr->value(), NULL, 10) + 1;
color_rgb[i] = styles.theme_[theme_int - 1];
color_theme.push_back(theme_int);
color_indexed.push_back(NA_INTEGER);
} else if (attr_name == "indexed") {
// I haven't been able to force this in real life
int indexed_int = strtol(color_attr->value(), NULL, 10) + 1;
color_rgb[i] = styles.indexed_[indexed_int - 1];
color_theme.push_back(NA_INTEGER);
color_indexed.push_back(indexed_int);

rapidxml::xml_attribute<>* rgb_attr = color->first_attribute("rgb");
if (rgb_attr != NULL) {

color_rgb[i] = rgb_attr->value();

} else {

rapidxml::xml_attribute<>* theme_attr = color->first_attribute("theme");
if (theme_attr != NULL) {

int theme_int = strtol(theme_attr->value(), NULL, 10) + 1;
color_rgb[i] = styles.theme_[theme_int - 1];
color_theme[i] = theme_int;

rapidxml::xml_attribute<>* tint_attr = color->first_attribute("tint");
if (tint_attr != NULL) {
color_tint[i] = strtod(tint_attr->value(), NULL);
}

} else {

rapidxml::xml_attribute<>* indexed_attr = color->first_attribute("indexed");
if (indexed_attr != NULL) {
int indexed_int = strtol(indexed_attr->value(), NULL, 10) + 1;
color_rgb[i] = styles.indexed_[indexed_int - 1];
color_indexed[i] = indexed_int;
}

}
}
} else {
color_theme.push_back(NA_INTEGER);
color_indexed.push_back(NA_INTEGER);
}

rapidxml::xml_node<>* rFont = rPr->first_node("rFont");
if (rFont != NULL) {
font[i] = rFont->first_attribute("val")->value();
}

rapidxml::xml_node<>* family_node = rPr->first_node("family");
if (family_node != NULL) {
family.push_back(strtol(family_node->first_attribute("val")->value(), NULL, 10));
} else {
family.push_back(NA_INTEGER);
family[i] = strtol(family_node->first_attribute("val")->value(), NULL, 10);
}

rapidxml::xml_node<>* scheme_node = rPr->first_node("scheme");
if (scheme_node != NULL) {
scheme[i] = scheme_node->first_attribute("val")->value();
}
} else {
bold.push_back(NA_LOGICAL);
italic.push_back(NA_LOGICAL);
underline.push_back(NA_LOGICAL);
size.push_back(NA_REAL);
color_theme.push_back(NA_INTEGER);
color_indexed.push_back(NA_INTEGER);
family.push_back(NA_INTEGER);

}
}
++i;
}

Rcpp::List out = Rcpp::List::create(
Rcpp::_["character"] = character,
Rcpp::_["bold"] = bold,
Rcpp::_["italic"] = italic,
Rcpp::_["underline"] = underline,
Rcpp::_["strike"] = strike,
Rcpp::_["vertAlign"] = vertAlign,
Rcpp::_["size"] = size,
Rcpp::_["color_rgb"] = color_rgb,
Rcpp::_["color_theme"] = color_theme,
Rcpp::_["color_indexed"] = color_indexed,
Rcpp::_["font"] = font,
Rcpp::_["family"] = family,
Rcpp::_["scheme"] = scheme);

// Turn list of vectors into a data frame without checking anything
out.attr("class") = Rcpp::CharacterVector::create("tbl_df", "tbl", "data.frame");
out.attr("row.names") = Rcpp::IntegerVector::create(NA_INTEGER, -n); // Dunno how this works (the -n part)

return out;
}

Expand Down
Binary file modified tests/testthat/examples.xlsx
Binary file not shown.

0 comments on commit fbd752b

Please sign in to comment.