Skip to content

Commit

Permalink
Use Python to make text report from html (#441)
Browse files Browse the repository at this point in the history
* Use Python to make text from html

---------

Co-authored-by: Tom Aldcroft <taldcroft@gmail.com>
  • Loading branch information
jeanconn and taldcroft authored May 13, 2024
1 parent dfc9947 commit 144485d
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 35 deletions.
38 changes: 3 additions & 35 deletions starcheck/src/starcheck.pl
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@

use Cwd qw( abs_path );

use HTML::TableExtract;
use Carp 'verbose';
$SIG{__DIE__} = sub { Carp::confess(@_) };

Expand Down Expand Up @@ -965,41 +964,10 @@ sub json_obsids {

if ($par{text}) {

my $textout = io("${STARCHECK}.txt");

my $te = HTML::TableExtract->new();
$te->parse($out);

my %table;
foreach my $ts ($te->table_states) {

# print "Table (", join(',', $ts->coords), "):\n";
my $table_text = qq{};
my ($depth, $count) = $ts->coords;
foreach my $row ($ts->rows) {
$table_text .= $row->[0] . "\n" if defined $row->[0];
}
if ($table_text =~ /OBSID/s) {
$table{$depth}{$count} = $table_text;
}
}

# use Data::Dumper;
# print Dumper %table;
for my $depth (sort { $a <=> $b } (keys %table)) {
for my $count (sort { $a <=> $b } (keys %{ $table{$depth} })) {

# print " $depth $count \n";
my $chunk = $table{$depth}{$count};
chomp($chunk);
$chunk =~ s/\s+$/\n/;
$textout->print("$chunk");
$textout->print(
"==================================================================================== \n"
);
}
}

my $textout = io("${STARCHECK}.txt");
$textout->print(call_python("utils.prehtml2text", [ $out ]));
$textout->close;
print STDERR "Wrote text report to $STARCHECK.txt\n";

}
Expand Down
12 changes: 12 additions & 0 deletions starcheck/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
from pathlib import Path
import warnings
from bs4 import BeautifulSoup

import agasc
import cxotime
Expand Down Expand Up @@ -45,6 +46,17 @@
message=r"\nModel .* computed between .* clipping input mag\(s\) outside that range\.",
)

def prehtml2text(html_text):
"""Convert the starcheck report html to plain text."""

soup = BeautifulSoup(html_text, "lxml")

# All of the report is basically in the pre tags, so write those out with a separator line.
section_separator = "\n" + "=" * 84
outs = [pre.get_text() + section_separator for pre in soup.find_all("pre")]

return "\n".join(outs)


def date2secs(val):
"""Convert date to seconds since 1998.0"""
Expand Down

0 comments on commit 144485d

Please sign in to comment.