Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Topic/fix load trait props script #4703

Merged
merged 13 commits into from
Oct 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 56 additions & 55 deletions bin/load_trait_props.pl
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,13 @@ =head2 TODO


sub print_help {
print STDERR "A script to load trait properties\nUsage: load_trait_props.pl -D [database name] -H [database host, e.g., localhost] -I [input file] -o [ontology namespace, e.g., CO] -w\n\t-w\toverwrite existing trait properties if they exist (optional)\n\t-t\ttest run. roll back at the end\n";
print STDERR "A script to load trait properties\nUsage: load_trait_props.pl -D [database name] -H [database host, e.g., localhost] -I [input file] -o [ontology namespace, e.g., CO] -w\n\t-w\toverwrite existing trait properties if they exist (optional)\n\t-t\ttest run. roll back at the end\n";
}


if (!$opt_D || !$opt_H || !$opt_I || !$opt_o) {
print_help();
die("Exiting: options missing\n");
print_help();
die("Exiting: options missing\n");
}

# Match a dot, extension .xls / .xlsx
Expand All @@ -89,92 +89,93 @@ sub print_help {
my $excel_obj = $parser->parse($opt_I);

if ( !$excel_obj ) {
die "Input file error: ".$parser->error()."\n";
die "Input file error: ".$parser->error()."\n";
}

my $worksheet = ( $excel_obj->worksheets() )[0]; #support only one worksheet
my ( $row_min, $row_max ) = $worksheet->row_range();
my ( $col_min, $col_max ) = $worksheet->col_range();

if (($col_max - $col_min) < 1 || ($row_max - $row_min) < 1 ) { #must have header and at least one row of phenotypes
die "Input file error: spreadsheet is missing header\n";
die "Input file error: spreadsheet is missing header\n";
}

my $trait_name_head;

if ($worksheet->get_cell(0,0)) {
$trait_name_head = $worksheet->get_cell(0,0)->value();
$trait_name_head = $worksheet->get_cell(0,0)->value();
}

if (!$trait_name_head || $trait_name_head ne 'trait_name') {
die "Input file error: no \"trait_name\" in header\n";
die "Input file error: no \"trait_name\" in header\n";
}

my @trait_property_names = qw(
trait_format
trait_default_value
trait_minimum
trait_maximum
trait_categories
trait_details
);
trait_format
trait_default_value
trait_minimum
trait_maximum
trait_categories
trait_details
);

#check header for property names
for (my $column_number = 1; $column_number <= scalar @trait_property_names; $column_number++) {
my $property_name = $trait_property_names[$column_number-1];
if ( !($worksheet->get_cell(0,$column_number)) || !($worksheet->get_cell(0,$column_number)->value() eq $property_name) ) {
die "Input file error: no \"$property_name\" in header\n";
}
my $property_name = $trait_property_names[$column_number-1];
if ( !($worksheet->get_cell(0,$column_number)) || !($worksheet->get_cell(0,$column_number)->value() eq $property_name) ) {
die "Input file error: no \"$property_name\" in header\n";
}
}

my @trait_props_data;


for my $row ( 1 .. $row_max ) {
my %trait_props;
my $trait_name;
my $current_row = $row+1;


if ($worksheet->get_cell($row,0)) {
$trait_name = $worksheet->get_cell($row,0)->value();
$trait_props{'trait_name'}=$trait_name;
} else {
next; #skip blank lines
}

my $prop_column = 1;
foreach my $property_name (@trait_property_names) {
if ($worksheet->get_cell($row,$prop_column)) {
if (($worksheet->get_cell($row,$prop_column)->value()) && ($worksheet->get_cell($row,$prop_column)->value() ne '')) {
$trait_props{$property_name}=$worksheet->get_cell($row,$prop_column)->value();
}
my %trait_props;
my $trait_name;
my $current_row = $row+1;


if ($worksheet->get_cell($row,0)) {
$trait_name = $worksheet->get_cell($row,0)->value();
$trait_props{'trait_name'}=$trait_name;
} else {
next; #skip blank lines
}
$prop_column++;
}

push @trait_props_data, \%trait_props;


my $prop_column = 1;
foreach my $property_name (@trait_property_names) {
if ($worksheet->get_cell($row,$prop_column)) {
my $value = $worksheet->get_cell($row,$prop_column)->value();
if (defined($value) && ($value ne '')) {
$trait_props{$property_name}=$worksheet->get_cell($row,$prop_column)->value();
}
}
$prop_column++;
}

push @trait_props_data, \%trait_props;

}

my $dbh = CXGN::DB::InsertDBH
->new({
dbname => $opt_D,
dbhost => $opt_H,
dbargs => {AutoCommit => 1,
RaiseError => 1},
});
->new({
dbname => $opt_D,
dbhost => $opt_H,
dbargs => {AutoCommit => 1,
RaiseError => 1},
});

my $overwrite_existing_props = 0;

if ($opt_w){
$overwrite_existing_props = 1;
$overwrite_existing_props = 1;
}

my $is_test_run = 0;

if ($opt_t){
$is_test_run = 1;
$is_test_run = 1;
}

my $chado_schema = Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() } );
Expand All @@ -187,20 +188,20 @@ sub print_help {
my $validate=$trait_props->validate();

if (!$validate) {
die("input data is not valid\n");
die("input data is not valid\n");
} else {
print STDERR "input data is valid\n";
print STDERR "input data is valid\n";
}

print STDERR "Storing data...\t\t";
my $store = $trait_props->store();

if (!$store){
if (!$is_test_run) {
die("\n\nerror storing data\n");
}
if (!$is_test_run) {
die("\n\nerror storing data\n");
}
} else {
print STDERR "successfully stored data\n";
print STDERR "successfully stored data\n";
}


Expand Down
50 changes: 50 additions & 0 deletions lib/CXGN/Cvterm.pm
Original file line number Diff line number Diff line change
Expand Up @@ -31,58 +31,108 @@ use base qw / CXGN::DB::Object / ;

use Try::Tiny;

=head2 accessor schema

=cut

has 'schema' => (
isa => 'Bio::Chado::Schema',
is => 'rw',
required => 1
);

=head2 accessor cvterm

Returns: Cv::Cvterm DBIx::Class object

=cut

has 'cvterm' => (
isa => 'Bio::Chado::Schema::Result::Cv::Cvterm',
is => 'rw',
);

=head2 accessor cvterm_id

=cut

has 'cvterm_id' => (
isa => 'Int',
is => 'rw',
);

=head2 accessor cv

=cut

has 'cv' => (
isa => 'Bio::Chado::Schema::Result::Cv::Cv',
is => 'rw',
);

=head2 accessor cv_id

=cut

has 'cv_id' => (
isa => 'Int',
is => 'rw',
);

=head2 accessor dbxref

=cut

has 'dbxref' => (
isa => 'Bio::Chado::Schema::Result::General::Dbxref',
is => 'rw',
);

=head2 accessor db

=cut

has 'db' => (
isa => 'Bio::Chado::Schema::Result::General::Db',
is => 'rw',
);

=head2 accessor name

=cut

has 'name' => (
isa => 'Str',
is => 'rw',
);

=head2 accessor definition

=cut

has 'definition' => (
isa => 'Str',
is => 'rw',
);

=head2 accessor is_obsolete

=cut


has 'is_obsolete' => (
isa => 'Bool',
is => 'rw',
default => 0,
);


=head2 accessor accession

refers to dbxref.accession column

=cut

has 'accession' => (
isa => 'Maybe[Str]',
is => 'rw',
Expand Down
Loading