Skip to content

Commit

Permalink
created parser regression test for nested sublocations. (#385)
Browse files Browse the repository at this point in the history
* created parser regression test for nested sublocations.

* integrating @abondrn's fix to genbank parser.

* fixed lint issues.

* added data for regression test.

Co-authored-by: Alex <abondarenko101@gmail.com>
Co-authored-by: Alex <abondrn@users.noreply.github.com>
  • Loading branch information
3 people authored Oct 11, 2023
1 parent 5bd6a5d commit f2310db
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 14 deletions.
1 change: 1 addition & 0 deletions data/parseLocationRegressionTest.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"start":0,"end":0,"complement":false,"join":true,"five_prime_partial":false,"three_prime_partial":false,"gbk_location_string":"join(complement(5306942..5307394),complement(5304401..5305029),complement(5303328..5303393),complement(5301928..5302004))","sub_locations":[{"start":5306941,"end":5307394,"complement":true,"join":false,"five_prime_partial":false,"three_prime_partial":false,"gbk_location_string":"join(complement(5306942..5307394),complement(5304401..5305029),complement(5303328..5303393),complement(5301928..5302004))","sub_locations":null},{"start":5304400,"end":5305029,"complement":true,"join":false,"five_prime_partial":false,"three_prime_partial":false,"gbk_location_string":"join(complement(5306942..5307394),complement(5304401..5305029),complement(5303328..5303393),complement(5301928..5302004))","sub_locations":null},{"start":5303327,"end":5303393,"complement":true,"join":false,"five_prime_partial":false,"three_prime_partial":false,"gbk_location_string":"join(complement(5306942..5307394),complement(5304401..5305029),complement(5303328..5303393),complement(5301928..5302004))","sub_locations":null},{"start":5301927,"end":5302004,"complement":true,"join":false,"five_prime_partial":false,"three_prime_partial":false,"gbk_location_string":"join(complement(5306942..5307394),complement(5304401..5305029),complement(5303328..5303393),complement(5301928..5302004))","sub_locations":null}]}
36 changes: 22 additions & 14 deletions io/genbank/genbank.go
Original file line number Diff line number Diff line change
Expand Up @@ -811,8 +811,8 @@ func getSourceOrganism(metadataData []string) (string, string, []string) {
func parseLocation(locationString string) (Location, error) {
var location Location
location.GbkLocationString = locationString
if !(strings.ContainsAny(locationString, "(")) { // Case checks for simple expression of x..x
if !(strings.ContainsAny(locationString, ".")) { //Case checks for simple expression x
if !strings.ContainsAny(locationString, "(") { // Case checks for simple expression of x..x
if !strings.ContainsAny(locationString, ".") { //Case checks for simple expression x
position, err := strconv.Atoi(locationString)
if err != nil {
return Location{}, err
Expand Down Expand Up @@ -841,26 +841,34 @@ func parseLocation(locationString string) (Location, error) {
if strings.ContainsAny(expression, "(") {
firstInnerParentheses := strings.Index(expression, "(")
ParenthesesCount := 1
comma := 0
for i := 1; ParenthesesCount > 0; i++ { // "(" is at 0, so we start at 1
comma = i
switch expression[firstInnerParentheses+i] {
case []byte("(")[0]:
prevSubLocationStart := 0
for i := firstInnerParentheses + 1; i < len(expression); i++ { // "(" is at 0, so we start at 1
switch expression[i] {
case '(':
ParenthesesCount++
case []byte(")")[0]:
case ')':
ParenthesesCount--
case ',':
if ParenthesesCount == 0 {
parsedSubLocation, err := parseLocation(expression[prevSubLocationStart:i])
if err != nil {
return Location{}, err
}
parsedSubLocation.GbkLocationString = locationString
location.SubLocations = append(location.SubLocations, parsedSubLocation)
prevSubLocationStart = i + 1
}
}
}
parseLeftLocation, err := parseLocation(expression[:firstInnerParentheses+comma+1])
if err != nil {
return Location{}, err
if ParenthesesCount != 0 {
return Location{}, fmt.Errorf("Unbalanced parentheses")
}
parseRightLocation, err := parseLocation(expression[2+firstInnerParentheses+comma:])
parsedSubLocation, err := parseLocation(expression[prevSubLocationStart:])
if err != nil {
return Location{}, err
}

location.SubLocations = append(location.SubLocations, parseLeftLocation, parseRightLocation)
parsedSubLocation.GbkLocationString = locationString
location.SubLocations = append(location.SubLocations, parsedSubLocation)
} else { // This is the default join(x..x,x..x)
for _, numberRange := range strings.Split(expression, ",") {
joinLocation, err := parseLocation(numberRange)
Expand Down
27 changes: 27 additions & 0 deletions io/genbank/genbank_test.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package genbank

import (
"encoding/json"
"errors"
"fmt"
"io"
"os"
"path/filepath"
Expand Down Expand Up @@ -160,6 +162,31 @@ func TestPartialLocationParseRegression(t *testing.T) {
}
}

func TestSubLocationStringParseRegression(t *testing.T) {
location := "join(complement(5306942..5307394),complement(5304401..5305029),complement(5303328..5303393),complement(5301928..5302004))"
parsedLocation, err := parseLocation(location)
if err != nil {
t.Errorf("Failed to parse location string. Got err: %s", err)
}
jsonFile, err := os.Open("../../data/parseLocationRegressionTest.json")
// if we os.Open returns an error then handle it
if err != nil {
fmt.Println(err)
}
defer jsonFile.Close()

byteValue, _ := io.ReadAll(jsonFile)
var testParsedLocation Location
err = json.Unmarshal(byteValue, &testParsedLocation)
if err != nil {
t.Errorf("Failed to unmarshal json. Got err: %s", err)
}

if diff := cmp.Diff(parsedLocation, testParsedLocation); diff != "" {
t.Errorf("Failed to parse sublocation string. Got this diff:\n%s", diff)
}
}

func TestSnapgeneGenbankRegression(t *testing.T) {
snapgene, err := Read("../../data/puc19_snapgene.gb")

Expand Down

0 comments on commit f2310db

Please sign in to comment.