Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pivotal # 182332696: TSV File List Serialization #571

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import java.io.OutputStream
internal class FileListTsvStreamDeserializer {
fun serializeFileList(files: Sequence<BioFile>, fileList: OutputStream) {
val writer = fileList.bufferedWriter()
writeHeaderNames(files.first(), writer)
processFirstFile(files.first(), writer)
Copy link
Contributor

@Juan-EBI Juan-EBI May 31, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Something does not look correct, if you notice we are writing two times now the attributes values of the first file. Once in line 35 and another in line 19. That probablythe reason unit test are failing.

This does not look correct. Do I am missing something?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the problem was coming from the .first() method taking out the first element from the sequence. Then, when it starts iterating, it'll miss it since it's already processed

files.forEach { file -> writeAttributesValues(file, writer) }
writer.close()
}
Expand All @@ -28,10 +28,11 @@ internal class FileListTsvStreamDeserializer {
return reader.lineSequence().mapIndexed { index, row -> deserializeRow(index + 1, row.split(TAB), headers) }
}

private fun writeHeaderNames(firstFile: BioFile, writer: BufferedWriter) {
private fun processFirstFile(firstFile: BioFile, writer: BufferedWriter) {
val attrsNames = firstFile.attributes.map { it.name }
writer.write("Files".plus(TAB).plus(attrsNames.joinToString(TAB.toString())))
writer.newLine()
writeAttributesValues(firstFile, writer)
}

private fun writeAttributesValues(file: BioFile, writer: BufferedWriter) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class FileListTsvStreamDeserializerTest(
) {
private val testInstance = FileListTsvStreamDeserializer()
private val tsvFile = tempFolder.createFile(TSV_FILE_LIST)
private val fileSystem = tempFolder.createFile("testFile.txt")
private val fileSystem = tempFolder.createFile("testFile.tsv")

@BeforeEach
fun beforeEach() {
Expand Down Expand Up @@ -55,16 +55,17 @@ class FileListTsvStreamDeserializerTest(

@Test
fun `serialize - deserialize FileList`() {
val files = (1..20_000).map {
var idx = 0
fun bioFile(it: Int): BioFile =
BioFile("folder/file$it.txt", attributes = listOf(Attribute("Attr1", "A$it"), Attribute("Attr2", "B$it")))
}.asSequence()
val iterator = files.iterator()

val files = sequence { yield(bioFile(idx++)) }

fileSystem.outputStream().use { testInstance.serializeFileList(files, it) }

fileSystem.inputStream().use {
testInstance.deserializeFileList(it).forEach { file ->
assertThat(file).isEqualToComparingFieldByField(iterator.next())
testInstance.deserializeFileList(it).forEachIndexed { idx, file ->
assertThat(file).isEqualToComparingFieldByField(bioFile(idx))
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ import ac.uk.ebi.biostd.itest.factory.allInOneSubmission
import ac.uk.ebi.biostd.itest.factory.assertAllInOneSubmissionJson
import ac.uk.ebi.biostd.itest.factory.assertAllInOneSubmissionTsv
import ac.uk.ebi.biostd.itest.factory.assertAllInOneSubmissionXml
import ac.uk.ebi.biostd.itest.factory.expectedAllInOneJsonFileList
import ac.uk.ebi.biostd.itest.factory.expectedAllInOneJsonInnerFileList
import ac.uk.ebi.biostd.itest.factory.expectedAllInOneTsvFileList
import ac.uk.ebi.biostd.itest.factory.expectedAllInOneTsvInnerFileList
import ac.uk.ebi.biostd.itest.factory.expectedAllInOneXmlFileList
import ac.uk.ebi.biostd.itest.factory.expectedAllInOneXmlInnerFileList
import ac.uk.ebi.biostd.persistence.common.service.SubmissionQueryService
import arrow.core.Either
import ebi.ac.uk.extended.mapping.to.ToSubmissionMapper
Expand All @@ -18,6 +24,8 @@ import ebi.ac.uk.io.ext.size
import ebi.ac.uk.util.collections.second
import ebi.ac.uk.util.collections.third
import org.assertj.core.api.Assertions.assertThat
import org.custommonkey.xmlunit.XMLAssert.assertXMLEqual
import org.custommonkey.xmlunit.XMLUnit
import java.io.File
import java.nio.file.Paths

Expand Down Expand Up @@ -48,7 +56,7 @@ internal class AllInOneSubmissionHelper(
assertAllInOneSubmissionTsv(getSubFileContent("$submissionFolderPath/$accNo.tsv"), accNo)
}

fun assertSubmissionFilesRecordsNfs(accNo: String) {
fun assertNfsPagetabFiles(accNo: String) {
val submission = submissionRepository.getExtByAccNo(accNo)
val subFolder = "$submissionPath/${submission.relPath}"

Expand All @@ -64,18 +72,21 @@ internal class AllInOneSubmissionHelper(
(submission.section.sections.first() as Either.Left).a.fileList!!.pageTabFiles as List<NfsFile>
assertThat(subFileListTabFiles).hasSize(3)
assertThat(subFileListTabFiles).isEqualTo(nfsTabFiles(subFolder, "sub-folder/file-list2"))

assertFileListsPagetabFiles(subFolder)
}

fun assertSubmissionFilesRecordsFire(accNo: String) {
fun assertFirePagetabFiles(accNo: String) {
val submission = submissionRepository.getExtByAccNo(accNo)
val subFolder = "$submissionPath/${submission.relPath}"

assertFireTabFiles(submission, accNo, subFolder)
assertFireFileListTabFiles(submission, subFolder)
assertFireSubFileListTabFiles(submission, subFolder)
assertFileListsPagetabFiles(subFolder)
}

private fun `assertFireTabFiles`(submission: ExtSubmission, accNo: String, subFolder: String) {
private fun assertFireTabFiles(submission: ExtSubmission, accNo: String, subFolder: String) {
val submissionTabFiles = submission.pageTabFiles as List<FireFile>
assertThat(submissionTabFiles).hasSize(3)

Expand Down Expand Up @@ -104,7 +115,7 @@ internal class AllInOneSubmissionHelper(
assertThat(tsvTabFile.size).isEqualTo(tsvFile.size())
}

private fun `assertFireFileListTabFiles`(submission: ExtSubmission, subFolder: String) {
private fun assertFireFileListTabFiles(submission: ExtSubmission, subFolder: String) {
val fileListTabFiles = submission.section.fileList!!.pageTabFiles as List<FireFile>
assertThat(fileListTabFiles).hasSize(3)

Expand Down Expand Up @@ -133,7 +144,7 @@ internal class AllInOneSubmissionHelper(
assertThat(tsvTabFile.size).isEqualTo(tsvFile.size())
}

private fun `assertFireSubFileListTabFiles`(submission: ExtSubmission, subFolder: String) {
private fun assertFireSubFileListTabFiles(submission: ExtSubmission, subFolder: String) {
val subFileListTabFiles =
(submission.section.sections.first() as Either.Left).a.fileList!!.pageTabFiles as List<FireFile>
assertThat(subFileListTabFiles).hasSize(3)
Expand Down Expand Up @@ -163,6 +174,24 @@ internal class AllInOneSubmissionHelper(
assertThat(tsvTabFile.size).isEqualTo(tsvFile.size())
}

private fun assertFileListsPagetabFiles(subFolder: String) {
val tsvFile = getSubFileContent("$subFolder/Files/file-list.tsv")
val tsvInnerFile = getSubFileContent("$subFolder/Files/sub-folder/file-list2.tsv")
assertThat(tsvFile).isEqualToIgnoringWhitespace(expectedAllInOneTsvFileList.toString())
assertThat(tsvInnerFile).isEqualToIgnoringWhitespace(expectedAllInOneTsvInnerFileList.toString())

val jsonFile = getSubFileContent("$subFolder/Files/file-list.json")
val jsonInnerFile = getSubFileContent("$subFolder/Files/sub-folder/file-list2.json")
assertThat(jsonFile).isEqualToIgnoringWhitespace(expectedAllInOneJsonFileList.toString())
assertThat(jsonInnerFile).isEqualToIgnoringWhitespace(expectedAllInOneJsonInnerFileList.toString())

val xmlFile = getSubFileContent("$subFolder/Files/file-list.xml")
val xmlInnerFile = getSubFileContent("$subFolder/Files/sub-folder/file-list2.xml")
XMLUnit.setIgnoreWhitespace(true)
assertXMLEqual(xmlFile, expectedAllInOneXmlFileList.toString())
assertXMLEqual(xmlInnerFile, expectedAllInOneXmlInnerFileList.toString())
}

private fun submissionNfsTabFiles(accNo: String, submissionFolderPath: String): List<NfsFile> {
val jsonFile = File("$submissionFolderPath/$accNo.json")
val xmlFile = File("$submissionFolderPath/$accNo.xml")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package ac.uk.ebi.biostd.itest.factory

import com.jayway.jsonpath.matchers.JsonPathMatchers.isJson
import com.jayway.jsonpath.matchers.JsonPathMatchers.withJsonPath
import ebi.ac.uk.dsl.json.jsonArray
import ebi.ac.uk.model.Attribute
import ebi.ac.uk.model.BioFile
import ebi.ac.uk.model.FilesTable
Expand All @@ -12,6 +13,68 @@ import ebi.ac.uk.model.SectionsTable
import org.hamcrest.CoreMatchers.equalTo
import org.hamcrest.MatcherAssert.assertThat

internal val expectedAllInOneJsonFileList = jsonArray(
{
"path" to "DataFile5.txt"
"size" to 9
"attributes" to jsonArray(
{
"name" to "Type"
"value" to "referenced"
}, {
"name" to "md5"
"value" to "3F57CF2A5D7C2E6E46B52D26EA72621C"
}
)
"type" to "file"
},
{
"path" to "Folder1/DataFile6.txt"
"size" to 9
"attributes" to jsonArray(
{
"name" to "Type"
"value" to "referenced"
}, {
"name" to "md5"
"value" to "838559E92C5A52DEF29B9484C32DDCBB"
}
)
"type" to "file"
}
)

internal val expectedAllInOneJsonInnerFileList = jsonArray(
{
"path" to "DataFile7.txt"
"size" to 9
"attributes" to jsonArray(
{
"name" to "Type"
"value" to "referenced"
}, {
"name" to "md5"
"value" to "8723FD7A2E31D56966F94616ADF799B1"
}
)
"type" to "file"
},
{
"path" to "Folder1/DataFile8.txt"
"size" to 9
"attributes" to jsonArray(
{
"name" to "Type"
"value" to "referenced"
}, {
"name" to "md5"
"value" to "51F996F04CF87844A8BBFCD9E440AAEC"
}
)
"type" to "file"
}
)

fun assertAllInOneSubmissionJson(json: String, accNo: String) {
assertThat(json, isJson(withJsonPath("$.accno", equalTo(accNo))))
assertJsonAttributes(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,17 @@ fun assertAllInOneSubmissionTsv(tsv: String, accNo: String) {
private fun assertTsvBlock(lines: List<String>, from: Int, to: Int, expected: Tsv) {
assertThat(lines.subList(from - 1, to).joinToString("\n")).isEqualTo(expected.toString())
}

internal val expectedAllInOneTsvFileList = tsv {
line("Files", "Type", "md5")
line("DataFile5.txt", "referenced", "3F57CF2A5D7C2E6E46B52D26EA72621C")
line("Folder1/DataFile6.txt", "referenced", "838559E92C5A52DEF29B9484C32DDCBB")
line()
}

internal val expectedAllInOneTsvInnerFileList = tsv {
line("Files", "Type", "md5")
line("DataFile7.txt", "referenced", "8723FD7A2E31D56966F94616ADF799B1")
line("Folder1/DataFile8.txt", "referenced", "51F996F04CF87844A8BBFCD9E440AAEC")
line()
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,75 @@ import ebi.ac.uk.model.Section
import ebi.ac.uk.model.SectionsTable
import org.hamcrest.CoreMatchers.equalTo
import org.hamcrest.MatcherAssert.assertThat
import org.redundent.kotlin.xml.xml
import org.xmlunit.matchers.EvaluateXPathMatcher.hasXPath

internal val expectedAllInOneXmlFileList = xml("table") {
"file" {
attribute("size", 9)
"path" { -"DataFile5.txt" }
"type" { -"file" }
"attributes" {
"attribute" {
"name" { -"Type" }
"value" { -"referenced" }
}
"attribute" {
"name" { -"md5" }
"value" { -"3F57CF2A5D7C2E6E46B52D26EA72621C" }
}
}
}
"file" {
attribute("size", 9)
"path" { -"Folder1/DataFile6.txt" }
"type" { -"file" }
"attributes" {
"attribute" {
"name" { -"Type" }
"value" { -"referenced" }
}
"attribute" {
"name" { -"md5" }
"value" { -"838559E92C5A52DEF29B9484C32DDCBB" }
}
}
}
}

internal val expectedAllInOneXmlInnerFileList = xml("table") {
"file" {
attribute("size", 9)
"path" { -"DataFile7.txt" }
"type" { -"file" }
"attributes" {
"attribute" {
"name" { -"Type" }
"value" { -"referenced" }
}
"attribute" {
"name" { -"md5" }
"value" { -"8723FD7A2E31D56966F94616ADF799B1" }
}
}
}
"file" {
attribute("size", 9)
"path" { -"Folder1/DataFile8.txt" }
"type" { -"file" }
"attributes" {
"attribute" {
"name" { -"Type" }
"value" { -"referenced" }
}
"attribute" {
"name" { -"md5" }
"value" { -"51F996F04CF87844A8BBFCD9E440AAEC" }
}
}
}
}

fun assertAllInOneSubmissionXml(xml: String, accNo: String) {
assertThat(xml, hasXPath("//submission/@accno", equalTo(accNo)))
assertXmlAttributes(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ class AllInOneSubmissionTest(
webClient.submitSingle(submission.readText(), TSV)

allInOneSubmissionHelper.assertSavedSubmission("S-EPMC124")
if (enableFire) allInOneSubmissionHelper.assertSubmissionFilesRecordsFire("S-EPMC124")
else allInOneSubmissionHelper.assertSubmissionFilesRecordsNfs("S-EPMC124")
if (enableFire) allInOneSubmissionHelper.assertFirePagetabFiles("S-EPMC124")
else allInOneSubmissionHelper.assertNfsPagetabFiles("S-EPMC124")
}

@Test
Expand All @@ -69,8 +69,8 @@ class AllInOneSubmissionTest(
webClient.submitSingle(submission.readText(), JSON)

allInOneSubmissionHelper.assertSavedSubmission("S-EPMC125")
if (enableFire) allInOneSubmissionHelper.assertSubmissionFilesRecordsFire("S-EPMC125")
else allInOneSubmissionHelper.assertSubmissionFilesRecordsNfs("S-EPMC125")
if (enableFire) allInOneSubmissionHelper.assertFirePagetabFiles("S-EPMC125")
else allInOneSubmissionHelper.assertNfsPagetabFiles("S-EPMC125")
}

@Test
Expand All @@ -83,7 +83,7 @@ class AllInOneSubmissionTest(
webClient.submitSingle(submission.readText(), XML)

allInOneSubmissionHelper.assertSavedSubmission("S-EPMC126")
if (enableFire) allInOneSubmissionHelper.assertSubmissionFilesRecordsFire("S-EPMC126")
else allInOneSubmissionHelper.assertSubmissionFilesRecordsNfs("S-EPMC126")
if (enableFire) allInOneSubmissionHelper.assertFirePagetabFiles("S-EPMC126")
else allInOneSubmissionHelper.assertNfsPagetabFiles("S-EPMC126")
}
}