Skip to content

Commit

Permalink
Pack.java: Recover more often in Pack.copyAsIs2()
Browse files Browse the repository at this point in the history
The PACK class is designed to throw
StoredObjectRepresentationNotAvailableException at times when it cannot
find an object which previously was believed to be in its packfile and
it is still possible for the caller, PackWriter.writeObjectImpl(), to
retry copying the object from another file and potentially avoid
causing a user facing error for this fairly common expected situation.
This retry helps handle when repacking causes a packfile to be replaced
by new files with the same objects. Improve copyAsIs2() to drastically
make recovery possible in more situations.

Once any data for a specific object, has been sent it is very difficult
to recover from that object being relocated to another pack. But if a
read error is detected in copyAsIs2() before sending the object header
(and thus any data), then it should still be recoverable. Fix three
places where we could have recovered because we hadn't sent the header
yet, and adjust another place to send the header a bit later, after
having read some data from the object successfully. Basically, if the
header has not been written yet, throw
StoredObjectRepresentationNotAvailableException to signal that this is
still recoverable.

These fixes should drastically improve the chances of recovery since due
to unix file semantics, if the partial read succeeds, then the full read
will very likely succeed. This is because while the file may no longer
be open when the first read is done (the WindowCache may have evicted
it), once the first read completes it will likely still be open and even
if the file is deleted the WindowCache will continue to be able to read
from it until it closes it.

Change-Id: Ib87e294e0dbacf71b10db55be511e91963b4a84a
Signed-off-by: Martin Fick <mfick@nvidia.com>
  • Loading branch information
mfick-nvidia committed Nov 11, 2024
1 parent d34f8b5 commit e682a02
Showing 1 changed file with 153 additions and 142 deletions.
295 changes: 153 additions & 142 deletions org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/Pack.java
Original file line number Diff line number Diff line change
Expand Up @@ -416,185 +416,196 @@ private void copyAsIs2(PackOutputStream out, LocalObjectToPack src,
final CRC32 crc2 = validate ? new CRC32() : null;
final byte[] buf = out.getCopyBuffer();

boolean isHeaderWritten = false;
// Rip apart the header so we can discover the size.
//
readFully(src.offset, buf, 0, 20, curs);
int c = buf[0] & 0xff;
final int typeCode = (c >> 4) & 7;
long inflatedLength = c & 15;
int shift = 4;
int headerCnt = 1;
while ((c & 0x80) != 0) {
c = buf[headerCnt++] & 0xff;
inflatedLength += ((long) (c & 0x7f)) << shift;
shift += 7;
}

if (typeCode == Constants.OBJ_OFS_DELTA) {
do {
try {
readFully(src.offset, buf, 0, 20, curs);

int c = buf[0] & 0xff;
final int typeCode = (c >> 4) & 7;
long inflatedLength = c & 15;
int shift = 4;
int headerCnt = 1;
while ((c & 0x80) != 0) {
c = buf[headerCnt++] & 0xff;
} while ((c & 128) != 0);
if (validate) {
assert(crc1 != null && crc2 != null);
crc1.update(buf, 0, headerCnt);
crc2.update(buf, 0, headerCnt);
inflatedLength += ((long) (c & 0x7f)) << shift;
shift += 7;
}
} else if (typeCode == Constants.OBJ_REF_DELTA) {
if (validate) {

if (typeCode == Constants.OBJ_OFS_DELTA) {
do {
c = buf[headerCnt++] & 0xff;
} while ((c & 128) != 0);
if (validate) {
assert(crc1 != null && crc2 != null);
crc1.update(buf, 0, headerCnt);
crc2.update(buf, 0, headerCnt);
}
} else if (typeCode == Constants.OBJ_REF_DELTA) {
if (validate) {
assert(crc1 != null && crc2 != null);
crc1.update(buf, 0, headerCnt);
crc2.update(buf, 0, headerCnt);
}

readFully(src.offset + headerCnt, buf, 0, 20, curs);
if (validate) {
assert(crc1 != null && crc2 != null);
crc1.update(buf, 0, 20);
crc2.update(buf, 0, 20);
}
headerCnt += 20;
} else if (validate) {
assert(crc1 != null && crc2 != null);
crc1.update(buf, 0, headerCnt);
crc2.update(buf, 0, headerCnt);
}

readFully(src.offset + headerCnt, buf, 0, 20, curs);
if (validate) {
assert(crc1 != null && crc2 != null);
crc1.update(buf, 0, 20);
crc2.update(buf, 0, 20);
}
headerCnt += 20;
} else if (validate) {
assert(crc1 != null && crc2 != null);
crc1.update(buf, 0, headerCnt);
crc2.update(buf, 0, headerCnt);
}
final long dataOffset = src.offset + headerCnt;
final long dataLength = src.length;
final long expectedCRC;
final ByteArrayWindow quickCopy;

final long dataOffset = src.offset + headerCnt;
final long dataLength = src.length;
final long expectedCRC;
final ByteArrayWindow quickCopy;

// Verify the object isn't corrupt before sending. If it is,
// we report it missing instead.
//
try {
quickCopy = curs.quickCopy(this, dataOffset, dataLength);
// Verify the object isn't corrupt before sending. If it is,
// we report it missing instead.
//
try {
quickCopy = curs.quickCopy(this, dataOffset, dataLength);

if (validate && idx().hasCRC32Support()) {
assert(crc1 != null);
// Index has the CRC32 code cached, validate the object.
//
expectedCRC = idx().findCRC32(src);
if (quickCopy != null) {
quickCopy.crc32(crc1, dataOffset, (int) dataLength);
} else {
long pos = dataOffset;
long cnt = dataLength;
while (cnt > 0) {
final int n = (int) Math.min(cnt, buf.length);
readFully(pos, buf, 0, n, curs);
crc1.update(buf, 0, n);
pos += n;
cnt -= n;
if (validate && idx().hasCRC32Support()) {
assert(crc1 != null);
// Index has the CRC32 code cached, validate the object.
//
expectedCRC = idx().findCRC32(src);
if (quickCopy != null) {
quickCopy.crc32(crc1, dataOffset, (int) dataLength);
} else {
long pos = dataOffset;
long cnt = dataLength;
while (cnt > 0) {
final int n = (int) Math.min(cnt, buf.length);
readFully(pos, buf, 0, n, curs);
crc1.update(buf, 0, n);
pos += n;
cnt -= n;
}
}
if (crc1.getValue() != expectedCRC) {
setCorrupt(src.offset);
throw new CorruptObjectException(MessageFormat.format(
JGitText.get().objectAtHasBadZlibStream,
Long.valueOf(src.offset), getPackFile()));
}
} else if (validate) {
// We don't have a CRC32 code in the index, so compute it
// now while inflating the raw data to get zlib to tell us
// whether or not the data is safe.
//
Inflater inf = curs.inflater();
byte[] tmp = new byte[1024];
if (quickCopy != null) {
quickCopy.check(inf, tmp, dataOffset, (int) dataLength);
} else {
assert(crc1 != null);
long pos = dataOffset;
long cnt = dataLength;
while (cnt > 0) {
final int n = (int) Math.min(cnt, buf.length);
readFully(pos, buf, 0, n, curs);
crc1.update(buf, 0, n);
inf.setInput(buf, 0, n);
while (inf.inflate(tmp, 0, tmp.length) > 0)
continue;
pos += n;
cnt -= n;
}
}
if (!inf.finished() || inf.getBytesRead() != dataLength) {
setCorrupt(src.offset);
throw new EOFException(MessageFormat.format(
JGitText.get().shortCompressedStreamAt,
Long.valueOf(src.offset)));
}
assert(crc1 != null);
expectedCRC = crc1.getValue();
} else {
expectedCRC = -1;
}
if (crc1.getValue() != expectedCRC) {
setCorrupt(src.offset);
throw new CorruptObjectException(MessageFormat.format(
JGitText.get().objectAtHasBadZlibStream,
Long.valueOf(src.offset), getPackFile()));
}
} else if (validate) {
// We don't have a CRC32 code in the index, so compute it
// now while inflating the raw data to get zlib to tell us
// whether or not the data is safe.
} catch (DataFormatException dataFormat) {
setCorrupt(src.offset);

CorruptObjectException corruptObject = new CorruptObjectException(
MessageFormat.format(
JGitText.get().objectAtHasBadZlibStream,
Long.valueOf(src.offset), getPackFile()),
dataFormat);

throw new StoredObjectRepresentationNotAvailableException(
corruptObject);
}

if (quickCopy != null) {
// The entire object fits into a single byte array window slice,
// and we have it pinned. Write this out without copying.
//
Inflater inf = curs.inflater();
byte[] tmp = new byte[1024];
if (quickCopy != null) {
quickCopy.check(inf, tmp, dataOffset, (int) dataLength);
} else {
assert(crc1 != null);
out.writeHeader(src, inflatedLength);
isHeaderWritten = true;
quickCopy.write(out, dataOffset, (int) dataLength);

} else if (dataLength <= buf.length) {
// Tiny optimization: Lots of objects are very small deltas or
// deflated commits that are likely to fit in the copy buffer.
//
if (!validate) {
long pos = dataOffset;
long cnt = dataLength;
while (cnt > 0) {
final int n = (int) Math.min(cnt, buf.length);
readFully(pos, buf, 0, n, curs);
crc1.update(buf, 0, n);
inf.setInput(buf, 0, n);
while (inf.inflate(tmp, 0, tmp.length) > 0)
continue;
pos += n;
cnt -= n;
}
}
if (!inf.finished() || inf.getBytesRead() != dataLength) {
setCorrupt(src.offset);
throw new EOFException(MessageFormat.format(
JGitText.get().shortCompressedStreamAt,
Long.valueOf(src.offset)));
}
assert(crc1 != null);
expectedCRC = crc1.getValue();
out.writeHeader(src, inflatedLength);
isHeaderWritten = true;
out.write(buf, 0, (int) dataLength);
} else {
expectedCRC = -1;
}
} catch (DataFormatException dataFormat) {
setCorrupt(src.offset);

CorruptObjectException corruptObject = new CorruptObjectException(
MessageFormat.format(
JGitText.get().objectAtHasBadZlibStream,
Long.valueOf(src.offset), getPackFile()),
dataFormat);

throw new StoredObjectRepresentationNotAvailableException(
corruptObject);

} catch (IOException ioError) {
throw new StoredObjectRepresentationNotAvailableException(ioError);
}

if (quickCopy != null) {
// The entire object fits into a single byte array window slice,
// and we have it pinned. Write this out without copying.
//
out.writeHeader(src, inflatedLength);
quickCopy.write(out, dataOffset, (int) dataLength);

} else if (dataLength <= buf.length) {
// Tiny optimization: Lots of objects are very small deltas or
// deflated commits that are likely to fit in the copy buffer.
//
if (!validate) {
// Now we are committed to sending the object. As we spool it out,
// check its CRC32 code to make sure there wasn't corruption between
// the verification we did above, and us actually outputting it.
//
long pos = dataOffset;
long cnt = dataLength;
while (cnt > 0) {
final int n = (int) Math.min(cnt, buf.length);
readFully(pos, buf, 0, n, curs);
if (validate) {
assert(crc2 != null);
crc2.update(buf, 0, n);
}
if (!isHeaderWritten) {
out.writeHeader(src, inflatedLength);
isHeaderWritten = true;
}
out.write(buf, 0, n);
pos += n;
cnt -= n;
}
}
out.writeHeader(src, inflatedLength);
out.write(buf, 0, (int) dataLength);
} else {
// Now we are committed to sending the object. As we spool it out,
// check its CRC32 code to make sure there wasn't corruption between
// the verification we did above, and us actually outputting it.
//
out.writeHeader(src, inflatedLength);
long pos = dataOffset;
long cnt = dataLength;
while (cnt > 0) {
final int n = (int) Math.min(cnt, buf.length);
readFully(pos, buf, 0, n, curs);
if (validate) {
assert(crc2 != null);
crc2.update(buf, 0, n);
if (crc2.getValue() != expectedCRC) {
throw new CorruptObjectException(MessageFormat.format(
JGitText.get().objectAtHasBadZlibStream,
Long.valueOf(src.offset), getPackFile()));
}
}
out.write(buf, 0, n);
pos += n;
cnt -= n;
}
if (validate) {
assert(crc2 != null);
if (crc2.getValue() != expectedCRC) {
throw new CorruptObjectException(MessageFormat.format(
JGitText.get().objectAtHasBadZlibStream,
Long.valueOf(src.offset), getPackFile()));
}
} catch (IOException ioError) {
if (!isHeaderWritten) {
throw new StoredObjectRepresentationNotAvailableException(ioError);
}
throw ioError;
}
}

Expand Down

0 comments on commit e682a02

Please sign in to comment.