chore: added conditonal check to prevent indexOutOfBound Exception (#343

) * chore: added conditonal check to prevent indexOfBOund Exception * nit * removed first lang part from batchTable sample
GoogleCloudPlatform · Nov 15, 2022 · edcb284 · edcb284
1 parent f4be66b
commit edcb284
Show file tree

Hide file tree

Showing 4 changed files with 70 additions and 58 deletions.
diff --git a/document-ai/snippets/src/main/java/documentai/v1beta2/BatchParseFormBeta.java b/document-ai/snippets/src/main/java/documentai/v1beta2/BatchParseFormBeta.java
@@ -68,8 +68,7 @@ public static void batchParseFormGcs(
     // Initialize client that will be used to send requests. This client only needs to be created
     // once, and can be reused for multiple requests. After completing all of your requests, call
     // the "close" method on the client to safely clean up any remaining background resources.
-    try (DocumentUnderstandingServiceClient client =
-        DocumentUnderstandingServiceClient.create()) {
+    try (DocumentUnderstandingServiceClient client = DocumentUnderstandingServiceClient.create()) {
 
       // Configure the request for processing the PDF
       String parent = String.format("projects/%s/locations/%s", projectId, location);
@@ -103,17 +102,16 @@ public static void batchParseFormGcs(
       // mime_type can be application/pdf, image/tiff,
       // and image/gif, or application/json
       InputConfig config =
-          InputConfig.newBuilder().setGcsSource(inputUri)
-                  .setMimeType("application/pdf").build();
+          InputConfig.newBuilder().setGcsSource(inputUri).setMimeType("application/pdf").build();
 
-      GcsDestination gcsDestination = GcsDestination.newBuilder()
-              .setUri(String.format("gs://%s/%s", outputGcsBucketName, outputGcsPrefix)).build();
-
-      OutputConfig outputConfig =  OutputConfig.newBuilder()
-              .setGcsDestination(gcsDestination)
-              .setPagesPerShard(1)
+      GcsDestination gcsDestination =
+          GcsDestination.newBuilder()
+              .setUri(String.format("gs://%s/%s", outputGcsBucketName, outputGcsPrefix))
               .build();
 
+      OutputConfig outputConfig =
+          OutputConfig.newBuilder().setGcsDestination(gcsDestination).setPagesPerShard(1).build();
+
       ProcessDocumentRequest request =
           ProcessDocumentRequest.newBuilder()
               .setFormExtractionParams(params)
@@ -165,13 +163,15 @@ public static void batchParseFormGcs(
           String text = document.getText();
 
           // Process the output.
-          Document.Page page1 = document.getPages(0);
-          for (Document.Page.FormField field : page1.getFormFieldsList()) {
-            String fieldName = getText(field.getFieldName(), text);
-            String fieldValue = getText(field.getFieldValue(), text);
-
-            System.out.println("Extracted form fields pair:");
-            System.out.printf("\t(%s, %s))", fieldName, fieldValue);
+          if (document.getPagesCount() > 0) {
+            Document.Page page1 = document.getPages(0);
+            for (Document.Page.FormField field : page1.getFormFieldsList()) {
+              String fieldName = getText(field.getFieldName(), text);
+              String fieldValue = getText(field.getFieldValue(), text);
+
+              System.out.println("Extracted form fields pair:");
+              System.out.printf("\t(%s, %s))", fieldName, fieldValue);
+            }
           }
 
           // Clean up temp file.

diff --git a/document-ai/snippets/src/main/java/documentai/v1beta2/BatchParseTableBeta.java b/document-ai/snippets/src/main/java/documentai/v1beta2/BatchParseTableBeta.java
@@ -165,24 +165,30 @@ public static void batchParseTableGcs(
           String text = document.getText();
 
           // Process the output.
-          Document.Page page1 = document.getPages(0);
-          Document.Page.Table table = page1.getTables(0);
-
-          System.out.println("Results from first table processed:");
-          System.out.println("Header row:");
-
-          Document.Page.Table.TableRow headerRow = table.getHeaderRows(0);
-
-          for (Document.Page.Table.TableCell tableCell : headerRow.getCellsList()) {
-            if (!tableCell.getLayout().getTextAnchor().getTextSegmentsList().isEmpty()) {
-              // Extract shards from the text field
-              // First shard in document doesn't have startIndex property
-              List<Document.TextAnchor.TextSegment> textSegments =
-                  tableCell.getLayout().getTextAnchor().getTextSegmentsList();
-              int startIdx =
-                  textSegments.size() > 0 ? (int) textSegments.get(0).getStartIndex() : 0;
-              int endIdx = (int) textSegments.get(0).getEndIndex();
-              System.out.printf("\t%s", text.substring(startIdx, endIdx));
+          if (document.getPagesCount() > 0) {
+            Document.Page page1 = document.getPages(0);
+            if (page1.getTablesCount() > 0) {
+              Document.Page.Table table = page1.getTables(0);
+
+              System.out.println("Results from first table processed:");
+              System.out.println("Header row:");
+
+              if (table.getHeaderRowsCount() > 0) {
+                Document.Page.Table.TableRow headerRow = table.getHeaderRows(0);
+
+                for (Document.Page.Table.TableCell tableCell : headerRow.getCellsList()) {
+                  if (!tableCell.getLayout().getTextAnchor().getTextSegmentsList().isEmpty()) {
+                    // Extract shards from the text field
+                    // First shard in document doesn't have startIndex property
+                    List<Document.TextAnchor.TextSegment> textSegments =
+                        tableCell.getLayout().getTextAnchor().getTextSegmentsList();
+                    int startIdx =
+                        textSegments.size() > 0 ? (int) textSegments.get(0).getStartIndex() : 0;
+                    int endIdx = (int) textSegments.get(0).getEndIndex();
+                    System.out.printf("\t%s", text.substring(startIdx, endIdx));
+                  }
+                }
+              }
             }
           }
 

diff --git a/document-ai/snippets/src/main/java/documentai/v1beta2/ParseFormBeta.java b/document-ai/snippets/src/main/java/documentai/v1beta2/ParseFormBeta.java
@@ -90,13 +90,15 @@ public static void parseForm(String projectId, String location, String inputGcsU
       String text = response.getText();
 
       // Process the output
-      Document.Page page1 = response.getPages(0);
-      for (Document.Page.FormField field : page1.getFormFieldsList()) {
-        String fieldName = getText(field.getFieldName(), text);
-        String fieldValue = getText(field.getFieldValue(), text);
+      if (response.getPagesCount() > 0) {
+        Document.Page page1 = response.getPages(0);
+        for (Document.Page.FormField field : page1.getFormFieldsList()) {
+          String fieldName = getText(field.getFieldName(), text);
+          String fieldValue = getText(field.getFieldValue(), text);
 
-        System.out.println("Extracted form fields pair:");
-        System.out.printf("\t(%s, %s))", fieldName, fieldValue);
+          System.out.println("Extracted form fields pair:");
+          System.out.printf("\t(%s, %s))", fieldName, fieldValue);
+        }
       }
     }
   }

diff --git a/document-ai/snippets/src/main/java/documentai/v1beta2/ParseTableBeta.java b/document-ai/snippets/src/main/java/documentai/v1beta2/ParseTableBeta.java
@@ -94,23 +94,27 @@ public static void parseTable(String projectId, String location, String inputGcs
       String text = response.getText();
 
       // Get the first table in the document
-      Document.Page page1 = response.getPages(0);
-      Document.Page.Table table = page1.getTables(0);
-
-      System.out.println("Results from first table processed:");
-      List<Document.Page.DetectedLanguage> detectedLangs = page1.getDetectedLanguagesList();
-      String langCode =
-          detectedLangs.size() > 0 ? detectedLangs.get(0).getLanguageCode() : "NOT_FOUND";
-      System.out.printf("First detected language: : %s", langCode);
-
-      Document.Page.Table.TableRow headerRow = table.getHeaderRows(0);
-      System.out.println("Header row:");
-
-      for (Document.Page.Table.TableCell tableCell : headerRow.getCellsList()) {
-        if (tableCell.getLayout().getTextAnchor().getTextSegmentsList() != null) {
-          // Extract shards from the text field
-          // First shard in document doesn't have startIndex property
-          System.out.printf("\t%s", getText(tableCell.getLayout(), text));
+      if (response.getPagesCount() > 0) {
+        Document.Page page1 = response.getPages(0);
+        if (page1.getTablesCount() > 0) {
+          Document.Page.Table table = page1.getTables(0);
+
+          System.out.println("Results from first table processed:");
+          List<Document.Page.DetectedLanguage> detectedLangs = page1.getDetectedLanguagesList();
+          String langCode =
+              detectedLangs.size() > 0 ? detectedLangs.get(0).getLanguageCode() : "NOT_FOUND";
+          System.out.printf("First detected language: : %s", langCode);
+
+          Document.Page.Table.TableRow headerRow = table.getHeaderRows(0);
+          System.out.println("Header row:");
+
+          for (Document.Page.Table.TableCell tableCell : headerRow.getCellsList()) {
+            if (tableCell.getLayout().getTextAnchor().getTextSegmentsList() != null) {
+              // Extract shards from the text field
+              // First shard in document doesn't have startIndex property
+              System.out.printf("\t%s", getText(tableCell.getLayout(), text));
+            }
+          }
         }
       }
     }