To convert Pandas dataframe to tabular format using tabulate()

import pandas as pd
df = pd.DataFrame({
    "a": [1,2,3],
    "b": [4,5,6]
})
df_str = tabulate(df, headers=df.columns, tablefmt="orgtbl", showindex=False)
print(df_str)

a	b
1	4
2	5
3	6

To convert Pandas dataframe to tabular format using pd2org()

import pandas as pd
df = pd.DataFrame({
    "a": [1,2,3],
    "b": [4,5,6]
})
<<pd2org("df")>>

a	b
1	4
2	5
3	6

To convert PySpark dataframe to tabular format using ps2org()

import pyspark.sql.functions as F
import pyspark.sql.types as T
from pyspark.sql.window import Window
from pyspark.sql import SparkSession
spark = SparkSession.builder.master("local").appName("test-app").getOrCreate()
schema = T.StructType(
    [
        T.StructField("a", T.IntegerType(), True),
        T.StructField("b", T.IntegerType(), True),
    ]
)
data = [(1, 4), (2, 5), (3, 6)]
df = spark.createDataFrame(schema=schema, data=data)
<<ps2org("df")>>

a	b
1	4
2	5
3	6

To convert PySpark dataframe to tabular format using actual and shown code

import pyspark.sql.functions as F
import pyspark.sql.types as T
from pyspark.sql.window import Window
from pyspark.sql import SparkSession
spark = SparkSession.builder.config("spark.log.level", "OFF").master("local").appName("test-app").getOrCreate()
<<nostderr("spark")>>

schema = T.StructType(
    [
        T.StructField("a", T.IntegerType(), True),
        T.StructField("b", T.IntegerType(), True),
    ]
)
data = [(1, 4), (2, 5), (3, 6)]
df = spark.createDataFrame(schema=schema, data=data)
print("Dataframe df:")
<<show2org("df")>>df.show()

a	b
1	4
2	5
3	6

Which is converted into the following code block during evaluation:

from tabulate import tabulate
import pyspark.sql.functions as F
import pyspark.sql.types as T
from pyspark.sql.window import Window
from pyspark.sql import SparkSession
spark = SparkSession.builder.master("local").appName("test-app").getOrCreate()
schema = T.StructType(
    [
        T.StructField("a", T.IntegerType(), True),
        T.StructField("b", T.IntegerType(), True),
    ]
)
data = [(1, 4), (2, 5), (3, 6)]
df = spark.createDataFrame(schema=schema, data=data)
print(df.toPandas().to_markdown(index=False, tablefmt='orgtbl'))#df.show()

To convert PySpark dataframe to tabular format using returned value and NOWEB

# Built-in namespace
import pyspark.sql.functions as F
import pyspark.sql.types as T
from pyspark.sql import SparkSession

spark = SparkSession.builder.master("local").appName("test-app").getOrCreate()
schema = T.StructType(
    [
        T.StructField("a", T.IntegerType(), True),
        T.StructField("b", T.IntegerType(), True),
    ]
)
data = [(1, 4), (2, 5), (3, 6)]
df = spark.createDataFrame(schema=schema, data=data)
df<<litps2org>>.show()

|   a |   b |
|-----+-----|
|   1 |   4 |
|   2 |   5 |
|   3 |   6 |

To convert PySpark dataframe to tabular format using post-processing with AWK

echo "$data"  | awk 'BEGIN{state_prev=""; prev_line=""}{                          \
              if ($0 ~ /^\+[-+]+\+$/){                                            \
                     state_curr = "hline"                                         \
              } else {                                                            \
                     if ($0 ~ /^\|.*\|$/) {                                       \
                          state_curr = "tblbody"                                  \
                      }                                                           \
                      else {                                                      \
                          state_curr = "txt"                                      \
                      }                                                           \
               }                                                                  \
                                                                                  \
              if ((state_curr == "hline") && (state_prev == "txt")) {             \
                     printf("%s", prev_line);                                     \
                     prev_line = "";                                              \
              } else if ((state_curr == "txt") && (state_prev == "hline")) {      \
                     prev_line = $0;                                              \
              } else if ((state_curr == "hline") && (state_prev == "")) {         \
                     prev_line = "";                                              \
              } else if ((state_curr == "txt") && (state_prev == "")) {           \
                     printf("%s", prev_line);                                     \
                     prev_line = gensub(/^\+([-+]+)\+$/, "|\\1|", "g", $0);       \
              } else {                                                            \
                     if (NR > 2) {                                                \
                          printf("%s\n", prev_line);                              \
                     }                                                            \
                     prev_line = gensub(/^\+([-+]+)\+$/, "|\\1|", "g", $0);       \
              }                                                                   \
              state_prev = state_curr;                                            \
              }END{if (prev_line !~ /^\|.*\|$/) {print prev_line}}'

import pyspark.sql.functions as F
import pyspark.sql.types as T
from pyspark.sql import SparkSession
from tabulate import tabulate
spark = SparkSession.builder.master("local[1]").appName("test-app").getOrCreate()
schema = T.StructType(
    [
        T.StructField("A", T.ArrayType(T.StringType()), True),
        T.StructField("B", T.ArrayType(T.StringType()), True),
    ]
)
data = [(["b", "a", "c"], ["c", "d", "a", "f"])]
df = spark.createDataFrame(schema=schema, data=data)

dft = df.select("A", "B",
          F.array_except("A", "B").alias("A\B"),
          F.array_except("B", "A").alias("B\A"))
print("Table 1:")
dft.show()

print("Table 2:")
dft.show()

print("Two tables are the same.")

A	B	A\B	B\A
[b, a, c]	[c, d, a, f]	[b]	[d, f]

Table 2:

A	B	A\B	B\A
[b, a, c]	[c, d, a, f]	[b]	[d, f]

Two tables are the same.

To convert PySpark dataframe to tabular format using post-processing with SED

echo "$data" | sed -E "s/^\+([-+]+)\+$/|\1|/g"

A	B	A\B	B\A
[b, a, c]	[c, d, a, f]	[b]	[d, f]

Table 2:

A	B	A\B	B\A
[b, a, c]	[c, d, a, f]	[b]	[d, f]

Two tables are the same.

To convert PySpark dataframe to tabular format using post-processing with Python

The formatting of PySpark dataframe is done in .showString().

import re
state_prev = ""
prev_line = ""

for j, line in enumerate(data.split("\n")):
    if re.match("^\+[-+]+\+$", line):
        state_curr = "hline"
    elif re.match("^\|.*\|$", line):
        state_curr = "tblbody"
    else:
        state_curr = "txt"

    if (state_curr == "hline") & (state_prev == "txt"):
        print(prev_line, end="")
        prev_line = ""
    elif (state_curr == "txt") & (state_prev == "hline"):
        print("", end="")
        prev_line = line
    elif (state_curr == "txt") & (state_prev == ""):
        print(prev_line, end="")
        prev_line = re.sub("^\+([-+]+)\+$", "|\\1|", line)
    else:
        if j > 0:
            print(prev_line, end="\n")
        prev_line = re.sub("^\+([-+]+)\+$", "|\\1|", line)

    state_prev = state_curr

if not re.match("^\|[-+]+\|$", prev_line):
    print(prev_line)

A	B	A\B	B\A
[b, a, c]	[c, d, a, f]	[b]	[d, f]

Table 2:

A	B	A\B	B\A
[b, a, c]	[c, d, a, f]	[b]	[d, f]

Two tables are the same.

To convert PySpark dataframe to HTML format using a built-in function

import pyspark.sql.functions as F
import pyspark.sql.types as T
from pyspark.sql import SparkSession
from pyspark import SparkConf

# This configuration is needed to enable HTML rendering
conf = SparkConf().set("spark.sql.repl.eagerEval.enabled", "true")

spark = SparkSession.builder.master("local[1]").appName("test-app").config(conf=conf).getOrCreate()
schema = T.StructType(
    [
        T.StructField("a", T.IntegerType(), True),
        T.StructField("b", T.IntegerType(), True),
    ]
)
data = [(1, 4), (2, 5), (3, 6)]
df = spark.createDataFrame(schema=schema, data=data)
print(df._repr_html_())

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

test_ps2org.org

test_ps2org.org

To convert Pandas dataframe to tabular format using tabulate()

To convert Pandas dataframe to tabular format using pd2org()

To convert PySpark dataframe to tabular format using ps2org()

To convert PySpark dataframe to tabular format using actual and shown code

To convert PySpark dataframe to tabular format using returned value and NOWEB

To convert PySpark dataframe to tabular format using post-processing with AWK

To convert PySpark dataframe to tabular format using post-processing with SED

To convert PySpark dataframe to tabular format using post-processing with Python

To convert PySpark dataframe to HTML format using a built-in function

Files

test_ps2org.org

Latest commit

History

test_ps2org.org

File metadata and controls

To convert Pandas dataframe to tabular format using tabulate()

To convert Pandas dataframe to tabular format using pd2org()

To convert PySpark dataframe to tabular format using ps2org()

To convert PySpark dataframe to tabular format using actual and shown code

To convert PySpark dataframe to tabular format using returned value and NOWEB

To convert PySpark dataframe to tabular format using post-processing with AWK

To convert PySpark dataframe to tabular format using post-processing with SED

To convert PySpark dataframe to tabular format using post-processing with Python

To convert PySpark dataframe to HTML format using a built-in function