diff --git a/test_pd2org.org b/test_pd2org.org new file mode 100644 index 0000000..e7b3940 --- /dev/null +++ b/test_pd2org.org @@ -0,0 +1,90 @@ +#+AUTHOR: Altynbek Isabekov +#+EMAIL: aisabekov@ku.edu.tr +#+LANGUAGE: en +#+PROPERTY: header-args:emacs-lisp :results silent +#+OPTIONS: ^:nil +#+OPTIONS: html-style:nil +#+HTML_HEAD: +#+HTML_HEAD: +#+HTML_HEAD: +#+HTML_HEAD: +#+HTML_HEAD: +#+HTML_HEAD: +* To convert Pandas dataframe to tabular format using tabulate() +#+header: :prologue from tabulate import tabulate +#+header: :noweb strip-export +#+begin_src python :results output raw + import pandas as pd + df = pd.DataFrame({ + "a": [1,2,3], + "b": [4,5,6] + }) + df_str = tabulate(df, headers=df.columns, tablefmt="orgtbl", showindex=False) + print(df_str) +#+end_src + +#+RESULTS: +| a | b | +|---+---| +| 1 | 4 | +| 2 | 5 | +| 3 | 6 | + +* To convert Pandas dataframe to tabular format using pd2org() +#+name: pd2org +#+begin_src python :var df="df" :exports none :results value raw + return f"return tabulate({df}, headers={df}.columns, tablefmt='orgtbl', showindex=False)" +#+end_src + +#+header: :prologue from tabulate import tabulate +#+header: :noweb strip-export +#+begin_src python :results value raw + import pandas as pd + df = pd.DataFrame({ + "a": [1,2,3], + "b": [4,5,6] + }) + <> + df.show() +#+end_src + +#+RESULTS: +| a | b | +|---+---| +| 1 | 4 | +| 2 | 5 | +| 3 | 6 | + +* To convert PySpark dataframe to tabular format using ps2org() +#+name: ps2org +#+header: :noweb strip-export +#+begin_src python :var df_in="df_in" :exports none :results value raw + return f"return {df_in}.toPandas().to_markdown(index=False, tablefmt='orgtbl')" +#+end_src + +#+header: :prologue from tabulate import tabulate +#+header: :noweb strip-export +#+BEGIN_SRC python :var df="df" :results value raw + import pyspark.sql.functions as F + import pyspark.sql.types as T + from pyspark.sql.window import Window + from pyspark.sql import SparkSession + spark = SparkSession.builder.master("local").appName("test-app").getOrCreate() + schema = T.StructType( + [ + T.StructField("a", T.IntegerType(), True), + T.StructField("b", T.IntegerType(), True), + ] + ) + data = [(1, 4), (2, 5), (3, 6)] + df = spark.createDataFrame(schema=schema, data=data) + <> + df.show() +#+END_SRC + +#+RESULTS: +| a | b | +|---+---| +| 1 | 4 | +| 2 | 5 | +| 3 | 6 |