Code
import os
Code
os.environ.get("JAVA_HOME")
# os.environ.get("SPARK_HOME")
'/opt/homebrew/opt/openjdk@11/'
Code
# !export JAVA_HOME=/opt/homebrew/opt/openjdk@11
Code
# !export SPARK_HOME=~/Developer/personal/hypershotgun-blog/.venv/lib/python3.11/site-packages/pyspark
Code
from pyspark.sql import DataFrame as df, functions as F, types as T, Window as W  # noqa


from pyspark.sql import SparkSession

# Create SparkSession (recommended in newer Spark versions)
spark = SparkSession.builder.appName("MyApp").getOrCreate()
25/01/29 08:53:54 WARN Utils: Your hostname, Joosts-MacBook-Pro.local resolves to a loopback address: 127.0.0.1; using 10.0.180.159 instead (on interface en0)
25/01/29 08:53:54 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/01/29 08:53:54 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Code
df_nulls = spark.createDataFrame(
    [("a", None), ("a", 1), ("a", 2), ("a", None)],
    ["key", "num"],
)
Code
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[10], line 1
----> 1 display_html(df_nulls)

NameError: name 'display_html' is not defined