_best_ — Kfp Movie
def ingest_op(): return components.load_component_from_text(""" name: Ingest MovieLens implementation: container: image: python:3.9-slim command: - sh - -c - | pip install pandas && \ python - <<'PY' import os, urllib.request, zipfile DATA_URL = "https://files.grouplens.org/datasets/movielens/ml-latest-small.zip" DEST="/mnt/data" os.makedirs(DEST, exist_ok=True) zip_path=os.path.join(DEST,"ml-latest-small.zip") urllib.request.urlretrieve(DATA_URL, zip_path) with zipfile.ZipFile(zip_path) as z: z.extractall(DEST) print("✅ Ingestion done") PY args: [] volumeMounts: - mountPath: /mnt/data name: data-volume """)
# Dockerfile.ingest FROM python:3.9-slim WORKDIR /app RUN pip install pandas COPY ingest.py . ENTRYPOINT ["python", "ingest.py"] # ingest.py import os, urllib.request, zipfile, pandas as pd kfp movie
train, test = train_test_split(df, test_size=0.2, random_state=42) train.to_parquet(os.path.join(OUT_DIR, "train.parquet")) test.to_parquet(os.path.join(OUT_DIR, "test.parquet")) def ingest_op(): return components
