#! /bin/bash
#
# A tutorial on running Meltano data ELT jobs form inside Dagster using the dagster-meltano library found at https://github.com/quantile-development/dagster-meltano
#
# Fraser Marlow [https://github.com/frasermarlow] - April 2023
#
# Ensure Docker is running locally before executing this script.
#
# ---------------------------------------------------------------

# SET PROJECT VARIABLES BELOW

PROJECT_FOLDER="dag-melt" # set a name for your project folder
MELTANO_PROJECT="meltano-project"
MELTANO_JOB="my-meltano-job"
GITHUB_TOKEN="ghp_abcdefghijklmnopqrstuvwxyz123456"  # set a standard GitHub token with minimum access permissions
REPOS_TO_IMPORT="['quantile-development/dagster-meltano','frasermarlow/tap-bls','dagster-io/fake-star-detector']"  # one or more Github repositories you want to pull data for
START_DATE="2021-12-31"  # the date of the earliest data you want to extract
DOCKER_CONTAINER_NAME="my-pg-container" # a unique name for the docker container
POSTGRES_USER="meltano"  # a new user for your postgres database
POSTGRES_PASSWORD="password"  # the postgres user's password
DATABASE="my-db-name"  # an arbitrary name for the database
ENVIRONMENT="dev"  # our Meltano environment, defaulting to 'dev' is fine.

###########################################################

# create project folder
mkdir $PROJECT_FOLDER
cd $PROJECT_FOLDER

# create project virtual environment
python3 -m venv venv
source venv/bin/activate

# install packages
pip install --upgrade pip
pip install meltano
meltano --version  # meltano, version 2.16.1
meltano init $MELTANO_PROJECT
cd $MELTANO_PROJECT

meltano environment add $ENVIRONMENT  # add a new environment if needed. Defaulting to 'dev' is fine.
export MELTANO_ENVIRONMENT=$ENVIRONMENT  # sets the environment as an env var

# install and configure the GitHub data extractor (tap)
meltano add extractor tap-github --variant=meltanolabs
meltano config tap-github set auth_token $GITHUB_TOKEN
meltano config tap-github set repositories $REPOS_TO_IMPORT
meltano config tap-github set start_date $START_DATE
meltano config tap-github # prints out the configuration settings

# select the streams we will import
meltano select tap-github commits url
meltano select tap-github commits sha
meltano select tap-github commits commit_timestamp

# Setup postgres database
docker run -p 5432:5432 --name $DOCKER_CONTAINER_NAME -e POSTGRES_USER=$POSTGRES_USER -e POSTGRES_PASSWORD=$POSTGRES_PASSWORD -e POSTGRES_DB=$DATABASE -e POSTGRES_INITDB_ARGS="--auth-host=md5 --auth-local=md5" -d postgres
docker container ls  # confirms the docker instance is up and running

## configure target-postgres loader
meltano add loader target-postgres --variant=meltanolabs
meltano config target-postgres set user $POSTGRES_USER
meltano config target-postgres set password $POSTGRES_PASSWORD
meltano config target-postgres set database $DATABASE
meltano config target-postgres set add_record_metadata True
meltano config target-postgres set host localhost

## Run the basic ingest job to check all is OK
meltano run tap-github target-postgres

# create this same task as a job
meltano job add $MELTANO_JOB --tasks "tap-github target-postgres"

# shut down the virtual environment
deactivate

## Optional:  add Dagster
#
# mkdir ~/$PROJECT_FOLDER/dagster && cd $_
# python3 -m venv dag-venv
# source dag-venv/bin/activate
#
# pip install --upgrade pip
# pip install dagster dagit
#
# dagster project scaffold --name dag-melt-project
# cd dag-melt-project
# rehash
# pip install dagster-meltano
# pip install -e ".[dev]"
# dagster dev
