Subsections of Application
Datahub
Preliminary
- Kubernetes has installed, if not check link
- argoCD has installed, if not check link
- Elasticsearch has installed, if not check link
- MariaDB has installed, if not check link
- Kafka has installed, if not check link
Steps
1. prepare datahub credentials secret
kubectl -n application \
create secret generic datahub-credentials \
--from-literal=mysql-root-password="$(kubectl get secret mariadb-credentials --namespace database -o jsonpath='{.data.mariadb-root-password}' | base64 -d)"kubectl -n application \
create secret generic datahub-credentials \
--from-literal=mysql-root-password="$(kubectl get secret mariadb-credentials --namespace database -o jsonpath='{.data.mariadb-root-password}' | base64 -d)" \
--from-literal=security.protocol="SASL_PLAINTEXT" \
--from-literal=sasl.mechanism="SCRAM-SHA-256" \
--from-literal=sasl.jaas.config="org.apache.kafka.common.security.scram.ScramLoginModule required username=\"user1\" password=\"$(kubectl get secret kafka-user-passwords --namespace database -o jsonpath='{.data.client-passwords}' | base64 -d | cut -d , -f 1)\";"5. prepare deploy-datahub.yaml
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: datahub
spec:
syncPolicy:
syncOptions:
- CreateNamespace=true
project: default
source:
repoURL: https://helm.datahubproject.io
chart: datahub
targetRevision: 0.4.8
helm:
releaseName: datahub
values: |
global:
elasticsearch:
host: elastic-search-elasticsearch.application.svc.cluster.local
port: 9200
skipcheck: "false"
insecure: "false"
useSSL: "false"
kafka:
bootstrap:
server: kafka.database.svc.cluster.local:9092
zookeeper:
server: kafka-zookeeper.database.svc.cluster.local:2181
sql:
datasource:
host: mariadb.database.svc.cluster.local:3306
hostForMysqlClient: mariadb.database.svc.cluster.local
port: 3306
url: jdbc:mysql://mariadb.database.svc.cluster.local:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2
driver: com.mysql.cj.jdbc.Driver
username: root
password:
secretRef: datahub-credentials
secretKey: mysql-root-password
datahub-gms:
enabled: true
replicaCount: 1
image:
repository: m.daocloud.io/docker.io/acryldata/datahub-gms
service:
type: ClusterIP
ingress:
enabled: false
datahub-frontend:
enabled: true
replicaCount: 1
image:
repository: m.daocloud.io/docker.io/acryldata/datahub-frontend-react
defaultUserCredentials:
randomAdminPassword: true
service:
type: ClusterIP
ingress:
enabled: true
className: nginx
annotations:
cert-manager.io/cluster-issuer: self-signed-ca-issuer
hosts:
- host: datahub.dev.geekcity.tech
paths:
- /
tls:
- secretName: "datahub.dev.geekcity.tech-tls"
hosts:
- datahub.dev.geekcity.tech
acryl-datahub-actions:
enabled: true
replicaCount: 1
image:
repository: m.daocloud.io/docker.io/acryldata/datahub-actions
datahub-mae-consumer:
replicaCount: 1
image:
repository: m.daocloud.io/docker.io/acryldata/datahub-mae-consumer
ingress:
enabled: false
datahub-mce-consumer:
replicaCount: 1
image:
repository: m.daocloud.io/docker.io/acryldata/datahub-mce-consumer
ingress:
enabled: false
datahub-ingestion-cron:
enabled: false
image:
repository: m.daocloud.io/docker.io/acryldata/datahub-ingestion
elasticsearchSetupJob:
enabled: true
image:
repository: m.daocloud.io/docker.io/acryldata/datahub-elasticsearch-setup
kafkaSetupJob:
enabled: true
image:
repository: m.daocloud.io/docker.io/acryldata/datahub-kafka-setup
mysqlSetupJob:
enabled: true
image:
repository: m.daocloud.io/docker.io/acryldata/datahub-mysql-setup
postgresqlSetupJob:
enabled: false
image:
repository: m.daocloud.io/docker.io/acryldata/datahub-postgres-setup
datahubUpgrade:
enabled: true
image:
repository: m.daocloud.io/docker.io/acryldata/datahub-upgrade
datahubSystemUpdate:
image:
repository: m.daocloud.io/docker.io/acryldata/datahub-upgrade
destination:
server: https://kubernetes.default.svc
namespace: applicationapiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: datahub
spec:
syncPolicy:
syncOptions:
- CreateNamespace=true
project: default
source:
repoURL: https://helm.datahubproject.io
chart: datahub
targetRevision: 0.4.8
helm:
releaseName: datahub
values: |
global:
springKafkaConfigurationOverrides:
security.protocol: SASL_PLAINTEXT
sasl.mechanism: SCRAM-SHA-256
credentialsAndCertsSecrets:
name: datahub-credentials
secureEnv:
sasl.jaas.config: sasl.jaas.config
elasticsearch:
host: elastic-search-elasticsearch.application.svc.cluster.local
port: 9200
skipcheck: "false"
insecure: "false"
useSSL: "false"
kafka:
bootstrap:
server: kafka.database.svc.cluster.local:9092
zookeeper:
server: kafka-zookeeper.database.svc.cluster.local:2181
neo4j:
host: neo4j.database.svc.cluster.local:7474
uri: bolt://neo4j.database.svc.cluster.local
username: neo4j
password:
secretRef: datahub-credentials
secretKey: neo4j-password
sql:
datasource:
host: mariadb.database.svc.cluster.local:3306
hostForMysqlClient: mariadb.database.svc.cluster.local
port: 3306
url: jdbc:mysql://mariadb.database.svc.cluster.local:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2
driver: com.mysql.cj.jdbc.Driver
username: root
password:
secretRef: datahub-credentials
secretKey: mysql-root-password
datahub-gms:
enabled: true
replicaCount: 1
image:
repository: m.daocloud.io/docker.io/acryldata/datahub-gms
service:
type: ClusterIP
ingress:
enabled: false
datahub-frontend:
enabled: true
replicaCount: 1
image:
repository: m.daocloud.io/docker.io/acryldata/datahub-frontend-react
defaultUserCredentials:
randomAdminPassword: true
service:
type: ClusterIP
ingress:
enabled: true
className: nginx
annotations:
cert-manager.io/cluster-issuer: self-signed-ca-issuer
hosts:
- host: datahub.dev.geekcity.tech
paths:
- /
tls:
- secretName: "datahub.dev.geekcity.tech-tls"
hosts:
- datahub.dev.geekcity.tech
acryl-datahub-actions:
enabled: true
replicaCount: 1
image:
repository: m.daocloud.io/docker.io/acryldata/datahub-actions
datahub-mae-consumer:
replicaCount: 1
image:
repository: m.daocloud.io/docker.io/acryldata/datahub-mae-consumer
ingress:
enabled: false
datahub-mce-consumer:
replicaCount: 1
image:
repository: m.daocloud.io/docker.io/acryldata/datahub-mce-consumer
ingress:
enabled: false
datahub-ingestion-cron:
enabled: false
image:
repository: m.daocloud.io/docker.io/acryldata/datahub-ingestion
elasticsearchSetupJob:
enabled: true
image:
repository: m.daocloud.io/docker.io/acryldata/datahub-elasticsearch-setup
kafkaSetupJob:
enabled: true
image:
repository: m.daocloud.io/docker.io/acryldata/datahub-kafka-setup
mysqlSetupJob:
enabled: true
image:
repository: m.daocloud.io/docker.io/acryldata/datahub-mysql-setup
postgresqlSetupJob:
enabled: false
image:
repository: m.daocloud.io/docker.io/acryldata/datahub-postgres-setup
datahubUpgrade:
enabled: true
image:
repository: m.daocloud.io/docker.io/acryldata/datahub-upgrade
datahubSystemUpdate:
image:
repository: m.daocloud.io/docker.io/acryldata/datahub-upgrade
destination:
server: https://kubernetes.default.svc
namespace: application3. apply to k8s
kubectl -n argocd apply -f deploy-datahub.yaml4. sync by argocd
argocd app sync argocd/datahub5. extract credientials
kubectl -n application get secret datahub-user-secret -o jsonpath='{.data.user\.props}' | base64 -d[Optional] Visit though browser
add
$K8S_MASTER_IP datahub.dev.geekcity.techto/etc/hosts
- datahub frontend: https://datahub.dev.geekcity.tech:32443
- api: https://datahub.dev.geekcity.tech:32443/openapi/swagger-ui/index.html
[Optional] Visit though DatahubCLI
We recommend Python virtual environments (venv-s) to namespace pip modules. Here’s an example setup:
python3 -m venv venv # create the environment
source venv/bin/activate # activate the environmentNOTE: If you install datahub in a virtual environment, that same virtual environment must be re-activated each time a shell window or session is created.
Once inside the virtual environment, install datahub using the following commands
# Requires Python 3.8+
python3 -m pip install --upgrade pip wheel setuptools
python3 -m pip install --upgrade acryl-datahub
# validate that the install was successful
datahub version
# If you see "command not found", try running this instead: python3 -m datahub version
datahub init
# authenticate your datahub CLI with your datahub instance