部署spark-submit容器时出错

问题描述 投票:0回答:1

我正在尝试部署我为我的主模块中的模块完成的项目。在这个项目中,我必须修改一些文件,以便能够将 Spark 与一些 Spark Workers、cassandra、flask 和其他一些包一起使用。

我遇到的问题是在运行我的 Spark-Submit 容器时:

这是我正在使用的 docker-compose.yaml:


version: "3"
services:

  zookeeper:
    container_name: zookeeper
    image: wurstmeister/zookeeper
    ports:
      - "2181:2181"
    hostname: zookeeper
    networks:
      - red1

  kafka:
    container_name: kafka
    image: wurstmeister/kafka:2.12-2.3.0
    ports:
      - "9092:9092"
    depends_on:
      - zookeeper
    hostname: kafka
    environment:
      KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092
      KAFKA_LISTENERS: PLAINTEXT://kafka:9092
      KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
      KAFKA_CREATE_TOPICS: "flight_delay_classification_request:1:1"
    networks:
      - red1

  mongo:
    container_name: mongo
    image: mongo:4.4.2
    ports:
      - "27017:27017"
    depends_on:
      - kafka
    hostname: mongo
    networks:
      - red1

  mongo_seed:
    image: jlmendo11/mongo_seed
    container_name: mongo_seed
    depends_on:
      - mongo
    networks:
      - red1
    environment:
      - MONGO_HOST=mongo
      - MONGO_PORT=27017
    command:
      - "mongoimport --host mongo --port 27017 -d agile_data_science -c origin_dest_distances --mode upsert --type json --file /origin_dest_distances.jsonl"
    restart: on-failure

  spark-master:
    image: bde2020/spark-master:3.3.0-hadoop3.3
    container_name: spark-master
    ports:
      - "7077:7077"
      - "9001:9001"
      - "8080:8080"
    environment:
      - SPARK_LOCAL_IP=spark-master
      - SPARK_WORKLOAD=master
    volumes:
      - ../models:/models
      - ../flight_prediction/target/scala-2.12:/target/scala-2.12
    networks:
      - red1

  spark-worker-1:
    image: bde2020/spark-worker:3.3.0-hadoop3.3
    container_name: spark-worker-1
    depends_on:
      - spark-master
    ports:
      - "8081:8081"
    environment:
      - SPARK_MASTER=spark://spark-master:7077
      - SPARK_WORKLOAD=worker
      - SPARK_LOCAL_IP=spark-worker-1
    networks:
      - red1
    volumes:
      - ../models:/models
      - ../flight_prediction/target/scala-2.12:/target/scala-2.12

  spark-worker-2:
    image: bde2020/spark-worker:3.3.0-hadoop3.3
    container_name: spark-worker-2
    depends_on:
      - spark-master
    ports:
      - "8082:8081"
    environment:
      - SPARK_MASTER=spark://spark-master:7077
      - SPARK_WORKLOAD=worker
      - SPARK_LOCAL_IP=spark-worker-2
    networks:
      - red1
    volumes:
      - ../models:/models
      - ../flight_prediction/target/scala-2.12:/target/scala-2.12

  spark-submit:
    image: bde2020/spark-submit:3.3.0-hadoop3.3
    container_name: spark-submit
    depends_on:
      - spark-master
      - spark-worker-1
      - spark-worker-2
    ports:
      - "4040:4040"
    environment:
      - SPARK_MASTER=spark://spark-master:7077
      - SPARK_WORKLOAD=submitter
      - SPARK_LOCAL_IP=spark-submit
      - CLASSPATH=/scala-2.12
    command: bash -c "sleep 15; /spark/bin/spark-submit --class "es.upm.dit.ging.predictor.MakePrediction" --master spark://spark-master:7077 --packages com.datastax.spark:spark-cassandra-connector_2.12:3.2.0, org.apache.spark:spark-sql-kafka-0-10_2.12:3.3.4, com.datastax.cassandra:cassandra-driver-core:4.0.0 --jars scala-2.12/flight_prediction_2.12-0.1.jar"
   # command: bash -c "sleep 15; /spark/bin/spark-submit --class "es.upm.dit.ging.predictor.MakePrediction" --master spark://spark-master:7077 --packages com.datastax.spark:spark-cassandra-connector_2.12:3.2.0, org.apache.spark:spark-sql-kafka-0-10_2.12:3.3.0, com.datastax.oss:java-driver-core_4.17.0 target/scala-2.12/flight_prediction_2.12-0.1.jar"
    networks:
      - red1
    restart: on-failure
    volumes:
      - ../flight_prediction/target/scala-2.12:/scala-2.12

  flask:
    container_name: flask
    image: jlmendo11/flask2
    ports:
      - "5000:5000"
      - "9200:9200"
    depends_on:
      - mongo_seed
      - spark-master
    hostname: flask
    networks:
      - red1
    restart: on-failure

  cassandra:
    container_name: cassandra
    image: cassandra:3
    ports:
      - "9042:9042"
    networks:
      - red1
    volumes:
      - ./cassandra/init.cql:/scripts/init.cql

  cassandra_init:
    container_name: cassandra_init
    image: nuvo/docker-cqlsh  
    depends_on: 
      - cassandra
    networks:
      - red1
    volumes:
      - ./cassandra/init.cql:/scripts/init.cql
    restart: on-failure

networks:
  red1:
    driver : bridge

这是我的 build.sbt 文件:

name := "flight_prediction"

version := "0.1"

scalaVersion := "2.12.10"

val sparkVersion = "3.3.4"

mainClass in Compile := Some("es.upm.dit.ging.predictor.MakePrediction")

resolvers ++= Seq(
  "apache-snapshots" at "https://repository.apache.org/snapshots/"
)

libraryDependencies ++= Seq(
  "org.apache.spark" %% "spark-core" % sparkVersion,
  "org.apache.spark" %% "spark-sql" % sparkVersion,
  "org.apache.spark" %% "spark-mllib" % sparkVersion,
  "org.apache.spark" %% "spark-streaming" % sparkVersion,
  "org.apache.spark" %% "spark-hive" % sparkVersion,
  "org.apache.spark" %% "spark-sql-kafka-0-10" % "3.3.4",
  "org.mongodb.spark" %% "mongo-spark-connector" % "10.1.1",
  "com.datastax.spark" %% "spark-cassandra-connector" % "3.2.0",
  "com.datastax.cassandra" % "cassandra-driver-core" % "4.0.0"
)

我尝试了很多版本的spark,因为我发现这可能是某个版本错误,但我找不到错误,因为错误描述毫无用处。

apache-spark hadoop docker-compose sbt spark-submit
1个回答
0
投票

我解决了这个问题,显然问题是命令字段中的包和逗号之间的间隙。

事情是这样的:

command: bash -c "sleep 15; /spark/bin/spark-submit --class "es.upm.dit.ging.predictor.MakePrediction" --master spark://spark-master:7077 --packages com.datastax.spark:spark-cassandra-connector_2.12:3.2.0, org.apache.spark:spark-sql-kafka-0-10_2.12:3.3.4, com.datastax.cassandra:cassandra-driver-core:4.0.0 --jars scala-2.12/flight_prediction_2.12-0.1.jar"

应该是这样的:

command: bash -c "sleep 15; /spark/bin/spark-submit --class "es.upm.dit.ging.predictor.MakePrediction" --master spark://spark-master:7077 --packages com.datastax.spark:spark-cassandra-connector_2.12:3.2.0,org.apache.spark:spark-sql-kafka-0-10_2.12:3.3.4,com.datastax.cassandra:cassandra-driver-core:4.0.0 --jars scala-2.12/flight_prediction_2.12-0.1.jar"
© www.soinside.com 2019 - 2024. All rights reserved.