操作MySQL的数据:
spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/sparksql").option("dbtable", "sparksql.TBLS").option("user", "root").option("password", "root").option("driver", "com.mysql.jdbc.Driver").load()
java.sql.SQLException: No suitable driver
import java.util.Properties
val connectionProperties = new Properties()
connectionProperties.put("user", "root")
connectionProperties.put("password", "root")
connectionProperties.put("driver", "com.mysql.jdbc.Driver")
val jdbcDF2 = spark.read.jdbc("jdbc:mysql://localhost:3306", "sparksql.TBLS", connectionProperties)
CREATE TEMPORARY VIEW jdbcTable
USING org.apache.spark.sql.jdbc
OPTIONS (
url "jdbc:mysql://localhost:3306",
dbtable "sparksql.TBLS",
user ‘root‘,
password ‘root‘,
driver ‘com.mysql.jdbc.Driver‘
)
外部数据源综合案例
create database spark;
use spark;
CREATE TABLE DEPT(
DEPTNO int(2) PRIMARY KEY,
DNAME VARCHAR(14) ,
LOC VARCHAR(13) ) ;
INSERT INTO DEPT VALUES(10,‘ACCOUNTING‘,‘NEW YORK‘);
INSERT INTO DEPT VALUES(20,‘RESEARCH‘,‘DALLAS‘);
INSERT INTO DEPT VALUES(30,‘SALES‘,‘CHICAGO‘);
INSERT INTO DEPT VALUES(40,‘OPERATIONS‘,‘BOSTON‘);
1 package com.imooc.spark 2 3 import org.apache.spark.sql.SparkSession 4 5 /** 6 * 使用外部数据源综合查询Hive和MySQL的表数据 7 */ 8 object HiveMySQLApp { 9 10 def main(args: Array[String]) {11 val spark = SparkSession.builder().appName("HiveMySQLApp")12 .master("local[2]").getOrCreate()13 14 // 加载Hive表数据15 val hiveDF = spark.table("emp")16 17 // 加载MySQL表数据18 val mysqlDF = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306").option("dbtable", "spark.DEPT").option("user", "root").option("password", "root").option("driver", "com.mysql.jdbc.Driver").load()19 20 // JOIN21 val resultDF = hiveDF.join(mysqlDF, hiveDF.col("deptno") === mysqlDF.col("DEPTNO"))22 resultDF.show23 24 25 resultDF.select(hiveDF.col("empno"),hiveDF.col("ename"),26 mysqlDF.col("deptno"), mysqlDF.col("dname")).show27 28 spark.stop()29 }30 31 }