import math._
object Haversine {
val R = 6372.8 //radius in km
def haversine(lat1:Double, lon1:Double, lat2:Double, lon2:Double)={
val dLat=(lat2 - lat1).toRadians
val dLon=(lon2 - lon1).toRadians
val a = pow(sin(dLat/2),2) + pow(sin(dLon/2),2) * cos(lat1.toRadians) * cos(lat2.toRadians)
val c = 2 * asin(sqrt(a))
R * c
}
def main(args: Array[String]): Unit = {
println(haversine(36.12, -86.67, 33.94, -118.40))
}
}
import org.apache.spark.sql.SparkSession
import Haversine.haversine
object Position {
def main(args: Array[String]): Unit = {
// create Spark DataFrame with Spark configuration
val spark= SparkSession.builder().getOrCreate()
// Read csv with DataFrame
val file1 = spark.read.csv("file:///home/aaron/Downloads/taxi_gps.txt")
val file2 = spark.read.csv("file:///home/aaron/Downloads/district.txt")
//change the name
val new_file1= file1.withColumnRenamed("_c0","id")
.withColumnRenamed("_c4","lat")
.withColumnRenamed("_c5","lon")
val new_file2= file2.withColumnRenamed("_c0","dis")
.withColumnRenamed("_1","lat")
.withColumnRenamed("_2","lon")
.withColumnRenamed("_c3","r")
//count
}
}
我对scala不熟悉,对我来说这是一个棘手的问题。我希望你们能帮助我,谢谢!
在实现之前,您需要在cal_distance
方法之外但在同一类中定义和实现方法main
。>
def cal_distance(lon: Float, lat: Float, taxiLon: Float, taxiLat: Float) : Float = { val distance = 0.0f // write your geopy scala code here distance }
您在Scala中的代码应类似于以下内容
new_file2.foreach(row => {
val district = row.getAs[Float]("dis")
val lon = row.getAs[Float]("lon")
val lat = row.getAs[Float]("lat")
val distance = row.getAs[Float]("r")
var temp = 0
new_file1.foreach(taxi => {
val taxiLon = taxi.getAs[Float]("lon")
val taxiLat = taxi.getAs[Float]("lat")
if(cal_distance(lon,lat,taxiLon,taxiLat) <= distance) {
temp+=1
}
})
println(s"district:${district} temp=${temp}")
})