如何修复关于spakly模型的R中的错误

问题描述 投票:0回答:1

有些人可以帮助我解决错误即可进入R

kmeans_model <- iris_tbl %>%
   select(Petal_Width, Petal_Length) %>%
   ml_kmeans(centers = 3)

错误:java.lang.IllegalArgumentException:字段“features”不存在。可用字段:Petal_Width,Petal_Length

    at org.apache.spark.sql.types.StructType$$anonfun$apply$1.apply(StructType.scala:274)

    at org.apache.spark.sql.types.StructType$$anonfun$apply$1.apply(StructType.scala:274)

    at scala.collection.MapLike$class.getOrElse(MapLike.scala:128)

    at scala.collection.AbstractMap.getOrElse(Map.scala:59)

    at org.apache.spark.sql.types.StructType.apply(StructType.scala:273)

    at org.apache.spark.ml.util.SchemaUtils$.checkColumnTypes(SchemaUtils.scala:58)

    at org.apache.spark.ml.util.SchemaUtils$.validateVectorCompatibleColumn(SchemaUtils.scala:119)

    at org.apache.spark.ml.clustering.KMeansParams$class.validateAndTransformSchema(KMeans.scala:96)

    at org.apache.spark.ml.clustering.KMeans.validateAndTransformSchema(KMeans.scala:285)

    at org.apache.spark.ml.clustering.KMeans.transformSchema(KMeans.scala:382)

    at org.apache.spark.ml.PipelineStage.transformSchema(Pipeline.scala:74)

    at org.apache.spark.ml.clustering.KMeans$$anonfun$fit$1.apply(KMeans.scala:341)

    at org.apache.spark.ml.clustering.KMeans$$anonfun$fit$1.apply(KMeans.scala:340)

    at org.apache.spark.ml.util.Instrumentation$$anonfun$11.apply(Instrumentation.scala:183)

    at scala.util.Try$.apply(Try.scala:192)

    at org.apache.spark.ml.util.Instrumentation$.instrumented(Instrumentation.scala:183)

    at org.apache.spark.ml.clustering.KMeans.fit(KMeans.scala:340)

    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)

    at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)

    at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)

    at java.lang.reflect.Method.invoke(Unknown Source)

    at sparklyr.Invoke.invoke(invoke.scala:139)

    at sparklyr.StreamHandler.handleMethodCall(stream.scala:123)

    at sparklyr.StreamHandler.read(stream.scala:66)

    at sparklyr.BackendHandler.channelRead0(handler.scala:51)

    at sparklyr.BackendHandler.channelRead0(handler.scala:4)

    at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105)

    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)

    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)

    at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340)

    at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:102)

    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)

    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)

    at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340)

    at io.netty.handler.codec.ByteToMessageDecoder.fireChannelRead(ByteToMessageDecoder.java:310)

    at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:284)

    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)

    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)

    at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340)

    at io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1359)

    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)

    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)

    at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:935)

    at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:138)

    at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:645)

    at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:580)

    at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:497)

    at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:459)

    at io.netty.util.concurrent.SingleThreadEventExecutor$5.run(SingleThreadEventExecutor.java:858)

    at io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:138)

    at java.lang.Thread.run(Unknown Source)

警告:未使用某些组件:中心

我已经尝试使用其他功能,但它不适用于3个群集,只能使用2个

kmeans_model <- iris_tbl %>% 
ml_kmeans(formula= ~ Petal_Width + Petal_Length, centers = 3)

#Warning: Some components of ... were not used: centers

print(kmeans_model)
#K-means clustering with 2 clusters
#
#Cluster centers:
#  Petal_Width Petal_Length
#1   1.6818182     4.925253
#2   0.2627451     1.492157
#
#Within Set Sum of Squared Errors =  86.39022>
r sparklyr
1个回答
0
投票

错误的第一行非常简单:

字段“功能”不存在。

如果您查看?ml_kmeans的文档,您会发现需要指定公式(第二次尝试)或features_col。现在,一个qucik说明,在模型的Spark功能中,预计将在data.frame的一列内进行矢量化

您的第二个错误/警告消息也是直截了当的:

警告:未使用某些组件:中心

centers不是ml_kmeans中的参数。你想要使用的是k

kmeans_model <- iris_tbl %>% 
       ml_kmeans(formula= ~ Petal_Width + Petal_Length, k = 3)

kmeans_model
# K-means clustering with 3 clusters
# 
# Cluster centers:
#   Petal_Width Petal_Length
# 1    1.359259     4.292593
# 2    0.246000     1.462000
# 3    2.047826     5.626087
# 
# Within Set Sum of Squared Errors =  31.41289

要在没有公式的情况下运行,您需要使用ft_vector_assembler

kmeans_model <- iris_tbl %>% 
  ft_vector_assembler(input_cols=c("Sepal_Width","Petal_Length"), output_col="features") %>%
  ml_kmeans(k = 3)
© www.soinside.com 2019 - 2024. All rights reserved.