散点图中的颜色区域

Question

我最近发现您可以为橙色的散点图创建颜色区域。我知道 Orange 位于 python 之上，所以我想我能够重新创建它，但我遇到了困难。我还没弄清楚如何将 pandas 数据框转换为橙色。更重要的是，我在 Spark 环境中工作，所以如果我能从 pyspark 转到橙色那就更好了。

我已经在seaborn和matplotlib中设置了一个基本的散点图，看看我是否能弄清楚。

import seaborn as sns
import matplotlib.pyplot as plt

# Load the Iris dataset from Seaborn
iris = sns.load_dataset("iris")

# Create a scatter plot
sns.scatterplot(x="sepal_length", y="petal_width", hue="species", data=iris)

# Add labels and title
plt.xlabel("Sepal Length")
plt.ylabel("Petal Width")
plt.title("Scatter Plot of Sepal Length vs. Petal Width")

# Show the plot
plt.legend()
plt.show()

Answer 1

根据Orange文档：

如果在“颜色”部分中选择了分类变量，则分数计算如下。对于每个数据实例，该方法在投影的 2D 空间（即属性对的组合）中找到 10 个最近邻。然后它检查其中有多少具有相同的颜色。投影的总得分就是相同颜色邻居的平均数量。

使用 scikit-learn 的 k 最近邻分类器可以获得类似的结果。他们的文档中有一个也使用 iris 数据集的示例。

我修改了这个示例，使其与您分享的屏幕截图更加相似：

import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import ListedColormap

from sklearn import datasets, neighbors
from sklearn.inspection import DecisionBoundaryDisplay

n_neighbors = 10

# import iris dataset
iris = datasets.load_iris()

# Select features
features = [2, 3]
X = iris.data[:, features]
y = iris.target

# Create color maps
cmap_light = ListedColormap(["blue", "red", "green"])
cmap_bold = ["blue", "red", "green"]

# we create an instance of Neighbours Classifier and fit the data.
clf = neighbors.KNeighborsClassifier(n_neighbors, weights="distance")
clf.fit(X, y)

# Plot boundaries
_, ax = plt.subplots()
DecisionBoundaryDisplay.from_estimator(
    clf,
    X,
    cmap=cmap_light,
    ax=ax,
    response_method="predict",
    plot_method="pcolormesh",
    xlabel=iris.feature_names[features[0]],
    ylabel=iris.feature_names[features[1]],
    shading="auto",
    alpha=0.3,
)

# Plot training points
sns.scatterplot(
    x=X[:, 0],
    y=X[:, 1],
    hue=iris.target_names[y],
    palette=cmap_bold,
    alpha=1.0,
    edgecolor="black",
)

这是结果：

Answer 2

下面的代码生成了一个与您发布的类似的图。它直接使用

matplotlib

进行绘图。

输出：

from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_iris

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import matplotlib

#
#Load data
#
iris = load_iris(as_frame=True)
iris_x = iris.data
iris_y = iris.target

iris_x.columns = [col.capitalize()[:-5] for col in iris_x.columns]

#
#Choose a color for each class
#

# Choose automatically across all classes
np.random.seed(2)
class_colors = np.random.choice(
    list(matplotlib.colors.CSS4_COLORS),
    size=len(iris_y.unique()),
    replace=False
)

# Alternatively, specify per class:
class_colors = ['tab:red', 'tab:green', 'tab:blue']

print('Class colors are:', class_colors)
display( matplotlib.colors.ListedColormap(class_colors) )

#Create a colormap out of each color
class_cmaps = [
    matplotlib.colors.LinearSegmentedColormap.from_list('Custom', ['w', color])
    for color in class_colors
]
#View the colormap
# for cmap in class_cmaps: display(cmap)

#
#Select features and fit KNN classifier
#
feat0 = 'Petal length'
feat1 = 'Petal width'
iris_x = iris_x[[feat0, feat1]]

n_neighbors = 10
knn = KNeighborsClassifier(n_neighbors=n_neighbors, weights='distance').fit(iris_x.values, iris_y)

#
#Define a feature space and get a prediction over the entire area
#
x_grid, y_grid = np.meshgrid(
    np.linspace(iris_x[feat0].min(), iris_x[feat0].max(), 100),
    np.linspace(iris_x[feat1].min(), iris_x[feat1].max(), 100)
)
grid_flat = np.hstack([x_grid.reshape(-1, 1), y_grid.reshape(-1, 1)])

#At each point in the feature space, get the:
#predicted class and nearest neighbors
classes = knn.predict(grid_flat)
neighbors = knn.kneighbors(grid_flat, return_distance=False)
#For each point, what proportion of neighbors match the predicted class
prop_per_gridpt = [sum(iris_y[row_neighbors] == clas) / n_neighbors
                   for row_neighbors, clas
                   in zip(neighbors, classes)]

#Convert proportions to colours. Each class has a colour.
rgb_per_gridpt = [
    class_cmaps[clas](prop)
    for clas, prop in zip(classes, prop_per_gridpt)
]
rgb_per_gridpt = np.array(rgb_per_gridpt).reshape(x_grid.shape + (4,))

#Plot
f, ax = plt.subplots(figsize=(8, 8))
ax.scatter(iris_x[feat0], iris_x[feat1], c=np.choose(iris_y.values, class_colors), s=60,
           alpha=0.7, linewidth=2)
ax.set_xlabel(feat0)
ax.set_ylabel(feat1)
ax.set_title(f'Scatter plot of {feat0} vs. {feat1}')

ax.imshow(rgb_per_gridpt, extent=ax.axis(), alpha=0.5,
          interpolation='bicubic', origin='lower')

散点图中的颜色区域

问题描述投票：0回答：2

2个回答

最新问题

散点图中的颜色区域

问题描述 投票：0回答：2

2个回答

最新问题

问题描述投票：0回答：2