如何将字符转换为数字

问题描述 投票:0回答:2

我是r中的新手,我首先有此表,我尝试使用df

   Saldo.Rotativo            A.Vista        Parcelado          Saque
1  dez/16  2.919.869.506,96   6.364.901.107,55   463.677.827,46     729.185,18 
2  dez/17  2.007.351.784,18   6.831.919.805,09   780.093.428,86   2.817.814,72 
3  dez/18  1.944.752.969,68   7.458.695.484,62   542.074.530,21   1.885.633,29 
4  jan/19  1.991.796.619,57   7.371.837.099,11   540.893.516,33   2.058.371,60 
5  fev/19  2.121.650.035,30   6.967.735.692,25   562.211.487,01   2.132.249,61 
6  mar/19  2.062.475.653,11   6.900.028.117,67   575.861.976,61   2.100.849,74 
7  abr/19  2.089.793.039,24   6.945.593.710,28   583.181.387,89   2.090.951,69 
8  mai/19  2.078.700.800,99   7.146.597.703,16   612.184.578,96   2.132.951,04 
9  jun/19  2.239.390.093,82   6.851.118.033,80   618.844.690,37   1.764.866,10 
10 jul/19  2.121.263.409,38   7.196.087.606,84   629.995.715,52   3.945.650,40 
11 ago/19  2.173.187.280,54   7.089.675.942,22   624.808.459,45   6.341.527,95 
12 set/19  2.285.571.063,90   7.111.228.186,19   617.840.220,61   6.143.505,16 
13 out/19  2.193.401.889,85   7.263.912.266,04   622.821.392,86   7.253.169,67 
14 nov/19  2.281.061.211,60   7.240.713.335,11   611.161.428,40   7.484.398,11 
15 dez/19  2.212.531.321,45   7.892.016.606,72   597.916.084,63   6.464.980,78 
r type-conversion numeric chr
2个回答
0
投票

我们可以使用str_remove_all

library(dplyr)
library(stringr)
df <-  df %>% 
       mutate_at(-1, ~ as.numeric(str_replace(str_remove_all(., 
              '\\.'), ',', '.')))

df
#     date Saldo.Rotativo    A.Vista Parcelado     Saque
#1  dez/16     2919869507 6364901108 463677827  729185.2
#2  dez/17     2007351784 6831919805 780093429 2817814.7
#3  dez/18     1944752970 7458695485 542074530 1885633.3
#4  jan/19     1991796620 7371837099 540893516 2058371.6
#5  fev/19     2121650035 6967735692 562211487 2132249.6
#6  mar/19     2062475653 6900028118 575861977 2100849.7
#7  abr/19     2089793039 6945593710 583181388 2090951.7
#8  mai/19     2078700801 7146597703 612184579 2132951.0
#9  jun/19     2239390094 6851118034 618844690 1764866.1
#10 jul/19     2121263409 7196087607 629995716 3945650.4
#11 ago/19     2173187281 7089675942 624808459 6341528.0
#12 set/19     2285571064 7111228186 617840221 6143505.2
#13 out/19     2193401890 7263912266 622821393 7253169.7
#14 nov/19     2281061212 7240713335 611161428 7484398.1
#15 dez/19     2212531321 7892016607 597916085 6464980.8   



str(df)
#'data.frame':  15 obs. of  5 variables:
# $ date          : chr  "dez/16" "dez/17" "dez/18" "jan/19" ...
# $ Saldo.Rotativo: num  2.92e+09 2.01e+09 1.94e+09 1.99e+09 2.12e+09 ...
# $ A.Vista       : num  6.36e+09 6.83e+09 7.46e+09 7.37e+09 6.97e+09 ...
# $ Parcelado     : num  4.64e+08 7.80e+08 5.42e+08 5.41e+08 5.62e+08 ...
# $ Saque         : num  729185 2817815 1885633 2058372 2132250 ...

数据

df <- structure(list(date = c("dez/16", "dez/17", "dez/18", "jan/19", 
"fev/19", "mar/19", "abr/19", "mai/19", "jun/19", "jul/19", "ago/19", 
"set/19", "out/19", "nov/19", "dez/19"), Saldo.Rotativo = c("2.919.869.506,96", 
"2.007.351.784,18", "1.944.752.969,68", "1.991.796.619,57", "2.121.650.035,30", 
"2.062.475.653,11", "2.089.793.039,24", "2.078.700.800,99", "2.239.390.093,82", 
"2.121.263.409,38", "2.173.187.280,54", "2.285.571.063,90", "2.193.401.889,85", 
"2.281.061.211,60", "2.212.531.321,45"), A.Vista = c("6.364.901.107,55", 
"6.831.919.805,09", "7.458.695.484,62", "7.371.837.099,11", "6.967.735.692,25", 
"6.900.028.117,67", "6.945.593.710,28", "7.146.597.703,16", "6.851.118.033,80", 
"7.196.087.606,84", "7.089.675.942,22", "7.111.228.186,19", "7.263.912.266,04", 
"7.240.713.335,11", "7.892.016.606,72"), Parcelado = c("463.677.827,46", 
"780.093.428,86", "542.074.530,21", "540.893.516,33", "562.211.487,01", 
"575.861.976,61", "583.181.387,89", "612.184.578,96", "618.844.690,37", 
"629.995.715,52", "624.808.459,45", "617.840.220,61", "622.821.392,86", 
"611.161.428,40", "597.916.084,63"), Saque = c("729.185,18", 
"2.817.814,72", "1.885.633,29", "2.058.371,60", "2.132.249,61", 
"2.100.849,74", "2.090.951,69", "2.132.951,04", "1.764.866,10", 
"3.945.650,40", "6.341.527,95", "6.143.505,16", "7.253.169,67", 
"7.484.398,11", "6.464.980,78")), class = "data.frame", row.names = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", 
"14", "15"))

0
投票

[不幸的是,R似乎没有自动检测不同语言环境的基本函数,因此我们需要一对gsub

dat[,-1] <- lapply(dat[,-1], function(a) as.numeric(sub(",", ".", gsub(".", "", a, fixed = TRUE))) )
head(dat)
#   rowname Saldo.Rotativo    A.Vista Parcelado     Saque
# 1  dez/16     2919869507 6364901108 463677827  729185.2
# 2  dez/17     2007351784 6831919805 780093429 2817814.7
# 3  dez/18     1944752970 7458695485 542074530 1885633.3
# 4  jan/19     1991796620 7371837099 540893516 2058371.6
# 5  fev/19     2121650035 6967735692 562211487 2132249.6
# 6  mar/19     2062475653 6900028118 575861977 2100849.7
str(dat)
# 'data.frame': 15 obs. of  5 variables:
#  $ rowname       : chr  "dez/16" "dez/17" "dez/18" "jan/19" ...
#  $ Saldo.Rotativo: num  2.92e+09 2.01e+09 1.94e+09 1.99e+09 2.12e+09 ...
#  $ A.Vista       : num  6.36e+09 6.83e+09 7.46e+09 7.37e+09 6.97e+09 ...
#  $ Parcelado     : num  4.64e+08 7.80e+08 5.42e+08 5.41e+08 5.62e+08 ...
#  $ Saque         : num  729185 2817815 1885633 2058372 2132250 ...

虽然不是base-R,但akrun最初建议的readr::parse_number(已删除)可以可以“正确”地进行:]]

options(digits=9)
readr::parse_number("6.143.505,16", locale = readr::locale(grouping_mark = ".", decimal_mark = ","))
# [1] 6143505.16

所以虽然不一定要更快

(我尚未对其进行基准测试),但人们可能认为这是干净的declarative代码:
str(lapply(dat[,-1], readr::parse_number, locale = readr::locale(grouping_mark = ".", decimal_mark = ",")))
# List of 4
#  $ Saldo.Rotativo: num [1:15] 2.92e+09 2.01e+09 1.94e+09 1.99e+09 2.12e+09 ...
#  $ A.Vista       : num [1:15] 6.36e+09 6.83e+09 7.46e+09 7.37e+09 6.97e+09 ...
#  $ Parcelado     : num [1:15] 4.64e+08 7.80e+08 5.42e+08 5.41e+08 5.62e+08 ...
#  $ Saque         : num [1:15] 729185 2817815 1885633 2058372 2132250 ...

(为简洁起见,输出str已被删减。]


数据:

dat <- structure(list(rowname = c("dez/16", "dez/17", "dez/18", "jan/19", 
"fev/19", "mar/19", "abr/19", "mai/19", "jun/19", "jul/19", "ago/19", 
"set/19", "out/19", "nov/19", "dez/19"), Saldo.Rotativo = c("2.919.869.506,96", 
"2.007.351.784,18", "1.944.752.969,68", "1.991.796.619,57", "2.121.650.035,30", 
"2.062.475.653,11", "2.089.793.039,24", "2.078.700.800,99", "2.239.390.093,82", 
"2.121.263.409,38", "2.173.187.280,54", "2.285.571.063,90", "2.193.401.889,85", 
"2.281.061.211,60", "2.212.531.321,45"), A.Vista = c("6.364.901.107,55", 
"6.831.919.805,09", "7.458.695.484,62", "7.371.837.099,11", "6.967.735.692,25", 
"6.900.028.117,67", "6.945.593.710,28", "7.146.597.703,16", "6.851.118.033,80", 
"7.196.087.606,84", "7.089.675.942,22", "7.111.228.186,19", "7.263.912.266,04", 
"7.240.713.335,11", "7.892.016.606,72"), Parcelado = c("463.677.827,46", 
"780.093.428,86", "542.074.530,21", "540.893.516,33", "562.211.487,01", 
"575.861.976,61", "583.181.387,89", "612.184.578,96", "618.844.690,37", 
"629.995.715,52", "624.808.459,45", "617.840.220,61", "622.821.392,86", 
"611.161.428,40", "597.916.084,63"), Saque = c("729.185,18", 
"2.817.814,72", "1.885.633,29", "2.058.371,60", "2.132.249,61", 
"2.100.849,74", "2.090.951,69", "2.132.951,04", "1.764.866,10", 
"3.945.650,40", "6.341.527,95", "6.143.505,16", "7.253.169,67", 
"7.484.398,11", "6.464.980,78")), class = "data.frame", row.names = c(NA, 
-15L))

(我推断出rowname,因为它在您的问题中并不明显。如果有任何歧义,使用dput会有所帮助。)

© www.soinside.com 2019 - 2024. All rights reserved.