所以我有一个表格,按项目ID显示库存值。我需要找到每个项目缺货的天数和天数。我的表只在每次库存值发生变化时添加一个新行,所以我只有任何给定项目缺货的日子,但没有任何随附的日子它仍然缺货。这是一个它看起来像什么的例子//
ItemID QTY ADate
10007 2 2011-07-22
10007 1 2011-07-27
10007 2 2011-09-01
10007 1 2011-09-23
10007 2 2011-09-29
10007 0 2011-10-29
10007 2 2011-11-03
10007 1 2011-11-21
23028 2 2011-07-23
23028 0 2011-08-03
23028 2 2011-08-10
我想拥有的是:
ItemID QTY ADate
10007 2 2011-07-22
10007 2 2011-07-23
10007 2 2011-07-24
10007 2 2011-07-25
10007 2 2011-07-26
10007 1 2011-07-27
10007 1 2011-07-28
10007 1 2011-07-29
10007 1 2011-07-30
等...`
然后我可以通过商品ID抓住我缺货的日子。我没有任何代码可以显示,因为坦率地说我甚至不知道从哪里开始。知道怎么解决这个问题吗?非常感谢!
如果我理解正确,您需要连续一天连续一行,而不仅仅是发生变化的日子。因此,例如ItemID 10007在2011-07-22,2011-07-23,2011-07-24 ......到2011-07-27,当它变为1时,QTY = 2。
在这种情况下,试试这个,假设你在R中有一个名为mydata
的数据框,并且日期是字符格式:
library(tidyverse)
mydata %>%
mutate(ADate = as.Date(ADate)) %>%
complete(ADate = seq.Date(min(ADate), max(ADate), by = "day")) %>%
fill(ItemID, QTY)
如果你添加%>% filter(QTY == 0)
到最后,你应该看到:
ADate ItemID QTY
<date> <int> <int>
1 2011-08-03 23028 0
2 2011-08-04 23028 0
3 2011-08-05 23028 0
4 2011-08-06 23028 0
5 2011-08-07 23028 0
6 2011-08-08 23028 0
7 2011-08-09 23028 0
8 2011-10-29 10007 0
9 2011-10-30 10007 0
10 2011-10-31 10007 0
11 2011-11-01 10007 0
12 2011-11-02 10007 0
您似乎希望填写具有最新价值的日子。一种方法是递归CTE。但这实际上是一种痛苦。我假设你有一张numbers
表:
select id.itemid, dateadd(day, n.n, t.min_adate) as dte, t.qty
from (select t.itemid, min(adate) as min_adate
from t
group by itemid
) id join
numbers n
on dateadd(day, n.n, t.min_adate) < getdate() outer apply
(select top 1 t.*
from t
where t.itemid = id.itemid and t.date <= dateadd(day, n.n, t.min_adate)
order by t.date desc
) t;
请使用CTE http://rextester.com/live/CDFT20818和许多2012+功能替代,找到这个演示。
declare @mytable table (itemid int,qty int,adate date)
insert into @mytable
values
( 10007, 2,' 2011-07-22'),
( 10007 , 1,' 2011-07-27'),
( 10008 , 1,' 2011-07-23'),
( 10008 , 0,' 2011-07-16'),
( 10008 , 4,' 2011-07-10')
declare @som date = '2011-07-01';
declare @eom date = eomonth(@som);
;with myCalendar as
(
Select @som startdate
union all
Select dateadd(day,1,startdate) startdate
from myCalendar
where startdate < @eom
)
,myDistinctList as (
select t.itemid, c.startdate
from myCalendar c
cross join
(select distinct itemid from @mytable) t
), myFinal as (
select
l.itemid,
iif(t.qty is null,(select top 1 qty from @mytable x where x.itemid = l.itemid and x.adate <= l.startdate order by x.adate desc),t.qty) [qty],
l.startdate [adate]
from myDistinctList l
left outer join @mytable t
on t.itemid = l.itemid
and t.adate = l.startdate
)
select * from myFinal
where qty is not null
order by itemid,adate;
这是另一种方式:
tmp <- split(d, d$ItemID)
f <- function(x) {
b <- seq(min(x$ADate), max(x$ADate), "day")
a <- do.call(c, mapply(rep, x$QTY[-nrow(x)], diff(x$ADate)))
a <- c(a, x$QTY[nrow(x)])
data.frame(x$ItemID[1], a, b)
}
out <- do.call(rbind, lapply(tmp, f))
rownames(out) <- NULL
colnames(out) <- c("ItemID", "QTY", "ADate")
数据
d <- "ItemID QTY ADate
10007 2 2011-07-22
10007 1 2011-07-27
10007 2 2011-09-01
10007 1 2011-09-23
10007 2 2011-09-29
10007 0 2011-10-29
10007 2 2011-11-03
10007 1 2011-11-21
23028 2 2011-07-23
23028 0 2011-08-03
23028 2 2011-08-10"
d <- read.table(text = d,header = T)
d$ADate <- as.POSIXct(d$ADate,"UTC")
d <- d[order(d$ItemID,d$ADate),]
您可以使用LEAD
(需要sql server 2012+)和CTE生成此日期序列,如下所示
WITH Orders AS
(
SELECT ItemID,QTY,
LEAD (cADate, 1, NULL) OVER (ORDER BY ItemID,cADate ) AS NExtVal,
cADate AS Number
FROM Table1
-- WHERE QTY > 0 -- remove this if you want
UNION ALL
SELECT ItemID,QTY,NExtVal, DATEADD(DAY, 1, Number)
FROM Orders
WHERE DATEADD(DAY, 1, Number) < NExtVal
)
SELECT ItemID,QTY, Number AS DateSequence FROM Orders
ORDER BY ItemID,Number
这是样本代码的Link。
使用R
库的tidyverse
解决方案,使用@Brian的数据:
date_seq <- seq(from=min(d$ADate),
to=max(d$ADate),
by="day")
library(tidyverse)
d %>%
right_join(expand.grid(ItemID = unique(.$ItemID),ADate=date_seq)) %>%
arrange(ItemID) %>%
group_by(ItemID) %>%
fill(ItemID,QTY)
# # A tibble: 40 x 3
# # Groups: ItemID [2]
# ItemID QTY ADate
# <int> <int> <dttm>
# 1 10007 2 2011-07-22
# 2 10007 2 2011-07-23
# 3 10007 2 2011-07-24
# 4 10007 2 2011-07-25
# 5 10007 2 2011-07-26
# 6 10007 1 2011-07-27
# 7 10007 1 2011-07-28
# 8 10007 1 2011-07-29
# 9 10007 1 2011-07-30
# 10 10007 1 2011-07-31
# # ... with 30 more rows