我正在尝试将查询从 MySQL 移植到 SQL SERVER 2012。
我如何为 MySQL 的 substring_index() 编写一个等价的函数?
MySQL SUBSTRING_INDEX() 返回给定字符串中指定的分隔符出现次数之前的子字符串。
SUBSTRING_INDEX(字符串,分隔符,计数)
SELECT SUBSTRING_INDEX('www.somewebsite.com','.',2);
输出:
'www.somewebsite'
尝试这个基于 T-SQL 和 XQuery 的解决方案(
(root/row)[position() <= sql:variable("@count")]
):
T-SQL 标量函数:
CREATE FUNCTION dbo.SUBSTRING_INDEX
(
@str NVARCHAR(4000),
@delim NVARCHAR(1),
@count INT
)
RETURNS NVARCHAR(4000)
WITH SCHEMABINDING
BEGIN
DECLARE @XmlSourceString XML;
SET @XmlSourceString = (SELECT N'<root><row>' + REPLACE( (SELECT @str AS '*' FOR XML PATH('')) , @delim, N'</row><row>' ) + N'</row></root>');
RETURN STUFF
(
((
SELECT @delim + x.XmlCol.value(N'(text())[1]', N'NVARCHAR(4000)') AS '*'
FROM @XmlSourceString.nodes(N'(root/row)[position() <= sql:variable("@count")]') x(XmlCol)
FOR XML PATH(N''), TYPE
).value(N'.', N'NVARCHAR(4000)')),
1, 1, N''
);
END
GO
SELECT dbo.SUBSTRING_INDEX(N'www.somewebsite.com', N'.', 2) AS Result;
输出:
/*
Result
---------------
www.somewebsite
*/
或
TSQL 内联表值函数:
CREATE FUNCTION dbo.SUBSTRING_INDEX
(
@str NVARCHAR(4000),
@delim NVARCHAR(1),
@count INT
)
RETURNS TABLE
AS
RETURN
WITH Base
AS
(
SELECT XmlSourceString = CONVERT(XML, (SELECT N'<root><row>' + REPLACE( (SELECT @str AS '*' FOR XML PATH('')) , @delim, N'</row><row>' ) + N'</row></root>'))
)
SELECT STUFF
(
((
SELECT @delim + x.XmlCol.value(N'(text())[1]', N'NVARCHAR(4000)') AS '*'
FROM Base b
CROSS APPLY b.XmlSourceString.nodes(N'(root/row)[position() <= sql:variable("@count")]') x(XmlCol)
FOR XML PATH(N''), TYPE
).value(N'.', N'NVARCHAR(4000)')),
1, 1, N''
) AS Result;
GO
SELECT *
FROM (
SELECT N'www.somewebsite.com' UNION ALL
SELECT N'www.yahoo.com' UNION ALL
SELECT N'www.outlook.com'
) a(Value)
CROSS APPLY dbo.SUBSTRING_INDEX(a.Value, N'.', 2) b;
输出:
/*
Value Result
------------------- ---------------
www.somewebsite.com www.somewebsite
www.yahoo.com www.yahoo
www.outlook.com www.outlook
*/
我最近需要这个,所以我写了以下存储函数。最后是一系列测试,以确保它的运行与 MySql 函数完全一样(在运行相同的测试后,预期结果是从 MySql 复制的):
-- Function to reproduce the useful functionality of SUBSTRING_INDEX from MySql
CREATE FUNCTION dbo.SUBSTRING_INDEX(@InString NVARCHAR(Max),
@Delimiter NVARCHAR(Max),
@Count INT)
RETURNS NVARCHAR(200)
AS
BEGIN
DECLARE @Pos INT;
DECLARE @DelimiterOffsets TABLE
(
i INT IDENTITY(1, 1) NOT NULL,
offset INT NOT NULL
);
-- If @Count is zero, we return '' as per spec
IF @Count = 0
BEGIN
RETURN '';
END;
DECLARE @OrigLength INT = LEN(@InString);
DECLARE @DelimiterLength INT = LEN(@Delimiter);
-- Prime the pump.
SET @Pos = Charindex(@Delimiter, @InString, 1);
-- If the delimiter does not exist in @InString, return the whole string
IF @Pos = 0
BEGIN
RETURN @InString;
END;
-- Put all delimiter offsets into @DelimiterOffsets, they get numbered automatically.
DECLARE @CurrentOffset INT = 0;
WHILE @Pos > 0
BEGIN
SET @CurrentOffset = @Pos;
INSERT INTO @DelimiterOffsets
(offset)
VALUES (@CurrentOffset);
SET @Pos = Charindex(@Delimiter, @InString, @CurrentOffset + @DelimiterLength);
END;
-- This number is guaranteed to be > 0.
DECLARE @DelimitersFound INT = (SELECT Count(*) FROM @DelimiterOffsets);
-- If they requested more delimiters than were found, return the whole string, as per spec.
IF Abs(@Count) > @DelimitersFound
BEGIN
RETURN @InString;
END;
DECLARE @StartSubstring INT = 0;
DECLARE @EndSubstring INT = @OrigLength;
-- OK, now return the part they requested
IF @Count > 0
BEGIN
SET @EndSubstring = (SELECT offset
FROM @DelimiterOffsets
WHERE i = @Count);
END
ELSE
BEGIN
SET @StartSubstring = (SELECT offset + @DelimiterLength
FROM @DelimiterOffsets
WHERE i = (@DelimitersFound + @Count + 1));
END;
RETURN Substring(@InString, @StartSubstring, @EndSubstring);
END;
Go
GRANT EXECUTE ON [dbo].SUBSTRING_INDEX TO PUBLIC;
-- Tests
DECLARE @TestResults TABLE (i int, answer nVarChar(MAX), expected nVarChar(MAX));
insert into @TestResults
select * from
(
(SELECT 1 as i, [dbo].SUBSTRING_INDEX(N'www.somewebsite.com', N'.', 2) as r, 'www.somewebsite' as e) UNION
(SELECT 2 as i, [dbo].SUBSTRING_INDEX(N'www.yahoo.com', N'.', 2) as r, 'www.yahoo' as e) UNION
(SELECT 3 as i, [dbo].SUBSTRING_INDEX(N'www.outlook.com', N'.', 2) as r, 'www.outlook' as e) UNION
(SELECT 4 as i, [dbo].SUBSTRING_INDEX(N'www.somewebsite.com', N'.', -2) as r, 'somewebsite.com' as e) UNION
(SELECT 5 as i, [dbo].SUBSTRING_INDEX(N'www.yahoo.com', N'.', -2) as r, 'yahoo.com' as e) UNION
(SELECT 6 as i, [dbo].SUBSTRING_INDEX(N'www.outlook.com', N'.', -2) as r, 'outlook.com' as e) UNION
(select 7 as i, [dbo].SUBSTRING_INDEX('hi.you.com','.',2) as r, 'hi.you' as e) UNION
(select 8 as i, [dbo].SUBSTRING_INDEX('hi.you.com','.',-1) as r, 'com' as e) UNION
(select 9 as i, [dbo].SUBSTRING_INDEX(N'prueba','ue',1) as r, 'pr' as e) UNION
(select 10 as i, [dbo].SUBSTRING_INDEX(N'prueba','ue',-1) as r, 'ba' as e) UNION
(select 11 as i, [dbo].SUBSTRING_INDEX(N'prueba','ue',0) as r, '' as e) UNION
(SELECT 12 as i, [dbo].SUBSTRING_INDEX(N'wwwxxxoutlookxxxcom', N'xxx', 2) as r, 'wwwxxxoutlook' as e) UNION
(SELECT 13 as i, [dbo].SUBSTRING_INDEX(N'wwwxxxoutlookxxxcom', N'xxx', -2) as r, 'outlookxxxcom' as e) UNION
(SELECT 14 as i, [dbo].SUBSTRING_INDEX(N'wwwxxxoutlookxxxcom', N'xxx', 5) as r, 'wwwxxxoutlookxxxcom' as e) UNION
(SELECT 15 as i, [dbo].SUBSTRING_INDEX(N'wwwxxxoutlookxxxcom', N'xxx', -5) as r, 'wwwxxxoutlookxxxcom' as e)
) as results;
select tr.i,
tr.answer,
tr.expected,
CASE WHEN tr.answer = tr.expected THEN 'Test Succeeded' ELSE 'Test Failed' END testState
from @TestResults tr
order by i;
这是一个受 Bogdan Sahlean 的答案启发的版本,使用 SQL Server 的 XML 功能进行解析和组合:
CREATE FUNCTION dbo.SUBSTRING_INDEX(@InString NVARCHAR(Max),
@Delimiter NVARCHAR(Max),
@Count INT)
RETURNS NVARCHAR(200)
AS
BEGIN
-- If @Count is zero, we return '' as per spec
IF @Count = 0
BEGIN
RETURN '';
END;
-- First we let the XML parser break up the string by @Delimiter.
-- Each parsed value will be <piece>[text]</piece>.
DECLARE @XmlSourceString XML = (select N'<piece>' + REPLACE( (SELECT @InString AS '*' FOR XML PATH('')) , @Delimiter, N'</piece><piece>' ) + N'</piece>');
-- This will contain the final requested string.
DECLARE @Results nVarChar(MAX);
;WITH Pieces(RowNumber, Piece) as
(
-- Take each node in @XmlSourceString, and return it with row numbers
-- which will identify each piece and give us a handle to change the
-- order, depending on the direction of search.
SELECT row_number() over(order by x.XmlCol) as RowNumber,
@Delimiter + x.XmlCol.value(N'(text())[1]', N'NVARCHAR(MAX)') AS '*'
FROM @XmlSourceString.nodes(N'(piece)') x(XmlCol)
), orderedPieces(RowNumber, Piece) as
(
-- Order the pieces normally or reversed depending on whether they want
-- the first @Count pieces or the last @Count pieces.
select TOP (ABS(@Count))
RowNumber,
Piece
from Pieces
ORDER BY CASE WHEN @Count < 0 THEN RowNumber END DESC ,
CASE WHEN @Count > 0 THEN RowNumber END ASC
), combinedPieces(result) as
(
-- Now combine the pieces back together, ordering them by
-- the original order. There will always
-- be an extra @Delimiter on the front of the string.
select CAST(Piece AS VARCHAR(100))
from OrderedPieces
order by RowNumber
FOR XML PATH(N'')
)
-- Finally, strip off the extra delimiter using STUFF and store the string in @Results.
select @Results = STUFF(result, 1, LEN(@Delimiter), '') from combinedPieces;
return @Results;
END;
运行测试会产生以下结果:
i answer expected testState
1 www.somewebsite www.somewebsite Test Succeeded
2 www.yahoo www.yahoo Test Succeeded
3 www.outlook www.outlook Test Succeeded
4 somewebsite.com somewebsite.com Test Succeeded
5 yahoo.com yahoo.com Test Succeeded
6 outlook.com outlook.com Test Succeeded
7 hi.you hi.you Test Succeeded
8 com com Test Succeeded
9 pr pr Test Succeeded
10 ba ba Test Succeeded
11 Test Succeeded
12 wwwxxxoutlook wwwxxxoutlook Test Succeeded
13 outlookxxxcom outlookxxxcom Test Succeeded
14 wwwxxxoutlookxxxcom wwwxxxoutlookxxxcom Test Succeeded
15 wwwxxxoutlookxxxcom wwwxxxoutlookxxxcom Test Succeeded
我最好的选择是:
select substring(email,(charindex('@', email,1)+1),100) from yourtable;
假设 TLD.EXT 最多 100 个字符。可以随意增加。
CREATE FUNCTION FN_SUBSTRING_INDEX
(
@TEXTO NVARCHAR(200),
@SUBSTRING_INDEX NVARCHAR(10),
@DESPLAZAMIENTO INT
)
RETURNS NVARCHAR(200)
AS
BEGIN
DECLARE @indiceSubstring INT
DECLARE @RESULTADO NVARCHAR(200)
SELECT @indiceSubstring = CHARINDEX(@SUBSTRING_INDEX,@TEXTO)
IF @DESPLAZAMIENTO > 0
BEGIN
SELECT @RESULTADO=SUBSTRING(@TEXTO,@indiceSubstring+@DESPLAZAMIENTO+1,LEN(@TEXTO))
END
ELSE
BEGIN
SELECT @RESULTADO=SUBSTRING(@TEXTO,0,@indiceSubstring-@DESPLAZAMIENTO-1)
END
RETURN @RESULTADO
END
GO
向右 选择 dbo.FN_SUBSTRING_INDEX(N'prueba','ue',1);
向左 选择 dbo.FN_SUBSTRING_INDEX(N'prueba','ue',-1);
试试这个......
CREATE FUNCTION SubString_Index
(
@ExistingString NVARCHAR(200),
@BreakPoint NVARCHAR(10),
@number INT
)
RETURNS NVARCHAR(200)
AS
BEGIN
DECLARE @Count INT
DECLARE @Substring NVARCHAR(200)
DECLARE @ssubstring NVARCHAR(200)
SET @ssubstring=@ExistingString
DECLARE @scount INT
SET @scount=0
DECLARE @sscount INT
SET @sscount=0
WHILE(@number>@scount)
BEGIN
Select @Count=CHARINDEX(@BreakPoint,@ExistingString)
Select @ExistingString=SUBSTRING(@ExistingString,@Count+1,LEN(@ExistingString))
Select @scount=@scount+1
select @sscount=@sscount+@Count
END
SELECT @Substring=SUBSTRING(@ssubstring,0,@sscount)
RETURN @Substring
END
GO
select dbo.SubStringIndex('hi.you.com','.',1)
这个 microsoft sql 函数的工作原理与 mysql 中的 substring_index 函数完全相同
/** This microsoft sql function Works exactly like substring_index function in mysql **/
CREATE FUNCTION SubString_Index
(
@ExistingString NVARCHAR(MAX),
@BreakPoint NVARCHAR(MAX),
@number INT
)
RETURNS NVARCHAR(MAX)
AS
BEGIN
DECLARE @Count INT
DECLARE @SubstringLength INT
DECLARE @Substring NVARCHAR(MAX)
DECLARE @ssubstring NVARCHAR(MAX)
SET @ssubstring=@ExistingString
DECLARE @scount INT
SET @scount=0
DECLARE @sscount INT
SET @sscount=0
DECLARE @number2 INT
DECLARE @occurence INT
SET @occurence=LEN(@ExistingString) - LEN(REPLACE(@ExistingString, @BreakPoint, ''))
If @number<0
BEGIN
SET @number2= @occurence-(-1*@number)+1
END
If @number>0
BEGIN
SET @number2=@number
END
WHILE(@number2>@scount)
BEGIN
Select @Count=CHARINDEX(@BreakPoint,@ExistingString)
Select @SubstringLength=@Count+LEN(@BreakPoint)
Select @ExistingString=SUBSTRING(@ExistingString,@SubstringLength,LEN(@ExistingString)-@Count)
Select @scount=@scount+1
select @sscount=@sscount+@Count
END
If @number<0
BEGIN
if (@number = -1) and (@sscount+LEN(@BreakPoint)) = (LEN(@ssubstring)+1)
BEGIN
SELECT @Substring=''
END
else if @occurence = 0
BEGIN
SELECT @Substring=''
END
else
BEGIN
SELECT @Substring=SUBSTRING(@ssubstring, @sscount+LEN(@BreakPoint), LEN(@ssubstring))
END
END
If @number>0
if @occurence = 0
BEGIN
SELECT @Substring=''
END
else
BEGIN
SELECT @Substring=SUBSTRING(@ssubstring,0,@sscount)
END
RETURN @Substring
END
GO
使用Python获取索引-2
use my_guitar_shop;
go
EXEC sp_execute_external_script
@language =N'Python',
@script=N'
import pandas as pd
import numpy as np
x = np.array(InputDataSet["email_address"], dtype = str)
broken = np.char.split(x, sep = ".")
OutputDataSet = pd.DataFrame([".".join(e[-2:]) if len(e)>2 else ".".join(e) for e in broken])
',
@input_data_1 = N'SELECT email_address from customers;'
WITH RESULT SETS(([indexed_email] nvarchar(250)));
在 SQL Server 2022、Azure SQL 数据库或 Azure SQL 托管实例中,您可以使用
ordinal
的新 STRING_SPLIT
参数来大大简化这一过程。
CREATE FUNCTION dbo.SubstringIndex
(
@input nvarchar(max),
@delim nchar(1),
@index int
)
RETURNS TABLE WITH SCHEMABINDING
AS
RETURN (SELECT str = value
FROM STRING_SPLIT(@input, @delim, 1)
WHERE ordinal = @index);
SELECT value FROM STRING_SPLIT('www.somewebsite.com', '.');