T-SQL Parsing Fruit Script


This entry is part 7 of 7 in the series SQL Server Parsing Many Columns

Below is the full script that you can use on your non-production server. This script is based on the fruit example I was using in the previous posts in this series.

This script is essentially the same as the scripts in the previous posts. The previous posts serve as a tutorial for this exercise. You will need to understand the queries and scripts to adapt them to your tables and situation. For example, if you have a string that has more than 12 parts to it, you can follow the pattern of this example to write the code for the fourth Group and further if necessary.

This series of posts seems like a lot of work to just split it out and create a second table for querying. However, when you start to write queries like the ones in the previous post you start to realize the benefits. Not only that, I haven’t discussed performance. I haven’t measured the performance of these queries, but imagine searching through a million records for a string in a column using LIKE.

User-Defined Functions

The large script below requires user-defined functions to exist before you run the script. Here are the user-defined function scripts that you can run in your non-production server. You will need these to successfully run the large script below.

CREATE FUNCTION [dbo].[ufn_CountChar] ( @pInput VARCHAR(1000), @pSearchChar CHAR(1) )
    RETURNS INT
BEGIN
    RETURN (LEN(@pInput) - LEN(REPLACE(@pInput, @pSearchChar, '')))
END
GO
-- ===================================================================================================
/*
Example:
SELECT dbo.CHARINDEX2('a', 'abbabba', 3)
returns the location of the third occurrence of 'a'
which is 7
*/
CREATE FUNCTION [dbo].[CHARINDEX2]
(
    @TargetStr varchar(8000), 
    @SearchedStr varchar(8000), 
    @Occurrence int
)
RETURNS int
AS
BEGIN

	DECLARE @pos int, @counter int, @ret int
	SET @pos = CHARINDEX(@TargetStr, @SearchedStr)
	SET @counter = 1

	IF @Occurrence = 1 SET @ret = @pos

	ELSE
	BEGIN
		WHILE (@counter < @Occurrence)
		BEGIN
			SELECT @ret = CHARINDEX(@TargetStr, @SearchedStr, @pos + 1)
			SET @counter = @counter + 1
			SET @pos = @ret
		END
	END
	RETURN(@ret)
END
GO

The Script

IF OBJECT_ID(N'[dbo].[ParseFruit]', N'U') IS NOT NULL
	DROP TABLE [dbo].[ParseFruit];

CREATE TABLE [dbo].[ParseFruit](
	[Id] [int] IDENTITY(1,1) NOT NULL,
	[Name] [nvarchar](50) NOT NULL,
	[ListFruit] [nvarchar](100) NULL,
 CONSTRAINT [PK_ParseFruit] PRIMARY KEY CLUSTERED 
(
	[Id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
) ON [PRIMARY]
GO
SET IDENTITY_INSERT [dbo].[ParseFruit] ON 
GO
INSERT [dbo].[ParseFruit] ([Id], [Name], [ListFruit]) VALUES (1, N'Bob', N'apple')
GO
INSERT [dbo].[ParseFruit] ([Id], [Name], [ListFruit]) VALUES (2, N'Sally', N'pear,apple,pomegranate')
GO
INSERT [dbo].[ParseFruit] ([Id], [Name], [ListFruit]) VALUES (3, N'Samantha', N'cherry,peach,strawberry,prune')
GO
INSERT [dbo].[ParseFruit] ([Id], [Name], [ListFruit]) VALUES (4, N'William', N'raspberry,mango,fig,cantaloupe,apple,banana')
GO
INSERT [dbo].[ParseFruit] ([Id], [Name], [ListFruit]) VALUES (5, N'Linda', N'blueberry,blackberry,tangarine,watermellon,plantain,persimmon,pineapple')
GO
INSERT [dbo].[ParseFruit] ([Id], [Name], [ListFruit]) VALUES (6, N'Susan', NULL)
GO
INSERT [dbo].[ParseFruit] ([Id], [Name], [ListFruit]) VALUES (7, N'Ron', N'peach,blackberry,orange,blueberry,pear,passion fruit,dragonfruit,tamarind,kiwi')
GO
SET IDENTITY_INSERT [dbo].[ParseFruit] OFF
GO
ALTER TABLE ParseFruit
ADD FruitFreq int
GO
UPDATE ParseFruit SET FruitFreq = dbo.ufn_CountChar(ListFruit, ',') + 1
GO
SELECT * FROM ParseFruit;
GO
ALTER TABLE ParseFruit
ADD Group1 NVARCHAR(50) NULL, Group2 NVARCHAR(50) NULL, Group3 NVARCHAR(50) NULL
GO
-- 1 to 4 parts - move it all
UPDATE ParseFruit
    SET Group1 = ListFruit
    WHERE FruitFreq < 5
-- =======================================================================
-- 5 to 8
-- starting position is one after the 4th comma
-- the length is the total length less the first 4 parts
UPDATE ParseFruit
   SET Group2 = SUBSTRING(ListFruit, dbo.CHARINDEX2(',', ListFruit, 4) + 1, LEN(ListFruit) - dbo.CHARINDEX2(',', ListFruit, 4))
   WHERE (FruitFreq >= 5 AND FruitFreq  < 9)
GO

UPDATE ParseFruit
   SET Group1 = SUBSTRING(ListFruit, 1, dbo.CHARINDEX2(',', ListFruit, 4) - 1)
   WHERE (FruitFreq >= 5 AND FruitFreq  < 9)
GO
-- ======================================================================
-- 9 to 12
-- starting position is one after the 4th comma
-- the length is the total length less the first 4 parts
UPDATE ParseFruit
   SET Group3 = SUBSTRING(ListFruit, dbo.CHARINDEX2(',', ListFruit, 8) + 1, LEN(ListFruit) - dbo.CHARINDEX2(',', ListFruit, 8))
   WHERE (FruitFreq >= 9 AND FruitFreq  <= 12)
GO
-- Group2
UPDATE ParseFruit
    SET Group2 = SUBSTRING(ListFruit, dbo.CHARINDEX2(',', ListFruit, 4) + 1, dbo.CHARINDEX2(',', ListFruit, 8) - dbo.CHARINDEX2(',', 
    ListFruit, 4) - 1)
    WHERE (FruitFreq >= 9 AND FruitFreq  <= 12)
GO
-- Group1
UPDATE ParseFruit
    SET Group1 = SUBSTRING(ListFruit, 1, dbo.CHARINDEX2(',', ListFruit, 4) - 1)
    WHERE (FruitFreq >= 9 AND FruitFreq  <= 12)
GO
ALTER TABLE ParseFruit
ADD fruit1 NVARCHAR(50) NULL, 
    fruit2 NVARCHAR(50) NULL,
    fruit3 NVARCHAR(50) NULL,
    fruit4 NVARCHAR(50) NULL,
    fruit5 NVARCHAR(50) NULL,
    fruit6 NVARCHAR(50) NULL,
    fruit7 NVARCHAR(50) NULL,
    fruit8 NVARCHAR(50) NULL,
    fruit9 NVARCHAR(50) NULL
GO
UPDATE dbo.ParseFruit SET fruit1 = REVERSE(PARSENAME(REPLACE(REVERSE(Group1), ',', '.'), 1));
UPDATE dbo.ParseFruit SET fruit2 = REVERSE(PARSENAME(REPLACE(REVERSE(Group1), ',', '.'), 2));
UPDATE dbo.ParseFruit SET fruit3 = REVERSE(PARSENAME(REPLACE(REVERSE(Group1), ',', '.'), 3));
UPDATE dbo.ParseFruit SET fruit4 = REVERSE(PARSENAME(REPLACE(REVERSE(Group1), ',', '.'), 4));

UPDATE dbo.ParseFruit SET fruit5 = REVERSE(PARSENAME(REPLACE(REVERSE(Group2), ',', '.'), 1));
UPDATE dbo.ParseFruit SET fruit6 = REVERSE(PARSENAME(REPLACE(REVERSE(Group2), ',', '.'), 2));
UPDATE dbo.ParseFruit SET fruit7 = REVERSE(PARSENAME(REPLACE(REVERSE(Group2), ',', '.'), 3));
UPDATE dbo.ParseFruit SET fruit8 = REVERSE(PARSENAME(REPLACE(REVERSE(Group2), ',', '.'), 4));

UPDATE dbo.ParseFruit SET fruit9 = REVERSE(PARSENAME(REPLACE(REVERSE(Group3), ',', '.'), 1));
GO
IF OBJECT_ID(N'[dbo].[NameFruit]', N'U') IS NOT NULL
    DROP TABLE [dbo].[NameFruit];
GO
CREATE TABLE [dbo].[NameFruit](
	[Id] [int] IDENTITY(1,1) NOT NULL,
	[NameId] [int] NOT NULL,
	[Fruit] [nvarchar](50) NULL,
 CONSTRAINT [PK_NameFruit] PRIMARY KEY CLUSTERED 
(
	[Id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
) ON [PRIMARY]
GO
INSERT INTO dbo.NameFruit (NameId, Fruit)
	SELECT Id, fruit1
	FROM dbo.ParseFruit
	WHERE fruit1 IS NOT NULL
GO
INSERT INTO dbo.NameFruit (NameId, Fruit)
	SELECT Id, fruit2
	FROM dbo.ParseFruit
	WHERE fruit2 IS NOT NULL
GO
INSERT INTO dbo.NameFruit (NameId, Fruit)
	SELECT Id, fruit3
	FROM dbo.ParseFruit
	WHERE fruit3 IS NOT NULL
GO	
INSERT INTO dbo.NameFruit (NameId, Fruit)
	SELECT Id, fruit4
	FROM dbo.ParseFruit
	WHERE fruit4 IS NOT NULL
GO
INSERT INTO dbo.NameFruit (NameId, Fruit)
	SELECT Id, fruit5
	FROM dbo.ParseFruit
	WHERE fruit5 IS NOT NULL
GO
INSERT INTO dbo.NameFruit (NameId, Fruit)
	SELECT Id, fruit6
	FROM dbo.ParseFruit
	WHERE fruit6 IS NOT NULL
GO
INSERT INTO dbo.NameFruit (NameId, Fruit)
	SELECT Id, fruit7
	FROM dbo.ParseFruit
	WHERE fruit7 IS NOT NULL
GO
INSERT INTO dbo.NameFruit (NameId, Fruit)
	SELECT Id, fruit8
	FROM dbo.ParseFruit
	WHERE fruit8 IS NOT NULL
GO
INSERT INTO dbo.NameFruit (NameId, Fruit)
	SELECT Id, fruit9
	FROM dbo.ParseFruit
	WHERE fruit9 IS NOT NULL
GO
-- The point to this exercise is to be able to write queries like these:
SELECT NameId, Fruit FROM
    NameFruit WHERE NameId = (SELECT Id FROM ParseFruit WHERE Name = 'Sally')
GO
SELECT ParseFruit.Name, NameFruit.Fruit FROM ParseFruit
    INNER JOIN NameFruit 
    ON ParseFruit.Id = NameFruit.NameId
    WHERE Name = 'Sally'
GO
Series Navigation<< T-SQL Parsing Better Queries