USE DWH;
GO
CREATE PROCEDURE LinearRegression (
@TableName nvarchar(250) = '[dbo].[linear]'
, @XColumnName nvarchar(250) = '[Xvalue]'
, @YColumnName nvarchar(250) = '[Yvalue]'
, @Where nvarchar(250) = '[RecordId] <=11'
)
-----------------------------------------------------------------------
-- Liear regression using T-Sql
-- John Minkjan 2013
-----------------------------------------------------------------------
AS
BEGIN
declare @sqlstatement nvarchar(4000)
declare @MeanX decimal(18,6)
declare @MeanY decimal(18,6)
declare @VarX decimal(18,6)
declare @VarY decimal(18,6)
declare @StdX decimal(18,6)
declare @StdY decimal(18,6)
declare @sumX decimal(18,6)
declare @sumY decimal(18,6)
declare @sumXY decimal(18,6)
declare @X2 decimal(18,6)
declare @Y2 decimal(18,6)
declare @XY decimal(18,6)
declare @X2t decimal(18,6)
declare @Y2t decimal(18,6)
declare @XYt decimal(18,6)
declare @CorXY decimal(18,6)
declare @Slope decimal(18,6)
declare @Intercept decimal(18,6)
declare @CountXY bigint
-- get the basic stats
set @sqlstatement = 'Declare users_cursor CURSOR FOR SELECT AVG('
+@XColumnName +'), AVG(' + @YColumnName
+') , COUNT(*), VAR('
+@XColumnName +'), VAR(' + @YColumnName
+') , STDEV('
+@XColumnName +'), STDEV(' + @YColumnName
+') , SUM('
+@XColumnName +'), SUM(' + @YColumnName
+') , SUM(' + @YColumnName
+' * ' +@XColumnName +') FROM ' + @TableName + case when @Where <> '' then ' WHERE ' + @Where end
exec sp_executesql @sqlstatement
OPEN users_cursor
FETCH NEXT FROM users_cursor
INTO @MeanX ,@MeanY,@CountXY,@VarX ,@VarY,@StdX,@StdY,@sumX,@sumY,@sumXY
CLOSE users_cursor
DEALLOCATE users_cursor
-- Get the Linear regression info
set @X2 = 0
set @y2 = 0
set @xy = 0
set @sqlstatement = 'Declare users_cursor CURSOR FOR SELECT ('
+ @XColumnName +' - ' +cast( @MeanX as varchar(250)) + ') * ('
+ @YColumnName +' - ' + cast( @MeanY as varchar(250)) + ') as XY, (('
+ @XColumnName +' - ' +cast( @MeanX as varchar(250))
+') * (' + @XColumnName +' - ' +cast( @MeanX as varchar(250))+')) as X2 , (('
+ @YColumnName +' - ' + cast( @MeanY as varchar(250)) +') * ('
+ @YColumnName +' - ' + cast( @MeanY as varchar(250))+')) as Y2 '
+ ' FROM ' + @TableName + case when @Where <> '' then ' WHERE ' + @Where end
exec sp_executesql @sqlstatement
OPEN users_cursor
FETCH NEXT FROM users_cursor
INTO @xyt, @x2t, @y2t
WHILE @@FETCH_STATUS = 0
BEGIN
set @X2 = @x2+ @x2t
set @y2 = @y2+ @y2t
set @xy = @xy+ @xyt
FETCH NEXT FROM users_cursor
INTO @xyt, @x2t, @y2t
END
CLOSE users_cursor
DEALLOCATE users_cursor
-- Calculate the correlation
set @CorXY = (@xy / SQRT(@x2 * @y2))
-- Get the slope and intercept
set @Slope = (@xy/ @x2)
set @Intercept = @MeanY-((@xy/ @x2) * @MeanX)
-- Output the result
set @sqlstatement = 'SELECT '
+@XColumnName +' as X '
+' ,'+@YColumnName +' as Y '
+' ,'+cast(@MeanX as varchar(250)) + ' as MeanX '
+' ,'+cast(@MeanY as varchar(250)) + ' as MeanY '
+' ,'+cast(@VarX as varchar(250)) +' as VarianceX '
+' ,'+cast(@VarY as varchar(250)) + ' as VarianceY '
+' ,'+cast(@CorXY as varchar(250)) + ' as CorrelationXY'
+' ,' +cast(@Slope as varchar(250)) + ' as Slope '
+' ,' +cast(@Intercept as varchar(250)) + ' as Intercept '
+' , (' + @XColumnName +' * ' + cast( @slope as varchar(250)) +' ) +' +cast( @Intercept as varchar(250)) + ' as PredictedY'
+' ,' + cast(@StdX as varchar(250)) +' as StandardDeviationX'
+' ,' + cast(@StdY as varchar(250)) +' as StandardDeviationY'
+' ,' + cast(@sumX as varchar(250)) +' as SumX'
+' ,' + cast(@sumY as varchar(250)) +' as SumY'
+' ,' + cast(@sumXY as varchar(250)) +' as SumXY'
+' FROM ' + @TableName + case when @Where <> '' then ' WHERE ' + @Where end
exec sp_executesql @sqlstatement;
end
Till Next Time