IF NOT EXISTS ( SELECT * FROM sys.Objects WHERE object_id = OBJECT_ID(N'dbo.Cities') AND type in (N'U') ) BEGIN CREATE TABLE dbo.Cities ( CityID BIGINT IDENTITY(1,1), CityName NVARCHAR(50) , StateID INT , MsaID INT ); END GO IF NOT EXISTS ( SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'dbo.Counties') AND type in (N'U') ) BEGIN CREATE TABLE dbo.Counties ( CountyID BIGINT IDENTITY(1,1), CountyName NVARCHAR(50) , StateID BIGINT ); END GO IF NOT EXISTS ( SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'dbo.States') AND type in (N'U') ) BEGIN CREATE TABLE dbo.States( ID INT IDENTITY(1,1) NOT NULL, Name NVARCHAR(40) NOT NULL, Abbreviation NVARCHAR(2) NOT NULL, CountryID INT NOT NULL, CONSTRAINT [PK_States] PRIMARY KEY CLUSTERED ([Id] ASC), CONSTRAINT [uc_States_Abbreviation] UNIQUE NONCLUSTERED ([Abbreviation] ASC) ); SET IDENTITY_INSERT States ON; DECLARE @US_CountryId INT = 226; DECLARE @CAD_CountryId INT = 38; INSERT INTO dbo.States ( Id, Name, Abbreviation, CountryId ) VALUES (1, 'Alaska', 'AK', @US_CountryId), (2, 'Alabama', 'AL', @US_CountryId), (3, 'American Samoa', 'AS', @US_CountryId), (4, 'Arizona', 'AZ', @US_CountryId), (5, 'Arkansas', 'AR', @US_CountryId), (6, 'California', 'CA', @US_CountryId), (7, 'Colorado', 'CO', @US_CountryId), (8, 'Connecticut', 'CT', @US_CountryId), (9, 'Delaware', 'DE', @US_CountryId), (10, 'District of Columbia', 'DC', @US_CountryId), (11, 'Florida', 'FL', @US_CountryId), (12, 'Georgia', 'GA', @US_CountryId), (13, 'Guam', 'GU', @US_CountryId), (14, 'Hawaii', 'HI', @US_CountryId), (15, 'Idaho', 'ID', @US_CountryId), (16, 'Illinois', 'IL', @US_CountryId), (17, 'Indiana', 'IN', @US_CountryId), (18, 'Iowa', 'IA', @US_CountryId), (19, 'Kansas', 'KS', @US_CountryId), (20, 'Kentucky', 'KY', @US_CountryId), (21, 'Louisiana', 'LA', @US_CountryId), (22, 'Maine', 'ME', @US_CountryId), (23, 'Maryland', 'MD', @US_CountryId), (24, 'Massachusetts', 'MA', @US_CountryId), (25, 'Michigan', 'MI', @US_CountryId), (26, 'Minnesota', 'MN', @US_CountryId), (27, 'Mississippi', 'MS', @US_CountryId), (28, 'Missouri', 'MO', @US_CountryId), (29, 'Montana', 'MT', @US_CountryId), (30, 'Nebraska', 'NE', @US_CountryId), (31, 'Nevada', 'NV', @US_CountryId), (32, 'New Hampshire', 'NH', @US_CountryId), (33, 'New Jersey', 'NJ', @US_CountryId), (34, 'New Mexico', 'NM', @US_CountryId), (35, 'New York', 'NY', @US_CountryId), (36, 'North Carolina', 'NC', @US_CountryId), (37, 'North Dakota', 'ND', @US_CountryId), (38, 'Northern Mariana Islands', 'MP', @US_CountryId), (39, 'Ohio', 'OH', @US_CountryId), (40, 'Oklahoma', 'OK', @US_CountryId), (41, 'Oregon', 'OR', @US_CountryId), (42, 'Palau', 'PW', @US_CountryId), (43, 'Pennsylvania', 'PA', @US_CountryId), (44, 'Puerto Rico', 'PR', @US_CountryId), (45, 'Rhode Island', 'RI', @US_CountryId), (46, 'South Carolina', 'SC', @US_CountryId), (47, 'South Dakota', 'SD', @US_CountryId), (48, 'Tennessee', 'TN', @US_CountryId), (49, 'Texas', 'TX', @US_CountryId), (50, 'Utah', 'UT', @US_CountryId), (51, 'Vermont', 'VT', @US_CountryId), (52, 'Virgin Islands', 'VI', @US_CountryId), (53, 'Virginia', 'VA', @US_CountryId), (54, 'Washington', 'WA', @US_CountryId), (55, 'West Virginia', 'WV', @US_CountryId), (56, 'Wisconsin', 'WI', @US_CountryId), (57, 'Wyoming', 'WY', @US_CountryId), -- included here for lookup convenience (58, 'Alberta', 'AB', @CAD_CountryId), (59, 'British Columbia', 'BC', @CAD_CountryId), (60, 'Manitoba', 'MB', @CAD_CountryId), (61, 'New Brunswick', 'NB', @CAD_CountryId), (62, 'Newfoundland and Labrador', 'NL', @CAD_CountryId), (63, 'Northwest Territories', 'NT', @CAD_CountryId), (64, 'Nova Scotia', 'NS', @CAD_CountryId), (65, 'Nunavut', 'NU', @CAD_CountryId), (66, 'Ontario', 'ON', @CAD_CountryId), (67, 'Prince Edward Island', 'PE', @CAD_CountryId), (68, 'Québec', 'QC', @CAD_CountryId), (69, 'Saskatchewan', 'SK', @CAD_CountryId), (70, 'Yukon Territory', 'YT', @CAD_CountryId); SET IDENTITY_INSERT dbo.States OFF; END GO IF NOT EXISTS ( SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'dbo.CanadianProvinces') AND type in (N'U') ) BEGIN CREATE TABLE dbo.CanadianProvinces ( ProvinceID BIGINT PRIMARY KEY IDENTITY(1,1) , ProvinceName NVARCHAR(50) , Abbreviation CHAR(2) ); INSERT INTO dbo.CanadianProvinces VALUES ('British Columbia','BC'), ('Manitoba','MB'), ('Alberta','AB'), ('Ontario','ON'), ('Saskatchewan','SK'), ('Quebec','QE'), ('Nunavut','NU'), ('Yukon','YK'), ('Northwest Territories','NT'), ('Nova Scotia','NS'), ('New Brunswick','NB'), ('Newfoundland & Labrador','NL'), ('Prince Edward Island','PE'); END GO
-- Get all MS_Description metadata values for ONE DATABASE USE <insert_database_name>; GO SELECT u.name + '.' + t.name AS 'Schema/Table Names' , -- td.value AS 'Table Desc' , c.name AS 'Column Name' , cd.value AS 'Column Desc' FROM sysobjects AS t INNER JOIN sysusers AS u ON u.uid = t.uid LEFT OUTER JOIN sys.extended_properties AS td ON td.major_id = t.id AND td.minor_id = 0 AND td.name = 'MS_Description' INNER JOIN syscolumns AS c ON c.id = t.id LEFT OUTER JOIN sys.extended_properties AS cd ON cd.major_id = c.id AND cd.minor_id = c.colid AND cd.name = 'MS_Description' WHERE t.type = 'u' AND NOT cd.value IS NULL ORDER BY t.name, c.colorder -- Get all MS_Description for ONE TABLE SELECT c.name AS Field, t.name AS Type, c.Precision, c.Scale, c.is_nullable, c.collation_name FROM sys.columns AS c INNER JOIN sys.types AS t ON t.system_type_id = c.system_type_id WHERE object_id = object_id('<insert_table_name>') ORDER BY column_id -- Add MS_Description to ONE TABLE EXEC sys.sp_addextendedproperty @name = N'MS_Description', @value = N'Street address information for customers, employees, and vendors.', @level0type = N'SCHEMA', @level0name = 'Person', @level1type = N'TABLE', @level1name = 'Address'; GO -- Add MS_Description to ONE COLUMN EXEC sys.sp_addextendedproperty @name = N'MS_Description', @value = 'Postal code is a required column.', @level0type = N'SCHEMA', @level0name = 'Person', @level1type = N'TABLE', @level1name = 'Address', @level2type = N'COLUMN', @level2name = 'PostalCode'; GO -- Update MS_Description on ONE COLUMN EXEC sys.sp_updateextendedproperty @name = N'MS_Description' , @value = 'Employee ID must be unique.' , @level0type = N'SCHEMA', @level0name = 'dbo' , @level1type = N'TABLE', @level1name = 'T1' , @level2type = N'COLUMN', @level2name = 'id'; GO -- Update MS_Description on a SCALAR FUNCTION -- function name is just an example EXEC sys.sp_addextendedproperty @name = N'MS_Description', @value = N'Performs LTRIM(RTRIM()) on input string. Optionally loops to convert all double-spaces to single-spaces.', @level0type = N'SCHEMA', @level0name = 'dbo', @level1type = N'FUNCTION', @level1name = 'fn_Trim'; GO -- Update MS_Description on a parameter of a scalar function -- function name is just an example EXEC sys.sp_addextendedproperty @name = N'MS_Description', @value = N'Should all extra (double) spaces be removed?', @level0type = N'SCHEMA', @level0name = 'dbo', @level1type = N'FUNCTION', @level1name = 'fn_Trim', @level2type = N'PARAMETER', @level2name ='@StripExtraSpaces'; GO
Use the following small script to find all hidden triggers in a SQL Server database. Pretty much all competent developers using SQL agree that using triggers is a bad practice, but few know how to find them before they have wasted a lot of development and testing time not understanding why things aren’t working the way they want them to (because of triggers doing behind the scenes work, undesirably).
-- Finds all "hidden" triggers in a database. Using triggers is not a best practice. USE <database_name>; GO SELECT name AS [TriggerName] FROM sys.objects WHERE OBJECTPROPERTY(object_id, 'IsTrigger') <> 0;
Here’s some information on SQL Prompt from RedGate, a kickin’ plug-in for SQL Server Management Studio.
This tool is not free – but it’s certainly a productivity enhancer in the long-run for writing clean and easy-to-read SQL with reusable and shared script components (snippets) and sharing team best-practices for SQL. Cost is $369 per seat-license (last I checked) including perpetual upgrades (one year time-limit?); site-licensing is certainly more cost-effective per seat. It’s little brother SQL Search is free, and it’s excellent also. I have used RedGate products including this one for five years and highly recommend this one in particular.
- Reusable common script components or shared snippets that an entire team can edit and share from a common network-share location.
- On one project we had 50+ script components that several coders shared to increase both code quality and time-to-completion.
- Having pre-written blocks or snippets of code really speeds things up
- Meta-data discovery for database objects “Intellisense” style, as you type, including MS_Description (extended-property) values along with object type-characteristics
- More robust Table/View/Column auto-completion, Visual Studio style
- Better script parsing for errors (red-squigglies under errors)
- Supports script refactoring with context-menu (right-click) support
- Supports script formatting with default or custom rule-sets, which helps to improve readability of the average SQL code-base
- Search the database for all scripts using “<search term>”, right within SSMS
- When you install this product, it installs as both a SSMS and Visual Studio plug-in, so you have the functionality for editing SQL in both editors
I can say without hesitation that this product greatly enhances coding productivity and release-script quality for SQL scripting.
They allow for a free 28 day evaluation version for those new to the product.
What is metadata, and why does it matter? Research shows that knowledge workers spend at least 30% of their work time searching for the right information, let alone analyzing or manipulating it.
I have seen this scenario repeatedly play out: “A developer who used to work here developed a database which is still in production use daily, but nobody 100% understands some of the more vague columns/fields in a lot of those tables. It never got documented and now that we are a bigger company, we’re trying to make sense of both the data structure and the code that depends on it.”
It’s so common, it’s cliché. The structure of the tables was put together by a developer with good intentions, but the column names might be very vague within the context in which they are used, like ‘Price’ or ‘Name’. That developer knew what she meant, but noone else does! Perhaps the company’s evolving business model is outgrowing and out-pacing the original data-model, or the column names were just poorly chosen. Should ‘Price’ have been named RetailPrice or WholesalePrice? ‘Name’ – LastName, FirstName, MiddleName, FullName, TechnicianName, or CustomerName?
Sometimes the name of the table containing the column gives enough context, more often it doesn’t.
With ‘Name’ you might just figure it out by looking at the actual data stored or the stored procedures that manipulate it. With ‘Price’, it might take a lot longer as you look at the calculations in various stored procedures, and ask business users their understanding of the intent of how the data was stored. Sometimes you never will get a straight answer. For ‘Percentage’, was that a discount percentage (retail, wholesale?), an upcharge percentage, commission percentage? You’re spec’ing or writing a tax report — is that ‘decTax’ a decimal-based column for use-tax or sales tax?
It’s the business’ accountability to give semantics or meaning to data, but it’s the responsibility of information workers to apply that meaning through metadata. Somewhere in the middle is usually a business analyst working to ensure the requirements for software creation or change are written correctly, and that meanings are captured and translated correctly for maximum understanding by IT. Sometimes there is no analyst and it all falls on a developer’s shoulders. The importance of this semantic layer in application development cannot be underestimated.
Fortunately, the solutions are not hard to implement
Fortunately, these time-consuming problems of reverse-engineering old code are very avoidable at the start with a little discipline, and if not avoided in the first-place, can be mitigated over time with tools that already exist and are sometimes free, provided enough of the right people have correct knowledge of the data use-context. Data-architecture reviews during development can solve these conundrums, but more often than not, a developer working closest to the data at the time it’s developed can take just a little more time to get this setup in a better manner and “future-proof” the code to the greatest extent possible at the time of creation.
From a SQL Server and .NET perspective, I have seen scores of database tables/columns, C#, and VB.NET classes with no metadata to describe the creator’s intent and use-cases. This information about the data stored is crucial to using these data structures correctly – in-turn, allowing software architects, business analysts, database administrators and developers to ensure a correct and mutual understanding of the data they are storing and manipulating.
For C# code, XML doc (documentation) is applied to classes, class-properties, and class-methods via the /// auto-completion trigger. For VB.NET, use the ”’ auto-completion trigger. In SQL Server, similar metadata is typically stored in the MS_Description field, but more metadata fields can be created and customized beyond that, per column, table, or other database object.
Data-dictionaries & Intellisense
Consistently applied, this metadata can be used to drive third-party tools that will pick it up and turn it into data-dictionaries or automatic code documentation that can be periodically refreshed via a scheduled job, for use on a Confluence server, Sharepoint, or other internal website. Think about it – a single-source of truth for what each table and column represents in your SQL Server, after the many developers and DBAs that have modified the database over the years. A single source of truth for the meaning and proper use-cases of every class, property, and method of every namespace in your company’s .NET code. No more having to do extensive code research to figure out by use-context what that field in your table (added ten years ago) really stands for, then not writing it down, and then someone else having to go through that process all over again five years from now when a vendor implementation is underway.
Applied at the database level, plug-ins into SQL Server Management Studio like SQL Prompt will override the very weak Intellisense provided natively and pick up this metadata and display it as you create and modify stored procedures, views, functions, etc. When you are dealing with anything more than a trivial database, this is a huge boon to productivity and coding accuracy. It’s the difference between knowing whether the column named ‘Price’ in the ‘Product’ table refers to a MSRP price, discount retail price, or wholesale price, and whether the internal applications use this column and/or the public-facing website. Knowing that information about the data is absolutely crucial to a software project’s success and a reduction in labor cost.
I think this graphic from eRSA in Australia sums up the need for metadata well, though it comes at it from the perspective of a large research university:
(see below the large graphic for links to my posts where I describe where to view and how to update the metadata in SQL Server)
On a recent enterprise ASP.NET MVC client project, I was introduced to using Microsoft’s Entity Framework for the first time. I was skeptical at first, thinking that an abstraction layer with LINQ over the database would slow things down performance-wise from the standard .NET calls to SQL procedures (there is a performance cost to using LINQ). I have written and modified thousands of stored procedures, which work reliably well, but don’t have the same development, design, and maintenance advantages with LINQ, Entity SQL, and so forth. Alternatively, using stored procedures requires a thorough understanding of SQL and the “impedance mismatch” between .NET object/procedural code and the set-based operations of a SQL engine.
I was blown away by the speed of the Entity Framework – honestly. I’d never seen database transactions happen that fast when stored procedures were not part of the solution, and with Entity Framework, there is no need for a library of stored procedures. Now part of it could have been the latest and greatest virtual SQL Server 2016 instances in use.
Setting that aside for a moment, I don’t know how one could go wrong with Entity Framework as a starting point for getting away from the standard repertoire (or jungle) of hundreds or even thousands of stored procedures, views, user-defined scalar and table-valued functions, all called from (usually) hand-coded DAL abstraction layers or form-classes in ASP.NET, WinForms, etc. It’s typically quite a bit to manage on it’s own, and change-analysis on a database server is usually a very time-consuming and tedious undertaking, with or without the benefit of script source-control. Entity Framework makes data-persistence much more manageable for everyone.
Industry was slow to adopt the Entity Framework due to performance bugs in the first versions. As as a result, Microsoft was forced to update it quickly, moving from v1 to v4 in fairly short order. By the time the framework had evolved to version 4, industry adoption rates picked up pace and a few performance kinks were worked out. As of early 2017, version 6 has been out for a while, and adoption is even better, for those enterprises that understand the benefit of adoption, and have the time and resources to implement it.
- One queries strongly-typed domain entities like: Customer, Order, LineItem, TimePeriod, etc.
- It completely replaces a typical DAL layer.
- If any, the learning curve of using the many flavors of LINQ.
I have a very high regard for the Entity Framework 4 and higher (current iteration is 6) and look forward to using it in any client project where I can.
Here’s a link to a great article I read recently that summarized in a concise package things not to do when using SQL Server as a data-persistence solution, which is almost more important than what to do.
Note: SQL cursors especially – please never use cursors of any flavor in your stored procedures. There is almost always a better set-based method to accomplish your end goal.
As it’s not mentioned in the following article, I will add that not making use of SQL Server metadata is a mistake I see very frequently. What is metadata? It’s data that describes other data or data structures. It’s particularly easy to implement with SQL Server once you know what to do and how to use it. There is probably no easier way to document a database for all users or stakeholders involved than to implement/update SQL Server metadata, and there are no good reasons not to utilize it. It’s easy and freely available – just waiting to be used. I have a set of stored procedures developed that make the process absolutely painless for keeping SQL Server database metadata updated at the table, column, view, and function/param levels (all the places that really count!), and across all environments (production, pilot, test).