-- Assuming we have already got the data in [Orignal] table!! -- prepare tables CREATE TABLE ProductType ( ID int PRIMARY KEY IDENTITY(1,1), description varchar(MAX) ); CREATE TABLE Product ( ID INT PRIMARY KEY IDENTITY(1,1), typeID INT, servingSize VARCHAR(MAX), description VARCHAR(MAX) ); CREATE TABLE NutritionFact ( ID INT PRIMARY KEY IDENTITY(1,1), description VARCHAR(MAX), unit VARCHAR(MAX) ); CREATE TABLE Product_Nutrition ( productID INT, nutritionID INT, value FLOAT constraint pk PRIMARY KEY( productID, nutritionID ) ); -- Prepare data in ProductType INSERT INTO ProductType SELECT DISTINCT [Product Type] FROM Original; -- Prepare data in Product INSERT INTO Product (typeID, servingSize, description) SELECT ProductType.ID, [Serving Size], [Nutrition Facts] FROM ProductType, Original WHERE ProductType.description = Original.[Product Type] -- Prepare data in NutritionFact INSERT INTO NutritionFact (description, unit) VALUES ('Calories', '(kCal)'); INSERT INTO NutritionFact (description, unit) VALUES ('Calories from fat', '(kCal)'); INSERT INTO NutritionFact (description, unit) VALUES ('Total Fat', '(g)'); INSERT INTO NutritionFact (description, unit) VALUES ('Saturated Fat', '(g)'); INSERT INTO NutritionFact (description, unit) VALUES ('Trans Fat', '(g)'); INSERT INTO NutritionFact (description, unit) VALUES ('Cholesterol', '(mg)'); INSERT INTO NutritionFact (description, unit) VALUES ('Sodium', '(mg)'); INSERT INTO NutritionFact (description, unit) VALUES ('Carbohydrates', '(g)'); -- Populate some Product-Nutrition Facts INSERT INTO Product_Nutrition SELECT Product.ID, NutritionFact.ID, Original.[Calories] FROM Product, NutritionFact, Original WHERE Original.[Nutrition Facts] = Product.description AND NutritionFact.description = 'Calories' AND Original.Calories IS NOT NULL AND Original.Calories<>0; INSERT INTO Product_Nutrition SELECT Product.ID, NutritionFact.ID, Original.[Calories from fat] FROM Product, NutritionFact, Original WHERE Original.[Nutrition Facts] = Product.description AND NutritionFact.description = 'Calories from fat' AND Original.[Calories from fat] IS NOT NULL AND Original.[Calories from fat]<>0; INSERT INTO Product_Nutrition SELECT Product.ID, NutritionFact.ID, Original.[Total Fat (g)] FROM Product, NutritionFact, Original WHERE Original.[Nutrition Facts] = Product.description AND NutritionFact.description = 'Total Fat' AND Original.[Total Fat (g)] IS NOT NULL AND Original.[Total Fat (g)]<>0; INSERT INTO Product_Nutrition SELECT Product.ID, NutritionFact.ID, Original.[Saturated Fat (g)] FROM Product, NutritionFact, Original WHERE Original.[Nutrition Facts] = Product.description AND NutritionFact.description = 'Saturated Fat' AND Original.[Saturated Fat (g)] IS NOT NULL AND Original.[Saturated Fat (g)]<>0; INSERT INTO Product_Nutrition SELECT Product.ID, NutritionFact.ID, Original.[Trans Fat (g)] FROM Product, NutritionFact, Original WHERE Original.[Nutrition Facts] = Product.description AND NutritionFact.description = 'Trans Fat' AND Original.[Trans Fat (g)] IS NOT NULL AND Original.[Trans Fat (g)]<>0; INSERT INTO Product_Nutrition SELECT Product.ID, NutritionFact.ID, Original.[Cholesterol (mg)] FROM Product, NutritionFact, Original WHERE Original.[Nutrition Facts] = Product.description AND NutritionFact.description = 'Cholesterol' AND Original.[Cholesterol (mg)] IS NOT NULL AND Original.[Cholesterol (mg)]<>0; INSERT INTO Product_Nutrition SELECT Product.ID, NutritionFact.ID, Original.[Sodium (mg)] FROM Product, NutritionFact, Original WHERE Original.[Nutrition Facts] = Product.description AND NutritionFact.description = 'Sodium' AND Original.[Sodium (mg)] IS NOT NULL AND Original.[Sodium (mg)]<>0; INSERT INTO Product_Nutrition SELECT Product.ID, NutritionFact.ID, Original.[Carbohydrates (g)] FROM Product, NutritionFact, Original WHERE Original.[Nutrition Facts] = Product.description AND NutritionFact.description = 'Carbohydrates' AND Original.[Carbohydrates (g)] IS NOT NULL AND Original.[Carbohydrates (g)]<>0; -- -- Now we want to introduce some error -- -- Assuming someone create a new product type called hamburger INSERT INTO ProductType VALUES('Hamburger'); -- Then, he made some products with word "burger" in it to be of type Hamburger UPDATE Product SET typeID=(SELECT ID FROM ProductType WHERE description='Hamburger') WHERE description LIKE '%burger'; -- Then, he created a view just to look at hamburgers... CREATE VIEW hamburger AS SELECT * FROM Product WHERE typeID=(SELECT ID FROM ProductType WHERE description='Hamburger') -- Now, he can easily look at hamburgers... SELECT * FROM hamburger; -- Then, he founds that three products ('Big Mac®', 'Big N' Tasty®,' 'Big N' Tasty® with Cheese') are also Hamburgers, but not on the list... SELECT * FROM product WHERE description LIKE '%big%'; -- So he inserted these products ... INSERT INTO Product (typeID, servingSize, description) VALUES ( 9, '7.5 oz (214 g)', 'Big Mac®'); INSERT INTO Product (typeID, servingSize, description) VALUES ( 9, '7.2 oz (206 g)', 'Big N'' Tasty®'); INSERT INTO Product (typeID, servingSize, description) VALUES ( 9, '7.7 oz (220 g)', 'Big N'' Tasty® with Cheese'); -- -- Now, assuming he was fired because he did some other sloppy things. -- The new guy came, and found hamburgers are really just sandwiches... -- -- So, he removed the view hamburger DROP VIEW hamburger; -- changed those product type 9 into 8... UPDATE Product SET typeID=(SELECT ID FROM ProductType WHERE description='Sandwiches') WHERE typeID=(SELECT ID FROM ProductType WHERE description='Hamburger'); -- And carefully look for duplicates based on descriptions SELECT description, COUNT(*) FROM Product GROUP BY description HAVING COUNT(*)>1 -- And to be extra careful, he want to also look at servingSize SELECT * FROM Product WHERE description in ( SELECT description FROM Product GROUP BY description HAVING COUNT(*)>1 ); -- In this example, there are very few duplicates, and manual removal of them is doable. -- But we are going to assume there are too many duplicates to be handled by hand... -- We can first create a list of distinct products. SELECT DISTINCT typeID, servingSize, description FROM Product; -- Then, we create a temp productTable to store distinct product CREATE TABLE Product_fixed ( ID INT PRIMARY KEY IDENTITY(1,1), typeID INT, servingSize VARCHAR(MAX), description VARCHAR(MAX) ); -- Now, we insert distinct products into this new product table -- It must be noted that productIDs may get changed!!! INSERT INTO Product_fixed SELECT DISTINCT typeID, servingSize, description FROM Product; -- Since product may get new IDs, we need to fix the Product_Nutrition table -- To make this easy, we first create a query to find mappings between IDs. SELECT Product.ID as oldID, Product_fixed.ID as newID FROM Product, Product_fixed WHERE Product.typeID=Product_fixed.typeID AND Product.servingSize=Product_fixed.servingSize AND Product.description = Product_fixed.description; -- Then, we make it into view for convenience CREATE VIEW mapping AS SELECT Product.ID as oldID, Product_fixed.ID as newID FROM Product, Product_fixed WHERE Product.typeID=Product_fixed.typeID AND Product.servingSize=Product_fixed.servingSize AND Product.description = Product_fixed.description; -- Now try to create a list in Product_Nutrition with both new and old ID shown SELECT * FROM Product_Nutrition, mapping WHERE Product_Nutrition.ProductID = mapping.oldID; -- Once we confirm the output is reasonable, we can then update the table UPDATE Product_Nutrition SET ProductID= (SELECT newID FROM mapping WHERE oldID=ProductID); -- Now, we no longer need the old PRoduct Table. Delete it. DROP TABLE Product; -- There is sp_rename Product_fixed, Product; ALTER TABLE Product_fixed rename Product; (MY-SQL) -- Now we finished fixing errors!!!