Last active
March 1, 2018 03:42
-
-
Save sripathikrishnan/5b487b4def977e4e4d7e46073a69a149 to your computer and use it in GitHub Desktop.
Answers to AskUbuntu Data Warehouse Assignment
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| --Top 10 users by reputation | |
| select id, DisplayName, Reputation | |
| from Users order by Reputation desc limit 10; | |
| --Top 10 questions asked by user Mitch | |
| select p.Title, p.Score, u.DisplayName | |
| from Posts p inner join Users u on p.OwnerUserId = u.Id | |
| and p.PostTypeId = 1 | |
| where u.DisplayName = 'Mitch' | |
| order by p.Score desc; | |
| -- Most popular badges, sorted by number of users who have this badge | |
| select b.Name, count(*) | |
| from Badges b inner join UserBadges ub on b.id = ub.BadgeId | |
| group by b.Name | |
| order by 2 desc | |
| limit 10; | |
| -- For users with reputation > 75000, print Userid, DisplayName, | |
| -- Reputation and total number of questions they have asked | |
| select u.Id, u.DisplayName, u.Reputation, count(*) as QuestionsAsked | |
| from Users u inner join Posts p on u.id = p.OwnerUserId and p.PostTypeId = 1 | |
| where u.reputation > 75000 | |
| group by u.Id, u.DisplayName, u.Reputation | |
| order by QuestionsAsked desc | |
| limit 10; | |
| -- Print userid, user name and reputation for users | |
| -- whose reputation is higher than the user 'Takkat' | |
| select u.Id, u.DisplayName, u.Reputation | |
| from Users u, Users u2 | |
| where u2.DisplayName = 'Takkat' | |
| and u.Reputation > u2.Reputation | |
| order by u.Reputation desc; | |
| -- Return a list of users (id, displayname, reputation) that: | |
| -- * Have a reputation greater than 1000 | |
| -- * Have received a score of 10 or more on at least one of their comments | |
| -- * Have at least one of the badges "Nice Answer", "Good Answer", "Great Answer" | |
| -- Sort the users by reputation in descending order | |
| with UsersWithAtleastOnePopularComment as ( | |
| select u.Id as UserId from Users u | |
| where exists ( | |
| select 'x' from comments c1 | |
| where c1.UserId = u.Id | |
| and c1.score > 10 | |
| ) | |
| ), | |
| UsersWithBadges as ( | |
| select u.Id as UserId from Users u | |
| where exists ( | |
| select 'x' from UserBadges ub inner join Badges b on ub.BadgeId = b.Id | |
| where ub.UserId = u.Id and b.Name in ('Nice Answer', 'Good Answer', 'Great Answer') | |
| ) | |
| ) | |
| select u.Id, u.DisplayName, u.Reputation | |
| from Users u inner join UsersWithAtleastOnePopularComment up on u.Id = up.UserId | |
| inner join UsersWithBadges ub on u.Id = ub.UserId | |
| where u.Reputation > 1000 | |
| order by u.Reputation desc; | |
| -- To verify the above query | |
| -- List all comments for a user whose score is > 10 | |
| select c.score, c.CommentText from Comments c | |
| where c.score > 10 and c.UserId = 1059; | |
| -- List all badges for user | |
| select ub.UserId, b.Name from Badges b inner join UserBadges ub on b.Id = ub.BadgeId | |
| where ub.UserId = 1059 and b.Name in ('Nice Answer', 'Good Answer', 'Great Answer'); | |
| -- Find Duplicate Comments | |
| -- Duplicate is defined as same user posting | |
| -- exact same comment on the same post within 1 minute of each other | |
| select c.Id, c.PostId, u.DisplayName, c.CommentText | |
| from Comments c inner join Users u on c.UserId = u.Id | |
| where exists ( | |
| select 'x' from Comments c1 | |
| where c1.Id <> c.Id and c1.UserId = c.UserId and c1.PostId = c.PostId | |
| and c1.CommentText = c.CommentText | |
| and extract(epoch from (c1.CreationDate - c.CreationDate)) < 60 | |
| ); | |
| -- CSV | |
| -- TODO: Not optimum, dislike the distinct | |
| with Answerers as ( | |
| select q.Id as QuestionId, array_agg(au.DisplayName) as Answerers | |
| from Posts q inner join Posts a | |
| on q.Id = a.ParentId and q.PostTypeId = 1 and a.PostTypeId = 2 | |
| inner join Users au on a.OwnerUserId = au.Id | |
| group by q.Id | |
| ), | |
| QuestionCommenters as ( | |
| select q.Id as QuestionId, array_agg(distinct cu.DisplayName) as QuestionCommenters | |
| from Posts q inner join Comments c on q.Id = c.PostId and q.PostTypeId = 1 | |
| inner join Users cu on c.Userid = cu.id | |
| group by q.Id | |
| ), | |
| AnswerCommenters as ( | |
| select q.Id as QuestionId, array_agg(distinct u.DisplayName) as AnswerCommenters | |
| from Posts q inner join Posts a | |
| on q.Id = a.ParentId and q.PostTypeId = 1 and a.PostTypeId = 2 | |
| inner join Comments c on a.Id = c.PostId | |
| inner join Users u on c.UserId = u.Id | |
| group by q.Id | |
| ) | |
| select p.Id, p.Title, a.Answerers, qc.QuestionCommenters, ac.AnswerCommenters | |
| from Posts p inner join Answerers a on p.Id = a.QuestionId | |
| inner join QuestionCommenters qc on p.Id = qc.QuestionId | |
| inner join AnswerCommenters ac on p.Id = ac.QuestionId | |
| where p.PostTypeId = 1 and p.OwnerUserId = 10; | |
| --- Corelated update | |
| update Users | |
| set QuestionsAsked = t.QuestionsAsked | |
| from | |
| (select p.OwnerUserId as UserId, count(*) as QuestionsAsked from Posts p where p.PostTypeId = 1 and p.OwnerUserId is not null group by p.OwnerUserId) t | |
| where id = t.UserId; | |
| -- Alternative way to write the same query | |
| with qa as ( | |
| select p.OwnerUserId as UserId, count(*) as QuestionsAsked from Posts p where p.PostTypeId = 1 and p.OwnerUserId is not null group by p.OwnerUserId) | |
| update Users | |
| set QuestionsAsked = qa.QuestionsAsked | |
| from qa | |
| where id = qa.UserId; | |
| -- New Tag Score Algorithm for User | |
| with AnswerTags as ( | |
| select a.Id as AnswerId, t.TagName as TagName | |
| from Posts a inner join Posts q | |
| on a.ParentId = q.Id and a.PostTypeId = 2 and q.PostTypeId = 1 | |
| inner join PostTags qt on q.id = qt.PostId | |
| inner join Tags t on qt.tagid = t.id | |
| ), | |
| AnswerNormalizedScore as ( | |
| select a.Id as AnswerId, a.score / count(*) as Score | |
| from Posts a inner join Posts q | |
| on a.ParentId = q.Id and a.PostTypeId = 2 and q.PostTypeId = 1 | |
| inner join PostTags qt on q.id = qt.PostId | |
| group by a.Id, a.score | |
| ) | |
| select u.DisplayName, at.TagName, sum(norm.Score) | |
| from Posts p inner join Users u on p.OwnerUserId = u.Id and p.PostTypeId = 2 | |
| inner join AnswerTags at on at.AnswerId = p.Id | |
| inner join AnswerNormalizedScore norm on norm.AnswerId = p.Id | |
| where u.Id = 10 | |
| group by u.DisplayName, at.TagName | |
| order by 3 desc; | |
| -- People who have updated the title of their posts more than 5 times | |
| -- To verify your output, replace postid and then visit | |
| -- https://askubuntu.com/posts/<postid>/revisions | |
| with IndecisiveUsers as ( | |
| select ph.PostId as PostId, ph.UserId as UserId, count(*) NumberOfEdits | |
| from PostHistory ph | |
| where ph.PostHistoryTypeId = 4 | |
| group by ph.PostId, ph.UserId | |
| having count(*) > 5 | |
| ) | |
| select p.Id as PostId, p.OwnerUserId, u.DisplayName, p.Title | |
| from Posts p | |
| inner join Users u on p.OwnerUserId = u.Id | |
| inner join IndecisiveUsers iu on p.OwnerUserId = iu.Userid and p.Id = iu.PostId; | |
| -- Indexes | |
| -- TODO: Haven't reviewed the performance of queries wrt these indexes | |
| create index indx_users_reputation on users(reputation); | |
| create index indx_users_displayname on users(displayname); | |
| create index indx_user_badge on userbadges(userid, badgeid); | |
| create index indx_post_owner_user on Posts(owneruserid, posttypeid); | |
| create index indx_comment_userid on Comments(userid); | |
| create index indx_badge_name on Badges(name); | |
| create index indx_comment_user_post on Comments(postid, userid); | |
| create index indx_posthistory on Posthistory(postid); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment