SQL常用语句学习与汇总
本文SQL语句主要来源于SQLZOO和数据库SQL实战_牛客网,结合自己在dataquest学习sql数据库的经历,汇总了sql的常用语句。本篇文章仅记录语句,不提供数据表,可供sql使用者查询。强烈推荐数据库SQL实战_牛客网,虽然系统有一些不足,但难度适中,且较全面,主要是讨论区有一群数据库学习者的热情分享总结,可快速查询到答案以及经验教训。 本人在dataquest学习过程中,被灌输了代码可读性的思想,SQL语句有时候会非常复杂,应该活用大写和空格(capitalization and whitespace)增强代码可读性。甚至可读性readability比一致性consistency更重要,所以任何时候你写sql语句都应该考虑可读性的写法。查询语句可读性的几点建议如下,更多的建议详见SQL Style Guide。
- If a select statement has more than one column, put each on a new line, indented from the select statement.
- Always capitalize SQL function names and keywords
- Put each clause of your query on a new line.
- Use indenting to make subqueries appear logically separate.
“Even if you don’t intend anybody else to read your code, there’s still a very good chance that somebody will have to stare at your code and figure out what it does: That person is probably going to be you, twelve months from now.” —Raymond Chen
1.基本条件查找(数据表见SQLZOO)
/*一般性where语句*/
SELECT population FROM world WHERE name = 'France';
SELECT name, population FROM world WHERE name IN ('Brazil', 'Russia', 'India', 'China');
SELECT name, area FROM world WHERE area BETWEEN 250000 AND 300000
/*字符串模式匹配*/
SELECT name FROM world WHERE name LIKE 'Y%' /*以y开头*/
SELECT name FROM world WHERE name LIKE 'C%ia' /*以C开头ia结尾*/
SELECT name FROM world WHERE name LIKE '_n%' ORDER BY name /*第二个字符是n,下划线是字母通配符*/
SELECT name FROM world WHERE name LIKE '%a%' AND name LIKE '%e%' AND name LIKE '%i%' AND name LIKE '%o%' AND name LIKE '%u%' AND name NOT LIKE '% %'
/*ROUND(*, n), n为正则精确到小数点位数,n为负则精确到相应位数*/
SELECT name, ROUND(population/1000000,2) FROM world WHERE continent='South America'
SELECT name, ROUND(gdp/population,-3) FROM world WHERE gdp>1000000000000
/* LENGTH()*/
SELECT name, capital FROM world WHERE LENGTH(name)=LENGTH(capital)
/* LEFT(*,n)前n个字符,<>,或!=不等于*/
SELECT name,capital FROM world WHERE LEFT(name,1)=LEFT(capital,1) AND name<>capital
/* SUBSTR(str, -2) str最后两个字符 */
select first_name from employees order by substr(first_name, -2)
/*LIMIT n从0开始取n个数据,LIMIT m,n从m开始取n个数据*/
select * from employees order by hire_date desc limit 2,1 /*取第三个数据*/
/* ||连接符 */
select last_name || ' ' || first_name from employees
/* distinct去重,常用去重计算和去重得到值唯一表 */
SELECT title, COUNT(DISTINCT emp_no) AS t FROM titles
GROUP BY title HAVING t >= 2
/* CAST()改变数据类型 */
SELECT CAST(f.population AS FLOAT)
2.JOIN语句(数据表见数据库SQL实战_牛客网)
select s.*, d.dept_no from salaries as s
inner join dept_manager as d on d.emp_no=s.emp_no
where d.to_date='9999-01-01' and s.to_date='9999-01-01'
/* ,号也可以查询两张表,效果同inner join连接两张表*/
SELECT e.emp_no, s.salary FROM employees AS e, salaries AS s
WHERE e.emp_no = s.emp_no AND e.hire_date = s.from_date
ORDER BY e.emp_no DESC
/* GROUP BY聚合后MAX(), MIN(), AVG(),SUM(), COUNT() 等操作*/
select de.dept_no, de.emp_no, max(s.salary)
from dept_emp as de
inner join salaries as s
on s.emp_no = de.emp_no
where de.to_date = '9999-01-01'
group by de.dept_no
/*多层连接*/
select e.emp_no, (s1.salary-s2.salary) growth
from employees as e
inner join salaries as s1
on e.emp_no = s1.emp_no and s1.to_date = '9999-01-01'
inner join salaries as s2
on e.emp_no = s2.emp_no and e.hire_date = s2.from_date
order by growth asc
3.SELECT within SELECT(数据表见数据库SQL实战_牛客网)
select emp_no from employees where emp_no not in (select emp_no from dept_manager)
select title, count(*) t from (select * from titles group by emp_no, title) group by title having t >= 2
/*select和join结合使用*/
SELECT de.dept_no, s.emp_no, s.salary
FROM dept_emp AS de INNER JOIN salaries AS s ON s.emp_no = de.emp_no AND s.to_date = '9999-01-01'
WHERE de.emp_no NOT IN (SELECT emp_no FROM dept_manager WHERE to_date = '9999-01-01')
4.Self JOIN(数据表见数据库SQL实战_牛客网)
/* from ,号查询两张表,相当于join */
SELECT s2.emp_no, s2.from_date, (s2.salary - s1.salary) AS salary_growth
FROM salaries AS s1, salaries AS s2
WHERE s1.emp_no = s2.emp_no
AND salary_growth > 5000
AND (strftime("%Y",s2.to_date) - strftime("%Y",s1.to_date) = 1 OR strftime("%Y",s2.from_date) - strftime("%Y",s1.from_date) = 1 )
ORDER BY salary_growth DESC
/* inner join复用salaries表 */
select de.emp_no, dm.emp_no, s1.salary, s2.salary
from dept_emp as de
inner join dept_manager as dm on dm.dept_no = de.dept_no
inner join salaries as s1 on s1.emp_no = de.emp_no and s1.to_date='9999-01-01'
inner join salaries as s2 on s2.emp_no = dm.emp_no and s2.to_date='9999-01-01'
where s1.salary > s2.salary
/*复用表*/
SELECT s1.emp_no, s1.salary,
(SELECT SUM(s2.salary) FROM salaries AS s2
WHERE s2.emp_no <= s1.emp_no AND s2.to_date = '9999-01-01') AS running_total
FROM salaries AS s1
WHERE s1.to_date = '9999-01-01'
ORDER BY s1.emp_no
/* 复用表 对于employees表中,给出奇数行的first_name */
SELECT e1.first_name FROM
employees e1
WHERE
(SELECT count(*) FROM employees e2
WHERE e1.first_name <=e2.first_name)%2=1;
5.Readability(数据表见dataquest)
/* with语句创建中间表,是临时的子查询 */
WITH track_info AS
(
SELECT
t.name,
ar.name artist,
al.title album_name,
mt.name media_type,
g.name genre,
t.milliseconds length_milliseconds
FROM track t
INNER JOIN media_type mt ON mt.media_type_id = t.media_type_id
INNER JOIN genre g ON g.genre_id = t.genre_id
INNER JOIN album al ON al.album_id = t.album_id
INNER JOIN artist ar ON ar.artist_id = al.artist_id
)
SELECT * FROM track_info
WHERE album_name = "Jagged Little Pill";
/* chinook为database,可不用。view是永久的子查询 */
CREATE VIEW chinook.customer_gt_90_dollars AS
SELECT
c.*
FROM chinook.invoice i
INNER JOIN chinook.customer c ON i.customer_id = c.customer_id
GROUP BY 1
HAVING SUM(i.total) > 90;
SELECT * FROM chinook.customer_gt_90_dollars;
6.SQL方案语句(数据表见数据库SQL实战_牛客网)
CREATE TABLE actor(
actor_id smallint(5) NOT NULL PRIMARY KEY,
first_name varchar(45) NOT NULL,
last_name varchar(45) NOT NULL,
last_update timestamp NOT NULL default(datetime('now','localtime'))
);
insert into actor values
(1, 'PENELOPE', 'GUINESS', '2006-02-15 12:34:33'),
(2, 'NICK', 'WAHLBERG', '2006-02-15 12:34:33')
INSERT OR IGNORE INTO actor values
(3, 'ED', 'CHASE', '2006-02-15 12:34:33')
create table actor_name as select first_name, last_name from actor;
create unique index uniq_idx_firstname on actor(first_name);
create index idx_lastname on actor(last_name);
create view actor_name_view as
select first_name first_name_v, last_name last_name_v from actor
select * from salaries indexed by idx_emp_no where emp_no=10005
alter table titles_test rename to titles_2017
alter table actor add create_date datetime not null default'0000-00-00 00:00:00'
create trigger audit_log after insert on employees_test
begin
insert into audit values(NEW.id,NEW.NAME);
end;
delete from titles_test where id not in (select min(id) from titles_test group by emp_no)
update titles_test set to_date=null, from_date='2001-01-01' where to_date='9999-01-01'
update titles_test set emp_no = replace(emp_no,10001,10005) where id=5
update salaries set salary = 1.1*salary where to_date='9999-01-01' and emp_no in (select emp_no from emp_bonus)
DROP TABLE audit;
CREATE TABLE audit(
EMP_no INT NOT NULL,
create_date datetime NOT NULL,
FOREIGN KEY(EMP_no) REFERENCES employees_test(ID));
/* 按照dept_no进行汇总,属于同一个部门的emp_no按照逗号进行连接,结果给出dept_no以及连接出的结果employees */
/* group_concat(x, y)函数,其中X是要连接的字段,Y是连接时用的符号,可省略,默认为逗号。此函数必须与 GROUP BY 配合使用 */
select dept_no, group_concat(emp_no) employees from dept_emp group by dept_no
/* 分页查询employees表,每5行一页,返回第2页的数据 */
select * from employees limit 5 offset 5
/* case语句, case when then else end */
select e.emp_no, e.first_name, e.last_name, eb.btype, s.salary,
(case eb.btype
when 1 then 0.1*s.salary
when 2 then 0.2*s.salary
else 0.3*s.salary end) bonus
from employees as e
inner join emp_bonus as eb on eb.emp_no = e.emp_no
inner join salaries as s on s.emp_no = eb.emp_no
where s.to_date = '9999-01-01'
/* 按行连接,要求列相同。 union并集,intersect交集,except差集 */
SELECT * from customer_usa
UNION
SELECT * from customer_gt_90_dollars;
SELECT * from customer_usa
INTERSECT
SELECT * from customer_gt_90_dollars;
SELECT * from customer_usa
EXCEPT
SELECT * from customer_gt_90_dollars;
7.总结
SQL(似乎并不是structured query language的首字母缩写,但当成它却很合理)是结构化查询语言,python结合sql可以高效处理大数据,不过是可视化还是机器学习中都很有用。附件是一个python应用sql数据处理的实例,可供学习。 Answering Business Questions using SQL
The theme of these days: Keep catching up!