leondutoit · April 5, 2021 11:06
diff --git a/sqlite_intro.sql b/sqlite_intro.sql

 -- this is a random comment
 -- anything prefixed by two dashes is a comment
 -- and will not be executed by the database

 -- to start the interactive session on the command prompt
 -- type: $ sqlite3
 -- something will be printed and then you'll see another prompt like this
 -- sqlite>

 -- copy and paste commands into the prompt and press enter
 -- a single command ends with ";"

 ---------------- Part 1: Tables and some SQL ---------------


 -- create a table
 -- we name columns in the table
 -- data types are assigned to column names
 -- id is the primary key and will be automatically handled by the database
 create table people(
    id integer primary key asc,
    name text,
    surname text,
    birthplace text,
    gender text,
    height real
 );

 -- to ask the database what the table looks like
 .schema people

 -- let's put some people into the table
 -- indentation and white space does not matter
 -- it is just to make things more readable
 -- we do not have to name the columns explicitly
 -- but then we have to get the order right
 -- some height values are guesses
 insert into people(name, surname, birthplace, gender, height)
    values('Leon', 'du Toit', 'South Africa', 'male', 1.79);
 insert into people(name, surname, birthplace, gender, height)
    values('Line', 'Simentad', 'Norway', 'female', 1.72);
 insert into people(name, surname, birthplace, gender, height)
    values('Lars', 'Vegstein', 'Norway', 'male', 1.8);
 insert into people(name, surname, birthplace, gender, height)
    values('Monica', 'Larsen', 'Norway', 'female', 1.71);

 -- let's see what is there
 select * from people;

 -- now we have some basic data to play with
 -- so some aspects of SQL can become clearer

 -- only female gender
 select * from people where gender = 'female';

 -- only male gender
 select * from people where gender = 'male';

 -- only male and Norway
 select * from people where gender = 'male' and birthplace = 'Norway';

 -- only people whose names begin with 'L'
 -- like is used instead of = so we can use a pattern
 -- L% means that the pattern has to begin with L and anything is allowed after
 select * from people where name like 'L%';

 -- we can count how many people there are according to birthplace
 select birthplace, count(*) from people group by birthplace;

 -- we can also see what the average height is per country
 -- and by country and gender
 -- absurd but yeah, we can do it nonetheless
 select birthplace, avg(height) from people group by birthplace;
 select birthplace, gender, avg(height) from people group by birthplace, gender;



 -------------------- Part 2: Joins and some SQL ---------------------

 -- suppose we have a table of events, things that the people did
 create table events(
    id integer primary key asc,
    person_name text,
    event_name text,
    event_time timestamp
 );

 -- lets create some events
 insert into events(person_name, event_name, event_time)
    values('Leon', 'coffee', '2014-11-06 08:01:10');
 insert into events(person_name, event_name, event_time)
    values('Leon', 'climb', '2014-11-07 17:15:01');
 insert into events(person_name, event_name, event_time)
    values('Monica', 'yoga', '2014-11-05 18:30:50');
 insert into events(person_name, event_name, event_time)
    values('Line', 'write', '2014-11-07 06:23:10');
 insert into events(person_name, event_name, event_time)
    values('Lars', 'coffee', '2014-11-07 06:45:34');
 insert into events(person_name, event_name, event_time)
    values('Lars', 'read', '2014-11-07 17:11:11');
 insert into events(person_name, event_name, event_time)
    values('Monica', 'read', '2014-11-07 17:10:44');
 insert into events(person_name, event_name, event_time)
    values('Line', 'coffee', '2014-11-07 10:30:03');

 -- let's look at all the events
 select * from events;

 -- let's look only at the person_name and event_name
 select person_name, event_name from events;

 -- we can order the results (to be more readable for example)
 select person_name, event_name from events order by person_name;
 select * from events order by event_time;

 -- or ask when the first an latest events happened
 select min(event_time), max(event_time) from events;

 -- what if we want more information about the people who did things?
 -- we can then join the person information on the event table
 -- because the names appear in both tables
 select * from events, people where events.person_name = people.name;


 -- we can refine what we want to select only specific columns from that join
 -- here we use what is called aliasing:
 -- we give the events and people tables short names 'e' and 'p'
 -- so we don't have to type so much
 -- this is not necessary and the names can be anything
 -- but it helps the database know which columns you refer to
 -- since there might be columns with the same name in both tables
 select
    e.person_name,
    e.event_name,
    e.event_time,
    p.birthplace,
    p.gender
 from
    events e,
    people p
 where e.person_name = p.name;

 -- it is also possible to do the same join explicitly
 -- here we use subqueries:
 -- we select specific columns from each table and give the result
 -- it returns a name: 'e' and 'p' again
 -- we can then say that we want to join the results in 'e' on 'p'
 -- based on the equality of person_name and name
 select * from
    (select person_name, event_name, event_time from events)e
 join
    (select name, gender, birthplace from people)p
 on e.person_name = p.name;


 -- From doing these joins one can see that it is possible to store
 -- information in several different tables, and to join them together
 -- in queries based on how they are related to each other
 -- this is the basic idea behind ER type database designs

 -- what would typically happen is one would, for example, make one entry
 -- through a web form and then the database would get the new information
 -- and split that up into several related pieces of information
 -- and store that in different tables; when one asks for that information
 -- again the database will combine it again and report it as if it was all
 -- in one table



 -------------------- Part 3: Entities and Relationships ----------------

 -- in the examples above we have two tables: people and events
 -- there is an implcit relation between these two tables: events belong to people
 -- for each event there exists a person that this event belongs to
 -- in the schema (the database tables and their specification) we do not
 -- however enforce that there _must_ exits a person in the people table
 -- for an event to be recorded

 -- we can use the features of the database to enforce this 'exists' relation
 -- between people and events so that a person must exist in order to record an
 -- event that belongs to that person; we can do this by using a foreign key constraint

 -- for further reference on FKs in sqlite3: http://www.hwaci.com/sw/sqlite/foreignkeys.html

 -- we must enable FK constraints in the db
 -- this is sqlite3 specific
 PRAGMA foreign_keys = ON;

 -- note that we drop the person_name column
 -- and instead use the person_id with the FK constraint
 create table events2(
    id integer primary key asc,
    event_name text,
    event_time timestamp,
    person_id integer,
    foreign key(person_id) references people(id)
 );

 -- let's add the same events as before but now using the different table
 -- with the FK constraint
 insert into events2(event_name, event_time, person_id)
    values('coffee', '2014-11-06 08:01:10', 1);
 insert into events2(event_name, event_time, person_id)
    values('climb', '2014-11-07 17:15:01', 1);
 insert into events2(event_name, event_time, person_id)
    values('yoga', '2014-11-05 18:30:50', 4);
 insert into events2(event_name, event_time, person_id)
    values('write', '2014-11-07 06:23:10', 2);
 insert into events2(event_name, event_time, person_id)
    values('coffee', '2014-11-07 06:45:34', 3);
 insert into events2(event_name, event_time, person_id)
    values('read', '2014-11-07 17:11:11', 3);
 insert into events2(event_name, event_time, person_id)
    values('read', '2014-11-07 17:10:44', 4);
 insert into events2(event_name, event_time, person_id)
    values('coffee', '2014-11-07 10:30:03', 2);


 -- let's try to add an event associate with a person with id number 5
 -- we don't know who that is yet but we do know that there are only 4 people
 insert into events2(event_name, event_time, person_id)
    values('tea', '2014-11-01 11:19:10', 5);
 -- "Error: foreign key constraint failed" is what we should get

 -- this means that the 'exists' relation in our data model has been violated
 -- by the insert statement
 -- we can get the new event in by creating the mysterious person
 insert into people(name, surname, birthplace, gender, height)
    values('Emilie', 'Ekeberg', 'Denmark', 'female', 1.68);

 -- now we can try again with success
 -- the 'exists' relation that we specified has been ssatisfied
 insert into events2(event_name, event_time, person_id)
    values('tea', '2014-11-01 11:19:10', 5);

 select * from events2;

 -- we can also do the join as before but now we will use the person_id as the common column
 select * from events2, people where events2.person_id = people.id;


 -- if this database was a real world application there would be a insert procedure that
 -- would get the full information about the event, check if the relevant person
 -- exists, if not create them, and then insert the event information while throwing away
 -- the person specifc information that is not needed; it would also use user_ids instead
 -- of simple sequential ids that are automatically generated as integer increments

	-- this is a random comment
	-- anything prefixed by two dashes is a comment
	-- and will not be executed by the database

	-- to start the interactive session on the command prompt
	-- type: $ sqlite3
	-- something will be printed and then you'll see another prompt like this
	-- sqlite>

	-- copy and paste commands into the prompt and press enter
	-- a single command ends with ";"

	---------------- Part 1: Tables and some SQL ---------------


	-- create a table
	-- we name columns in the table
	-- data types are assigned to column names
	-- id is the primary key and will be automatically handled by the database
	create table people(
	id integer primary key asc,
	name text,
	surname text,
	birthplace text,
	gender text,
	height real
	);

	-- to ask the database what the table looks like
	.schema people

	-- let's put some people into the table
	-- indentation and white space does not matter
	-- it is just to make things more readable
	-- we do not have to name the columns explicitly
	-- but then we have to get the order right
	-- some height values are guesses
	insert into people(name, surname, birthplace, gender, height)
	values('Leon', 'du Toit', 'South Africa', 'male', 1.79);
	insert into people(name, surname, birthplace, gender, height)
	values('Line', 'Simentad', 'Norway', 'female', 1.72);
	insert into people(name, surname, birthplace, gender, height)
	values('Lars', 'Vegstein', 'Norway', 'male', 1.8);
	insert into people(name, surname, birthplace, gender, height)
	values('Monica', 'Larsen', 'Norway', 'female', 1.71);

	-- let's see what is there
	select * from people;

	-- now we have some basic data to play with
	-- so some aspects of SQL can become clearer

	-- only female gender
	select * from people where gender = 'female';

	-- only male gender
	select * from people where gender = 'male';

	-- only male and Norway
	select * from people where gender = 'male' and birthplace = 'Norway';

	-- only people whose names begin with 'L'
	-- like is used instead of = so we can use a pattern
	-- L% means that the pattern has to begin with L and anything is allowed after
	select * from people where name like 'L%';

	-- we can count how many people there are according to birthplace
	select birthplace, count(*) from people group by birthplace;

	-- we can also see what the average height is per country
	-- and by country and gender
	-- absurd but yeah, we can do it nonetheless
	select birthplace, avg(height) from people group by birthplace;
	select birthplace, gender, avg(height) from people group by birthplace, gender;



	-------------------- Part 2: Joins and some SQL ---------------------

	-- suppose we have a table of events, things that the people did
	create table events(
	id integer primary key asc,
	person_name text,
	event_name text,
	event_time timestamp
	);

	-- lets create some events
	insert into events(person_name, event_name, event_time)
	values('Leon', 'coffee', '2014-11-06 08:01:10');
	insert into events(person_name, event_name, event_time)
	values('Leon', 'climb', '2014-11-07 17:15:01');
	insert into events(person_name, event_name, event_time)
	values('Monica', 'yoga', '2014-11-05 18:30:50');
	insert into events(person_name, event_name, event_time)
	values('Line', 'write', '2014-11-07 06:23:10');
	insert into events(person_name, event_name, event_time)
	values('Lars', 'coffee', '2014-11-07 06:45:34');
	insert into events(person_name, event_name, event_time)
	values('Lars', 'read', '2014-11-07 17:11:11');
	insert into events(person_name, event_name, event_time)
	values('Monica', 'read', '2014-11-07 17:10:44');
	insert into events(person_name, event_name, event_time)
	values('Line', 'coffee', '2014-11-07 10:30:03');

	-- let's look at all the events
	select * from events;

	-- let's look only at the person_name and event_name
	select person_name, event_name from events;

	-- we can order the results (to be more readable for example)
	select person_name, event_name from events order by person_name;
	select * from events order by event_time;

	-- or ask when the first an latest events happened
	select min(event_time), max(event_time) from events;

	-- what if we want more information about the people who did things?
	-- we can then join the person information on the event table
	-- because the names appear in both tables
	select * from events, people where events.person_name = people.name;


	-- we can refine what we want to select only specific columns from that join
	-- here we use what is called aliasing:
	-- we give the events and people tables short names 'e' and 'p'
	-- so we don't have to type so much
	-- this is not necessary and the names can be anything
	-- but it helps the database know which columns you refer to
	-- since there might be columns with the same name in both tables
	select
	e.person_name,
	e.event_name,
	e.event_time,
	p.birthplace,
	p.gender
	from
	events e,
	people p
	where e.person_name = p.name;

	-- it is also possible to do the same join explicitly
	-- here we use subqueries:
	-- we select specific columns from each table and give the result
	-- it returns a name: 'e' and 'p' again
	-- we can then say that we want to join the results in 'e' on 'p'
	-- based on the equality of person_name and name
	select * from
	(select person_name, event_name, event_time from events)e
	join
	(select name, gender, birthplace from people)p
	on e.person_name = p.name;


	-- From doing these joins one can see that it is possible to store
	-- information in several different tables, and to join them together
	-- in queries based on how they are related to each other
	-- this is the basic idea behind ER type database designs

	-- what would typically happen is one would, for example, make one entry
	-- through a web form and then the database would get the new information
	-- and split that up into several related pieces of information
	-- and store that in different tables; when one asks for that information
	-- again the database will combine it again and report it as if it was all
	-- in one table



	-------------------- Part 3: Entities and Relationships ----------------

	-- in the examples above we have two tables: people and events
	-- there is an implcit relation between these two tables: events belong to people
	-- for each event there exists a person that this event belongs to
	-- in the schema (the database tables and their specification) we do not
	-- however enforce that there _must_ exits a person in the people table
	-- for an event to be recorded

	-- we can use the features of the database to enforce this 'exists' relation
	-- between people and events so that a person must exist in order to record an
	-- event that belongs to that person; we can do this by using a foreign key constraint

	-- for further reference on FKs in sqlite3: http://www.hwaci.com/sw/sqlite/foreignkeys.html

	-- we must enable FK constraints in the db
	-- this is sqlite3 specific
	PRAGMA foreign_keys = ON;

	-- note that we drop the person_name column
	-- and instead use the person_id with the FK constraint
	create table events2(
	id integer primary key asc,
	event_name text,
	event_time timestamp,
	person_id integer,
	foreign key(person_id) references people(id)
	);

	-- let's add the same events as before but now using the different table
	-- with the FK constraint
	insert into events2(event_name, event_time, person_id)
	values('coffee', '2014-11-06 08:01:10', 1);
	insert into events2(event_name, event_time, person_id)
	values('climb', '2014-11-07 17:15:01', 1);
	insert into events2(event_name, event_time, person_id)
	values('yoga', '2014-11-05 18:30:50', 4);
	insert into events2(event_name, event_time, person_id)
	values('write', '2014-11-07 06:23:10', 2);
	insert into events2(event_name, event_time, person_id)
	values('coffee', '2014-11-07 06:45:34', 3);
	insert into events2(event_name, event_time, person_id)
	values('read', '2014-11-07 17:11:11', 3);
	insert into events2(event_name, event_time, person_id)
	values('read', '2014-11-07 17:10:44', 4);
	insert into events2(event_name, event_time, person_id)
	values('coffee', '2014-11-07 10:30:03', 2);


	-- let's try to add an event associate with a person with id number 5
	-- we don't know who that is yet but we do know that there are only 4 people
	insert into events2(event_name, event_time, person_id)
	values('tea', '2014-11-01 11:19:10', 5);
	-- "Error: foreign key constraint failed" is what we should get

	-- this means that the 'exists' relation in our data model has been violated
	-- by the insert statement
	-- we can get the new event in by creating the mysterious person
	insert into people(name, surname, birthplace, gender, height)
	values('Emilie', 'Ekeberg', 'Denmark', 'female', 1.68);

	-- now we can try again with success
	-- the 'exists' relation that we specified has been ssatisfied
	insert into events2(event_name, event_time, person_id)
	values('tea', '2014-11-01 11:19:10', 5);

	select * from events2;

	-- we can also do the join as before but now we will use the person_id as the common column
	select * from events2, people where events2.person_id = people.id;


	-- if this database was a real world application there would be a insert procedure that
	-- would get the full information about the event, check if the relevant person
	-- exists, if not create them, and then insert the event information while throwing away
	-- the person specifc information that is not needed; it would also use user_ids instead
	-- of simple sequential ids that are automatically generated as integer increments