Skip to content

Instantly share code, notes, and snippets.

@MrHen
Last active November 16, 2018 16:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save MrHen/036d340ae20d86c8b75c96acaafe24e3 to your computer and use it in GitHub Desktop.
Save MrHen/036d340ae20d86c8b75c96acaafe24e3 to your computer and use it in GitHub Desktop.
Sample Data
INSERT INTO {table_schema}.{table_name} (
name,
city,
customer_id,
last_purchase,
nps,
value_tier,
customer_since
)
SELECT
translate(lpad(floor(random() * 99999)::text, 5, '0'), '1234567890', 'abcdefghij')
, translate(lpad(floor(random() * 99999)::text, 3, '0'), '1234567890', 'abcdefghij')
, floor(random() * 99999)
, floor(random() * 999) / 100
, floor(random() * 9) + 1
, floor(random() * 6)
, date('2020-02-20') + make_interval(days := floor(random() * 20)::integer)
FROM generate_series(1, {count})
DROP TABLE adam_sample_one;
DROP TABLE adam_sample_two;
DROP TABLE adam_sample_three;
CREATE TABLE adam_sample_two
(
aaa SERIAL,
-- aaa INT IDENTITY(0, 1),
bbb TEXT NOT NULL,
ccc INT NOT NULL,
ddd TEXT NOT NULL
)
;
CREATE TABLE adam_sample_three
(
aaa SERIAL,
-- aaa INT IDENTITY(0, 1),
bbb TEXT NOT NULL,
ccc INT NOT NULL,
ddd TEXT NOT NULL
)
;
INSERT INTO adam_sample_two (bbb, ccc, ddd)
SELECT
translate(lpad(floor(random() * 99999)::text, 5, '0'), '1234567890', 'abcdefghij')
, floor(random() * 999)
, translate(lpad(floor(random() * 999)::text, 3, '0'), '1234567890', 'abcdefghij')
FROM adam_sample_two
LIMIT 10000
;
CREATE UNIQUE INDEX adam_sample_two_pk ON adam_sample_two (aaa);
SELECT COUNT(*) FROM adam_sample_two;
INSERT INTO adam_sample_three (
bbb
, ccc
, ddd
, aaa
)
SELECT bbb
, ccc
, case
when (random() < 0.5)
then ddd
else translate(
lpad(floor(random() * (10^char_length(ddd))-1)::text, char_length(ddd), '0'),
'1234567890',
'abcdefghij'
)
end as ddd
, aaa
FROM adam_sample_two
;
CREATE UNIQUE INDEX IF NOT EXISTS adam_sample_three_pk ON adam_sample_three (aaa);
DROP TABLE adam_sample_one;
CREATE TABLE adam_sample_one AS SELECT * FROM adam_sample_two;
CREATE TABLE adam_sample_one AS SELECT * FROM adam_sample_three;
SELECT * FROM adam_sample_one LIMIT 1000;
SELECT * FROM adam_sample_two LIMIT 1000;
SELECT * FROM adam_sample_three LIMIT 1000;
---
DROP TABLE adam_sample_two;
DROP TABLE adam_sample_three;
CREATE TABLE adam_sample_two (
consumer_id INTEGER IDENTITY(0, 1)
, customer_name TEXT NOT NULL
, transactions INTEGER NOT NULL
, sales_last_12 NUMERIC NOT NULL
, total_sales NUMERIC NOT NULL
, customer_since DATE NOT NULL
, value_tier TEXT NOT NULL
, lifestyle_segment TEXT NOT NULL
, sales_last_3 NUMERIC NOT NULL
, avg_basket NUMERIC NOT NULL
, favorite_store TEXT NOT NULL
, age NUMERIC NOT NULL
, education TEXT NOT NULL
, est_income TEXT NOT NULL
, num_of_children INTEGER NOT NULL
, sales_last_6 NUMERIC NOT NULL
)
;
INSERT INTO adam_sample_huge (
customer_name
, transactions
, sales_last_12
, total_sales
, customer_since
, value_tier
, lifestyle_segment
, sales_last_3
, avg_basket
, favorite_store
, age
, education
, est_income
, num_of_children
, sales_last_6
)
SELECT
translate(
lpad(floor(random() * (10^10)-1)::text, 10, '0'),
'1234567890',
'abcdefghij'
)
AS customer_name
, floor(random() * (10^4)-1) AS transactions
, floor(random() * (10^8)-1) / 100 AS sales_last_12
, floor(random() * (10^8)-1) / 100 AS "total_sales"
, ('2018-01-' || (floor(random() * 31) + 1)::text)::date AS customer_since
, floor(random() * 6)::text AS value_tier
, translate(
lpad(floor(random() * (10^10)-1)::text, 10, '0'),
'1234567890',
'abcdefghij'
)
AS "lifestyle_segment"
, floor(random() * (10^8)-1) / 100 AS "sales_last_3"
, floor(random() * (10^4)-1) / 100 AS "avg_basket"
, translate(
lpad(floor(random() * (10^10)-1)::text, 10, '0'),
'1234567890',
'abcdefghij'
)
AS "favorite_store"
, floor(random() * 50 + 20) AS "age"
, translate(
lpad(floor(random() * (10^10)-1)::text, 10, '0'),
'1234567890',
'abcdefghij'
) AS "education"
, translate(
lpad(floor(random() * (10^6)-1)::text, 10, '0'),
'1234567890',
'abcdefghij'
) AS "est_income"
, floor(random() * 4) AS "num_of_children"
, floor(random() * (10^8)-1) / 100 AS sales_last_6
FROM adam_sample_two
LIMIT 10000
;
CREATE TABLE adam_sample_three AS
SELECT
"consumer_id"
, CASE
WHEN (random() < 0.8)
THEN "customer_name"
ELSE translate(
lpad(floor(random() * (10^char_length("customer_name"))-1)::text, char_length("customer_name"), '0'),
'1234567890',
'abcdefghij'
)
END AS customer_name
, CASE
WHEN (random() < 0.9)
THEN "transactions"
ELSE floor(random() * (10^char_length("transactions"::text))-1)
END AS transactions
, CASE
WHEN (random() < 0.9)
THEN "sales_last_12"
ELSE floor(random() * (10^char_length("sales_last_12"::text))-1)
END AS sales_last_12
, "total_sales"
, CASE WHEN (random() < 0.9)
THEN "customer_since"
ELSE ('2018-01-' || (floor(random() * 31) + 1)::text)::date
END AS customer_since
, CASE WHEN (random() < 0.9)
THEN "value_tier"
ELSE floor(random() * 6)::text
END AS value_tier
, "lifestyle_segment"
, "sales_last_3"
, "avg_basket"
, "favorite_store"
, "age","education"
, "est_income"
, "num_of_children"
, "sales_last_6"
FROM adam_sample_two
;
CREATE TABLE adam_sample_two AS SELECT * FROM adam_sample_three;
CREATE UNIQUE INDEX IF NOT EXISTS adam_sample_two_pk ON adam_sample_two (consumer_id);
CREATE UNIQUE INDEX IF NOT EXISTS adam_sample_three_pk ON adam_sample_three (consumer_id);
SELECT * FROM adam_sample_one LIMIT 100;
SELECT * FROM adam_sample_two LIMIT 100;
SELECT * FROM adam_sample_three LIMIT 100;
DROP TABLE adam_sample_one;
CREATE TABLE adam_sample_one AS SELECT * FROM adam_sample_two;
DROP TABLE adam_sample_tiny;
CREATE TABLE adam_sample_tiny AS SELECT * FROM adam_sample_two LIMIT 500;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment