# import python libraries
import sqlite3,os,pandas as pd,numpy as np
# define a function for creating experimental databases
def connect_to_db(db_file):
sql_connection=None
try: sql_connection=sqlite3.connect(db_file); return sql_connection
except Error as err:
print(err)
if sql_connection is not None: sql_connection.close()
# apply this function
#...
# prepare to SQL query executings
if connection is not None: cursor=connection.cursor()
os.listdir()
For Tests
For Experiments
def get_query(q):
pretty_print(html('SQL Queries'))
tr=[]
# add the executing command
...
# collect the result
result=cursor.fetchall()
for r in result: tr+=[r]
# represent queries
display(table(tr))
For Tests
For Experiments
For Additional Experiments
cursor.execute('''
...
projects(
id integer PRIMARY KEY,
name text NOT NULL,
begin_date text,
end_date text);
''')
cursor.execute('''
...
tasks(
id integer PRIMARY KEY,
name text NOT NULL,
priority integer,
status_id integer NOT NULL,
project_id integer NOT NULL,
begin_date text NOT NULL,
end_date text NOT NULL,
FOREIGN KEY (project_id) REFERENCES projects (id));
''')
def create_project(cursor,project):
q='''INSERT INTO projects(name,begin_date,end_date)
VALUES(?,?,?)'''
cursor.execute(q,project)
return cursor.lastrowid
def create_task(cursor,task):
q='''INSERT INTO tasks(
name,priority,status_id,
project_id,begin_date,end_date)
VALUES(?,?,?,?,?,?)'''
cursor.execute(q,task)
return cursor.lastrowid
For Tests
For Experiments
project1=('SQL CookBook with SageMathCell',
'2020-02-04','2020-02-18')
project2=('SQL Tests',
'2020-01-31','2020-02-14')
project_id1=create_project(cursor,project1)
project_id2=create_project(cursor,project2)
t=[('Page 1',int(1),int(1),
project_id1,'2020-02-04','2020-02-12'),
('Page 2',int(1),int(1),
project_id1,'2020-02-10','2020-02-18'),
('Test 1',int(1),int(1),
project_id2,'2020-01-31','2020-02-01'),
('Test 2',int(1),int(1),
project_id2,'2020-02-02','2020-02-03'),
('Test 3',int(1),int(1),
project_id2,'2020-02-04','2020-02-05'),
('Test 4',int(1),int(1),
project_id2,'2020-02-06','2020-02-07'),
('Test 5',int(1),int(1),
project_id2,'2020-02-08','2020-02-09'),
('Test 6',int(1),int(1),
project_id2,'2020-02-09','2020-02-10'),
('Test 7',int(1),int(1),
project_id2,'2020-02-10','2020-02-11'),
('Dublicate',int(1),int(1),
project_id2,'2020-02-11','2020-02-14')]
for task in t:
...
For Tests
For Experiments
# from pandas DataFrames
df1.to_sql('df1',con=connection,
index=False,if_exists='replace')
df2.to_sql('df2',con=connection,
index=False,if_exists='replace')
# from pandas dataframes and csv files
url=('https://raw.githubusercontent.com/pandas-dev/pandas/main/pandas/'
'tests/io/data/csv/tips.csv')
...
tips.to_sql('tips',con=connection,if_exists='replace')
tips.head(int(10)).style.set_table_styles(style_dict)
For Tests
For Experiments
q1='''
...
FROM df1,df2;
'''
q2='''
...
FROM df1,df2
WHERE df1.key=df2.key AND df1.value>0
ORDER BY key;
'''
q3='''
...
FROM projects,tasks
WHERE projects.id=tasks.project_id;
'''
q4='''
...
FROM projects,tasks
WHERE projects.id=tasks.project_id AND projects.id=2
ORDER BY tasks.id DESC;
'''
for q in [q1,q2]:
display(pd.read_sql_query(q,con=connection)\
.set_axis(['key1','value1','key2','value2'],axis=1)\
.style.set_table_styles(style_dict))
for q in [q3,q4]: get_query(q)
For Tests
For Experiments
q1='''
SELECT total_bill,tip,smoker,time
FROM tips
%s 7;
'''
q2='''
SELECT *
FROM tips
WHERE time='Dinner'
%s 3;
'''
q3='''
SELECT day,sex,AVG(tip),COUNT(*)
FROM tips
GROUP BY day,sex
%s 4;
'''
...
for q in [q1,q2,q3]: get_query(q%s)
pretty_print(html('<p>Pandas Queries</p>'))
display(tips[['total_bill','tip','smoker','time']].head(int(7))\
.style.set_table_styles(style_dict))
pretty_print(html('<p>Pandas Queries</p>'))
display(tips[tips['time']=='Dinner'].head(int(3))\
.style.set_table_styles(style_dict))
pretty_print(html('<p>Pandas Queries</p>'))
tips.groupby(['day','sex']).agg({'tip':'mean','day':np.size})\
.head(int(4)).style.set_table_styles(style_dict)
For Tests
For Experiments
q1='''
SELECT *
FROM df1
INNER %s df2
ON df1.key=df2.key;
'''
q2='''
SELECT *
FROM df1
LEFT OUTER %s df2
ON df1.key=df2.key;
'''
q3='''
SELECT *
FROM df2
LEFT OUTER %s df1
ON df1.key=df2.key;
'''
...
for q in [q1,q2,q3]: get_query(q%s)
pretty_print(html('<p>Pandas Queries</p>'))
display(pd.merge(df1,df2,on='key')\
.style.set_table_styles(style_dict))
pretty_print(html('<p>Pandas Queries</p>'))
display(pd.merge(df1,df2,on='key',how='left')\
.style.set_table_styles(style_dict))
pretty_print(html('<p>Pandas Queries</p>'))
pd.merge(df1,df2,on='key',how='right')\
.style.set_table_styles(style_dict)
For Tests
For Experiments
get_query('''
SELECT key,value
FROM df1
...
SELECT key,value
FROM df2;
''')
pretty_print(html('<p>Pandas Queries</p>'))
pd.concat([df1,df2]).set_index(pd.Index(range(11)))\
.style.set_table_styles(style_dict)
For Tests
For Experiments
For Experiments
For Experiments