SQL AND PYTHON, ETC…
-
Given a database table users, with the following columns:
User
Id
Name
and
friendship_pairs
Users = [
{"id": 0, "name": "Ace"},
{"id": 0, "name": "Most"},
{"id": 0, "name": "Brad"},
{"id": 0, "name": "Jules"},
{"id": 0, "name": "Lars"},
{"id": 0, "name": "Mia"},
{"id": 0, "name": "Cole"},
{"id": 0, "name": "Devin"},
{"id": 0, "name": "Susie"},
{"id": 0, "name": "Shaw"}
]
friendship_pairs = [(0,1), (0,2), (1,2), (1,3), (2,3), (3,4), (4,5), (5,6), (5,7), (6,8), (7,8), (8,9)]
-
Interests = [
(0, "Violin"),(0, "Flute"),(0, "Jazz"),(0, "Choral"),(0, "Bass"),(0, "Storm"),(0, "Klesmer"),
(1, "NoSQL"),(1, "Brass"),(1, "Klesmer"),(1, "Jazz"),(1, "Classical"),
(2, "Funk"),(2, "Woodwinds"),(2, "scipy"),(2, "numpy"),(2, "statsmodels"),(2, "pandas"),
(3, "Dance"),(3, "regression"),(3, "Vocal"),
(4, "Orchestral"),(4, "regression"),(4, "decision trees"),(4, "libsvm"),
(5, "Funk"),(5, "R"),(5, "Choral"),(5, "Haskell"),(5, "programming languages"),
(6, "Vocal"),(6, "Timpani"),(6, "theory"),(6, "Dance"),
(7, "Orchestral"),(7, "scikit-learn"),(7, "Mahout"),(7, "neural networks",
(8, "neural networks"),(8, "Flute"),(8, "Soul"),(8, "Violin"),
(9, "Choral"),(9, "MapReduce"),(9, "Flute"),
(10, "Funk"),(10, "Brass"),(10, "Dance"),(10, "Violin"),
(11, "Dance"),(11, "Classical"),(11, "Flute"),(11, "Vocal")
-
scientist_who_like(target_interest)
-
user_ids_by_interest
-
interests_by_user_id
-
most_common_interests_with(user)
salaries_and_tenures = [ (83000, 8.7), (88000, 8.1), (48000,0.7), (76000, 6), (69000, 6.5), (76000, 7.5), (60000, 2.5), (83000, 10), (48000, 1.9), (63000, 4.2) ]
salary_by_tenure
average_salary_by_tenure
tenure_bucket
salary_by_tenure_bucket
average_salary_by_bucket
words_and_counts
addresses
sorting
Regular Expressions
https://dev.mysql.com/doc/refman/8.0/en/regexp.html https://dev.mysql.com/doc/refman/8.0/en/regexp.html#operator_regexp
Name Description
NOT REGEXP Negation of REGEXP
REGEXP Whether string matches regular expression
REGEXP_INSTR() Starting index of substring matching regular expression
REGEXP_LIKE() Whether string matches regular expression
REGEXP_REPLACE() Replace substrings matching regular expression
REGEXP_SUBSTR() Return substring matching regular expression
RLIKE Regular Expression Functions and Operators
mysql> SELECT REGEXP_INSTR('dog cat dog', 'dog');
+------------------------------------+
| REGEXP_INSTR('dog cat dog', 'dog') |
+------------------------------------+
| 1 |
+------------------------------------+
mysql> SELECT REGEXP_INSTR('dog cat dog', 'dog', 2);
+---------------------------------------+
| REGEXP_INSTR('dog cat dog', 'dog', 2) |
+---------------------------------------+
| 9 |
+---------------------------------------+
mysql> SELECT REGEXP_INSTR('aa aaa aaaa', 'a{2}');
+-------------------------------------+
| REGEXP_INSTR('aa aaa aaaa', 'a{2}') |
+-------------------------------------+
| 1 |
+-------------------------------------+
mysql> SELECT REGEXP_INSTR('aa aaa aaaa', 'a{4}');
+-------------------------------------+
| REGEXP_INSTR('aa aaa aaaa', 'a{4}') |
+-------------------------------------+
| 8 |
+-------------------------------------+
mysql> SELECT REGEXP_LIKE('CamelCase', 'CAMELCASE');
+---------------------------------------+
| REGEXP_LIKE('CamelCase', 'CAMELCASE') |
+---------------------------------------+
| 1 |
+---------------------------------------+
mysql> SELECT REGEXP_LIKE('CamelCase', 'CAMELCASE' COLLATE utf8mb4_0900_as_cs);
+------------------------------------------------------------------+
| REGEXP_LIKE('CamelCase', 'CAMELCASE' COLLATE utf8mb4_0900_as_cs) |
+------------------------------------------------------------------+
| 0 |
+------------------------------------------------------------------+
mysql> SELECT REGEXP_LIKE('Michael!', '.*');
+-------------------------------+
| REGEXP_LIKE('Michael!', '.*') |
+-------------------------------+
| 1 |
+-------------------------------+
mysql> SELECT REGEXP_LIKE('new*\n*line', 'new\\*.\\*line');
+----------------------------------------------+
| REGEXP_LIKE('new*\n*line', 'new\\*.\\*line') |
+----------------------------------------------+
| 0 |
+----------------------------------------------+
mysql> SELECT REGEXP_LIKE('a', '^[a-d]');
+----------------------------+
| REGEXP_LIKE('a', '^[a-d]') |
+----------------------------+
| 1 |
+----------------------------+
mysql> SELECT REGEXP_LIKE('a', 'A'), REGEXP_LIKE('a', BINARY 'A');
+-----------------------+------------------------------+
| REGEXP_LIKE('a', 'A') | REGEXP_LIKE('a', BINARY 'A') |
+-----------------------+------------------------------+
| 1 | 0 |
+-----------------------+------------------------------+
mysql> SELECT REGEXP_LIKE('abc', 'ABC');
+---------------------------+
| REGEXP_LIKE('abc', 'ABC') |
+---------------------------+
| 1 |
+---------------------------+
mysql> SELECT REGEXP_LIKE('abc', 'ABC', 'c');
+--------------------------------+
| REGEXP_LIKE('abc', 'ABC', 'c') |
+--------------------------------+
| 0 |
+--------------------------------+
mysql> SELECT REGEXP_REPLACE('a b c', 'b', 'X');
+-----------------------------------+
| REGEXP_REPLACE('a b c', 'b', 'X') |
+-----------------------------------+
| a X c |
+-----------------------------------+
mysql> SELECT REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 1, 3);
+----------------------------------------------------+
| REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 1, 3) |
+----------------------------------------------------+
| abc def X |
+----------------------------------------------------+
mysql> SELECT REGEXP_SUBSTR('abc def ghi', '[a-z]+');
+----------------------------------------+
| REGEXP_SUBSTR('abc def ghi', '[a-z]+') |
+----------------------------------------+
| abc |
+----------------------------------------+
mysql> SELECT REGEXP_SUBSTR('abc def ghi', '[a-z]+', 1, 3);
+----------------------------------------------+
| REGEXP_SUBSTR('abc def ghi', '[a-z]+', 1, 3) |
+----------------------------------------------+
| ghi |
+----------------------------------------------+
mysql> SELECT REGEXP_LIKE('fo\nfo', '^fo$'); -> 0
mysql> SELECT REGEXP_LIKE('fofo', '^fo'); -> 1
-
Match the beginning of a string.
mysql> SELECT REGEXP_LIKE('fo\nfo', '^fo$'); -> 0
mysql> SELECT REGEXP_LIKE('fofo', '^fo'); -> 1
-
$
Match the end of a string.
mysql> SELECT REGEXP_LIKE('fo\no', '^fo\no$'); -> 1
mysql> SELECT REGEXP_LIKE('fo\no', '^fo$'); -> 0
-
.
Match any character (including carriage return and newline, although to match these in the middle of a string, the m (multiple line) match-control character or the (?m) within-pattern modifier must be given).
mysql> SELECT REGEXP_LIKE('fofo', '^f.*$'); -> 1
mysql> SELECT REGEXP_LIKE('fo\r\nfo', '^f.*$'); -> 0
mysql> SELECT REGEXP_LIKE('fo\r\nfo', '^f.*$', 'm'); -> 1
mysql> SELECT REGEXP_LIKE('fo\r\nfo', '(?m)^f.*$'); -> 1
-
a*
Match any sequence of zero or more a characters.
mysql> SELECT REGEXP_LIKE('Ban', '^Ba*n'); -> 1
mysql> SELECT REGEXP_LIKE('Baaan', '^Ba*n'); -> 1
mysql> SELECT REGEXP_LIKE('Bn', '^Ba*n'); -> 1
-
a+
Match any sequence of one or more a characters.
mysql> SELECT REGEXP_LIKE('Ban', '^Ba+n'); -> 1
mysql> SELECT REGEXP_LIKE('Bn', '^Ba+n'); -> 0
-
a?
Match either zero or one a character.
mysql> SELECT REGEXP_LIKE('Bn', '^Ba?n'); -> 1
mysql> SELECT REGEXP_LIKE('Ban', '^Ba?n'); -> 1
mysql> SELECT REGEXP_LIKE('Baan', '^Ba?n'); -> 0
-
de | abc |
Alternation; match either of the sequences de or abc.
mysql> SELECT REGEXP_LIKE('pi', 'pi|apa'); -> 1
mysql> SELECT REGEXP_LIKE('axe', 'pi|apa'); -> 0
mysql> SELECT REGEXP_LIKE('apa', 'pi|apa'); -> 1
mysql> SELECT REGEXP_LIKE('apa', '^(pi|apa)$'); -> 1
mysql> SELECT REGEXP_LIKE('pi', '^(pi|apa)$'); -> 1
mysql> SELECT REGEXP_LIKE('pix', '^(pi|apa)$'); -> 0
-
(abc)*
Match zero or more instances of the sequence abc.
mysql> SELECT REGEXP_LIKE('pi', '^(pi)*$'); -> 1
mysql> SELECT REGEXP_LIKE('pip', '^(pi)*$'); -> 0
mysql> SELECT REGEXP_LIKE('pipi', '^(pi)*$'); -> 1
-
{1}, {2,3}
Repetition; {n} and {m,n} notation provide a more general way of writing regular expressions that match many occurrences of the previous atom (or “piece”) of the pattern. m and n are integers.
-
###a* Can be written as a{0,}.
###a+ Can be written as a{1,}.
###a? Can be written as a{0,1}.
To be more precise, a{n} matches exactly n instances of a. a{n,} matches n or more instances of a. a{m,n} matches m through n instances of a, inclusive. If both m and n are given, m must be less than or equal to n.
mysql> SELECT REGEXP_LIKE('abcde', 'a[bcd]{2}e'); -> 0
mysql> SELECT REGEXP_LIKE('abcde', 'a[bcd]{3}e'); -> 1
mysql> SELECT REGEXP_LIKE('abcde', 'a[bcd]{1,10}e'); -> 1
[a-dX], [^a-dX]
-
Matches any character that is (or is not, if ^ is used) either a, b, c, d or X. A - character between two other characters forms a range that matches all characters from the first character to the second. For example, [0-9] matches any decimal digit. To include a literal ] character, it must immediately follow the opening bracket [. To include a literal - character, it must be written first or last. Any character that does not have a defined special meaning inside a [] pair matches only itself.
-
mysql> SELECT REGEXP_LIKE('aXbc', '[a-dXYZ]'); -> 1
mysql> SELECT REGEXP_LIKE('aXbc', '^[a-dXYZ]$'); -> 0
mysql> SELECT REGEXP_LIKE('aXbc', '^[a-dXYZ]+$'); -> 1
mysql> SELECT REGEXP_LIKE('aXbc', '^[^a-dXYZ]+$'); -> 0
mysql> SELECT REGEXP_LIKE('gheis', '^[^a-dXYZ]+$'); -> 1
mysql> SELECT REGEXP_LIKE('gheisa', '^[^a-dXYZ]+$'); -> 0
-
[=character_class=]
Within a bracket expression (written using [ and ]), [=character_class=] represents an equivalence class. It matches all characters with the same collation value, including itself. For example, if o and (+) are the members of an equivalence class, [[=o=]], [[=(+)=]], and [o(+)] are all synonymous. An equivalence class may not be used as an endpoint of a range.
-
[:character_class:]
Within a bracket expression (written using [ and ]), [:character_class:]
represents a character class that matches all characters belonging to that class. The following table lists the standard class names. These names stand for the character classes defined in the ctype(3) manual page. A particular locale may provide other class names. A character class may not be used as an endpoint of a range.
-
Character Class Name Meaning
alnum Alphanumeric characters
alpha Alphabetic characters
blank Whitespace characters
cntrl Control characters
digit Digit characters
graph Graphic characters
lower Lowercase alphabetic characters
print Graphic or space characters
punct Punctuation characters
space Space, tab, newline, and carriage return
upper Uppercase alphabetic characters
xdigit Hexadecimal digit characters
-
mysql> SELECT REGEXP_LIKE('justalnums', '[[:alnum:]]+'); -> 1
mysql> SELECT REGEXP_LIKE('!!', '[[:alnum:]]+'); -> 0
-
To use a literal instance of a special character in a regular expression, precede it by two backslash () characters. The MySQL parser interprets one of the backslashes, and the regular expression library interprets the other. For example, to match the string 1+2 that contains the special + character, only the last of the following regular expressions is the correct one:
mysql> SELECT REGEXP_LIKE('1+2', '1+2'); -> 0
mysql> SELECT REGEXP_LIKE('1+2', '1\+2'); -> 0
mysql> SELECT REGEXP_LIKE('1+2', '1\\+2'); -> 1
-
Regular Expression Resource Control
REGEXP_LIKE() and similar functions use resources that can be controlled by setting system variables:
The match engine uses memory for its internal stack. To control the maximum available memory for the stack in bytes, set the regexp_stack_limit system variable.
The match engine operates in steps. To control the maximum number of steps performed by the engine (and thus indirectly the execution time), set the regexp_time_limit system variable. Because this limit is expressed as number of steps, it affects execution time only indirectly. Typically, it is on the order of milliseconds.
-
[Spencer addressed in seperate deck]
-
For repetition counts ({n} and {m,n} notation), the Spencer library has a maximum of 255. ICU has no such limit, although the maximum number of match engine steps can be limited by setting the regexp_time_limit system variable.
-
ICU interprets parentheses as metacharacters. To specify a literal open or close parenthesis ( in a regular expression, it must be escaped:
mysql> SELECT REGEXP_LIKE('(', '(');
ERROR 3692 (HY000): Mismatched parenthesis in regular expression.
mysql> SELECT REGEXP_LIKE('(', '\\(');
+-------------------------+
| REGEXP_LIKE('(', '\\(') |
+-------------------------+
| 1 |
+-------------------------+
mysql> SELECT REGEXP_LIKE(')', ')');
ERROR 3692 (HY000): Mismatched parenthesis in regular expression.
mysql> SELECT REGEXP_LIKE(')', '\\)');
+-------------------------+
| REGEXP_LIKE(')', '\\)') |
+-------------------------+
| 1 |
+-------------------------+
ICU also interprets square brackets as metacharacters, but only the opening square bracket need be escaped to be used as a literal character:
mysql> SELECT REGEXP_LIKE('[', '[');
ERROR 3696 (HY000): The regular expression contains an
unclosed bracket expression.
mysql> SELECT REGEXP_LIKE('[', '\\[');
+-------------------------+
| REGEXP_LIKE('[', '\\[') |
+-------------------------+
| 1 |
+-------------------------+
mysql> SELECT REGEXP_LIKE(']', ']');
+-----------------------+
| REGEXP_LIKE(']', ']') |
+-----------------------+
| 1 |
+-----------------------+
-
Project -type -Author
Awards
Awards awarded -award -category -date -awardee -project
Height, weight, age Grades is_friend friend_counts
(Sorted)
max_friends
Quantile? http://rpbouman.blogspot.com/2008/07/mysql-percentile-aftermath-calculating.html https://www.codevat.com/articles/mysql-quantiles/
Variance:
======
SELECT VARIANCE(total_cost) FROM purchase;
+----------------------+
| VARIANCE(total_cost) |
+----------------------+
| 99472.222222 |
+----------------------+
1 row in set (0.00 sec)
Median:
======
SELECT AVG(dd.val) as median_val
FROM (
SELECT d.val, @rownum:=@rownum+1 as `row_number`, @total_rows:=@rownum
FROM data d, (SELECT @rownum:=0) r
WHERE d.val is NOT NULL
-- put some where clause here
ORDER BY d.val
) as dd
WHERE dd.row_number IN ( FLOOR((@total_rows+1)/2), FLOOR((@total_rows+2)/2) );
I propose a faster way. Get the row count:
SELECT CEIL(COUNT(*)/2) FROM data;
Then take the middle value in a sorted subquery:
SELECT max(val) FROM (SELECT val FROM data ORDER BY val limit @middlevalue) x;
Mean:
-
SELECT AVG(Price) AS AveragePrice FROM Products;
#Standard Deviation
mysql> SELECT STDDEV(total_cost)
-> FROM purchase;
+--------------------+
| STDDEV(total_cost) |
+--------------------+
| 315.392172 |
+--------------------+
1 row in set (0.00 sec)
======================================
INSERT INTO table_name (col_1, col_2) VALUES (value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2);
INSERT INTO table_name (col_1, col_2) VALUES (value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2);
INSERT INTO table_name (col_1, col_2) VALUES (value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2);
INSERT INTO table_name (col_1, col_2) VALUES (value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2);
-
Mean:
=======
SELECT AVG(Price) AS AveragePrice FROM Products;
-
Standard Deviation
mysql> SELECT STDDEV(total_cost)
-> FROM purchase;
+--------------------+
| STDDEV(total_cost) |
+--------------------+
| 315.392172 |
+--------------------+
1 row in set (0.00 sec)
-
INSERT INTO table_name (col_1, col_2) VALUES (value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2);
-
INSERT INTO table_name (col_1, col_2) VALUES (value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2);
-
INSERT INTO table_name (col_1, col_2) VALUES (value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2);
-
INSERT INTO table_name (col_1, col_2) VALUES (value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2),
(value_1, value_2), (value_1, value_2);
-