Chapter 1 Data Structures

1.1 Numbers, Strings, Lists and Tuples

1.1.1 Numeric Basics

Go back to fan’s Python Code Examples Repository (bookdown site) or the pyfan Package (API).

import numpy as np

1.1.1.1 Two Digit Numbers Coding Conditional Information

We have numbers between 0 and 99, these indicate different estimation specifications, where the digit number is the estimation tolerance level, and the tens number is the minimization algorithm.

ls_it_esti_optsalgo = [0, 1, 10, 15, 23, 89, 90, 99, 900, 901, 999, 1000]
for it_esti_optsalgo in ls_it_esti_optsalgo:
    it_esti_optsalgo_tens = int(np.floor(it_esti_optsalgo/10))
    it_esti_optsalgo_digits = it_esti_optsalgo%10
    print(f'{it_esti_optsalgo_tens=}, {it_esti_optsalgo_digits=}')
## it_esti_optsalgo_tens=0, it_esti_optsalgo_digits=0
## it_esti_optsalgo_tens=0, it_esti_optsalgo_digits=1
## it_esti_optsalgo_tens=1, it_esti_optsalgo_digits=0
## it_esti_optsalgo_tens=1, it_esti_optsalgo_digits=5
## it_esti_optsalgo_tens=2, it_esti_optsalgo_digits=3
## it_esti_optsalgo_tens=8, it_esti_optsalgo_digits=9
## it_esti_optsalgo_tens=9, it_esti_optsalgo_digits=0
## it_esti_optsalgo_tens=9, it_esti_optsalgo_digits=9
## it_esti_optsalgo_tens=90, it_esti_optsalgo_digits=0
## it_esti_optsalgo_tens=90, it_esti_optsalgo_digits=1
## it_esti_optsalgo_tens=99, it_esti_optsalgo_digits=9
## it_esti_optsalgo_tens=100, it_esti_optsalgo_digits=0

1.1.2 Tuple

Go back to fan’s Python Code Examples Repository (bookdown site) or the pyfan Package (API).

import numpy as np

1.1.2.1 Deal Variables

Define a number of variables in one line.

st_cta, st_ctb, it_ctc = 'e', '20201025x_esr', 2
print(f'{st_cta=} and {st_ctb=} and {it_ctc=}')
## st_cta='e' and st_ctb='20201025x_esr' and it_ctc=2

1.1.2.2 Tuple Example

A tuple is created with parenthesis on the sides (or no parenthesis), not brackets on the sides. Can access values in a tuple as in list.

# Define Tuple, with and without parenthesis
tp_abc = ('a', 'b', 'c')
tp_abc_noparent = 'a', 'b', 'c'
print(f'{tp_abc=}')
## tp_abc=('a', 'b', 'c')
print(f'{len(tp_abc)=}')
## len(tp_abc)=3
print(f'{tp_abc_noparent=}')
## tp_abc_noparent=('a', 'b', 'c')
print(f'{(tp_abc==tp_abc_noparent)=}')
# Check Type
## (tp_abc==tp_abc_noparent)=True
print(f'{isinstance(tp_abc, list)=}')
## isinstance(tp_abc, list)=False
print(f'{isinstance(tp_abc, tuple)=}')
# select element
## isinstance(tp_abc, tuple)=True
print(f'{tp_abc[1]=}')
## tp_abc[1]='b'

Convert tuple to a list:

# convert Tuple to list
ls_abc = [i for i in tp_abc]
print(f'{ls_abc=}')
## ls_abc=['a', 'b', 'c']
print(f'{isinstance(ls_abc, list)=}')
## isinstance(ls_abc, list)=True
print(f'{isinstance(ls_abc, tuple)=}')
## isinstance(ls_abc, tuple)=False

Since the tuple is not mutable, we can not change values inside the tuple:

# define the tuple
tp_abc = ('a', 'b', 'c')
# update tuple value
try:
    tp_abc[0] = 'efg'
except TypeError as error:
    print('Caught this error: ' + repr(error))
## Caught this error: TypeError("'tuple' object does not support item assignment")

1.1.2.3 Function Returns Tuple and Unpack

When a function returns multiple items in a list, that is a tuple. Each element of the list can be accessed. And the tuple can be unpacked:

# Results from some function
tp_results = 'a', 123,  [1,2,3]
# Unpack the results
a_st, b_int, c_ls_int = tp_results
# Print
print(f'{tp_results=}')
## tp_results=('a', 123, [1, 2, 3])
print(f'{a_st=}')
## a_st='a'
print(f'{b_int=}')
## b_int=123
print(f'{c_ls_int=}')
## c_ls_int=[1, 2, 3]

Unpack only a subset of the elements in the tuple:

# Unpack only one
a_st_self_m1, _ , _ = tp_results
# Alternative shorter
a_st_self_m2, *_  = tp_results
# Print
print(f'{a_st_self_m1=}')
## a_st_self_m1='a'
print(f'{a_st_self_m2=}')
## a_st_self_m2='a'

see unpack the first two elements in list/tuple.

1.1.3 List

Go back to fan’s Python Code Examples Repository (bookdown site) or the pyfan Package (API).

import numpy as np

1.1.3.1 Loop Through a List get Value and Index

There is a list, loop through it, get out both the index and the value of each indexed element together.

ls_ob_combo_type = ["e", "20201025x_esr_list_tKap_mlt_ce1a2", ["esti_param.kappa_ce9901", "esti_param.kappa_ce0209"], 1, "C1E31M3S3=1"]
for it_idx, ob_val in enumerate(ls_ob_combo_type):
    print(f'{it_idx=} and {ob_val=}')
## it_idx=0 and ob_val='e'
## it_idx=1 and ob_val='20201025x_esr_list_tKap_mlt_ce1a2'
## it_idx=2 and ob_val=['esti_param.kappa_ce9901', 'esti_param.kappa_ce0209']
## it_idx=3 and ob_val=1
## it_idx=4 and ob_val='C1E31M3S3=1'

1.1.3.2 Parse Elements of a List

A list has multiple elements, deal them out.

list_test = ['C1E126M4S3', 2]
[compesti_short_name, esti_top_which] = list_test
print(f'{compesti_short_name=} and {esti_top_which=}')
## compesti_short_name='C1E126M4S3' and esti_top_which=2

1.1.3.3 Check if Any Element of a List is another List

There is a list with two elements, there is another list with say four elements. Check if any of the two element is in the list with four elements.

ar_int_list_a1 = [1,2]
ar_int_list_a2 = [2,1111]
ar_int_list_a3 = [2111,1111]
ar_int_list_b = [1,2,3,11,999]
ar_int_list_c = [2]

check_any_a1_in_b =  any(item in ar_int_list_a1 for item in ar_int_list_b)
check_any_a2_in_b =  any(item in ar_int_list_a2 for item in ar_int_list_b)
check_any_a3_in_b =  any(item in ar_int_list_a3 for item in ar_int_list_b)
check_any_a1_in_c =  any(item in ar_int_list_a1 for item in ar_int_list_c)

print(f'{check_any_a1_in_b=}')
## check_any_a1_in_b=True
print(f'{check_any_a2_in_b=}')
## check_any_a2_in_b=True
print(f'{check_any_a3_in_b=}')
## check_any_a3_in_b=False
print(f'{check_any_a1_in_c=}')
## check_any_a1_in_c=True

1.1.3.4 Convert a List to a String List

Given a list of string and numeric values, convert to a list of string values. The MAP function is like apply in R.

ls_spec_key = ['ng_s_d', 'esti_test_11_simu', 2, 3]
ls_st_spec_key = list(map(str, ls_spec_key))
print(ls_st_spec_key)
## ['ng_s_d', 'esti_test_11_simu', '2', '3']

Additionally, append some common element to each element of the string using MAP.

ls_spec_key = ['ng_s_d', 'esti_test_11_simu', 2, 3]
ls_st_spec_key = list(map(lambda x: 'add++' + str(x), ls_spec_key))
print(ls_st_spec_key)
## ['add++ng_s_d', 'add++esti_test_11_simu', 'add++2', 'add++3']

Equivalently, via list comprehension

ls_spec_key = ['ng_s_d', 'esti_test_11_simu', 2, 3]
ls_st_spec_key = ['list_comprehension' + str(spec_key) for spec_key in ls_spec_key]
print(ls_st_spec_key)
## ['list_comprehensionng_s_d', 'list_comprehensionesti_test_11_simu', 'list_comprehension2', 'list_comprehension3']

1.1.3.5 Concatenate a List to a String with Separator

Given a list of strings and numeric data types, concatenate list to a string with some separator. Also in reverse, generate a list by breaking a string joined by some separator.

ls_spec_key = ['ng_s_d', 'esti_test_11_simu', 2, 3]
st_separator = '='
st_spec_key = st_separator.join(list(map(lambda x : str(x), ls_spec_key)))
print(st_spec_key)
## ng_s_d=esti_test_11_simu=2=3

Now break string apart:

st_spec_key = '='.join(list(map(lambda x : '$' + str(x) + '$', ['ng_s_d', 'esti_test_11_simu', 2, 3])))
print(st_spec_key.split('='))
## ['$ng_s_d$', '$esti_test_11_simu$', '$2$', '$3$']

1.1.3.6 Add Nth Element to List when Nth Element Does not Exist

There is a list with 2 elements, check if the list has 3 elements, if not, add another element.

ls_string_A = ['c', '20180918']
ls_string_B = ['c', '20180918', ['esti_param.alpha_k']]

for ls_string in [ls_string_A, ls_string_B]:
  if len(ls_string) == 2:
    ls_string.insert(2, None)
    
  print(ls_string)
  
## ['c', '20180918', None]
## ['c', '20180918', ['esti_param.alpha_k']]

1.1.3.7 Check If List Has N Elements of None for Some Elements

In the example below, for A, B and C, do something, for D and E do something else.

ls_string_A = ['c', '20180918']
ls_string_B = ['c', '20180918', None]
ls_string_C = ['c', '20180918', None, None]
ls_string_D = ['c', '20180918', ['esti_param.alpha_k'], None]
ls_string_E = ['c', '20180918', ['esti_param.alpha_k'], 5]

for ls_string in [ls_string_A, ls_string_B, ls_string_C, ls_string_D, ls_string_E]:
  if len(ls_string) >= 3 and ls_string[2] is not None:
    print(ls_string)
  else:
    print(ls_string[0:2])
  
## ['c', '20180918']
## ['c', '20180918']
## ['c', '20180918']
## ['c', '20180918', ['esti_param.alpha_k'], None]
## ['c', '20180918', ['esti_param.alpha_k'], 5]

1.1.3.8 Add a Default Value to Nth Element of List

There is a string list with potential potentially three elements. But sometimes the input only has two elements. Provide default third element value if third element is NONE or if the string list only has two elements.


ls_string_A = ['c', '20180918']
ls_string_B = ['c', '20180918', None]
ls_string_C = ['c', '20180918', ['esti_param.alpha_k']]

for ls_string in [ls_string_A, ls_string_B, ls_string_C]:

  if len(ls_string) <= 2:
    # Deals with situation A
    ls_string.append(['esti_param.alpha_k'])
  elif ls_string[2] is None:
    # Deals with situation B
    ls_string[2] = ['esti_param.alpha_k']
  else:
    # Situation C
    pass

  print(ls_string)
## ['c', '20180918', ['esti_param.alpha_k']]
## ['c', '20180918', ['esti_param.alpha_k']]
## ['c', '20180918', ['esti_param.alpha_k']]

Now do the same thing for a numeric list:


ls_string_A = [11, 22]
ls_string_B = [11, 22, None]
ls_string_C = [11, 22, 33]

for ls_string in [ls_string_A, ls_string_B, ls_string_C]:

  if len(ls_string) <= 2:
    # Deals with situation A
    ls_string.append(33)
  elif ls_string[2] is None:
    # Deals with situation B
    ls_string[2] = 33
  else:
    # Situation C
    pass

  print(ls_string)
## [11, 22, 33]
## [11, 22, 33]
## [11, 22, 33]

1.1.4 Strings

Go back to fan’s Python Code Examples Repository (bookdown site) or the pyfan Package (API).

import numpy as np
import string as string
import random as random
import pprint

1.1.4.1 List of Array Count Frequency

There is a list of strings, with repeating values, count the frequence of the each unique string.

ls_st_status = ["success", "running", "running", "running", "finished", "pending", "pending"]
ls_freq = [ [f'{ls_st_status.count(st_status)} of {len(ls_st_status)} {st_status}']  for st_status in set(ls_st_status)]
pprint.pprint(ls_freq)
## [['3 of 7 running'],
##  ['2 of 7 pending'],
##  ['1 of 7 success'],
##  ['1 of 7 finished']]

1.1.4.2 Get Substring

Given string, get substring after a word.

st_func_stack_code = 'dc_ls_combo_type = pyfan_amto_lsdcconvert.ff_ls2dc(ls_combo_type,'
st_search_break = 'ff_ls2dc('
st_string_after = st_func_stack_code.split(st_search_break)[1]
st_search_break = ','
st_string_after = st_func_stack_code.split(st_search_break)[1]
print(f'{st_string_after=}')
## st_string_after=''

1.1.4.3 Generate Random Strings

Generate some random strings:

random.seed(123)
it_word_length = 5
st_rand_word = ''.join(random.choice(string.ascii_lowercase) for i in range(it_word_length))
st_rand_word = st_rand_word.capitalize()
print(f'{st_rand_word=}')
## st_rand_word='Bicyn'

Generate a block or random text and then convert it to a one list of strings:

random.seed(123)
it_words_count = 15
it_word_length = 5
st_rand_word_block = ''.join(random.choice(string.ascii_lowercase) for ctr in range(it_word_length*it_words_count))
ls_st_rand_word = [st_rand_word_block[ctr: ctr + it_word_length].capitalize() 
                   for ctr in range(0, len(st_rand_word_block), it_word_length)]
print(f'{ls_st_rand_word=}')
## ls_st_rand_word=['Bicyn', 'Idbmr', 'Rkkbf', 'Ekrkw', 'Hfany', 'Ctmca', 'Kxodb', 'Cveez', 'Ajnsp', 'Ipbyj', 'Kqzpg', 'Tuqsz', 'Kamyu', 'Qnvru', 'Zvtpq']

Reshape the array of words to a matrix:

mt_st_rand_word = np.reshape(ls_st_rand_word, [3,5])
print(f'{mt_st_rand_word=}')
## mt_st_rand_word=array([['Bicyn', 'Idbmr', 'Rkkbf', 'Ekrkw', 'Hfany'],
##        ['Ctmca', 'Kxodb', 'Cveez', 'Ajnsp', 'Ipbyj'],
##        ['Kqzpg', 'Tuqsz', 'Kamyu', 'Qnvru', 'Zvtpq']], dtype='<U5')
print(f'{mt_st_rand_word.shape=}')
## mt_st_rand_word.shape=(3, 5)
print(f'{type(mt_st_rand_word)=}')
## type(mt_st_rand_word)=<class 'numpy.ndarray'>

1.1.4.4 Add String Suffix to Numeric Array

Given an numeric array, add string, for example to generate sequencial column names with suffix c:

ar_st_colnames = [ 's' + str(it_col) for it_col in np.array(range(1, 3))]
print(ar_st_colnames)
## ['s1', 's2']

1.1.4.5 Search if Names Include Strings

Given a list of strings, loop but skip if string contains elements string list.

# define string
ls_st_ignore = ['abc', 'efg', 'xyz']
ls_st_loop = ['ab cefg sdf', '12345', 'xyz', 'abc xyz', 'good morning']

# zip and loop and replace
for st_loop in ls_st_loop:
  if sum([st_ignore in st_loop for st_ignore in ls_st_ignore]):
    print('skip:', st_loop)
  else:
    print('not skip:', st_loop)
## skip: ab cefg sdf
## not skip: 12345
## skip: xyz
## skip: abc xyz
## not skip: good morning

1.1.4.6 Replace a Set of Strings in String

Replace terms in string

# define string
st_full = """
abc is a great efg, probably xyz. Yes, xyz is great, like efg. 
eft good, EFG capitalized, efg good again. 
A B C or abc or ABC. Interesting xyz. 
"""

# define new and old
ls_st_old = ['abc', 'efg', 'xyz']
ls_st_new = ['123', '456', '789']

# zip and loop and replace
for old, new in zip(ls_st_old, ls_st_new):
  st_full = st_full.replace(old, new)

# print
print(st_full)
## 
## 123 is a great 456, probably 789. Yes, 789 is great, like 456. 
## eft good, EFG capitalized, 456 good again. 
## A B C or 123 or ABC. Interesting 789.

1.1.4.7 Wrap String with Fixed Width

Given a long string, wrap it into multiple lines with fixed width.

import textwrap

# A long Path
st_path = """
C:/Users/fan/Documents/Dropbox (UH-ECON)/Project Emily Minority Survey/EthLang/reg_lang_abi_cls_mino/tab3_fm/attain_m_vs_f/tab3_mand_talk_m2c_hfracle02.tex
"""

# Wrap text with tight width
st_wrapped = textwrap.fill(st_path, width = 20)
print(st_wrapped)
##  C:/Users/fan/Docume
## nts/Dropbox (UH-
## ECON)/Project Emily
## Minority Survey/EthL
## ang/reg_lang_abi_cls
## _mino/tab3_fm/attain
## _m_vs_f/tab3_mand_ta
## lk_m2c_hfracle02.tex

Combine Strings that are wrapped and not Wrapped


# Paths
st_path_a = "C:/Users/fan/Documents/Dropbox (UH-ECON)/Project Emily Minority Survey/EthLang/reg_lang_abi_cls_mino/tab3_fm/attain_m_vs_f/tab3_mand_talk_m2c_hfracle02.tex"
st_path_b = 'C:/Users/fan/R4Econ/support/development/fs_packaging.html'

# Combine Strings and Wrap
str_dc_records = 'First Path:'.upper() + '\n' + \
                 textwrap.fill(st_path_a, width=25) + '\n\n' + \
                 'Second Path:'.upper() + '\n' + \
                 textwrap.fill(st_path_b, width=25)
              
# Print
print(str_dc_records)                 
## FIRST PATH:
## C:/Users/fan/Documents/Dr
## opbox (UH-ECON)/Project
## Emily Minority Survey/Eth
## Lang/reg_lang_abi_cls_min
## o/tab3_fm/attain_m_vs_f/t
## ab3_mand_talk_m2c_hfracle
## 02.tex
## 
## SECOND PATH:
## C:/Users/fan/R4Econ/suppo
## rt/development/fs_packagi
## ng.html

1.1.4.8 Change Round for Lists of String Estimates

Here we have two strings in a list, with point estimates and corresponding standard errors. Estimates are separated by commas. We want to change the number of decimal points shown and set appropriate roundings. Several steps: (1) split string by comma (2) Loop over (3) extract numerical elements (4) recover

it_round_decimal = 1
ls_st_all_estimates = ["84.506***, 91.758***, 107.950***, 115.879***, 133.560***\n",
                       "(7.796), (4.848), (4.111), (5.044), (6.961)\n",
                       "68.180***, 47.921***, 47.127***, 51.366***, 41.764***\n",
                       "(8.986), (5.368), (4.995), (5.099), (8.637)\n"]

for st_all_estimates in ls_st_all_estimates:

    # delete linebreak at end of line 
    st_all_estimates = st_all_estimates.replace("\n", "")
    
    # split
    ls_st_estimates = st_all_estimates.split(",")

    # Loop over each value separated by commas
    for it_esti_ctr, st_esti in enumerate(ls_st_estimates):
    
        # Default update is to keep current
        st_esti_update = st_esti
    
        # If estimates, might have stars
        st_esti_numeric = st_esti.strip()
        st_esti_numeric = st_esti_numeric.replace("*", "")
        st_esti_numeric = st_esti_numeric.replace("(", "")
        st_esti_numeric = st_esti_numeric.replace(")", "")
    
        # Decimal Rounding
        fl_esti_rounded = round(float(st_esti_numeric), it_round_decimal)
        st_esti_rounded = f'{fl_esti_rounded:.{it_round_decimal}f}'
        
        # Replace
        print(f'{st_esti=} + {st_esti_numeric=} + {st_esti_rounded=}')
        st_esti_rounded = st_esti.replace(st_esti_numeric, st_esti_rounded)
        
        # Update List
        ls_st_estimates[it_esti_ctr] = st_esti_rounded
    
    # Flatten comman
    st_text_out = ','.join(ls_st_estimates)
    print(f'\n{st_text_out=}\n')
    print()
## st_esti='84.506***' + st_esti_numeric='84.506' + st_esti_rounded='84.5'
## st_esti=' 91.758***' + st_esti_numeric='91.758' + st_esti_rounded='91.8'
## st_esti=' 107.950***' + st_esti_numeric='107.950' + st_esti_rounded='108.0'
## st_esti=' 115.879***' + st_esti_numeric='115.879' + st_esti_rounded='115.9'
## st_esti=' 133.560***' + st_esti_numeric='133.560' + st_esti_rounded='133.6'
## 
## st_text_out='84.5***, 91.8***, 108.0***, 115.9***, 133.6***'
## 
## 
## st_esti='(7.796)' + st_esti_numeric='7.796' + st_esti_rounded='7.8'
## st_esti=' (4.848)' + st_esti_numeric='4.848' + st_esti_rounded='4.8'
## st_esti=' (4.111)' + st_esti_numeric='4.111' + st_esti_rounded='4.1'
## st_esti=' (5.044)' + st_esti_numeric='5.044' + st_esti_rounded='5.0'
## st_esti=' (6.961)' + st_esti_numeric='6.961' + st_esti_rounded='7.0'
## 
## st_text_out='(7.8), (4.8), (4.1), (5.0), (7.0)'
## 
## 
## st_esti='68.180***' + st_esti_numeric='68.180' + st_esti_rounded='68.2'
## st_esti=' 47.921***' + st_esti_numeric='47.921' + st_esti_rounded='47.9'
## st_esti=' 47.127***' + st_esti_numeric='47.127' + st_esti_rounded='47.1'
## st_esti=' 51.366***' + st_esti_numeric='51.366' + st_esti_rounded='51.4'
## st_esti=' 41.764***' + st_esti_numeric='41.764' + st_esti_rounded='41.8'
## 
## st_text_out='68.2***, 47.9***, 47.1***, 51.4***, 41.8***'
## 
## 
## st_esti='(8.986)' + st_esti_numeric='8.986' + st_esti_rounded='9.0'
## st_esti=' (5.368)' + st_esti_numeric='5.368' + st_esti_rounded='5.4'
## st_esti=' (4.995)' + st_esti_numeric='4.995' + st_esti_rounded='5.0'
## st_esti=' (5.099)' + st_esti_numeric='5.099' + st_esti_rounded='5.1'
## st_esti=' (8.637)' + st_esti_numeric='8.637' + st_esti_rounded='8.6'
## 
## st_text_out='(9.0), (5.4), (5.0), (5.1), (8.6)'

1.2 Dictionary

1.2.1 Dictionary

Go back to fan’s Python Code Examples Repository (bookdown site) or the pyfan Package (API).

import pprint
import copy as copy

1.2.1.1 Loop Through a Dictionary

Given a dictionary, loop through all of its elements

dc_speckey_dict = {0: 'mpoly_1',
                   1: 'ng_s_t',
                   2: 'ng_s_d',
                   3: 'ng_p_t',
                   4: 'ng_p_d'}
for speckey_key, speckey_val in dc_speckey_dict.items():
    print('speckey_key:' + str(speckey_key) + ', speckey_val:' + speckey_val)
## speckey_key:0, speckey_val:mpoly_1
## speckey_key:1, speckey_val:ng_s_t
## speckey_key:2, speckey_val:ng_s_d
## speckey_key:3, speckey_val:ng_p_t
## speckey_key:4, speckey_val:ng_p_d

1.2.1.2 Convert a List to a Dictionary

There is a list with different types of elements. Use the name of the list as the key, with value index added in as a part of the key, the value is the value of the dictionary.

# List
ls_combo_type = ["e", "20201025x_esr_list_tKap_mlt_ce1a2", ["esti_param.kappa_ce9901", "esti_param.kappa_ce0209"], 1, "C1E31M3S3=1"]
# List name as string variable
st_ls_name = f'{ls_combo_type=}'.split('=')[0]
# Convert to dict
dc_from_list = {st_ls_name + '_i' + str(it_idx) + 'o' + str(len(ls_combo_type)) : ob_val 
                for it_idx, ob_val in enumerate(ls_combo_type)}
# Print
pprint.pprint(dc_from_list, width=1)
## {'ls_combo_type_i0o5': 'e',
##  'ls_combo_type_i1o5': '20201025x_esr_list_tKap_mlt_ce1a2',
##  'ls_combo_type_i2o5': ['esti_param.kappa_ce9901',
##                         'esti_param.kappa_ce0209'],
##  'ls_combo_type_i3o5': 1,
##  'ls_combo_type_i4o5': 'C1E31M3S3=1'}

1.2.1.3 Select One Key-value Pair

Given a dictionary, select a single key-value pair, based on either the key or the value.

# select by key
ls_it_keys = [0, 4]
dc_speckey_dict_select_by_key = {it_key: dc_speckey_dict[it_key] for it_key in ls_it_keys}
print(f'{dc_speckey_dict_select_by_key=}')
# select by value
## dc_speckey_dict_select_by_key={0: 'mpoly_1', 4: 'ng_p_d'}
ls_st_keys = ['ng_s_d', 'ng_p_d']
dc_speckey_dict_select_by_val = {it_key: st_val for it_key, st_val in dc_speckey_dict.items() 
                                 if st_val in ls_st_keys}
print(f'{dc_speckey_dict_select_by_val=}')
## dc_speckey_dict_select_by_val={2: 'ng_s_d', 4: 'ng_p_d'}

See Get key by value in dictionary.

1.2.1.4 Copying Dictionary and Updating Copied Dictionary

First, below, it looks as if the default dictionary has been copied, and that the updates to the dictionary will only impact the dc_invoke_main_args, but that is not the case:

# list update
dc_invoke_main_args_default = {'speckey': 'ng_s_t',
                               'ge': False,
                               'multiprocess': False,
                               'estimate': False,
                               'graph_panda_list_name': 'min_graphs',
                               'save_directory_main': 'simu',
                               'log_file': False,
                               'log_file_suffix': ''}
dc_invoke_main_args = dc_invoke_main_args_default
dc_invoke_main_args['speckey'] = 'b_ge_s_t_bis'
dc_invoke_main_args['ge'] = True
print(f'speckey in dc_invoke_main_args is {dc_invoke_main_args["speckey"]}.')
## speckey in dc_invoke_main_args is b_ge_s_t_bis.
print(f'speckey in dc_invoke_main_args_default is {dc_invoke_main_args_default["speckey"]}.')
## speckey in dc_invoke_main_args_default is b_ge_s_t_bis.

Now this has the intended result. After updating the deep-copied dictionary, the key-values in the original dictionary are preserved:

# list update
dc_invoke_main_args_default = {'speckey': 'ng_s_t',
                               'ge': False,
                               'multiprocess': False,
                               'estimate': False,
                               'graph_panda_list_name': 'min_graphs',
                               'save_directory_main': 'simu',
                               'log_file': False,
                               'log_file_suffix': ''}
# deep copy and update
dc_invoke_main_args = copy.deepcopy(dc_invoke_main_args_default)
dc_invoke_main_args['speckey'] = 'b_ge_s_t_bis'
dc_invoke_main_args['ge'] = True
print(f'speckey in dc_invoke_main_args_default is {dc_invoke_main_args_default["speckey"]}.')
## speckey in dc_invoke_main_args_default is ng_s_t.
print(f'speckey in dc_invoke_main_args is {dc_invoke_main_args["speckey"]}.')
# deep copy and update again
## speckey in dc_invoke_main_args is b_ge_s_t_bis.
dc_invoke_main_args = copy.deepcopy(dc_invoke_main_args_default)
dc_invoke_main_args['speckey'] = 'b_ge_s_t_bis_new'
dc_invoke_main_args['ge'] = False
print(f'speckey in dc_invoke_main_args is {dc_invoke_main_args["speckey"]}.')
## speckey in dc_invoke_main_args is b_ge_s_t_bis_new.

1.2.1.5 Create a List of Dictionaries

import datetime
import pprint
ls_dc_exa =  [
    {"file": "mat_matlab",
     "title": "One Variable Graphs and Tables",
     "description": "Frequency table, bar chart and histogram",
     "val": 1,
     "date": datetime.date(2020, 5, 2)},
    {"file": "mat_two",
     "title": "Second file",
     "description": "Second file.",
     "val": [1, 2, 3],
     "date": datetime.date(2020, 5, 2)},
    {"file": "mat_algebra_rules",
     "title": "Opening a Dataset",
     "description": "Opening a Dataset.",
     "val": 1.1,
     "date": datetime.date(2018, 12, 1)}
]
pprint.pprint(ls_dc_exa, width=1)
## [{'date': datetime.date(2020, 5, 2),
##   'description': 'Frequency '
##                  'table, '
##                  'bar '
##                  'chart '
##                  'and '
##                  'histogram',
##   'file': 'mat_matlab',
##   'title': 'One '
##            'Variable '
##            'Graphs '
##            'and '
##            'Tables',
##   'val': 1},
##  {'date': datetime.date(2020, 5, 2),
##   'description': 'Second '
##                  'file.',
##   'file': 'mat_two',
##   'title': 'Second '
##            'file',
##   'val': [1,
##           2,
##           3]},
##  {'date': datetime.date(2018, 12, 1),
##   'description': 'Opening '
##                  'a '
##                  'Dataset.',
##   'file': 'mat_algebra_rules',
##   'title': 'Opening '
##            'a '
##            'Dataset',
##   'val': 1.1}]

1.2.1.6 Iteratively Add to A Dictionary

Iteratively add additional Key and Value pairs to a dictionary.

ls_snm_tex = ["file1.tex", "file2.tex", "file3.tex"]
ls_snm_pdf = ["file1.pdf", "file2.pdf", "file3.pdf"]

dc_tex_pdf = {}
for tex, pdf in zip(ls_snm_tex, ls_snm_pdf):
  dc_tex_pdf[tex] = pdf

pprint.pprint(dc_tex_pdf, width=1)
## {'file1.tex': 'file1.pdf',
##  'file2.tex': 'file2.pdf',
##  'file3.tex': 'file3.pdf'}

1.2.1.7 Select by Keys Dictionaries from list of Dictionaries

Given a list of dictionary, search if key name is in list:

# string to search through
ls_str_file_ids = ['mat_matlab', 'mat_algebra_rules']
# select subset
ls_dc_selected = [dc_exa
                  for dc_exa in ls_dc_exa
                  if dc_exa['file'] in ls_str_file_ids]
# print
pprint.pprint(ls_dc_selected, width=1)
## [{'date': datetime.date(2020, 5, 2),
##   'description': 'Frequency '
##                  'table, '
##                  'bar '
##                  'chart '
##                  'and '
##                  'histogram',
##   'file': 'mat_matlab',
##   'title': 'One '
##            'Variable '
##            'Graphs '
##            'and '
##            'Tables',
##   'val': 1},
##  {'date': datetime.date(2018, 12, 1),
##   'description': 'Opening '
##                  'a '
##                  'Dataset.',
##   'file': 'mat_algebra_rules',
##   'title': 'Opening '
##            'a '
##            'Dataset',
##   'val': 1.1}]

Search and Select by Multiple Keys in Dictionary. Using two keys below:

# string to search through
ls_str_file_ids = ['mat_matlab', 'mat_algebra_rules']
# select subset
ls_dc_selected = [dc_exa
                  for dc_exa in ls_dc_exa
                  if ((dc_exa['file'] in ls_str_file_ids)
                      and
                      (dc_exa['val']== 1))]
# print
pprint.pprint(ls_dc_selected, width=1)
## [{'date': datetime.date(2020, 5, 2),
##   'description': 'Frequency '
##                  'table, '
##                  'bar '
##                  'chart '
##                  'and '
##                  'histogram',
##   'file': 'mat_matlab',
##   'title': 'One '
##            'Variable '
##            'Graphs '
##            'and '
##            'Tables',
##   'val': 1}]

1.2.1.8 Drop Element of Dictionary

Drop element of a dictionary inside a list:

# Dictionary
dc_test = [{"file": "mat_matlab_1",
           "title": "One Variable Graphs and Tables",
           "description": "Frequency table, bar chart and histogram",
           "val": 1,
           "date": datetime.date(2020, 5, 2)},
           {"file": "mat_matlab_2", 
            "val": "mat_matlab_2"}]

# Drop           
del dc_test[0]['val']
del dc_test[0]['file']
del dc_test[0]['description']
del dc_test[1]['val']

# Print
pprint.pprint(dc_test, width=1)
## [{'date': datetime.date(2020, 5, 2),
##   'title': 'One '
##            'Variable '
##            'Graphs '
##            'and '
##            'Tables'},
##  {'file': 'mat_matlab_2'}]

1.3 Numpy Arrays

1.3.1 Generate Matrix from Arrays

Go back to fan’s Python Code Examples Repository (bookdown site) or the pyfan Package (API).

import numpy as np

1.3.1.1 Generate a Random Matrix

Generate a matrix with random numbers and arbitrary number of rows and columns. Several types of matrix below:

  1. uniform random
  2. integer random
  3. integer random resorted (shuffled)
  4. integer random redrawn (with replacements)

Set size:

it_rows = 2;
it_cols = 3;
np.random.seed(123)

uniform random:

# A random matrix of uniform draws
mt_rand_unif = np.random.rand(it_rows, it_cols)
print(mt_rand_unif)
## [[0.69646919 0.28613933 0.22685145]
##  [0.55131477 0.71946897 0.42310646]]

integer random:

# A random matrix of integers
it_max_int = 10
mt_rand_integer = np.random.randint(it_max_int, size=(it_rows, it_cols))
print(mt_rand_integer)
## [[6 1 0]
##  [1 9 0]]

integer random resorted (shuffled):

# A sequence of numbers, 1 to matrix size, resorted, unique
it_mat_size = it_rows*it_cols
ar_seq = np.arange(it_mat_size)
ar_idx_resort = np.random.choice(np.arange(it_mat_size), it_mat_size, replace = False)
ar_seq_rand_sorted = ar_seq[ar_idx_resort]
mt_seq_rand_sorted = ar_seq_rand_sorted.reshape((it_rows, it_cols))
print(mt_seq_rand_sorted)
# achieve the same objective with a shuffle
## [[5 4 2]
##  [3 1 0]]
np.random.shuffle(ar_seq)
mt_seq_rand_shuffle = ar_seq.reshape((it_rows, it_cols))
print(mt_seq_rand_shuffle)
## [[2 1 3]
##  [5 0 4]]

integer random redrawn (with replacements):

# A sequence of numbers, 1 to matrix size, resorted, nonunique, REPLACE = TRUE
it_mat_size = it_rows*it_cols
ar_seq = np.arange(it_mat_size)
ar_idx_resort_withreplacement = np.random.choice(np.arange(it_mat_size), it_mat_size, replace = True)
ar_seq_rand_sorted_withreplacement = ar_seq[ar_idx_resort_withreplacement]
mt_seq_rand_sorted_withreplacement = ar_seq_rand_sorted_withreplacement.reshape((it_rows, it_cols))
print(mt_seq_rand_sorted_withreplacement)
## [[3 2 4]
##  [2 4 0]]

1.3.1.2 Stack Arrays to Matrix

Given various arrays, generate a matrix by stacking equi-length arrays as columns

# three arrays
ar_a = [1,2,3]
ar_b = [3,4,5]
ar_c = [11,4,1]

# Concatenate to matrix
mt_abc = np.column_stack([ar_a, ar_b, ar_c])
print(mt_abc)
## [[ 1  3 11]
##  [ 2  4  4]
##  [ 3  5  1]]