Go to the RMD, PDF, or HTML version of this file. Go back to Python Code Examples Repository (bookdown site) or the pyfan Package (API).
import numpy as np
import string as string
import random as random
import pprint
There is a list of strings, with repeating values, count the frequence of the each unique string.
ls_st_status = ["success", "running", "running", "running", "finished", "pending", "pending"]
ls_freq = [ [f'{ls_st_status.count(st_status)} of {len(ls_st_status)} {st_status}'] for st_status in set(ls_st_status)]
pprint.pprint(ls_freq)
## [['3 of 7 running'],
## ['1 of 7 finished'],
## ['1 of 7 success'],
## ['2 of 7 pending']]
Given string, get substring after a word.
st_func_stack_code = 'dc_ls_combo_type = pyfan_amto_lsdcconvert.ff_ls2dc(ls_combo_type,'
st_search_break = 'ff_ls2dc('
st_string_after = st_func_stack_code.split(st_search_break)[1]
st_search_break = ','
st_string_after = st_func_stack_code.split(st_search_break)[1]
print(f'{st_string_after=}')
## st_string_after=''
Generate some random strings:
random.seed(123)
it_word_length = 5
st_rand_word = ''.join(random.choice(string.ascii_lowercase) for i in range(it_word_length))
st_rand_word = st_rand_word.capitalize()
print(f'{st_rand_word=}')
## st_rand_word='Bicyn'
Generate a block or random text and then convert it to a one list of strings:
random.seed(123)
it_words_count = 15
it_word_length = 5
st_rand_word_block = ''.join(random.choice(string.ascii_lowercase) for ctr in range(it_word_length*it_words_count))
ls_st_rand_word = [st_rand_word_block[ctr: ctr + it_word_length].capitalize()
for ctr in range(0, len(st_rand_word_block), it_word_length)]
print(f'{ls_st_rand_word=}')
## ls_st_rand_word=['Bicyn', 'Idbmr', 'Rkkbf', 'Ekrkw', 'Hfany', 'Ctmca', 'Kxodb', 'Cveez', 'Ajnsp', 'Ipbyj', 'Kqzpg', 'Tuqsz', 'Kamyu', 'Qnvru', 'Zvtpq']
Reshape the array of words to a matrix:
mt_st_rand_word = np.reshape(ls_st_rand_word, [3,5])
print(f'{mt_st_rand_word=}')
## mt_st_rand_word=array([['Bicyn', 'Idbmr', 'Rkkbf', 'Ekrkw', 'Hfany'],
## ['Ctmca', 'Kxodb', 'Cveez', 'Ajnsp', 'Ipbyj'],
## ['Kqzpg', 'Tuqsz', 'Kamyu', 'Qnvru', 'Zvtpq']], dtype='<U5')
print(f'{mt_st_rand_word.shape=}')
## mt_st_rand_word.shape=(3, 5)
print(f'{type(mt_st_rand_word)=}')
## type(mt_st_rand_word)=<class 'numpy.ndarray'>
Similar to the paste function in R, given a list of strings, paste them together with a connector.
st_separator = "_"
st_pasted = st_separator.join(filter(None, ['abc', 'efg']))
print(f'{st_pasted=}')
# empty if the strings are empty
## st_pasted='abc_efg'
st_pasted = st_separator.join(filter(None, ['', '', '']))
print(f'{st_pasted=}')
# If only one not empy output the same
## st_pasted=''
st_pasted = st_separator.join(filter(None, ['abc', '']))
print(f'{st_pasted=}')
## st_pasted='abc'
Given an numeric array, add string, for example to generate sequencial column names with suffix c:
ar_st_colnames = [ 's' + str(it_col) for it_col in np.array(range(1, 3))]
print(ar_st_colnames)
## ['s1', 's2']
Given a list of strings, loop but skip if string contains elements string list.
# define string
ls_st_ignore = ['abc', 'efg', 'xyz']
ls_st_loop = ['ab cefg sdf', '12345', 'xyz', 'abc xyz', 'good morning']
# zip and loop and replace
for st_loop in ls_st_loop:
if sum([st_ignore in st_loop for st_ignore in ls_st_ignore]):
print('skip:', st_loop)
else:
print('not skip:', st_loop)
## skip: ab cefg sdf
## not skip: 12345
## skip: xyz
## skip: abc xyz
## not skip: good morning
Replace terms in string
# define string
st_full = """
abc is a great efg, probably xyz. Yes, xyz is great, like efg.
eft good, EFG capitalized, efg good again.
A B C or abc or ABC. Interesting xyz.
"""
# define new and old
ls_st_old = ['abc', 'efg', 'xyz']
ls_st_new = ['123', '456', '789']
# zip and loop and replace
for old, new in zip(ls_st_old, ls_st_new):
st_full = st_full.replace(old, new)
# print
print(st_full)
##
## 123 is a great 456, probably 789. Yes, 789 is great, like 456.
## eft good, EFG capitalized, 456 good again.
## A B C or 123 or ABC. Interesting 789.
Given a long string, wrap it into multiple lines with fixed width.
import textwrap
# A long Path
st_path = """
C:/Users/fan/Documents/Dropbox (UH-ECON)/Project Emily Minority Survey/EthLang/reg_lang_abi_cls_mino/tab3_fm/attain_m_vs_f/tab3_mand_talk_m2c_hfracle02.tex
"""
# Wrap text with tight width
st_wrapped = textwrap.fill(st_path, width = 20)
print(st_wrapped)
## C:/Users/fan/Docume
## nts/Dropbox (UH-
## ECON)/Project Emily
## Minority Survey/EthL
## ang/reg_lang_abi_cls
## _mino/tab3_fm/attain
## _m_vs_f/tab3_mand_ta
## lk_m2c_hfracle02.tex
Combine Strings that are wrapped and not Wrapped
# Paths
st_path_a = "C:/Users/fan/Documents/Dropbox (UH-ECON)/Project Emily Minority Survey/EthLang/reg_lang_abi_cls_mino/tab3_fm/attain_m_vs_f/tab3_mand_talk_m2c_hfracle02.tex"
st_path_b = 'C:/Users/fan/R4Econ/support/development/fs_packaging.html'
# Combine Strings and Wrap
str_dc_records = 'First Path:'.upper() + '\n' + \
textwrap.fill(st_path_a, width=25) + '\n\n' + \
'Second Path:'.upper() + '\n' + \
textwrap.fill(st_path_b, width=25)
# Print
print(str_dc_records)
## FIRST PATH:
## C:/Users/fan/Documents/Dr
## opbox (UH-ECON)/Project
## Emily Minority Survey/Eth
## Lang/reg_lang_abi_cls_min
## o/tab3_fm/attain_m_vs_f/t
## ab3_mand_talk_m2c_hfracle
## 02.tex
##
## SECOND PATH:
## C:/Users/fan/R4Econ/suppo
## rt/development/fs_packagi
## ng.html
Here we have two strings in a list, with point estimates and corresponding standard errors. Estimates are separated by commas. We want to change the number of decimal points shown and set appropriate roundings. Several steps: (1) split string by comma (2) Loop over (3) extract numerical elements (4) recover
it_round_decimal = 1
ls_st_all_estimates = ["84.506***, 91.758***, 107.950***, 115.879***, 133.560***\n",
"(7.796), (4.848), (4.111), (5.044), (6.961)\n",
"68.180***, 47.921***, 47.127***, 51.366***, 41.764***\n",
"(8.986), (5.368), (4.995), (5.099), (8.637)\n"]
for st_all_estimates in ls_st_all_estimates:
# delete linebreak at end of line
st_all_estimates = st_all_estimates.replace("\n", "")
# split
ls_st_estimates = st_all_estimates.split(",")
# Loop over each value separated by commas
for it_esti_ctr, st_esti in enumerate(ls_st_estimates):
# Default update is to keep current
st_esti_update = st_esti
# If estimates, might have stars
st_esti_numeric = st_esti.strip()
st_esti_numeric = st_esti_numeric.replace("*", "")
st_esti_numeric = st_esti_numeric.replace("(", "")
st_esti_numeric = st_esti_numeric.replace(")", "")
# Decimal Rounding
fl_esti_rounded = round(float(st_esti_numeric), it_round_decimal)
st_esti_rounded = f'{fl_esti_rounded:.{it_round_decimal}f}'
# Replace
print(f'{st_esti=} + {st_esti_numeric=} + {st_esti_rounded=}')
st_esti_rounded = st_esti.replace(st_esti_numeric, st_esti_rounded)
# Update List
ls_st_estimates[it_esti_ctr] = st_esti_rounded
# Flatten comman
st_text_out = ','.join(ls_st_estimates)
print(f'\n{st_text_out=}\n')
print()
## st_esti='84.506***' + st_esti_numeric='84.506' + st_esti_rounded='84.5'
## st_esti=' 91.758***' + st_esti_numeric='91.758' + st_esti_rounded='91.8'
## st_esti=' 107.950***' + st_esti_numeric='107.950' + st_esti_rounded='108.0'
## st_esti=' 115.879***' + st_esti_numeric='115.879' + st_esti_rounded='115.9'
## st_esti=' 133.560***' + st_esti_numeric='133.560' + st_esti_rounded='133.6'
##
## st_text_out='84.5***, 91.8***, 108.0***, 115.9***, 133.6***'
##
##
## st_esti='(7.796)' + st_esti_numeric='7.796' + st_esti_rounded='7.8'
## st_esti=' (4.848)' + st_esti_numeric='4.848' + st_esti_rounded='4.8'
## st_esti=' (4.111)' + st_esti_numeric='4.111' + st_esti_rounded='4.1'
## st_esti=' (5.044)' + st_esti_numeric='5.044' + st_esti_rounded='5.0'
## st_esti=' (6.961)' + st_esti_numeric='6.961' + st_esti_rounded='7.0'
##
## st_text_out='(7.8), (4.8), (4.1), (5.0), (7.0)'
##
##
## st_esti='68.180***' + st_esti_numeric='68.180' + st_esti_rounded='68.2'
## st_esti=' 47.921***' + st_esti_numeric='47.921' + st_esti_rounded='47.9'
## st_esti=' 47.127***' + st_esti_numeric='47.127' + st_esti_rounded='47.1'
## st_esti=' 51.366***' + st_esti_numeric='51.366' + st_esti_rounded='51.4'
## st_esti=' 41.764***' + st_esti_numeric='41.764' + st_esti_rounded='41.8'
##
## st_text_out='68.2***, 47.9***, 47.1***, 51.4***, 41.8***'
##
##
## st_esti='(8.986)' + st_esti_numeric='8.986' + st_esti_rounded='9.0'
## st_esti=' (5.368)' + st_esti_numeric='5.368' + st_esti_rounded='5.4'
## st_esti=' (4.995)' + st_esti_numeric='4.995' + st_esti_rounded='5.0'
## st_esti=' (5.099)' + st_esti_numeric='5.099' + st_esti_rounded='5.1'
## st_esti=' (8.637)' + st_esti_numeric='8.637' + st_esti_rounded='8.6'
##
## st_text_out='(9.0), (5.4), (5.0), (5.1), (8.6)'
There are some special string structures, if a string is of this special structure, do something, if it is not, do something else. In the following example, one string structure is a string with a equality sign and than an integer after. Is a string of this nature?
The integer check uses this:
all([st_ele in ‘1234567890’ for st_ele in esti_top_which])
In the example below, the first and last elements are valid.
# examples strings to check
ls_st_exas = ["C1E126M4S3=2",
"simu_tst/M4S3_top_json.json",
"M4S3_top_json.json",
"simu_tst/M4S3=_top_json.json",
"=====",
"==$$%%==123123",
"$$%%=123123"]
# check
for combo_type_e in ls_st_exas:
# split
st_connector = "="
ls_combo_type_e_split = combo_type_e.split(st_connector)
# first check length
bl_esr_json = True
if len(ls_combo_type_e_split) == 2:
[compesti_short_name, esti_top_which] = ls_combo_type_e_split
# check type
bl_first_is_str = isinstance(compesti_short_name, str)
bl_second_is_int = all([st_ele in '1234567890' for st_ele in esti_top_which])
if bl_first_is_str + bl_second_is_int < 2:
bl_esr_json = False
print(f'{bl_esr_json=}, {bl_first_is_str=}, {bl_second_is_int=}, {combo_type_e=}')
else:
# Print
print(f'{bl_esr_json=}, {bl_first_is_str=}, {bl_second_is_int=}')
else:
bl_esr_json = False
print(f'{bl_esr_json=}, {combo_type_e=}')
## bl_esr_json=True, bl_first_is_str=True, bl_second_is_int=True
## bl_esr_json=False, combo_type_e='simu_tst/M4S3_top_json.json'
## bl_esr_json=False, combo_type_e='M4S3_top_json.json'
## bl_esr_json=False, bl_first_is_str=True, bl_second_is_int=False, combo_type_e='simu_tst/M4S3=_top_json.json'
## bl_esr_json=False, combo_type_e='====='
## bl_esr_json=False, combo_type_e='==$$%%==123123'
## bl_esr_json=True, bl_first_is_str=True, bl_second_is_int=True