Text Processing on Linux Using The Command Line

Common text processing commands on Linux

  • grep
  • sed
  • awk
  • tr
  • sort
  • wc

Filter

Filter lines

echo -e "Foo\nBar" | grep "Fo"

Insert

Insert lines

Insert to the specific line

echo -e 'Foo\nBar' | sed '2i\the new line\' # Foo\nthe new line\nBar

Add line to beginning and end

echo -e 'Foo\nBar' | sed '1 i\First line'
echo -e 'Foo\nBar' | sed '$aEnd line'

Insert lines after match pattern

echo -e 'Foo\nBar' | sed '/Foo/a NewLine1\nNewLine2'
echo -e 'Foo\nBar' | sed '/Foo/r add.txt'

Insert text to the beginning and end

Insert text to the beginning of each line

echo 'foo' | sed 's/^/BeginText/'

Insert text to the end of each line

echo 'foo' | sed 's/$/EndText/'

Insert text to the begining and end of each line

echo 'foo' | sed 's/^/BeginText/' | sed 's/$/EndText/'

Insert a new line to the end of each line

echo -e 'Foo\nBar'| sed 's/$/\r\n/'

Replace

Replace first

echo "old names, old books" | sed 's/old/new/'
# or
echo "old names, old books" | sed '0,/old/{s/old/new/}'

Replace all

echo "old names, old books" | sed 's/old/new/g'

Remove

Remove matched lines

echo -e "Foo\nBar" | sed '/Foo/d'

Remove empty line

echo -e "Foo\n \nBar" | sed '/^\s*$/d'
# or
echo -e "Foo\n \nBar" | sed '/^[[:space:]]*$/d'

Remove comment /**/ or //

# reomve lines start with / or *
sed '/^ *[*/]/d'

Remove n lines after a pattern

# including the line with the pattern
echo -e "Line1\nLine2\nLine3\nLine4" | sed '/Line1/,+2d' # Line4

# excluding the line with the pattern
echo -e "Line1\nLine2\nLine3\nLine4" | sed '/Line1/{n;N;d}' # Line1\nLine4

Remove all lines between two patterns

# including the line with the pattern
sed '/pattern1/,/pattern2/d;'
echo -e "Foo\nAAA\nBBB\nBar\nCCC" | sed '/Foo/,/Bar/d' # CCC

# excluding the line with the pattern
sed '/pattern1/,/pattern2/{//!d;};'
echo -e "Foo\nAAA\nBBB\nBar\nCCC" | sed '/Foo/,/Bar/{//!d;}' # Foo\nBar\nCCC

Substring

Get substring by index

cut -c start-end1,start_end2
printf 'hello,world' | cut -c 1-5 # hello

Split and get fields

cut -d DELIMITER -f field_number1,field_number2
print 'hello,world' | cut -d ',' -f 2 # world

Find String

Find String by Pattern

echo -e 'Hello Java developer!\nHello Web developer!' | sed 's/Hello \(.*\) developer!/\1/'

Join

Join lines

echo -e "Foo\nBar" | tr '\n' ' '

Split

Split to multiple lines

echo "Foo Bar" | tr '[:space:]' '[\n*]'

Deduplication

# sort and deduplication
echo -e "1\n3\n2\n1" | sort -u

Sort

echo -e "1\n3\n2\n1" | sort

Count

Count lines

echo -e "1\n3\n2\n1" | wc -l # 4

Count matched lines

echo -e "1\n3\n2\n1" | grep -c "1" # 2

Count matched number of string

echo "hello world" | grep -o -i "o" | wc -l # 2

Format

To Upper/Lower Case

# to upper case
echo "hello WORLD" | tr a-z A-Z
# to lower case
echo "hello WORLD" | tr A-Z a-z

Format JSON string

echo '{"name":"Jack","age":18}' | jq .
echo '{"name":"Jack","age":18}' | jq .name

Crypto

Encode

Base64

Base64 Encode

printf 'hello' | base64
# or
echo -n 'hello' | base64
  • -n: do not output the trailing newline

Base64 Decode

printf 'hello' | base64 | base64 -d

URL encode

URL encode

printf '你好' | jq -sRr @uri
# or
echo -n '你好' | jq -sRr @uri
  • -n: do not output the trailing newline

Hash

md5

md5 -r /path/to/file
printf 'hello' | md5sum

echo -n 'hello' | md5sum
# or
echo -n 'hello' | md5
  • -n: do not output the trailing newline

md5sum on linux, md5 on macOS

sha

shasum -a 256 /path/to/file
printf 'hello' | shasum -a 256

Examples

Wrap in double quotes and join with comma

echo 'hello
world' | sed 's/^/"/' | sed 's/$/"/' | tr '\n' ','