Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wwwanlingxiao
public-apis
Commits
4808d633
Unverified
Commit
4808d633
authored
Jan 11, 2022
by
Matheus Felipe
Browse files
Implement functions to find links in a text/file
parent
e526f867
Changes
1
Hide whitespace changes
Inline
Side-by-side
scripts/validate/links.py
0 → 100644
View file @
4808d633
# -*- coding: utf-8 -*-
import
sys
import
re
from
typing
import
List
def
find_links_in_text
(
text
:
str
)
->
List
[
str
]:
"""Find links in a text and return a list of URLs."""
link_pattern
=
re
.
compile
(
r
'((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'\".,<>?«»“”‘’]))'
)
raw_links
=
re
.
findall
(
link_pattern
,
text
)
links
=
[
str
(
raw_link
[
0
]).
rstrip
(
'/'
)
for
raw_link
in
raw_links
]
return
links
def
find_links_in_file
(
filename
:
str
)
->
List
[
str
]:
"""Find links in a file and return a list of URLs from text file."""
with
open
(
filename
,
mode
=
'r'
,
encoding
=
'utf-8'
)
as
file
:
readme
=
file
.
read
()
index_section
=
readme
.
find
(
'## Index'
)
content
=
readme
[
index_section
:]
links
=
find_links_in_text
(
content
)
return
links
if
__name__
==
'__main__'
:
num_args
=
len
(
sys
.
argv
)
if
num_args
<
2
:
print
(
'No .md file passed'
)
sys
.
exit
(
1
)
links
=
find_links_in_file
(
sys
.
argv
[
1
])
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment