Unverified Commit c2bdd9e5 authored by Matheus Felipe's avatar Matheus Felipe
Browse files

Fix false negative http code 404 in verification

Some links when they were being checked returned the http code 404,
but the links were working correctly.

This was happening because before the request the link was concatenated
with the / character at the end, making it a different link from the
original. If the original link didn't have a path that is
accessed by / at the end, it would return a 404 error.

This behavior made it a false negative.
parent 51b4166a
...@@ -17,7 +17,7 @@ def find_links_in_text(text: str) -> List[str]: ...@@ -17,7 +17,7 @@ def find_links_in_text(text: str) -> List[str]:
raw_links = re.findall(link_pattern, text) raw_links = re.findall(link_pattern, text)
links = [ links = [
str(raw_link[0]).rstrip('/') for raw_link in raw_links str(raw_link[0]) for raw_link in raw_links
] ]
return links return links
...@@ -49,6 +49,7 @@ def check_duplicate_links(links: List[str]) -> Tuple[bool, List]: ...@@ -49,6 +49,7 @@ def check_duplicate_links(links: List[str]) -> Tuple[bool, List]:
has_duplicate = False has_duplicate = False
for link in links: for link in links:
link = link.rstrip('/')
if link not in seen: if link not in seen:
seen[link] = 1 seen[link] = 1
else: else:
...@@ -163,7 +164,7 @@ def check_if_link_is_working(link: str) -> Tuple[bool, str]: ...@@ -163,7 +164,7 @@ def check_if_link_is_working(link: str) -> Tuple[bool, str]:
error_message = '' error_message = ''
try: try:
resp = requests.get(link + '/', timeout=25, headers={ resp = requests.get(link, timeout=25, headers={
'User-Agent': fake_user_agent(), 'User-Agent': fake_user_agent(),
'host': get_host_from_link(link) 'host': get_host_from_link(link)
}) })
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment