Unverified Commit 144fe599 authored by Josiah Sayers's avatar Josiah Sayers Committed by GitHub
Browse files

Merge branch 'master' into shrtlnk

parents cbcdd248 eb4681aa
MIT License
Copyright (c) 2021 public-apis
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
This diff is collapsed.
flake8>=3.5.0
httplib2==0.18.0
httplib2==0.19.0
......@@ -20,7 +20,6 @@ num_segments = 5
errors = []
title_links = []
previous_links = []
anchor_re = re.compile(anchor + '\s(.+)')
section_title_re = re.compile('\*\s\[(.*)\]')
link_re = re.compile('\[(.+)\]\((http.*)\)')
......@@ -68,12 +67,6 @@ def check_entry(line_num, segments):
title = title_re_match.group(1)
if title.upper().endswith(' API'):
add_error(line_num, 'Title should not end with "... API". Every entry is an API here!')
# do not allow duplicate links
link = title_re_match.group(2)
if link in previous_links:
add_error(line_num, 'Duplicate link - entries should only be included in one section')
else:
previous_links.append(link)
# END Title
# START Description
# first character should be capitalized
......
......@@ -5,6 +5,12 @@ import re
import socket
import sys
ignored_links = [
'https://github.com/public-apis/public-apis/actions?query=workflow%3A%22Run+tests%22',
'https://github.com/public-apis/public-apis/workflows/Validate%20links/badge.svg?branch=master',
'https://github.com/public-apis/public-apis/actions?query=workflow%3A%22Validate+links%22',
'https://github.com/davemachado/public-api',
]
def parse_links(filename):
"""Returns a list of URLs from text file"""
......@@ -16,38 +22,75 @@ def parse_links(filename):
links = [raw_link[0] for raw_link in raw_links]
return links
def dup_links(links):
"""Check for duplicated links"""
print(f'Checking for duplicated links...')
hasError = False
seen = {}
dupes = []
for link in links:
link = link.rstrip('/')
if link in ignored_links:
continue
if link not in seen:
seen[link] = 1
else:
if seen[link] == 1:
dupes.append(link)
if not dupes:
print(f"No duplicate links")
else:
print(f"Found duplicate links: {dupes}")
hasError = True
return hasError
def validate_links(links):
"""Checks each entry in JSON file for live link"""
print('Validating {} links...'.format(len(links)))
errors = []
print(f'Validating {len(links)} links...')
hasError = False
for link in links:
h = httplib2.Http(disable_ssl_certificate_validation=True, timeout=25)
try:
resp = h.request(link, headers={'user-agent': 'python-httplib2/0.18.0'})
resp = h.request(link, headers={
# Faking user agent as some hosting services block not-whitelisted UA
'user-agent': 'Mozilla/5.0'
})
code = int(resp[0]['status'])
# check if status code is a client or server error
if code >= 404:
errors.append('{}: {}'.format(code, link))
# Checking status code errors
if (code >= 300):
hasError = True
print(f"ERR:CLT:{code} : {link}")
except TimeoutError:
errors.append("TMO: " + link)
hasError = True
print(f"ERR:TMO: {link}")
except socket.error as socketerror:
errors.append("SOC: {} : {}".format(socketerror, link))
hasError = True
print(f"ERR:SOC: {socketerror} : {link}")
except Exception as e:
hasError = True
# Ignore some exceptions which are not actually errors.
# The list below should be extended with other exceptions in the future if needed
if ((-1 != str(e).find("Content purported to be compressed with gzip but failed to decompress.")) and
(-1 != str(e).find("[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:852)"))) :
errors.append("ERR: {} : {}".format(e, link))
return errors
if (-1 != str(e).find("[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:852)")):
print(f"ERR:SSL: {e} : {link}")
elif (-1 != str(e).find("Content purported to be compressed with gzip but failed to decompress.")):
print(f"ERR:GZP: {e} : {link}")
elif (-1 != str(e).find("Unable to find the server at")):
print(f"ERR:SRV: {e} : {link}")
else:
print(f"ERR:UKN: {e} : {link}")
return hasError
if __name__ == "__main__":
num_args = len(sys.argv)
if num_args < 2:
print("No .md file passed")
sys.exit(1)
errors = validate_links(parse_links(sys.argv[1]))
if len(errors) > 0:
for err in errors:
print(err)
links = parse_links(sys.argv[1])
hasError = dup_links(links)
if not hasError:
hasError = validate_links(links)
if hasError:
sys.exit(1)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment