0w1

CTF HR Infinite Links ( Web Crawler )

https://www.hackerrank.com/contests/capture-the-flag/challenges/infinite-links

簡単な Web Crawler を書くだけ。

import urllib.request
from queue import Queue

def retrieve_msg_links( link ):
  line = list( urllib.request.urlopen( link ).read().decode( "utf-8" ).split( '\n' ) )
  msg, sublinks = "", []
  for s in line:
    if s.count( "Secret Phrase" ):
      msg = s[ s.find( "</b>" ) + 4 : s.find( "</font>" ) ]
    elif s.count( "html" ):
      sublinks.append( s[ s.find( "href=" ) + 5 : s.find( "html" ) + 4 ] )
  return msg, sublinks

url = "https://cdn.hackerrank.com/hackerrank/static/contests/capture-the-flag/infinite/"
que = Queue()
vis = set( [ "qds.html" ] )
que.put( "qds.html" )
msg = set()
while not que.empty():
  sfx = que.get()
  m, l = retrieve_msg_links( url + sfx )
  msg.add( m )
  for s in l:
    if s in vis: continue
    vis.add( s )
    que.put( s )
print( '\n'.join( sorted( list( msg ) ) ) )