Ferramentas Pessoais
  •  
Você está aqui: Entrada Sobre Publicações Preserving Websites Of Research & Development Projects BibTex

Preserving Websites Of Research & Development Projects BibTex

preservingRDprojectsACMV2.bib — Bibtex bibliographic data, 20Kb

Conteúdo do ficheiro

@inproceedings{klein2010evaluating,
author = {Klein, M and Nelson, M L},
booktitle = {Proceedings of the 10th annual joint conference on Digital libraries},
file = {:home/dbicho/Documents/p59-klein.pdf:pdf},
mendeley-groups = {AWP},
organization = {ACM},
pages = {59--68},
title = {{Evaluating methods to rediscover missing web pages from the web infrastructure}},
year = {2010}
}

@article{Kornblum2006,
abstract = {Homologous files share identical sets of bits in the same order. Because such files are not completely identical, traditional techniques such as cryptographic hashing cannot be used to identify them. This paper introduces a new technique for constructing hash signatures by combining a number of traditional hashes whose boundaries are determined by the context of the input. These signatures can be used to identify modified versions of known files even if data has been inserted, modified, or deleted in the new files. The description of this method is followed by a brief analysis of its performance and some sample applications to computer forensics. ?? 2006 DFRWS.},
author = {Kornblum, Jesse},
doi = {10.1016/j.diin.2006.06.015},
file = {:home/dbicho/Documents/12-Kornblum.pdf:pdf},
isbn = {17422876},
issn = {17422876},
journal = {Digital Investigation},
keywords = {Forensics,Memory analysis,Microsoft,Reverse engineering,Windows},
mendeley-groups = {AWP},
number = {SUPPL.},
pages = {91--97},
title = {{Identifying almost identical files using context triggered piecewise hashing}},
volume = {3},
year = {2006}
}

@misc{researchOutputs,
title = {{Research resources and outputs}},
howpublished = {\url{https://github.com/arquivo/Research-Websites-Preservation}},
urldate = {2016-04-23}
}


@misc{OpenSearchAPI,
keywords = {Community/Guidelines,Community/Mailing lists,Community/OpenSearch 2 wishlist,Community/OpenSearch community,Community/OpenSearch search clients,Community/OpenSearch search engine directories,Community/OpenSearch software,Community/Proposal/Specifications/OpenSearch/Exten,Home},
mendeley-groups = {AWP},
title = {{OpenSearch}},
howpublished = {\url{http://www.opensearch.org/Home}},
urldate = {2016-04-23}
}

@misc{testcollection300,
title = {{Test collection 300 samples}},
howpublished = {\url{https://github.com/arquivo/Research-Websites-Preservation/blob/master/datasets/fp7-golden-dataset-300.csv}},
urldate = {2016-03-03}
}

@misc{fp7dataset,
mendeley-groups = {AWP},
title = {{CORDIS - EU research projects under FP7 (2007-2013) Datasets}},
howpublished = {\url{http://open-data.europa.eu/en/data/dataset/cordisfp7projects}},
urldate = {2016-03-03}
}

@misc{fp6dataset,
mendeley-groups = {AWP},
title = {{CORDIS - EU research projects under FP6 (2002-2006) -Datasets}},
howpublished = {\url{https://open-data.europa.eu/en/data/dataset/cordisfp6projects}},
urldate = {2016-04-24}
}

@misc{fp5dataset,
mendeley-groups = {AWP},
title = {{CORDIS - EU research projects under FP5 (1998-2002) Datasets}},
howpublished = {\url{https://open-data.europa.eu/en/data/dataset/cordisfp5projects}},
urldate = {2016-04-24}
}


@misc{fp4dataset,
mendeley-groups = {AWP},
title = {{CORDIS - EU research projects under FP4 (1994-1998) Datasets}},
howpublished = {\url{https://open-data.europa.eu/en/data/dataset/cordisfp4projects}},
urldate = {2016-04-24}
}


@misc{opendataportal,
mendeley-groups = {AWP},
title = {{European Union Open Data Portal}},
howpublished = {\url{http://open-data.europa.eu/en/data/}},
urldate = {2016-03-03}
}

@misc{BingSearchAPI,
mendeley-groups = {AWP},
title = {{Bing Search API Web | Microsoft Azure Marketplace}},
howpublished = {\url{http://datamarket.azure.com/dataset/bing/searchweb}},
urldate = {2016-03-03}
}

@misc{GoogleSearchAPI,
mendeley-groups = {AWP},
title = {{Google Custom Search Engine}},
howpublished = {\url{https://cse.google.com/}},
urldate = {2016-04-21}
}

@misc{FarooAPI,
keywords = {FAROO,Search API,free search api,free web search api,web search},
mendeley-groups = {AWP},
title = {{FAROO - Free Search API}},
howpublished = {\url{http://www.faroo.com/hp/api/api.html}},
urldate = {2016-04-23}
}

@misc{YahooBossAPI,
title = {{Yahoo BOSS API}},
howpublished = {\url{https://developer.yahoo.com/boss/search/}},
urldate = {2016-04-23}
}

@misc{HistoricalArchivesEU,
mendeley-groups = {AWP},
title = {{Websites Archives of EU Institutions}},
howpublished = {\url{http://www.eui.eu/Research/HistoricalArchivesOfEU/WebsitesArchivesofEUInstitutions.aspx}},
urldate = {2016-04-23}
}

@misc{FCT,
keywords = {fellowships,grants,laboratories,projects,research,research units,science,technology},
mendeley-groups = {AWP},
title = {{FCT - Funda{\c{c}}{\~{a}}o para a Ci{\^{e}}ncia e a Tecnologia}},
howpublished = {\url{http://www.fct.pt/index.phtml.en}},
urldate = {2016-04-23}
}


@misc{resaw,
mendeley-groups = {AWP},
title = {{RESAW | A Research infrastructure for the Study of Archived Web materials}},
howpublished = {\url{http://resaw.eu/}},
urldate = {2016-03-09}
}

@misc{Bicho2015,
author = {Bicho, Daniel and Gomes, Daniel},
file = {:home/dbicho/Documents/Crawling{\_}Domain{\_}EU.pdf:pdf},
mendeley-groups = {AWP},
pages = {1--7},
title = {{A first attempt to archive the .EU domain Technical report}},
year = {2015},
howpublished = {\url{http://arquivo.pt/crawlreport/Crawling_Domain_EU.pdf}}
}

@article{Ramos2003,
abstract = {In this paper, we examine the results of applying Term Frequency Inverse Document Frequency (TF-IDF) to determine what words in a corpus of documents might be more favorable to use in a query. As the term implies, TF-IDF calculates values for each word in a document through an inverse proportion of the frequency of the word in a particular document to the percentage of documents the word appears in. Words with high TF-IDF numbers imply a strong relationship with the document they appear in, suggesting that if that word were to appear in a query, the document could be of interest to the user. We provide evidence that this simple algorithm efficiently categorizes relevant words that can enhance query retrieval.},
author = {Ramos, Juan and Eden, Juramos and Edu, Rutgers},
doi = {10.1.1.121.1424},
file = {:home/dbicho/Documents/ramos.pdf:pdf},
journal = {Processing},
mendeley-groups = {AWP},
title = {{Using TF-IDF to Determine Word Relevance in Document Queries}},
url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.121.1424{\&}amp;rep=rep1{\&}amp;type=pdf},
year = {2003}
}

@article{Chakrabarti1999,
abstract = {The rapid growth of the World-Wide Web poses unprecedented scaling challenges for general-purpose crawlers and search engines. In this paper we describe a new hypertext resource discovery system called a Focused Crawler. The goal of a focused crawler is to selectively seek out pages that are relevant to a pre-defined set of topics. The topics are specified not using keywords, but using exemplary documents. Rather than collecting and indexing all accessible Web documents to be able to answer all possible ad-hoc queries, a focused crawler analyzes its crawl boundary to find the links that are likely to be most relevant for the crawl, and avoids irrelevant regions of the Web. This leads to significant savings in hardware and network resources, and helps keep the crawl more up-to-date. To achieve such goal-directed crawling, we designed two hypertext mining programs that guide our crawler: a classifier that evaluates the relevance of a hypertext document with respect to the focus topics, and a distiller that identifies hypertext nodes that are great access points to many relevant pages within a few links. We report on extensive focused-crawling experiments using several topics at different levels of specificity. Focused crawling acquires relevant pages steadily while standard crawling quickly loses its way, even though they are started from the same root set. Focused crawling is robust against large perturbations in the starting set of URLs. It discovers largely overlapping sets of resources in spite of these perturbations. It is also capable of exploring out and discovering valuable resources that are dozens of links away from the start set, while carefully pruning the millions of pages that may lie within this same radius. Our anecdotes suggest that focused crawling is very effective for building high-quality collections of Web documents on specific topics, using modest desktop hardware.},
author = {Chakrabarti, Soumen and {Van Den Berg}, Martin and Dom, Byron},
doi = {10.1016/S1389-1286(99)00052-3},
isbn = {1389-1286},
issn = {13891286},
journal = {Computer Networks},
mendeley-groups = {AWP},
number = {11},
pages = {1623--1640},
title = {{Focused crawling: A new approach to topic-specific Web resource discovery}},
volume = {31},
year = {1999}
}

@inproceedings{Mohr2004,
abstract = {Heritrix is the Internet Archive's open-source, extensible, web-scale, archival-quality webcrawler project. The Internet Archive started Heritrix development in the early part of 2003. The intention was to develop a crawler for the specific purpose of archiving websites and to support multiple different use cases including focused and broadcrawling. The software is open source to encourage collaboration and joint development across institutions with similar needs. A pluggable, extensible architecture facilitates customization and outside contribution. Now, after over a year of development, the Internet Archive and other institutions are using Heritrix to perform focused and increasingly broad crawls.},
author = {Mohr, Gordon and Stack, Michael and Ranitovic, Igor and Avery, Dan and Kimpton, Michele},
booktitle = {4th International Web Archiving Workshop},
number = {2004},
title = {{An introduction to Heritrix: An open source archvial quality Web crawler}},
year = {2004}
}


@misc{internetarchive,
mendeley-groups = {AWP},
title = {{Internet Archive: Digital Library of Free Books, Movies, Music {\&} Wayback Machine}},
howpublished = {\url{https://archive.org/index.php}},
urldate = {2016-03-31}
}


@techreport{RFC7089,
  author = {H. Van de Sompel and M. Nelson and R. Sanderson},
  title = {HTTP Framework for Time-Based Access to Resource States -- Memento},
  howpublished = {Internet Requests for Comments},
  type = {RFC},
  number = {7089},
  year = {2013},
  month = {December},
  issn = {2070-1721},
  publisher = {RFC Editor},
  institution = {RFC Editor},
  url = {https://tools.ietf.org/html/rfc7089}
}

@techreport{RFC3174,
  author = {D. Eastlake and P. Jones},
  title = {US Secure Hash Algorithm 1 (SHA1)},
  howpublished = {Internet Requests for Comments},
  type = {RFC},
  number = {3174},
  year = {2001},
  month = {September},
  issn = {2070-1721},
  publisher = {RFC Editor},
  institution = {RFC Editor},
  url = {http://www.rfc-editor.org/rfc/rfc3174.txt},
  note = {\url{http://www.rfc-editor.org/rfc/rfc3174.txt}},
}

@techreport{RFC1321,
  author = {Ronald L. Rivest},
  title = {The MD5 Message-Digest Algorithm},
  howpublished = {Internet Requests for Comments},
  type = {RFC},
  number = {1321},
  year = {1992},
  month = {April},
  issn = {2070-1721},
  publisher = {RFC Editor},
  institution = {RFC Editor},
  url = {http://www.rfc-editor.org/rfc/rfc1321.txt},
  note = {\url{http://www.rfc-editor.org/rfc/rfc1321.txt}},
}

@article{Ainsworth2012,
abstract = {Although the Internet Archive's Wayback Machine is the largest and most well-known web archive, there have been a number of public web archives that have emerged in the last several years. With varying resources, audiences and collection development policies, these archives have varying levels of overlap with each other. While individual archives can be measured in terms of number of URIs, number of copies per URI, and intersection with other archives, to date there has been no answer to the question "How much of the Web is archived?" We study the question by approximating the Web using sample URIs from DMOZ, Delicious, Bitly, and search engine indexes; and, counting the number of copies of the sample URIs exist in various public web archives. Each sample set provides its own bias. The results from our sample sets indicate that range from 35{\%}-90{\%} of the Web has at least one archived copy, 17{\%}-49{\%} has between 2-5 copies, 1{\%}-8{\%} has 6-10 copies, and 8{\%}-63{\%} has more than 10 copies in public web archives. The number of URI copies varies as a function of time, but no more than 31.3{\%} of URIs are archived more than once per month.},
archivePrefix = {arXiv},
arxivId = {1212.6177},
author = {Ainsworth, Scott G. and AlSum, Ahmed and SalahEldeen, Hany and Weigle, Michele C. and Nelson, Michael L.},
eprint = {1212.6177},
file = {:home/dbicho/Documents/1212.6177.pdf:pdf},
keywords = {although the need for,digital preservation,http,ing,resource versioning,temporal applications,understood,web architecture,web archiv-,web archiving has been},
mendeley-groups = {AWP},
pages = {1--10},
title = {{How Much of the Web Is Archived?}},
url = {http://arxiv.org/abs/1212.6177},
year = {2012}
}

@article{Alam2015,
author = {Alam, Sawood and Nelson, Michael L and {Van de Sompel}, Herbert and Balakireva, Lyudmila L and Shankar, Harihar and Rosenthal, David S H},
doi = {10.1007/978-3-319-24592-8{\_}1},
file = {:home/dbicho/Documents/tpdl-2015-profiling.pdf:pdf},
isbn = {978-3-319-24592-8; 978-3-319-24591-1},
journal = {Research and Advanced Technology for Digital Libraries},
keywords = {cdx files,memento,profiling,web archives},
mendeley-groups = {AWP},
pages = {3--14},
title = {{Web Archive Profiling Through CDX Summarization}},
url = {<Go to ISI>://WOS:000365853300001},
volume = {9316},
year = {2015}
}

@misc{archiveis,
mendeley-groups = {AWP},
title = {archive.is},
howpublished = {\url{http://archive.is/}},
urldate = {2016-04-04}
}

@misc{bibalex,
mendeley-groups = {AWP},
title = {{International School of Information Science (ISIS)}},
howpublished = {\url{http://www.bibalex.org/isis/frontend/home/home.aspx}},
urldate = {2016-04-22}
}

@misc{arquivopt,
keywords = {sample,web archive},
mendeley-groups = {AWP},
title = {{Arquivo.pt: pesquisa sobre o passado}},
howpublished = {\url{http://arquivo.pt/}},
urldate = {2016-04-22}
}

@misc{archiveit,
mendeley-groups = {AWP},
title = {{Archive-It - Web Archiving Services for Libraries and Archives}},
howpublished = {\url{https://archive-it.org/}},
urldate = {2016-04-22}
}

@misc{libraryofcongress,
language = {eng},
mendeley-groups = {AWP},
title = {{Library of Congress}},
howpublished = {\url{https://www.loc.gov/}},
urldate = {2016-04-22}
}

@misc{ukGovWebArchive:url2011,
author = {{The National Archives}},
howpublished = {\url{http://www.nationalarchives.gov.uk/webarchive/}},
title = {{UK Government Web Archive | The National Archives}},
year = {2011},
urldate = {2016-04-22}
}

@misc{ukWebArchive:url2011,
author = {{British Library}},
howpublished = {\url{http://www.webarchive.org.uk/ukwa/}},
mendeley-groups = {AWP},
title = {{UK Web Archive}},
url = {http://www.webarchive.org.uk/ukwa/},
year = {2011},
urldate = {2016-04-22}
}

@misc{internetmemory,
author = {{Internet Memory Foundation}},
howpublished = {\url{http://internetmemory.org/en/}},
mendeley-groups = {AWP},
title = {{Internet Memory Foundation}},
url = {http://internetmemory.org/en/},
urldate = {2016-07-11}
}

@misc{padicat,
author = {{The Web Archive of Catalonia}},
howpublished = {\url{http://www.padi.cat/en}},
mendeley-groups = {AWP},
title = {{The Web Archive of Catalonia}},
url = {http://www.padi.cat/en},
urldate = {2016-07-11}
}


@misc{webcite,
mendeley-groups = {AWP},
title = {{WebCite}},
howpublished = {\url{http://www.webcitation.org/}},
urldate = {2016-04-22}
}

@misc{icelandWebArchive:url2011,
author = {{National and University Library of Iceland}},
howpublished = {\url{http://vefsafn.is/index.php?page=english}},
title = {{Vefsafn - English}},
url = {http://vefsafn.is/index.php?page=english},
year = {2011},
urldate = {2016-04-22}
}

@ONLINE{timetravel,
mendeley-groups = {AWP},
title = {{Time Travel}},
howpublished = {\url{http://timetravel.mementoweb.org/}},
urldate = {2016-03-18}
}

@inproceedings{ntoulas04whatsNew,
author = {Ntoulas, Alexandros and Cho, Junghoo and Olston, Christopher},
booktitle = {Proceedings of the 13th international conference on World Wide Web},
doi = {http://doi.acm.org/10.1145/988672.988674},
file = {:home/dbicho/Documents/p1-ntoulas.pdf:pdf},
isbn = {1-58113-844-X},
mendeley-groups = {AWP},
pages = {1--12},
publisher = {ACM Press},
title = {{What's new on the web?: the evolution of the web from a search engine perspective}},
year = {2004}
}

@INPROCEEDINGS{gomes06modelling,
  author = {Daniel Gomes and M\&\#225;rio J. Silva},
  title = {Modelling information persistence on the web},
  booktitle = {ICWE '06: Proceedings of the 6th international conference on Web
	engineering},
  year = {2006},
  pages = {193--200},
  address = {New York, NY, USA},
  publisher = {ACM Press},
  day = {12--14},
  doi = {http://doi.acm.org/10.1145/1145581.1145623},
  isbn = {1-59593-352-2},
  location = {Palo Alto, California, USA}
}


@article{shipman2010using,
  title={Using Web Page Titles to Rediscover Lost Web Pages},
  author={Shipman, Jeffery L and Klein, Martin and Nelson, Michael L},
  journal={arXiv preprint arXiv:1002.2439},
  year={2010}
}

@misc{arcomem11,
  author = {ARCOMEM},
  title = {ARCOMEM},
  howpublished = {https://web.archive.org/web/\\20130426060455/http://www.arcomem.eu/},
  month = {October},
  year = {2011},
  day = {27},
  url = {http://www.arcomem.eu/about/}
}


@incollection{faheem2013intelligent,
  title={Intelligent and adaptive crawling of web applications for web archiving},
  author={Faheem, Muhammad and Senellart, Pierre},
  booktitle={Web Engineering},
  pages={306--322},
  year={2013},
  publisher={Springer}
}

@incollection{risse2012exploiting,
  title={Exploiting the social and semantic web for guided web archiving},
  author={Risse, Thomas and Dietze, Stefan and Peters, Wim and Doka, Katerina and Stavrakas, Yannis and Senellart, Pierre},
  booktitle={Theory and Practice of Digital Libraries},
  pages={426--432},
  year={2012},
  publisher={Springer}
}

@article{sampath2012decay,
  title={Decay and half-life period of online citations cited in open access journals},
  author={Sampath Kumar, BT and Manoj Kumar, KS},
  journal={The International Information \& Library Review},
  volume={44},
  number={4},
  pages={202--211},
  year={2012},
  publisher={Taylor \& Francis}
}

@article{evangelou2005unavailability,
  title={Unavailability of online supplementary scientific information from articles published in major journals},
  author={Evangelou, Evangelos and Trikalinos, Thomas A and Ioannidis, John PA},
  journal={The FASEB Journal},
  volume={19},
  number={14},
  pages={1943--1944},
  year={2005},
  publisher={FASEB}
}

@article{tajeddini2011death,
  title={Death of web citations: a serious alarm for authors},
  author={Tajeddini, Oranus and Azimi, Ali and Sadatmoosavi, Ali and Sharif-Moghaddam, Hadi},
  journal={Malaysian Journal of Library \& Information Science},
  volume={16},
  number={3},
  pages={17--29},
  year={2011}
}

@article{sife2013persistence,
  title={Persistence and decay of web citations used in theses and dissertations available at the Sokoine National Agricultural Library, Tanzania},
  author={Sife, Alfred S and Bernard, Ronald},
  journal={International Journal of Education and Development using Information and Communication Technology},
  volume={9},
  number={2},
  pages={85},
  year={2013},
  publisher={University of the West Indies}
}

@article{goh2007link,
  title={Link decay in leading information science journals},
  author={Goh, Dion Hoe-Lian and Ng, Peng Kin},
  journal={Journal of the American Society for Information Science and Technology},
  volume={58},
  number={1},
  pages={15--24},
  year={2007},
  publisher={Wiley Online Library}
}

@ARTICLE{spinellis03,
  author = {Diomidis Spinellis},
  title = {The decay and failures of web references},
  journal = {Communications of the ACM},
  year = {2003},
  volume = {46},
  pages = {71--77},
  number = {1},
  doi = {http://doi.acm.org/10.1145/602421.602422},
  issn = {0001-0782},
  publisher = {ACM Press}

Partilhar | |